From dd95bd822b10bbea837e63764f5f4d70abeb7a8e Mon Sep 17 00:00:00 2001 From: Matt Van Horn <455140+mvanhorn@users.noreply.github.com> Date: Sat, 4 Apr 2026 22:26:07 -0700 Subject: [PATCH 1/5] feat: add checksum verification library (SHA-256, SHA-1, MD5) Add VerifyChecksum() to compute file hashes and compare against expected values, and ParseDigestHeader() to extract checksums from HTTP Digest response headers (RFC 3230). Supports hex and base64 encoded hashes. This is the core library for download integrity verification. Wiring into the download lifecycle and CLI flags will follow in a separate PR. Co-Authored-By: Claude Opus 4.6 (1M context) --- internal/processing/checksum.go | 119 +++++++++++++++++++++++++++ internal/processing/checksum_test.go | 75 +++++++++++++++++ 2 files changed, 194 insertions(+) create mode 100644 internal/processing/checksum.go create mode 100644 internal/processing/checksum_test.go diff --git a/internal/processing/checksum.go b/internal/processing/checksum.go new file mode 100644 index 00000000..24002aef --- /dev/null +++ b/internal/processing/checksum.go @@ -0,0 +1,119 @@ +package processing + +import ( + "crypto/md5" + "crypto/sha1" + "crypto/sha256" + "encoding/base64" + "encoding/hex" + "fmt" + "hash" + "io" + "os" + "strings" +) + +// ChecksumResult holds the outcome of a checksum verification. +type ChecksumResult struct { + Algorithm string + Expected string + Actual string + Match bool +} + +// VerifyChecksum computes the hash of a file and compares it to the expected value. +// algorithm should be one of: md5, sha1, sha256. +// expected should be a hex-encoded hash string. +func VerifyChecksum(filepath string, algorithm string, expected string) (*ChecksumResult, error) { + if filepath == "" || algorithm == "" || expected == "" { + return nil, fmt.Errorf("filepath, algorithm, and expected hash are all required") + } + + algorithm = strings.ToLower(algorithm) + expected = strings.ToLower(strings.TrimSpace(expected)) + + var h hash.Hash + switch algorithm { + case "md5": + h = md5.New() + case "sha1", "sha-1": + h = sha1.New() + case "sha256", "sha-256": + h = sha256.New() + default: + return nil, fmt.Errorf("unsupported checksum algorithm: %s", algorithm) + } + + f, err := os.Open(filepath) + if err != nil { + return nil, fmt.Errorf("failed to open file for checksum: %w", err) + } + defer f.Close() + + if _, err := io.Copy(h, f); err != nil { + return nil, fmt.Errorf("failed to read file for checksum: %w", err) + } + + actual := hex.EncodeToString(h.Sum(nil)) + return &ChecksumResult{ + Algorithm: algorithm, + Expected: expected, + Actual: actual, + Match: actual == expected, + }, nil +} + +// ParseDigestHeader parses an HTTP Digest header (RFC 3230) and returns +// the algorithm and hex-encoded hash. Returns empty strings if not parseable. +// Example header: "sha-256=base64hash" or "SHA-256=base64hash" +func ParseDigestHeader(header string) (algorithm string, hexHash string) { + parts := strings.SplitN(header, "=", 2) + if len(parts) != 2 { + return "", "" + } + + algo := strings.ToLower(strings.TrimSpace(parts[0])) + value := strings.TrimSpace(parts[1]) + + switch algo { + case "sha-256": + algo = "sha256" + case "sha-1": + algo = "sha1" + case "md5": + // already correct + default: + return "", "" + } + + // Some servers provide hex directly in Digest; prefer that when it matches + // the expected hash length for the selected algorithm. + expectedHexLen := 0 + switch algo { + case "md5": + expectedHexLen = 32 + case "sha1": + expectedHexLen = 40 + case "sha256": + expectedHexLen = 64 + } + if len(value) == expectedHexLen { + if _, err := hex.DecodeString(value); err == nil { + return algo, strings.ToLower(value) + } + } + + // RFC 3230 uses base64 + decoded, err := base64.StdEncoding.DecodeString(value) + if err != nil { + decoded, err = base64.URLEncoding.DecodeString(value) + if err != nil { + // Maybe it's already hex + if _, hexErr := hex.DecodeString(value); hexErr == nil { + return algo, strings.ToLower(value) + } + return "", "" + } + } + return algo, hex.EncodeToString(decoded) +} diff --git a/internal/processing/checksum_test.go b/internal/processing/checksum_test.go new file mode 100644 index 00000000..faa41936 --- /dev/null +++ b/internal/processing/checksum_test.go @@ -0,0 +1,75 @@ +package processing + +import ( + "crypto/sha256" + "encoding/hex" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestVerifyChecksum_SHA256(t *testing.T) { + // Create a temp file with known content + dir := t.TempDir() + path := filepath.Join(dir, "test.bin") + content := []byte("hello surge") + require.NoError(t, os.WriteFile(path, content, 0o644)) + + // Compute expected hash + h := sha256.Sum256(content) + expected := hex.EncodeToString(h[:]) + + result, err := VerifyChecksum(path, "sha256", expected) + require.NoError(t, err) + assert.True(t, result.Match) + assert.Equal(t, expected, result.Actual) +} + +func TestVerifyChecksum_Mismatch(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "test.bin") + require.NoError(t, os.WriteFile(path, []byte("hello"), 0o644)) + + result, err := VerifyChecksum(path, "sha256", "0000000000000000000000000000000000000000000000000000000000000000") + require.NoError(t, err) + assert.False(t, result.Match) +} + +func TestVerifyChecksum_UnsupportedAlgorithm(t *testing.T) { + _, err := VerifyChecksum("/tmp/test", "sha512", "abc") + assert.Error(t, err) + assert.Contains(t, err.Error(), "unsupported") +} + +func TestVerifyChecksum_EmptyArgs(t *testing.T) { + _, err := VerifyChecksum("", "sha256", "abc") + assert.Error(t, err) +} + +func TestParseDigestHeader_SHA256Base64(t *testing.T) { + // sha256 of empty string in base64 + algo, hash := ParseDigestHeader("sha-256=47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=") + assert.Equal(t, "sha256", algo) + assert.NotEmpty(t, hash) +} + +func TestParseDigestHeader_MD5Hex(t *testing.T) { + algo, hash := ParseDigestHeader("md5=d41d8cd98f00b204e9800998ecf8427e") + assert.Equal(t, "md5", algo) + assert.Equal(t, "d41d8cd98f00b204e9800998ecf8427e", hash) +} + +func TestParseDigestHeader_Invalid(t *testing.T) { + algo, hash := ParseDigestHeader("invalid") + assert.Empty(t, algo) + assert.Empty(t, hash) +} + +func TestParseDigestHeader_UnsupportedAlgo(t *testing.T) { + algo, hash := ParseDigestHeader("sha-512=abc") + assert.Empty(t, algo) + assert.Empty(t, hash) +} From 0ec53707a3cdad00afd6822cfe5724cb6980f726 Mon Sep 17 00:00:00 2001 From: Matt Van Horn <455140+mvanhorn@users.noreply.github.com> Date: Sun, 5 Apr 2026 20:34:18 -0700 Subject: [PATCH 2/5] fix(checksum): validate hash length in hex fallback and support unpadded base64 --- internal/processing/checksum.go | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/internal/processing/checksum.go b/internal/processing/checksum.go index 24002aef..10a6046e 100644 --- a/internal/processing/checksum.go +++ b/internal/processing/checksum.go @@ -103,17 +103,24 @@ func ParseDigestHeader(header string) (algorithm string, hexHash string) { } } - // RFC 3230 uses base64 - decoded, err := base64.StdEncoding.DecodeString(value) - if err != nil { - decoded, err = base64.URLEncoding.DecodeString(value) - if err != nil { - // Maybe it's already hex - if _, hexErr := hex.DecodeString(value); hexErr == nil { - return algo, strings.ToLower(value) + // RFC 3230 uses base64 (padded or unpadded, standard or URL-safe) + for _, enc := range []*base64.Encoding{ + base64.StdEncoding, + base64.URLEncoding, + base64.RawStdEncoding, + base64.RawURLEncoding, + } { + if decoded, err := enc.DecodeString(value); err == nil { + h := hex.EncodeToString(decoded) + if len(h) == expectedHexLen { + return algo, h } - return "", "" } } - return algo, hex.EncodeToString(decoded) + + // Hex fallback - only accept if length matches the expected hash size + if _, err := hex.DecodeString(value); err == nil && len(value) == expectedHexLen { + return algo, strings.ToLower(value) + } + return "", "" } From 0e35ef4a9319b4c0d9aa4856480968fab24398d5 Mon Sep 17 00:00:00 2001 From: Matt Van Horn <455140+mvanhorn@users.noreply.github.com> Date: Thu, 9 Apr 2026 19:06:09 -0700 Subject: [PATCH 3/5] fix: normalize algorithm name in ChecksumResult, rename shadowed param --- internal/processing/checksum.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/internal/processing/checksum.go b/internal/processing/checksum.go index 10a6046e..15646849 100644 --- a/internal/processing/checksum.go +++ b/internal/processing/checksum.go @@ -24,8 +24,8 @@ type ChecksumResult struct { // VerifyChecksum computes the hash of a file and compares it to the expected value. // algorithm should be one of: md5, sha1, sha256. // expected should be a hex-encoded hash string. -func VerifyChecksum(filepath string, algorithm string, expected string) (*ChecksumResult, error) { - if filepath == "" || algorithm == "" || expected == "" { +func VerifyChecksum(filePath string, algorithm string, expected string) (*ChecksumResult, error) { + if filePath == "" || algorithm == "" || expected == "" { return nil, fmt.Errorf("filepath, algorithm, and expected hash are all required") } @@ -37,14 +37,16 @@ func VerifyChecksum(filepath string, algorithm string, expected string) (*Checks case "md5": h = md5.New() case "sha1", "sha-1": + algorithm = "sha1" h = sha1.New() case "sha256", "sha-256": + algorithm = "sha256" h = sha256.New() default: return nil, fmt.Errorf("unsupported checksum algorithm: %s", algorithm) } - f, err := os.Open(filepath) + f, err := os.Open(filePath) if err != nil { return nil, fmt.Errorf("failed to open file for checksum: %w", err) } @@ -81,7 +83,7 @@ func ParseDigestHeader(header string) (algorithm string, hexHash string) { case "sha-1": algo = "sha1" case "md5": - // already correct + // no normalization needed default: return "", "" } From 62ac7bd5d13982974617716bb1415d5a2a790e32 Mon Sep 17 00:00:00 2001 From: Matt Van Horn <455140+mvanhorn@users.noreply.github.com> Date: Thu, 9 Apr 2026 20:57:04 -0700 Subject: [PATCH 4/5] fix: remove redundant hex fallback and strengthen checksum tests Remove dead code in ParseDigestHeader (hex fallback already handled by the earlier hex check). Strengthen base64 test assertion with exact expected hash. Add MD5 and SHA-1 happy-path tests with algorithm normalization verification. Co-Authored-By: Claude Opus 4.6 (1M context) --- internal/processing/checksum.go | 4 ---- internal/processing/checksum_test.go | 36 +++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/internal/processing/checksum.go b/internal/processing/checksum.go index 15646849..27157496 100644 --- a/internal/processing/checksum.go +++ b/internal/processing/checksum.go @@ -120,9 +120,5 @@ func ParseDigestHeader(header string) (algorithm string, hexHash string) { } } - // Hex fallback - only accept if length matches the expected hash size - if _, err := hex.DecodeString(value); err == nil && len(value) == expectedHexLen { - return algo, strings.ToLower(value) - } return "", "" } diff --git a/internal/processing/checksum_test.go b/internal/processing/checksum_test.go index faa41936..d5d772da 100644 --- a/internal/processing/checksum_test.go +++ b/internal/processing/checksum_test.go @@ -1,6 +1,8 @@ package processing import ( + "crypto/md5" + "crypto/sha1" "crypto/sha256" "encoding/hex" "os" @@ -28,6 +30,38 @@ func TestVerifyChecksum_SHA256(t *testing.T) { assert.Equal(t, expected, result.Actual) } +func TestVerifyChecksum_MD5(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "test.bin") + content := []byte("hello surge") + require.NoError(t, os.WriteFile(path, content, 0o644)) + + h := md5.Sum(content) + expected := hex.EncodeToString(h[:]) + + result, err := VerifyChecksum(path, "md5", expected) + require.NoError(t, err) + assert.True(t, result.Match) + assert.Equal(t, "md5", result.Algorithm) + assert.Equal(t, expected, result.Actual) +} + +func TestVerifyChecksum_SHA1(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "test.bin") + content := []byte("hello surge") + require.NoError(t, os.WriteFile(path, content, 0o644)) + + h := sha1.Sum(content) + expected := hex.EncodeToString(h[:]) + + result, err := VerifyChecksum(path, "sha-1", expected) + require.NoError(t, err) + assert.True(t, result.Match) + assert.Equal(t, "sha1", result.Algorithm) + assert.Equal(t, expected, result.Actual) +} + func TestVerifyChecksum_Mismatch(t *testing.T) { dir := t.TempDir() path := filepath.Join(dir, "test.bin") @@ -53,7 +87,7 @@ func TestParseDigestHeader_SHA256Base64(t *testing.T) { // sha256 of empty string in base64 algo, hash := ParseDigestHeader("sha-256=47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=") assert.Equal(t, "sha256", algo) - assert.NotEmpty(t, hash) + assert.Equal(t, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", hash) } func TestParseDigestHeader_MD5Hex(t *testing.T) { From 3a1a223a0fa1e4c8d65f1becb23880e187d6c0b9 Mon Sep 17 00:00:00 2001 From: Matt Van Horn <455140+mvanhorn@users.noreply.github.com> Date: Sun, 12 Apr 2026 12:12:55 -0400 Subject: [PATCH 5/5] fix: validate hash length and support unpadded base64 in digest parsing Address greptile P1 findings: - Validate decoded hash byte length matches expected algorithm size to prevent wrong-length hashes from being silently accepted - Add base64.RawStdEncoding and RawURLEncoding fallbacks for services that return unpadded base64 digests - Return error from ParseDigestHeader on length mismatches - Add test for unpadded base64 and wrong-length hex detection - Fix deferred file close to handle error Co-Authored-By: Claude Opus 4.6 (1M context) --- internal/processing/checksum.go | 37 ++++++++++++++-------------- internal/processing/checksum_test.go | 26 ++++++++++++++++--- 2 files changed, 41 insertions(+), 22 deletions(-) diff --git a/internal/processing/checksum.go b/internal/processing/checksum.go index 27157496..5f05d0aa 100644 --- a/internal/processing/checksum.go +++ b/internal/processing/checksum.go @@ -50,7 +50,7 @@ func VerifyChecksum(filePath string, algorithm string, expected string) (*Checks if err != nil { return nil, fmt.Errorf("failed to open file for checksum: %w", err) } - defer f.Close() + defer func() { _ = f.Close() }() if _, err := io.Copy(h, f); err != nil { return nil, fmt.Errorf("failed to read file for checksum: %w", err) @@ -66,12 +66,12 @@ func VerifyChecksum(filePath string, algorithm string, expected string) (*Checks } // ParseDigestHeader parses an HTTP Digest header (RFC 3230) and returns -// the algorithm and hex-encoded hash. Returns empty strings if not parseable. +// the algorithm and hex-encoded hash. // Example header: "sha-256=base64hash" or "SHA-256=base64hash" -func ParseDigestHeader(header string) (algorithm string, hexHash string) { +func ParseDigestHeader(header string) (algorithm string, hexHash string, err error) { parts := strings.SplitN(header, "=", 2) if len(parts) != 2 { - return "", "" + return "", "", nil } algo := strings.ToLower(strings.TrimSpace(parts[0])) @@ -85,27 +85,28 @@ func ParseDigestHeader(header string) (algorithm string, hexHash string) { case "md5": // no normalization needed default: - return "", "" + return "", "", nil } - // Some servers provide hex directly in Digest; prefer that when it matches - // the expected hash length for the selected algorithm. - expectedHexLen := 0 + expectedBytes := 0 switch algo { case "md5": - expectedHexLen = 32 + expectedBytes = md5.Size case "sha1": - expectedHexLen = 40 + expectedBytes = sha1.Size case "sha256": - expectedHexLen = 64 + expectedBytes = sha256.Size } + expectedHexLen := expectedBytes * 2 if len(value) == expectedHexLen { - if _, err := hex.DecodeString(value); err == nil { - return algo, strings.ToLower(value) + if decoded, err := hex.DecodeString(value); err == nil { + if len(decoded) != expectedBytes { + return "", "", fmt.Errorf("digest length mismatch for %s", algo) + } + return algo, strings.ToLower(value), nil } } - // RFC 3230 uses base64 (padded or unpadded, standard or URL-safe) for _, enc := range []*base64.Encoding{ base64.StdEncoding, base64.URLEncoding, @@ -113,12 +114,12 @@ func ParseDigestHeader(header string) (algorithm string, hexHash string) { base64.RawURLEncoding, } { if decoded, err := enc.DecodeString(value); err == nil { - h := hex.EncodeToString(decoded) - if len(h) == expectedHexLen { - return algo, h + if len(decoded) != expectedBytes { + return "", "", fmt.Errorf("digest length mismatch for %s", algo) } + return algo, hex.EncodeToString(decoded), nil } } - return "", "" + return "", "", nil } diff --git a/internal/processing/checksum_test.go b/internal/processing/checksum_test.go index d5d772da..2e90d188 100644 --- a/internal/processing/checksum_test.go +++ b/internal/processing/checksum_test.go @@ -83,27 +83,45 @@ func TestVerifyChecksum_EmptyArgs(t *testing.T) { assert.Error(t, err) } +func mustParseDigestHeader(t *testing.T, header string) (string, string) { + t.Helper() + algo, hash, err := ParseDigestHeader(header) + require.NoError(t, err) + return algo, hash +} + func TestParseDigestHeader_SHA256Base64(t *testing.T) { // sha256 of empty string in base64 - algo, hash := ParseDigestHeader("sha-256=47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=") + algo, hash := mustParseDigestHeader(t, "sha-256=47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=") assert.Equal(t, "sha256", algo) assert.Equal(t, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", hash) } func TestParseDigestHeader_MD5Hex(t *testing.T) { - algo, hash := ParseDigestHeader("md5=d41d8cd98f00b204e9800998ecf8427e") + algo, hash := mustParseDigestHeader(t, "md5=d41d8cd98f00b204e9800998ecf8427e") assert.Equal(t, "md5", algo) assert.Equal(t, "d41d8cd98f00b204e9800998ecf8427e", hash) } func TestParseDigestHeader_Invalid(t *testing.T) { - algo, hash := ParseDigestHeader("invalid") + algo, hash := mustParseDigestHeader(t, "invalid") assert.Empty(t, algo) assert.Empty(t, hash) } func TestParseDigestHeader_UnsupportedAlgo(t *testing.T) { - algo, hash := ParseDigestHeader("sha-512=abc") + algo, hash := mustParseDigestHeader(t, "sha-512=abc") assert.Empty(t, algo) assert.Empty(t, hash) } + +func TestParseDigestHeader_SHA256UnpaddedBase64(t *testing.T) { + algo, hash := mustParseDigestHeader(t, "sha-256=47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU") + assert.Equal(t, "sha256", algo) + assert.Equal(t, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", hash) +} + +func TestParseDigestHeader_SHA256WrongLengthHex(t *testing.T) { + _, _, err := ParseDigestHeader("sha-256=d41d8cd98f00b204e9800998ecf8427e") + require.Error(t, err) +}