diff --git a/mcpp.toml b/mcpp.toml index 646b01cc..5cf446e8 100644 --- a/mcpp.toml +++ b/mcpp.toml @@ -1,6 +1,6 @@ [package] name = "xlings" -version = "0.4.50" +version = "0.4.51" description = "Universal package management infrastructure tool with SubOS isolation" license = "Apache-2.0" repo = "https://github.com/openxlings/xlings" diff --git a/src/core/config.cppm b/src/core/config.cppm index eef9c51f..bb614af2 100644 --- a/src/core/config.cppm +++ b/src/core/config.cppm @@ -13,7 +13,7 @@ import xlings.core.xvm.db; namespace xlings { export struct Info { - static constexpr std::string_view VERSION = "0.4.50"; + static constexpr std::string_view VERSION = "0.4.51"; static constexpr std::string_view REPO = "https://github.com/openxlings/xlings"; }; diff --git a/src/core/xim/downloader.cppm b/src/core/xim/downloader.cppm index 52705869..3cfe5e68 100644 --- a/src/core/xim/downloader.cppm +++ b/src/core/xim/downloader.cppm @@ -13,6 +13,7 @@ import xlings.core.config; import xlings.libs.tinyhttps; import xlings.runtime.cancellation; import xlings.core.mirror; +import xlings.libs.sha256; // Re-export extract_archive so existing importers (installer) keep working. export import xlings.core.xim.extract; @@ -51,6 +52,15 @@ bool looks_like_archive_filename_(const std::filesystem::path& path) { // exitCode=0. See .agents/docs/2026-05-22-cmd-install-silent-failure-analysis.md constexpr std::uintmax_t kMinPlausibleArchiveBytes_ = 1024; +// Lowercase a declared sha256 so it compares against our hex digests +// regardless of the recipe author's casing. +std::string lower_hex_(std::string_view s) { + std::string out(s); + for (auto& c : out) + if (c >= 'A' && c <= 'F') c = static_cast(c - 'A' + 'a'); + return out; +} + // ── Sidecar (.meta) helpers for HEAD-based cache freshness ──────────── // // When a package recipe omits sha256 (~8% of pkgindex entries declare a @@ -268,9 +278,11 @@ DownloadResult download_one(const DownloadTask& task, // If the recipe declares a sha256 and the on-disk file matches, we're // byte-identical to upstream — skip download outright. if (fs::exists(destFile) && !task.sha256.empty()) { - auto cmd = std::format("sha256sum \"{}\"", destFile.string()); - auto [rc, output] = platform::run_command_capture(cmd); - if (rc == 0 && output.find(task.sha256) != std::string::npos) { + // In-process hash — `sha256sum` is a coreutils tool absent on + // stock macOS, where shelling out made every pinned download + // "mismatch" (see xlings.libs.sha256 header). + auto digest = sha256::hex_file(destFile); + if (digest && *digest == lower_hex_(task.sha256)) { log::debug("already downloaded (sha256): {}", destFile.string()); result.success = true; return result; @@ -374,11 +386,30 @@ DownloadResult download_one(const DownloadTask& task, if (cancel) { opts.isCancelled = [cancel] { return cancel->is_paused() || cancel->is_cancelled(); }; } - // A stalled host is throttled for us right now — demote it for the - // rest of the session so later downloads skip straight past it. + // A stalled host is throttled for us right now, and a host that + // served bytes failing the integrity check is worse — demote both + // for the rest of the session so later downloads skip them. opts.onUrlAttemptFailed = [](const std::string& u, const std::string& err) { - if (err.rfind("stalled:", 0) == 0) mirror::adaptive::penalize_host(u); + if (err.rfind("stalled:", 0) == 0 + || err.rfind("sha256 mismatch", 0) == 0) + mirror::adaptive::penalize_host(u); }; + // Per-candidate integrity acceptance: verify INSIDE the URL loop + // so a mirror that wins the latency race but serves corrupted + // bytes is rejected and the next candidate (ultimately the + // author URL) is tried — previously a single mismatch failed the + // whole download with the remaining candidates untried. + if (!task.sha256.empty()) { + auto want = lower_hex_(task.sha256); + opts.onVerify = [destFile, want, &task](const std::string& u) + -> std::string { + auto digest = sha256::hex_file(destFile); + if (digest && *digest == want) return {}; + return std::format( + "sha256 mismatch for {} (source {}): got {}, want {}", + task.name, u, digest ? *digest : "", want); + }; + } auto dlResult = tinyhttps::download_file(opts); if (!dlResult.success) { @@ -408,11 +439,12 @@ DownloadResult download_one(const DownloadTask& task, } } - // Verify SHA256 if provided + // Final SHA256 re-check (defense in depth — the per-candidate + // onVerify above already gated acceptance; in-process hash, no + // dependency on a host `sha256sum` binary, which stock macOS lacks). if (!task.sha256.empty()) { - auto shaCmd = std::format("sha256sum \"{}\"", destFile.string()); - auto [shaRc, shaOut] = platform::run_command_capture(shaCmd); - if (shaRc != 0 || shaOut.find(task.sha256) == std::string::npos) { + auto digest = sha256::hex_file(destFile); + if (!digest || *digest != lower_hex_(task.sha256)) { result.error = std::format("SHA256 mismatch for {}", task.name); fs::remove(destFile, ec); return result; diff --git a/src/libs/sha256.cppm b/src/libs/sha256.cppm new file mode 100644 index 00000000..9a83c219 --- /dev/null +++ b/src/libs/sha256.cppm @@ -0,0 +1,136 @@ +// xlings.libs.sha256 — in-process SHA-256 (FIPS 180-4). +// +// Exists because the downloader used to shell out to `sha256sum`, which +// is a GNU coreutils tool: absent on stock macOS (which only ships +// `shasum`), so every sha256-pinned download "mismatched" on hosts +// without coreutils regardless of payload integrity (mcpp issue #120's +// fresh-install lane caught this on the macos-14 runner image). +// Hashing in-process removes the host-tool dependency on every platform. + +export module xlings.libs.sha256; + +import std; + +export namespace xlings::sha256 { + +// Streaming SHA-256 context. +class Hasher { +public: + Hasher() { reset(); } + + void reset() { + h_ = {0x6a09e667u, 0xbb67ae85u, 0x3c6ef372u, 0xa54ff53au, + 0x510e527fu, 0x9b05688cu, 0x1f83d9abu, 0x5be0cd19u}; + buflen_ = 0; + total_ = 0; + } + + void update(const void* data, std::size_t len) { + auto p = static_cast(data); + total_ += len; + while (len > 0) { + std::size_t take = std::min(len, sizeof(buf_) - buflen_); + std::memcpy(buf_ + buflen_, p, take); + buflen_ += take; + p += take; + len -= take; + if (buflen_ == sizeof(buf_)) { + compress_(buf_); + buflen_ = 0; + } + } + } + + // Finalize and return the lowercase hex digest. The context is + // consumed; call reset() to reuse. + std::string hex_digest() { + unsigned char pad[72]{}; + pad[0] = 0x80; + std::uint64_t bits = total_ * 8; + // Pad to 56 mod 64, then the 64-bit big-endian length. + std::size_t padlen = (buflen_ < 56) ? (56 - buflen_) : (120 - buflen_); + update(pad, padlen); + unsigned char lenbe[8]; + for (int i = 0; i < 8; ++i) + lenbe[i] = static_cast(bits >> (56 - 8 * i)); + total_ -= padlen; // length bytes are not message bytes + update(lenbe, 8); + + std::string out; + out.reserve(64); + constexpr char hexd[] = "0123456789abcdef"; + for (std::uint32_t w : h_) { + for (int s = 28; s >= 0; s -= 4) + out.push_back(hexd[(w >> s) & 0xF]); + } + return out; + } + +private: + static std::uint32_t rotr_(std::uint32_t x, int n) { + return (x >> n) | (x << (32 - n)); + } + + void compress_(const unsigned char* block) { + static constexpr std::uint32_t K[64] = { + 0x428a2f98u,0x71374491u,0xb5c0fbcfu,0xe9b5dba5u,0x3956c25bu,0x59f111f1u,0x923f82a4u,0xab1c5ed5u, + 0xd807aa98u,0x12835b01u,0x243185beu,0x550c7dc3u,0x72be5d74u,0x80deb1feu,0x9bdc06a7u,0xc19bf174u, + 0xe49b69c1u,0xefbe4786u,0x0fc19dc6u,0x240ca1ccu,0x2de92c6fu,0x4a7484aau,0x5cb0a9dcu,0x76f988dau, + 0x983e5152u,0xa831c66du,0xb00327c8u,0xbf597fc7u,0xc6e00bf3u,0xd5a79147u,0x06ca6351u,0x14292967u, + 0x27b70a85u,0x2e1b2138u,0x4d2c6dfcu,0x53380d13u,0x650a7354u,0x766a0abbu,0x81c2c92eu,0x92722c85u, + 0xa2bfe8a1u,0xa81a664bu,0xc24b8b70u,0xc76c51a3u,0xd192e819u,0xd6990624u,0xf40e3585u,0x106aa070u, + 0x19a4c116u,0x1e376c08u,0x2748774cu,0x34b0bcb5u,0x391c0cb3u,0x4ed8aa4au,0x5b9cca4fu,0x682e6ff3u, + 0x748f82eeu,0x78a5636fu,0x84c87814u,0x8cc70208u,0x90befffau,0xa4506cebu,0xbef9a3f7u,0xc67178f2u, + }; + std::uint32_t w[64]; + for (int i = 0; i < 16; ++i) { + w[i] = (std::uint32_t(block[i*4]) << 24) | (std::uint32_t(block[i*4+1]) << 16) + | (std::uint32_t(block[i*4+2]) << 8) | std::uint32_t(block[i*4+3]); + } + for (int i = 16; i < 64; ++i) { + std::uint32_t s0 = rotr_(w[i-15], 7) ^ rotr_(w[i-15], 18) ^ (w[i-15] >> 3); + std::uint32_t s1 = rotr_(w[i-2], 17) ^ rotr_(w[i-2], 19) ^ (w[i-2] >> 10); + w[i] = w[i-16] + s0 + w[i-7] + s1; + } + auto a = h_[0], b = h_[1], c = h_[2], d = h_[3]; + auto e = h_[4], f = h_[5], g = h_[6], h = h_[7]; + for (int i = 0; i < 64; ++i) { + std::uint32_t S1 = rotr_(e, 6) ^ rotr_(e, 11) ^ rotr_(e, 25); + std::uint32_t ch = (e & f) ^ (~e & g); + std::uint32_t t1 = h + S1 + ch + K[i] + w[i]; + std::uint32_t S0 = rotr_(a, 2) ^ rotr_(a, 13) ^ rotr_(a, 22); + std::uint32_t mj = (a & b) ^ (a & c) ^ (b & c); + std::uint32_t t2 = S0 + mj; + h = g; g = f; f = e; e = d + t1; + d = c; c = b; b = a; a = t1 + t2; + } + h_[0] += a; h_[1] += b; h_[2] += c; h_[3] += d; + h_[4] += e; h_[5] += f; h_[6] += g; h_[7] += h; + } + + std::array h_{}; + unsigned char buf_[64]{}; + std::size_t buflen_ = 0; + std::uint64_t total_ = 0; +}; + +// Hex digest of a memory buffer. +std::string hex(std::string_view data) { + Hasher h; + h.update(data.data(), data.size()); + return h.hex_digest(); +} + +// Hex digest of a file's contents (streaming; empty optional on I/O error). +std::optional hex_file(const std::filesystem::path& path) { + std::ifstream in(path, std::ios::binary); + if (!in) return std::nullopt; + Hasher h; + char buf[64 * 1024]; + while (in.read(buf, sizeof(buf)) || in.gcount() > 0) + h.update(buf, static_cast(in.gcount())); + if (in.bad()) return std::nullopt; + return h.hex_digest(); +} + +} // namespace xlings::sha256 diff --git a/src/libs/tinyhttps.cppm b/src/libs/tinyhttps.cppm index c8fb0a3d..49fcbfb3 100644 --- a/src/libs/tinyhttps.cppm +++ b/src/libs/tinyhttps.cppm @@ -8,6 +8,11 @@ export namespace xlings::tinyhttps { // ── Public types ───────────────────────────────────────────────────── +struct DownloadFileResult { + bool success { false }; + std::string error; +}; + struct DownloadOptions { std::filesystem::path destFile; std::vector urls; // primary + fallbacks, tried in order @@ -29,6 +34,20 @@ struct DownloadOptions { // aborts). Used by the downloader to penalize degraded hosts. std::function onUrlAttemptFailed; + // Per-URL acceptance hook: called after a successful transfer with the + // source URL. Return empty to accept; return an error message to + // REJECT the candidate — the file is removed, onUrlAttemptFailed + // fires, and the next candidate URL is tried (no same-URL retry: the + // payload is deterministic, so re-fetching the same source cannot + // change the verdict). Used for sha256 integrity: a mirror may win + // the latency race yet serve corrupted bytes. + std::function onVerify; + // TEST SEAM: when set, replaces the network transfer for one URL + // attempt (must write destFile on success). Lets unit tests exercise + // the candidate loop / verify fallback without sockets. + std::function + transferOverride; }; // Windowed-average stall detector (curl --speed-limit/--speed-time style). @@ -74,11 +93,6 @@ private: double winB_ { 0 }; }; -struct DownloadFileResult { - bool success { false }; - std::string error; -}; - // Result of a HEAD probe used by the cache to decide whether a previously // downloaded file is still current. `ok` is true only when the server // returned a 2xx response — header fields may still be empty if the @@ -311,11 +325,24 @@ DownloadFileResult download_file(const DownloadOptions& opts) { if (opts.isCancelled && opts.isCancelled()) return {false, "cancelled"}; for (int att = 0; att <= opts.retryCount; ++att) { if (opts.isCancelled && opts.isCancelled()) return {false, "cancelled"}; - auto r = detail_::download_once(url, opts.destFile, - opts.connectTimeoutSec, opts.maxTimeSec, - lowSpeedBytes, lowSpeedSecs, - opts.onProgress, opts.isCancelled); - if (r.success) return r; + auto r = opts.transferOverride + ? opts.transferOverride(url, opts.destFile) + : detail_::download_once(url, opts.destFile, + opts.connectTimeoutSec, opts.maxTimeSec, + lowSpeedBytes, lowSpeedSecs, + opts.onProgress, opts.isCancelled); + if (r.success) { + // Candidate acceptance: integrity failures are a property + // of the SOURCE, not the transfer — reject and move to + // the next URL rather than failing the whole download. + std::string verdict = + opts.onVerify ? opts.onVerify(url) : std::string{}; + if (verdict.empty()) return r; + lastErr = verdict; + if (opts.onUrlAttemptFailed) opts.onUrlAttemptFailed(url, verdict); + std::filesystem::remove(opts.destFile, ec); + break; // same bytes would fail again — next candidate + } lastErr = r.error; if (opts.onUrlAttemptFailed) opts.onUrlAttemptFailed(url, r.error); std::filesystem::remove(opts.destFile, ec); diff --git a/tests/unit/test_sha256.cpp b/tests/unit/test_sha256.cpp new file mode 100644 index 00000000..b277bfd4 --- /dev/null +++ b/tests/unit/test_sha256.cpp @@ -0,0 +1,142 @@ +// Unit tests for xlings.libs.sha256 (in-process SHA-256, FIPS 180-4) +// and the download_file per-candidate verification fallback. +// Context: the downloader used to shell out to `sha256sum`, absent on +// stock macOS — see the module header and mcpp issue #120. +#include + +import std; +import xlings.libs.sha256; +import xlings.libs.tinyhttps; + +namespace sha = xlings::sha256; + +// ── FIPS 180-4 / NIST test vectors ─────────────────────────────────── + +TEST(Sha256, EmptyString) { + EXPECT_EQ(sha::hex(""), + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"); +} + +TEST(Sha256, Abc) { + EXPECT_EQ(sha::hex("abc"), + "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"); +} + +TEST(Sha256, TwoBlockMessage) { + EXPECT_EQ(sha::hex("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"), + "248d6a61d20638b8e5c026930c3e6039a33ce45964ff2167f6ecedd419db06c1"); +} + +TEST(Sha256, MillionA) { + sha::Hasher h; + std::string chunk(1000, 'a'); + for (int i = 0; i < 1000; ++i) h.update(chunk.data(), chunk.size()); + EXPECT_EQ(h.hex_digest(), + "cdc76e5c9914fb9281a1c7e284d73e67f1809a48a497200e046d39ccc7112cd0"); +} + +TEST(Sha256, PaddingBoundaries) { + // 55/56/63/64 bytes straddle the length-padding block boundary. + EXPECT_EQ(sha::hex(std::string(55, 'x')), + sha::hex(std::string(55, 'x'))); + std::string s56(56, 'a'); + sha::Hasher h; + h.update(s56.data(), 30); + h.update(s56.data() + 30, 26); + EXPECT_EQ(h.hex_digest(), sha::hex(s56)); // split == one-shot + std::string s64(64, 'b'); + EXPECT_EQ(sha::hex(s64).size(), 64u); +} + +TEST(Sha256, FileDigestMatchesBufferDigest) { + auto p = std::filesystem::temp_directory_path() / "xlings-sha256-test.bin"; + std::string payload = "the quick brown fox jumps over the lazy dog\n"; + { std::ofstream(p, std::ios::binary) << payload; } + auto fd = sha::hex_file(p); + ASSERT_TRUE(fd.has_value()); + EXPECT_EQ(*fd, sha::hex(payload)); + std::filesystem::remove(p); +} + +TEST(Sha256, MissingFileReturnsNullopt) { + EXPECT_FALSE(sha::hex_file("/nonexistent/xlings-sha256").has_value()); +} + +// ── download_file per-candidate verification fallback ──────────────── +// +// A mirror can win the latency race yet serve corrupted bytes (or the +// integrity check can fail for host reasons); the verify hook must +// reject that candidate and FALL THROUGH to the next URL instead of +// failing the download outright. + +namespace th = xlings::tinyhttps; + +TEST(DownloadVerify, RejectedCandidateFallsThroughToNext) { + auto dest = std::filesystem::temp_directory_path() / "xlings-dlverify-1.bin"; + th::DownloadOptions o; + o.destFile = dest; + o.urls = {"https://bad.mirror/x.tar.gz", "https://good.host/x.tar.gz"}; + o.transferOverride = [](const std::string& url, + const std::filesystem::path& d) -> th::DownloadFileResult { + std::ofstream(d, std::ios::binary) + << (url.starts_with("https://bad.") ? "garbage" : "payload"); + return {true, ""}; + }; + std::vector failures; + o.onUrlAttemptFailed = [&](const std::string& u, const std::string& e) { + failures.push_back(u + " | " + e); + }; + o.onVerify = [&](const std::string&) -> std::string { + std::ifstream f(dest, std::ios::binary); + std::string s((std::istreambuf_iterator(f)), {}); + return s == "payload" ? std::string{} : "sha256 mismatch (test)"; + }; + + auto r = th::download_file(o); + EXPECT_TRUE(r.success); + ASSERT_EQ(failures.size(), 1u); + EXPECT_TRUE(failures[0].starts_with("https://bad.mirror")); + { + // Scoped: Windows can't remove a file with an open handle. + std::ifstream f(dest, std::ios::binary); + std::string s((std::istreambuf_iterator(f)), {}); + EXPECT_EQ(s, "payload"); + } + std::filesystem::remove(dest); +} + +TEST(DownloadVerify, AllCandidatesRejectedFails) { + auto dest = std::filesystem::temp_directory_path() / "xlings-dlverify-2.bin"; + th::DownloadOptions o; + o.destFile = dest; + o.urls = {"https://a/x", "https://b/x"}; + o.transferOverride = [](const std::string&, const std::filesystem::path& d) + -> th::DownloadFileResult { + std::ofstream(d, std::ios::binary) << "junk"; + return {true, ""}; + }; + int rejected = 0; + o.onVerify = [&](const std::string&) -> std::string { + ++rejected; + return "sha256 mismatch (test)"; + }; + auto r = th::download_file(o); + EXPECT_FALSE(r.success); + EXPECT_EQ(rejected, 2); + EXPECT_FALSE(std::filesystem::exists(dest)); +} + +TEST(DownloadVerify, NoVerifyHookKeepsFirstSuccess) { + auto dest = std::filesystem::temp_directory_path() / "xlings-dlverify-3.bin"; + th::DownloadOptions o; + o.destFile = dest; + o.urls = {"https://only/x"}; + o.transferOverride = [](const std::string&, const std::filesystem::path& d) + -> th::DownloadFileResult { + std::ofstream(d, std::ios::binary) << "anything"; + return {true, ""}; + }; + auto r = th::download_file(o); + EXPECT_TRUE(r.success); + std::filesystem::remove(dest); +}