diff --git a/Cargo.lock b/Cargo.lock index 81a264e..6d12b12 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "aho-corasick" @@ -11,15 +11,42 @@ dependencies = [ "memchr", ] +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "assert-json-diff" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "async-channel" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35" +dependencies = [ + "concurrent-queue", + "event-listener", + "futures-core", +] + [[package]] name = "async-trait" -version = "0.1.74" +version = "0.1.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.41", + "syn 2.0.117", ] [[package]] @@ -59,7 +86,7 @@ dependencies = [ "hex", "http", "hyper", - "ring", + "ring 0.16.20", "time", "tokio", "tower", @@ -200,7 +227,7 @@ dependencies = [ "once_cell", "percent-encoding", "regex", - "ring", + "ring 0.16.20", "time", "tracing", ] @@ -253,7 +280,7 @@ dependencies = [ "http", "http-body", "hyper", - "hyper-rustls", + "hyper-rustls 0.23.2", "lazy_static", "pin-project-lite", "tokio", @@ -374,7 +401,7 @@ checksum = "acee9fd5073ab6b045a275b3e709c163dd36c90685219cb21804a147b58dba43" dependencies = [ "async-trait", "axum-core", - "bitflags", + "bitflags 1.3.2", "bytes", "futures-util", "http", @@ -419,12 +446,18 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6293dae2ec708e679da6736e857cf8532886ef258e92930f38279c12641628b8" dependencies = [ - "heck", + "heck 0.4.0", "proc-macro2", "quote", "syn 1.0.103", ] +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + [[package]] name = "base64" version = "0.21.0" @@ -437,6 +470,12 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" + [[package]] name = "block-buffer" version = "0.10.3" @@ -493,7 +532,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "335867764ed2de42325fafe6d18b8af74ba97ee0c590fa016f157535b42ab04b" dependencies = [ "atty", - "bitflags", + "bitflags 1.3.2", "clap_derive", "clap_lex", "once_cell", @@ -507,7 +546,7 @@ version = "4.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16a1b0f6422af32d5da0c58e2703320f379216ee70198241c84173a8c5ac28f3" dependencies = [ - "heck", + "heck 0.4.0", "proc-macro-error", "proc-macro2", "quote", @@ -523,6 +562,15 @@ dependencies = [ "os_str_bytes", ] +[[package]] +name = "concurrent-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "core-foundation" version = "0.9.3" @@ -566,6 +614,30 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + [[package]] name = "crypto-common" version = "0.1.6" @@ -576,6 +648,38 @@ dependencies = [ "typenum", ] +[[package]] +name = "dashmap" +version = "5.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "907076dfda823b0b36d2a1bb5f90c96660a5bbcd7729e10727f07858f22c4edc" +dependencies = [ + "cfg-if", + "hashbrown 0.12.3", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "deadpool" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "421fe0f90f2ab22016f32a9881be5134fdd71c65298917084b0c7477cbc3856e" +dependencies = [ + "async-trait", + "deadpool-runtime", + "num_cpus", + "retain_mut", + "tokio", +] + +[[package]] +name = "deadpool-runtime" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b" + [[package]] name = "digest" version = "0.10.6" @@ -592,6 +696,21 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" +[[package]] +name = "encoding_rs" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + [[package]] name = "errno" version = "0.2.8" @@ -613,6 +732,12 @@ dependencies = [ "libc", ] +[[package]] +name = "event-listener" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" + [[package]] name = "fastrand" version = "1.8.0" @@ -628,6 +753,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "form_urlencoded" version = "1.1.0" @@ -637,6 +768,21 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futures" +version = "0.3.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38390104763dc37a5145a53c29c63c1290b5d316d6086ec32c293f6736051bb0" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + [[package]] name = "futures-channel" version = "0.3.25" @@ -644,6 +790,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52ba265a92256105f45b719605a571ffe2d1f0fea3807304b522c1d778f79eed" dependencies = [ "futures-core", + "futures-sink", ] [[package]] @@ -652,6 +799,38 @@ version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04909a7a7e4633ae6c4a9ab280aeb86da1236243a77b694a49eacd659a4bd3ac" +[[package]] +name = "futures-executor" +version = "0.3.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7acc85df6714c176ab5edf386123fafe217be88c0840ec11f199441134a074e2" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" + +[[package]] +name = "futures-lite" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49a9d51ce47660b1e808d3c990b4709f2f415d928835a17dfd16991515c46bce" +dependencies = [ + "fastrand", + "futures-core", + "futures-io", + "memchr", + "parking", + "pin-project-lite", + "waker-fn", +] + [[package]] name = "futures-macro" version = "0.3.25" @@ -675,15 +854,25 @@ version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2ffb393ac5d9a6eaa9d3fdf37ae2776656b706e200c8e16b1bdb227f5198e6ea" +[[package]] +name = "futures-timer" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" + [[package]] name = "futures-util" version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "197676987abd2f9cadff84926f410af1c183608d36641465df73ae8211dc65d6" dependencies = [ + "futures-channel", "futures-core", + "futures-io", "futures-macro", + "futures-sink", "futures-task", + "memchr", "pin-project-lite", "pin-utils", "slab", @@ -699,6 +888,17 @@ dependencies = [ "version_check", ] +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", +] + [[package]] name = "getrandom" version = "0.2.8" @@ -707,7 +907,20 @@ checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" dependencies = [ "cfg-if", "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", + "wasip3", ] [[package]] @@ -722,7 +935,7 @@ dependencies = [ "futures-sink", "futures-util", "http", - "indexmap", + "indexmap 1.9.2", "slab", "tokio", "tokio-util", @@ -735,12 +948,33 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51" + [[package]] name = "heck" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "hermit-abi" version = "0.1.19" @@ -784,6 +1018,27 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bfe8eed0a9285ef776bb792479ea3834e8b94e13d615c2f66d03dd50a435a29" +[[package]] +name = "http-types" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e9b187a72d63adbfba487f48095306ac823049cb504ee195541e91c7775f5ad" +dependencies = [ + "anyhow", + "async-channel", + "base64 0.13.1", + "futures-lite", + "http", + "infer", + "pin-project-lite", + "rand 0.7.3", + "serde", + "serde_json", + "serde_qs", + "serde_urlencoded", + "url", +] + [[package]] name = "httparse" version = "1.8.0" @@ -829,12 +1084,32 @@ dependencies = [ "http", "hyper", "log", - "rustls", + "rustls 0.20.8", "rustls-native-certs", "tokio", - "tokio-rustls", + "tokio-rustls 0.23.4", +] + +[[package]] +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http", + "hyper", + "rustls 0.21.12", + "tokio", + "tokio-rustls 0.24.1", ] +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + [[package]] name = "idna" version = "0.3.0" @@ -852,9 +1127,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" dependencies = [ "autocfg", - "hashbrown", + "hashbrown 0.12.3", +] + +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown 0.17.0", + "serde", + "serde_core", ] +[[package]] +name = "infer" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64e9829a50b42bb782c1df523f78d332fe371b10c661e78b7a3c34b0198e9fac" + [[package]] name = "instant" version = "0.1.12" @@ -871,9 +1164,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e7d6c6f8c91b4b9ed43484ad1a938e393caf35960fce7f82a040497207bd8e9e" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.42.0", ] +[[package]] +name = "ipnet" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" + [[package]] name = "itoa" version = "1.0.4" @@ -882,10 +1181,11 @@ checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc" [[package]] name = "js-sys" -version = "0.3.60" +version = "0.3.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47" +checksum = "2964e92d1d9dc3364cae4d718d93f227e3abb088e747d92e0395bfdedf1c12ca" dependencies = [ + "once_cell", "wasm-bindgen", ] @@ -895,11 +1195,17 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + [[package]] name = "libc" -version = "0.2.137" +version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc7fcc620a3bff7cdd7a365be3376c97191aeaccc2a603e600951e452615bf89" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" [[package]] name = "linux-raw-sys" @@ -970,8 +1276,25 @@ checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de" dependencies = [ "libc", "log", - "wasi", - "windows-sys", + "wasi 0.11.0+wasi-snapshot-preview1", + "windows-sys 0.42.0", +] + +[[package]] +name = "moka" +version = "0.12.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "957228ad12042ee839f93c8f257b62b4c0ab5eaae1d4fa60de53b27c9d7c5046" +dependencies = [ + "crossbeam-channel", + "crossbeam-epoch", + "crossbeam-utils", + "equivalent", + "parking_lot", + "portable-atomic", + "smallvec", + "tagptr", + "uuid", ] [[package]] @@ -1037,6 +1360,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +[[package]] +name = "parking" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" + [[package]] name = "parking_lot" version = "0.12.1" @@ -1057,7 +1386,7 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-sys", + "windows-sys 0.42.0", ] [[package]] @@ -1098,12 +1427,28 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + [[package]] name = "ppv-lite86" version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn 2.0.117", +] + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -1130,9 +1475,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.70" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] @@ -1143,7 +1488,7 @@ version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1de8dacb0873f77e6aefc6d71e044761fcc68060290f5b1089fcdf84626bb69" dependencies = [ - "bitflags", + "bitflags 1.3.2", "byteorder", "hex", "lazy_static", @@ -1175,13 +1520,32 @@ checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" [[package]] name = "quote" -version = "1.0.33" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc", +] + [[package]] name = "rand" version = "0.8.5" @@ -1189,8 +1553,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha", - "rand_core", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", ] [[package]] @@ -1200,7 +1574,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom 0.1.16", ] [[package]] @@ -1209,7 +1592,16 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom", + "getrandom 0.2.8", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core 0.5.1", ] [[package]] @@ -1218,7 +1610,7 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] @@ -1248,23 +1640,83 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" [[package]] -name = "ring" -version = "0.16.20" +name = "reqwest" +version = "0.11.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" +checksum = "37b1ae8d9ac08420c66222fb9096fc5de435c3c48542bc5336c51892cffafb41" dependencies = [ - "cc", - "libc", - "once_cell", - "spin", - "untrusted", - "web-sys", - "winapi", -] - -[[package]] -name = "router" -version = "0.1.0" + "base64 0.21.0", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "hyper", + "hyper-rustls 0.24.2", + "ipnet", + "js-sys", + "log", + "mime", + "once_cell", + "percent-encoding", + "pin-project-lite", + "rustls 0.21.12", + "rustls-pemfile", + "serde", + "serde_json", + "serde_urlencoded", + "system-configuration", + "tokio", + "tokio-rustls 0.24.1", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "webpki-roots", + "winreg", +] + +[[package]] +name = "retain_mut" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4389f1d5789befaf6029ebd9f7dac4af7f7e3d61b69d4f30e2ac02b57e7712b0" + +[[package]] +name = "ring" +version = "0.16.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" +dependencies = [ + "cc", + "libc", + "once_cell", + "spin 0.5.2", + "untrusted 0.7.1", + "web-sys", + "winapi", +] + +[[package]] +name = "ring" +version = "0.17.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9babe80d5c16becf6594aa32ad2be8fe08498e7ae60b77de8df700e67f191d7e" +dependencies = [ + "cc", + "getrandom 0.2.8", + "libc", + "spin 0.9.8", + "untrusted 0.9.0", + "windows-sys 0.48.0", +] + +[[package]] +name = "router" +version = "0.1.0" dependencies = [ "async-trait", "aws-config", @@ -1272,24 +1724,34 @@ dependencies = [ "axum", "axum-macros", "clap", + "dashmap", + "hyper", + "ipnet", "lazy_static", "libc", + "moka", "prometheus", + "reqwest", "router-controller", + "serde", + "serde_json", "tokio", + "tower", "tracing", "tracing-subscriber", + "tracing-test", "url", + "wiremock", ] [[package]] name = "router-controller" version = "0.1.0" dependencies = [ - "base64", + "base64 0.21.0", "log", "parking_lot", - "rand", + "rand 0.8.5", "serde", "tracing", ] @@ -1309,12 +1771,12 @@ version = "0.36.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4fdebc4b395b7fbb9ab11e462e20ed9051e7b16e42d24042c776eca0ac81b03" dependencies = [ - "bitflags", + "bitflags 1.3.2", "errno", "io-lifetimes", "libc", "linux-raw-sys", - "windows-sys", + "windows-sys 0.42.0", ] [[package]] @@ -1324,11 +1786,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fff78fc74d175294f4e83b28343315ffcfb114b156f0185e9741cb5570f50e2f" dependencies = [ "log", - "ring", + "ring 0.16.20", "sct", "webpki", ] +[[package]] +name = "rustls" +version = "0.21.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +dependencies = [ + "log", + "ring 0.17.3", + "rustls-webpki", + "sct", +] + [[package]] name = "rustls-native-certs" version = "0.6.2" @@ -1347,9 +1821,25 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d194b56d58803a43635bdc398cd17e383d6f71f9182b9a192c127ca42494a59b" dependencies = [ - "base64", + "base64 0.21.0", +] + +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring 0.17.3", + "untrusted 0.9.0", ] +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + [[package]] name = "ryu" version = "1.0.11" @@ -1362,7 +1852,7 @@ version = "0.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "713cfb06c7059f3588fb8044c0fad1d09e3c01d225e25b9220dbfdcf16dbb1b3" dependencies = [ - "windows-sys", + "windows-sys 0.42.0", ] [[package]] @@ -1377,8 +1867,8 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4" dependencies = [ - "ring", - "untrusted", + "ring 0.16.20", + "untrusted 0.7.1", ] [[package]] @@ -1387,7 +1877,7 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "645926f31b250a2dca3c232496c2d898d91036e45ca0e97e0e2390c54e11be36" dependencies = [ - "bitflags", + "bitflags 1.3.2", "core-foundation", "core-foundation-sys", "libc", @@ -1412,22 +1902,32 @@ checksum = "58bc9567378fc7690d6b2addae4e60ac2eeea07becb2c64b9f218b53865cba2a" [[package]] name = "serde" -version = "1.0.147" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d193d69bae983fc11a79df82342761dfbf28a99fc8d203dca4c3c1b590948965" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.147" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f1d362ca8fc9c3e3a7484440752472d68a6caa98f1ab81d99b5dfe517cec852" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 1.0.103", + "syn 2.0.117", ] [[package]] @@ -1441,6 +1941,17 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_qs" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7715380eec75f029a4ef7de39a9200e0a63823176b759d055b613f5a87df6a6" +dependencies = [ + "percent-encoding", + "serde", + "thiserror", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -1524,6 +2035,12 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + [[package]] name = "strsim" version = "0.10.0" @@ -1543,9 +2060,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.41" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -1558,6 +2075,33 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20518fe4a4c9acf048008599e464deb21beeae3d3578418951a189c235a7a9a8" +[[package]] +name = "system-configuration" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "tagptr" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" + [[package]] name = "termcolor" version = "1.1.3" @@ -1674,11 +2218,21 @@ version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" dependencies = [ - "rustls", + "rustls 0.20.8", "tokio", "webpki", ] +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.12", + "tokio", +] + [[package]] name = "tokio-stream" version = "0.1.11" @@ -1726,7 +2280,7 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f873044bf02dd1e8239e9c1293ea39dad76dc594ec16185d0a1bf31d8dc8d858" dependencies = [ - "bitflags", + "bitflags 1.3.2", "bytes", "futures-core", "futures-util", @@ -1753,10 +2307,11 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" [[package]] name = "tracing" -version = "0.1.40" +version = "0.1.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" dependencies = [ + "cfg-if", "log", "pin-project-lite", "tracing-attributes", @@ -1765,20 +2320,20 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.27" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +checksum = "4017f8f45139870ca7e672686113917c71c7a6e02d4924eda67186083c03081a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.41", + "syn 1.0.103", ] [[package]] name = "tracing-core" -version = "0.1.32" +version = "0.1.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a" dependencies = [ "once_cell", "valuable", @@ -1826,6 +2381,27 @@ dependencies = [ "tracing-serde", ] +[[package]] +name = "tracing-test" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19a4c448db514d4f24c5ddb9f73f2ee71bfb24c526cf0c570ba142d1119e0051" +dependencies = [ + "tracing-core", + "tracing-subscriber", + "tracing-test-macro", +] + +[[package]] +name = "tracing-test-macro" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad06847b7afb65c7866a36664b75c40b895e318cea4f71299f013fb22965329d" +dependencies = [ + "quote", + "syn 2.0.117", +] + [[package]] name = "try-lock" version = "0.2.4" @@ -1859,12 +2435,24 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + [[package]] name = "untrusted" version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + [[package]] name = "url" version = "2.3.1" @@ -1874,6 +2462,7 @@ dependencies = [ "form_urlencoded", "idna", "percent-encoding", + "serde", ] [[package]] @@ -1882,6 +2471,17 @@ version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8db7427f936968176eaa7cdf81b7f98b980b18495ec28f1b5791ac3bfe3eea9" +[[package]] +name = "uuid" +version = "1.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76" +dependencies = [ + "getrandom 0.4.2", + "js-sys", + "wasm-bindgen", +] + [[package]] name = "valuable" version = "0.1.0" @@ -1894,6 +2494,12 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "waker-fn" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "317211a0dc0ceedd78fb2ca9a44aed3d7b9b26f81870d485c07122b4350673b7" + [[package]] name = "want" version = "0.3.0" @@ -1904,42 +2510,66 @@ dependencies = [ "try-lock", ] +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasip2" +version = "1.0.3+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" +dependencies = [ + "wit-bindgen 0.57.1", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen 0.51.0", +] + [[package]] name = "wasm-bindgen" -version = "0.2.83" +version = "0.2.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268" +checksum = "0bf938a0bacb0469e83c1e148908bd7d5a6010354cf4fb73279b7447422e3a89" dependencies = [ "cfg-if", + "once_cell", + "rustversion", "wasm-bindgen-macro", + "wasm-bindgen-shared", ] [[package]] -name = "wasm-bindgen-backend" -version = "0.2.83" +name = "wasm-bindgen-futures" +version = "0.4.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142" +checksum = "cc7ec4f8827a71586374db3e87abdb5a2bb3a15afed140221307c3ec06b1f63b" dependencies = [ - "bumpalo", - "log", - "once_cell", - "proc-macro2", - "quote", - "syn 1.0.103", - "wasm-bindgen-shared", + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.83" +version = "0.2.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810" +checksum = "eeff24f84126c0ec2db7a449f0c2ec963c6a49efe0698c4242929da037ca28ed" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1947,22 +2577,59 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.83" +version = "0.2.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" +checksum = "9d08065faf983b2b80a79fd87d8254c409281cf7de75fc4b773019824196c904" dependencies = [ + "bumpalo", "proc-macro2", "quote", - "syn 1.0.103", - "wasm-bindgen-backend", + "syn 2.0.117", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.83" +version = "0.2.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f" +checksum = "5fd04d9e306f1907bd13c6361b5c6bfc7b3b3c095ed3f8a9246390f8dbdee129" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap 2.14.0", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags 2.11.1", + "hashbrown 0.15.5", + "indexmap 2.14.0", + "semver", +] [[package]] name = "web-sys" @@ -1980,10 +2647,16 @@ version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f095d78192e208183081cc07bc5515ef55216397af48b873e5edcd72637fa1bd" dependencies = [ - "ring", - "untrusted", + "ring 0.16.20", + "untrusted 0.7.1", ] +[[package]] +name = "webpki-roots" +version = "0.25.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1" + [[package]] name = "winapi" version = "0.3.9" @@ -2021,13 +2694,37 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.42.0", + "windows_aarch64_msvc 0.42.0", + "windows_i686_gnu 0.42.0", + "windows_i686_msvc 0.42.0", + "windows_x86_64_gnu 0.42.0", + "windows_x86_64_gnullvm 0.42.0", + "windows_x86_64_msvc 0.42.0", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", ] [[package]] @@ -2036,42 +2733,210 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_msvc" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_i686_gnu" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_msvc" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_x86_64_gnu" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_msvc" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "winreg" +version = "0.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + +[[package]] +name = "wiremock" +version = "0.5.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13a3a53eaf34f390dd30d7b1b078287dd05df2aa2e21a589ccb80f5c7253c2e9" +dependencies = [ + "assert-json-diff", + "async-trait", + "base64 0.21.0", + "deadpool", + "futures", + "futures-timer", + "http-types", + "hyper", + "log", + "once_cell", + "regex", + "serde", + "serde_json", + "tokio", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen" +version = "0.57.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck 0.5.0", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck 0.5.0", + "indexmap 2.14.0", + "prettyplease", + "syn 2.0.117", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn 2.0.117", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags 2.11.1", + "indexmap 2.14.0", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap 2.14.0", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + [[package]] name = "xmlparser" version = "0.13.5" diff --git a/Dockerfile b/Dockerfile index 6a4c1c2..1691066 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,10 @@ -FROM rust:1.65.0 AS builder +# Builder pinned to Rust 1.89 on bullseye: +# - Cargo.lock is now v4 (introduced in Cargo 1.78, April 2024) due to the +# crates added for the v2 auth middleware (moka, dashmap, reqwest, ipnet, +# etc.). The previous rust:1.65.0 builder cannot parse v4 lockfiles. +# - Bullseye matches the runtime image below — keeps the binary's +# glibc-symbol set compatible with `debian:bullseye-slim` at runtime. +FROM rust:1.89-bullseye AS builder WORKDIR /archive-router COPY ./ . RUN cargo build --release diff --git a/crates/router/Cargo.toml b/crates/router/Cargo.toml index 097bbd3..14df1c4 100644 --- a/crates/router/Cargo.toml +++ b/crates/router/Cargo.toml @@ -11,7 +11,7 @@ aws-config = "0.51.0" axum = "0.5.17" axum-macros = "0.2.3" async-trait = "0.1.74" -clap = { version = "4.0.18", features = ["derive"] } +clap = { version = "4.0.18", features = ["derive", "env"] } tokio = { version = "1.21.2", features = ["full"] } url = "2.3.1" tracing = "0.1" @@ -19,5 +19,17 @@ tracing-subscriber = { version = "0.3.0", features = ["env-filter", "json"] } libc = "0.2" lazy_static = "1.4.0" prometheus = { version = "0.13.3", features = ["process"] } +moka = { version = "0.12", features = ["sync"] } +dashmap = "5" +reqwest = { version = "0.11", default-features = false, features = ["json", "rustls-tls"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" +ipnet = "2" router-controller = { version = "0.1", path = "../router-controller" } + +[dev-dependencies] +wiremock = "0.5" +tower = { version = "0.4", features = ["util"] } +tracing-test = "0.2" +hyper = "0.14" diff --git a/crates/router/src/auth/cache.rs b/crates/router/src/auth/cache.rs new file mode 100644 index 0000000..9427952 --- /dev/null +++ b/crates/router/src/auth/cache.rs @@ -0,0 +1,244 @@ +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use super::clock::{Clock, SystemClock}; + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum KeyState { + Exists { user_id: String }, + Deleted, + /// Network API was unreachable on the most recent attempt for this key. + /// Cached briefly so the singleflight queue can drain without each + /// waiter re-issuing a 250ms timeout. Recovery TTL is short (1s) so + /// when Network API comes back, the cache un-poisons quickly. + FailedRecently, +} + +#[derive(Clone)] +struct Entry { + state: KeyState, + deadline: Instant, +} + +const TTL_EXISTS: Duration = Duration::from_secs(60); +const TTL_DELETED: Duration = Duration::from_secs(15); +const TTL_FAIL_OPEN: Duration = Duration::from_secs(1); + +pub struct KeyCache { + inner: moka::sync::Cache, + clock: Arc, +} + +impl KeyCache { + pub fn new(capacity: u64) -> Self { + Self::with_clock(capacity, Arc::new(SystemClock)) + } + + pub fn with_clock(capacity: u64, clock: Arc) -> Self { + // Configure moka with a TTL ceiling equal to the longest semantic + // TTL we use (TTL_EXISTS = 60s). Without it, expired entries stay + // physically resident until LRU eviction triggers under capacity + // pressure — which means a key flood evicts *valid* entries first + // (LRU sees them as "older" than freshly-touched expired ones). + // Per-entry semantic TTLs (Deleted=15s, FailedRecently=1s) are + // shorter than this ceiling and remain enforced by `get` returning + // None past the deadline; the moka TTL is just the upper bound that + // guarantees eventual physical eviction of any entry. + Self { + inner: moka::sync::Cache::builder() + .max_capacity(capacity) + .time_to_live(TTL_EXISTS) + .build(), + clock, + } + } + + /// Read the current state for `token`. + /// + /// Note: the deadline check and the entry return are not atomic — the + /// clock can advance past the deadline in the few ns between the check + /// and the return. For 1s+ TTLs this is inconsequential, and we never + /// rely on monotonic eviction at the boundary. + /// + /// We deliberately DO NOT call `invalidate(token)` on expired entries + /// here. Pre-fix that did, and was racy: between reading the expired + /// entry and the invalidate call, another writer could have replaced + /// the entry with a fresh one (different value, fresh deadline); + /// `invalidate` deletes by key and would wipe that fresh write. + /// Instead we rely on (a) moka's `time_to_live` for eventual physical + /// eviction and (b) `put_*` overwriting the slot whenever the next + /// validate result arrives, so an expired entry is just a soft-miss + /// that the next request resolves through the normal cache-miss path. + pub fn get(&self, token: &str) -> Option { + let entry = self.inner.get(token)?; + if self.clock.now() >= entry.deadline { + None + } else { + Some(entry.state) + } + } + + pub fn put_exists(&self, token: String, user_id: String, expires_at: Option) { + let now = self.clock.now(); + if let Some(exp) = expires_at { + if exp <= now { + self.put_deleted(token); + return; + } + } + let default_deadline = now + TTL_EXISTS; + let deadline = match expires_at { + Some(exp) => default_deadline.min(exp), + None => default_deadline, + }; + let entry = Entry { + state: KeyState::Exists { user_id }, + deadline, + }; + self.inner.insert(token, entry); + } + + pub fn put_deleted(&self, token: String) { + let deadline = self.clock.now() + TTL_DELETED; + let entry = Entry { + state: KeyState::Deleted, + deadline, + }; + self.inner.insert(token, entry); + } + + /// Brief sentinel so concurrent waiters for the same token don't all + /// re-issue the validate call after the leader returns FailOpen. + pub fn put_failed_recently(&self, token: String) { + let deadline = self.clock.now() + TTL_FAIL_OPEN; + let entry = Entry { + state: KeyState::FailedRecently, + deadline, + }; + self.inner.insert(token, entry); + } + + #[cfg(test)] + fn entry_count(&self) -> u64 { + self.inner.run_pending_tasks(); + self.inner.entry_count() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::auth::clock::TestClock; + + fn cache() -> (KeyCache, Arc) { + let clock = TestClock::new(); + let cache = KeyCache::with_clock(10_000, clock.clone()); + (cache, clock) + } + + fn exists(user: &str) -> KeyState { + KeyState::Exists { + user_id: user.into(), + } + } + + #[test] + fn get_undefined_returns_none() { + let (c, _) = cache(); + assert_eq!(c.get("unknown"), None, "UNDEFINED must be None, not Deleted"); + } + + #[test] + fn put_exists_then_get() { + let (c, _) = cache(); + c.put_exists("tok".into(), "u1".into(), None); + assert_eq!(c.get("tok"), Some(exists("u1"))); + } + + #[test] + fn put_deleted_then_get() { + let (c, _) = cache(); + c.put_deleted("tok".into()); + assert_eq!(c.get("tok"), Some(KeyState::Deleted)); + } + + #[test] + fn exists_default_ttl_60s() { + let (c, clock) = cache(); + c.put_exists("tok".into(), "u".into(), None); + clock.advance(Duration::from_secs(59)); + assert_eq!(c.get("tok"), Some(exists("u"))); + clock.advance(Duration::from_secs(2)); + assert_eq!(c.get("tok"), None); + } + + #[test] + fn deleted_default_ttl_15s() { + let (c, clock) = cache(); + c.put_deleted("tok".into()); + clock.advance(Duration::from_secs(14)); + assert_eq!(c.get("tok"), Some(KeyState::Deleted)); + clock.advance(Duration::from_secs(2)); + assert_eq!(c.get("tok"), None); + } + + #[test] + fn expires_at_clamped_inside_window() { + let (c, clock) = cache(); + let exp = clock.now() + Duration::from_secs(30); + c.put_exists("tok".into(), "u".into(), Some(exp)); + clock.advance(Duration::from_secs(29)); + assert_eq!(c.get("tok"), Some(exists("u"))); + clock.advance(Duration::from_secs(2)); + assert_eq!(c.get("tok"), None, "clamp should expire at 30s, not 60s"); + } + + #[test] + fn expires_at_already_expired_stores_deleted() { + let (c, clock) = cache(); + let exp = clock.now() - Duration::from_secs(1); + c.put_exists("tok".into(), "u".into(), Some(exp)); + assert_eq!( + c.get("tok"), + Some(KeyState::Deleted), + "already-expired must downgrade to Deleted" + ); + clock.advance(Duration::from_secs(14)); + assert_eq!(c.get("tok"), Some(KeyState::Deleted)); + clock.advance(Duration::from_secs(2)); + assert_eq!(c.get("tok"), None); + } + + #[test] + fn undefined_ne_deleted_regression() { + let (c, _) = cache(); + assert_eq!(c.get("tok"), None); + c.put_exists("tok".into(), "u".into(), None); + let g = c.get("tok"); + assert!(matches!(g, Some(KeyState::Exists { .. }))); + assert_ne!(g, Some(KeyState::Deleted)); + assert_ne!(g, None); + } + + #[test] + fn capacity_evicts_under_pressure() { + let clock = TestClock::new(); + let cache = KeyCache::with_clock(100, clock.clone()); + for i in 0..1_000 { + cache.put_exists(format!("tok{i}"), "u".into(), None); + } + let count = cache.entry_count(); + assert!( + count <= 100, + "cache must respect capacity bound; got {count}" + ); + // Evicted entries are UNDEFINED (None), not DELETED. + let mut undefined = 0; + for i in 0..1_000 { + if cache.get(&format!("tok{i}")).is_none() { + undefined += 1; + } + } + assert!(undefined > 0, "some early entries must have been evicted"); + } +} diff --git a/crates/router/src/auth/client.rs b/crates/router/src/auth/client.rs new file mode 100644 index 0000000..895cb81 --- /dev/null +++ b/crates/router/src/auth/client.rs @@ -0,0 +1,315 @@ +use std::sync::{Arc, Mutex}; +use std::time::{Duration, Instant}; + +use serde::Deserialize; +use url::Url; + +use super::clock::{system_clock, Clock}; +use crate::metrics::VALIDATE_CALL_TOTAL; + +const TIMEOUT: Duration = Duration::from_millis(250); +const BREAKER_THRESHOLD: u64 = 50; +const BREAKER_OPEN_DURATION: Duration = Duration::from_secs(30); + +#[derive(Debug, PartialEq, Eq)] +pub enum ValidateResult { + /// `expires_at` is the server-authoritative absolute expiry timestamp, + /// when present. Forwarded to the cache so a key revoked with a + /// short server-side TTL doesn't get silently cached for the full + /// `TTL_EXISTS` (60s) cache TTL. + Exists { + user_id: String, + expires_at: Option, + }, + Deleted, + FailOpen, +} + +#[derive(Deserialize)] +struct ValidateResponse { + user_id: String, + /// Optional Unix timestamp (seconds) at which the server says this key + /// stops being valid. Wired through to the cache so the entry expires + /// at `min(now + TTL_EXISTS, expires_at)`. Server may omit it; the + /// cache then uses the default 60s TTL. + #[serde(default)] + expires_at: Option, +} + +#[derive(Clone, Copy)] +enum OpenState { + Closed, + Open { until: Instant }, + /// A probe is in flight after the open window elapsed. No other + /// requests are admitted until the probe records success or failure. + HalfOpen, +} + +/// State + counter live in the SAME mutex so a probe-success that resets +/// the counter and a stale failure that increments it cannot interleave +/// to spuriously open the breaker. +struct State { + open: OpenState, + consecutive_errors: u64, +} + +pub struct CircuitBreaker { + state: Mutex, + threshold: u64, + open_duration: Duration, + clock: Arc, +} + +impl CircuitBreaker { + pub fn new(threshold: u64, open_duration: Duration, clock: Arc) -> Self { + Self { + state: Mutex::new(State { + open: OpenState::Closed, + consecutive_errors: 0, + }), + threshold, + open_duration, + clock, + } + } + + /// Acquire admission for a request. Returns `Some(Permit)` if the + /// caller may proceed; the permit MUST be turned into a success or + /// failure record before it is dropped, otherwise its Drop impl + /// records a failure. This makes probe acquisition cancellation-safe: + /// a dropped probe (client disconnect, panic, shutdown) re-opens the + /// breaker instead of leaving it stuck in HalfOpen forever. + pub fn acquire(self: &Arc) -> Option { + let mut state = self.state.lock().unwrap(); + let admit = match state.open { + OpenState::Closed => true, + OpenState::HalfOpen => false, + OpenState::Open { until } => { + if self.clock.now() >= until { + state.open = OpenState::HalfOpen; + true + } else { + false + } + } + }; + if admit { + Some(Permit { + breaker: Some(self.clone()), + }) + } else { + None + } + } + + fn record_success_internal(&self) { + let mut state = self.state.lock().unwrap(); + state.consecutive_errors = 0; + state.open = OpenState::Closed; + } + + fn record_failure_internal(&self) { + let mut state = self.state.lock().unwrap(); + state.consecutive_errors = state.consecutive_errors.saturating_add(1); + match state.open { + OpenState::HalfOpen => { + state.open = OpenState::Open { + until: self.clock.now() + self.open_duration, + }; + } + OpenState::Closed | OpenState::Open { .. } => { + if state.consecutive_errors >= self.threshold { + state.open = OpenState::Open { + until: self.clock.now() + self.open_duration, + }; + } + } + } + } +} + +/// RAII handle returned by [`CircuitBreaker::acquire`]. Must be consumed +/// via [`Permit::record_success`] / [`Permit::record_failure`]. If +/// dropped without being consumed, records a failure — this keeps the +/// breaker out of permanent HalfOpen on cancellation. +pub struct Permit { + breaker: Option>, +} + +impl Permit { + pub fn record_success(mut self) { + if let Some(b) = self.breaker.take() { + b.record_success_internal(); + } + } + + pub fn record_failure(mut self) { + if let Some(b) = self.breaker.take() { + b.record_failure_internal(); + } + } +} + +impl Drop for Permit { + fn drop(&mut self) { + if let Some(b) = self.breaker.take() { + b.record_failure_internal(); + } + } +} + +pub struct NetworkApiClient { + http: reqwest::Client, + base_url: Option, + breaker: Arc, + /// Same clock the breaker uses, kept here so we can project absolute + /// server-side `expires_at` (unix seconds) onto the monotonic timeline + /// the cache is keyed on. Tests inject a `TestClock` so deterministic + /// expiry is observable without `tokio::time::pause`. + clock: Arc, +} + +impl NetworkApiClient { + pub fn new(base_url: Url) -> Self { + Self::build(Some(base_url), system_clock()) + } + + pub fn disabled() -> Self { + Self::build(None, system_clock()) + } + + #[cfg(test)] + pub fn with_clock(base_url: Option, clock: Arc) -> Self { + Self::build(base_url, clock) + } + + fn build(base_url: Option, clock: Arc) -> Self { + let http = reqwest::Client::builder() + .timeout(TIMEOUT) + .build() + .expect("reqwest client build"); + // Normalise base_url to end with `/` so a relative `join` below + // preserves any subpath (e.g. `https://auth.example.com/api/v2/` + // is not silently truncated to root). RFC 3986 absolute-path + // references with a leading `/` would replace the base path — + // so we use a relative path on a slash-terminated base instead. + let base_url = base_url.map(|mut u| { + if !u.path().ends_with('/') { + let path = format!("{}/", u.path()); + u.set_path(&path); + } + u + }); + Self { + http, + base_url, + clock: clock.clone(), + breaker: Arc::new(CircuitBreaker::new( + BREAKER_THRESHOLD, + BREAKER_OPEN_DURATION, + clock, + )), + } + } + + pub async fn validate(&self, token: &str) -> ValidateResult { + // Local short-circuits (auth disabled, breaker open) return FailOpen + // WITHOUT incrementing VALIDATE_CALL_TOTAL — that metric measures + // actual outbound traffic, not every fail-open outcome. + let Some(base_url) = self.base_url.as_ref() else { + return ValidateResult::FailOpen; + }; + let Some(permit) = self.breaker.acquire() else { + return ValidateResult::FailOpen; + }; + // Relative path (no leading `/`) so a base_url with a subpath + // like `/api/v2/` resolves to `/api/v2/internal/validate`. + let url = match base_url.join("internal/validate") { + Ok(u) => u, + Err(_) => { + permit.record_failure(); + return ValidateResult::FailOpen; + } + }; + // From here on we either send a request or hit a transport error + // partway through; both count as a real validate call attempt. + let resp = self + .http + .post(url) + .json(&serde_json::json!({ "token": token })) + .send() + .await; + match resp { + Ok(r) if r.status().is_success() => match r.json::().await { + Ok(body) => { + VALIDATE_CALL_TOTAL.with_label_values(&["ok"]).inc(); + permit.record_success(); + let expires_at = body.expires_at.and_then(|exp_unix| { + // unix -> Instant requires anchoring on a "now" + // pair. We compute the offset between server-time + // (unix seconds) and our clock's now() and project + // the absolute expiry into the same monotonic + // reference frame the cache uses. + let now_unix = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .ok()? + .as_secs(); + if exp_unix <= now_unix { + // Already expired by server's clock — let the + // cache see the past instant and demote it to + // `Deleted` immediately (see `put_exists`). + return Some(self.clock.now()); + } + let remaining = Duration::from_secs(exp_unix - now_unix); + Some(self.clock.now() + remaining) + }); + ValidateResult::Exists { + user_id: body.user_id, + expires_at, + } + } + Err(_) => { + VALIDATE_CALL_TOTAL.with_label_values(&["fail_open"]).inc(); + permit.record_failure(); + ValidateResult::FailOpen + } + }, + Ok(r) if r.status() == reqwest::StatusCode::NOT_FOUND => { + // A bare 404 is ambiguous: the validate endpoint genuinely + // says "no such key", OR the URL is misconfigured (wrong + // base path, wrong subpath, missing reverse-proxy route). + // Treating both as `Deleted` would negatively cache valid + // keys for 15s under a deployment misconfig. We require a + // signal that the validate API itself answered: a JSON + // content-type. HTML/text 404s (typical of misrouted + // requests through nginx/Cloud LB) fail open instead. + let is_json = r + .headers() + .get(reqwest::header::CONTENT_TYPE) + .and_then(|v| v.to_str().ok()) + .map(|s| s.starts_with("application/json")) + .unwrap_or(false); + if is_json { + VALIDATE_CALL_TOTAL.with_label_values(&["deleted"]).inc(); + permit.record_success(); + ValidateResult::Deleted + } else { + // Looks like a non-API 404 (misrouted, gateway error + // page, etc.). Fail open so a deployment problem + // doesn't masquerade as a flood of revoked keys. + VALIDATE_CALL_TOTAL.with_label_values(&["fail_open"]).inc(); + permit.record_failure(); + ValidateResult::FailOpen + } + } + _ => { + VALIDATE_CALL_TOTAL.with_label_values(&["fail_open"]).inc(); + permit.record_failure(); + ValidateResult::FailOpen + } + } + } +} + +#[cfg(test)] +mod tests; diff --git a/crates/router/src/auth/client/tests.rs b/crates/router/src/auth/client/tests.rs new file mode 100644 index 0000000..11c7f66 --- /dev/null +++ b/crates/router/src/auth/client/tests.rs @@ -0,0 +1,572 @@ +use super::*; +use crate::auth::clock::TestClock; +use serde_json::json; +use std::sync::OnceLock; +use tokio::sync::{Mutex, MutexGuard}; +use wiremock::matchers::{body_json, method, path}; +use wiremock::{Mock, MockServer, ResponseTemplate}; + +/// Serialise tests that read or write VALIDATE_CALL_TOTAL — the +/// counter is process-global and concurrent tests would race on it. +async fn metrics_lock() -> MutexGuard<'static, ()> { + static LOCK: OnceLock> = OnceLock::new(); + LOCK.get_or_init(|| Mutex::new(())).lock().await +} + +async fn server() -> MockServer { + MockServer::start().await +} + +fn client(server: &MockServer, clock: Arc) -> NetworkApiClient { + NetworkApiClient::with_clock(Some(Url::parse(&server.uri()).unwrap()), clock) +} + +#[tokio::test] +async fn validate_200_returns_exists() { + let s = server().await; + Mock::given(method("POST")) + .and(path("/internal/validate")) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({"user_id": "u1"}))) + .mount(&s) + .await; + let c = client(&s, TestClock::new()); + assert_eq!( + c.validate("sqd_data_abc_xyz").await, + ValidateResult::Exists { + user_id: "u1".into(), + expires_at: None, + } + ); +} + +// New: when the validate API returns an `expires_at` (server-side TTL), +// it must propagate through to ValidateResult so the cache can clamp the +// entry's lifetime below TTL_EXISTS. Server timestamp is unix seconds; we +// project onto the monotonic clock (TestClock here) by computing the +// remaining duration relative to "now" and adding to clock.now(). +#[tokio::test] +async fn validate_200_propagates_expires_at() { + let s = server().await; + // Server says: token expires 30s from now (unix seconds). + let now_unix = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs(); + let exp = now_unix + 30; + Mock::given(method("POST")) + .and(path("/internal/validate")) + .respond_with( + ResponseTemplate::new(200) + .set_body_json(json!({"user_id": "u1", "expires_at": exp})), + ) + .mount(&s) + .await; + let c = client(&s, TestClock::new()); + match c.validate("sqd_data_abc_xyz").await { + ValidateResult::Exists { + user_id, + expires_at: Some(_), + } => { + assert_eq!(user_id, "u1"); + // Specific value depends on TestClock starting reference; + // the existence of Some(_) is what we assert here, plus the + // cache test below which exercises the clamping behaviour. + } + other => panic!("expected Exists with expires_at, got {:?}", other), + } +} + +// `expires_at` already in the past (server says: this is dead) -> we +// return Some(now) so the cache demotes the entry to Deleted on insert. +#[tokio::test] +async fn validate_200_past_expires_at_returns_now() { + let s = server().await; + let now_unix = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs(); + let past = now_unix - 60; + Mock::given(method("POST")) + .and(path("/internal/validate")) + .respond_with( + ResponseTemplate::new(200) + .set_body_json(json!({"user_id": "u1", "expires_at": past})), + ) + .mount(&s) + .await; + let c = client(&s, TestClock::new()); + let result = c.validate("sqd_data_abc_xyz").await; + assert!( + matches!( + result, + ValidateResult::Exists { + expires_at: Some(_), + .. + } + ), + "past expires_at must still surface as Some(_) so cache can demote" + ); +} + +#[tokio::test] +async fn validate_404_with_json_returns_deleted() { + let s = server().await; + // 404 + JSON content-type = the validate API itself answering "deleted". + Mock::given(method("POST")) + .and(path("/internal/validate")) + .respond_with( + ResponseTemplate::new(404) + .insert_header("content-type", "application/json") + .set_body_json(json!({"deleted": true})), + ) + .mount(&s) + .await; + let c = client(&s, TestClock::new()); + assert_eq!(c.validate("sqd_data_x_y").await, ValidateResult::Deleted); +} + +// 404 WITHOUT a JSON content-type is most likely a misrouted request +// (gateway error page, wrong subpath, etc.) — fail open so a deployment +// misconfig doesn't masquerade as a flood of revoked keys. +#[tokio::test] +async fn validate_404_without_json_fails_open() { + let s = server().await; + Mock::given(method("POST")) + .and(path("/internal/validate")) + .respond_with( + ResponseTemplate::new(404) + .insert_header("content-type", "text/html") + .set_body_string("404 not found"), + ) + .mount(&s) + .await; + let c = client(&s, TestClock::new()); + assert_eq!(c.validate("sqd_data_x_y").await, ValidateResult::FailOpen); +} + +// Bare 404 with no content-type: ambiguous, also fail open. The validate +// API contract requires JSON; anything else looks like misrouting. +#[tokio::test] +async fn validate_404_no_content_type_fails_open() { + let s = server().await; + Mock::given(method("POST")) + .and(path("/internal/validate")) + .respond_with(ResponseTemplate::new(404)) + .mount(&s) + .await; + let c = client(&s, TestClock::new()); + assert_eq!(c.validate("sqd_data_x_y").await, ValidateResult::FailOpen); +} + +#[tokio::test] +async fn validate_500_returns_fail_open() { + let s = server().await; + Mock::given(method("POST")) + .respond_with(ResponseTemplate::new(500)) + .mount(&s) + .await; + let c = client(&s, TestClock::new()); + assert_eq!(c.validate("sqd_data_x_y").await, ValidateResult::FailOpen); +} + +#[tokio::test] +async fn validate_timeout_returns_fail_open() { + let s = server().await; + Mock::given(method("POST")) + .respond_with(ResponseTemplate::new(200).set_delay(Duration::from_millis(500))) + .mount(&s) + .await; + let c = client(&s, TestClock::new()); + let start = std::time::Instant::now(); + assert_eq!(c.validate("sqd_data_x_y").await, ValidateResult::FailOpen); + assert!( + start.elapsed() < Duration::from_millis(450), + "must time out within ~250ms; took {:?}", + start.elapsed() + ); +} + +#[tokio::test] +async fn validate_connection_error_returns_fail_open() { + // Port 1 is reserved/refused on basically every host. + let c = NetworkApiClient::with_clock( + Some(Url::parse("http://127.0.0.1:1").unwrap()), + TestClock::new(), + ); + assert_eq!(c.validate("sqd_data_x_y").await, ValidateResult::FailOpen); +} + +#[tokio::test] +async fn breaker_opens_after_50_consecutive_errors() { + let s = server().await; + Mock::given(method("POST")) + .respond_with(ResponseTemplate::new(500)) + .mount(&s) + .await; + let clock = TestClock::new(); + let c = client(&s, clock); + for _ in 0..50 { + let _ = c.validate("sqd_data_x_y").await; + } + let received_before = s.received_requests().await.unwrap().len(); + assert_eq!(received_before, 50); + // 51st must short-circuit (no HTTP call). + assert_eq!(c.validate("sqd_data_x_y").await, ValidateResult::FailOpen); + let received_after = s.received_requests().await.unwrap().len(); + assert_eq!(received_after, 50, "breaker must short-circuit the 51st call"); +} + +#[tokio::test] +async fn breaker_probes_after_30s() { + let s = server().await; + Mock::given(method("POST")) + .respond_with(ResponseTemplate::new(500)) + .mount(&s) + .await; + let clock = TestClock::new(); + let c = NetworkApiClient::with_clock(Some(Url::parse(&s.uri()).unwrap()), clock.clone()); + for _ in 0..50 { + let _ = c.validate("sqd_data_x_y").await; + } + // Breaker open. Advance 30s. + clock.advance(Duration::from_secs(30)); + // Probe should be admitted (and fail again, but the call hits wiremock). + let _ = c.validate("sqd_data_x_y").await; + let received = s.received_requests().await.unwrap().len(); + assert_eq!(received, 51, "one probe must be admitted after 30s"); +} + +#[tokio::test] +async fn breaker_resets_on_success() { + let s = server().await; + Mock::given(method("POST")) + .respond_with(ResponseTemplate::new(500).set_body_string("")) + .up_to_n_times(10) + .mount(&s) + .await; + Mock::given(method("POST")) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({"user_id": "u"}))) + .mount(&s) + .await; + let clock = TestClock::new(); + let c = client(&s, clock); + for _ in 0..10 { + let _ = c.validate("sqd_data_x_y").await; + } + // 11th: success -> breaker counter resets. + assert!(matches!( + c.validate("sqd_data_x_y").await, + ValidateResult::Exists { .. } + )); + // We can now sustain another 49 errors before breaking. + // Mount 50 more 500s. + s.reset().await; + Mock::given(method("POST")) + .respond_with(ResponseTemplate::new(500)) + .mount(&s) + .await; + for _ in 0..49 { + let _ = c.validate("sqd_data_x_y").await; + } + let received = s.received_requests().await.unwrap().len(); + assert_eq!( + received, 49, + "breaker should still be closed after success reset" + ); +} + +// 200 OK with malformed body must not cache garbage. +#[tokio::test] +async fn validate_malformed_success_body_is_fail_open() { + let s = server().await; + Mock::given(method("POST")) + .and(path("/internal/validate")) + .respond_with(ResponseTemplate::new(200).set_body_string("not json")) + .mount(&s) + .await; + let c = client(&s, TestClock::new()); + assert_eq!(c.validate("sqd_data_x_y").await, ValidateResult::FailOpen); +} + +// 200 OK with valid JSON but missing user_id must not be Exists. +#[tokio::test] +async fn validate_200_missing_user_id_is_fail_open() { + let s = server().await; + Mock::given(method("POST")) + .and(path("/internal/validate")) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({"other": "field"}))) + .mount(&s) + .await; + let c = client(&s, TestClock::new()); + assert_eq!(c.validate("sqd_data_x_y").await, ValidateResult::FailOpen); +} + +// half-open: only ONE probe slips through after the open window. +#[tokio::test] +async fn breaker_half_open_admits_exactly_one_probe() { + let s = server().await; + Mock::given(method("POST")) + .respond_with(ResponseTemplate::new(500)) + .mount(&s) + .await; + let clock = TestClock::new(); + let c = NetworkApiClient::with_clock(Some(Url::parse(&s.uri()).unwrap()), clock.clone()); + for _ in 0..50 { + let _ = c.validate("sqd_data_x_y").await; + } + clock.advance(Duration::from_secs(30)); + // 10 simultaneous calls after the open window: only 1 probe should be admitted. + let before = s.received_requests().await.unwrap().len(); + let mut handles = Vec::new(); + let c = Arc::new(c); + for _ in 0..10 { + let c = c.clone(); + handles.push(tokio::spawn(async move { c.validate("sqd_data_x_y").await })); + } + for h in handles { + let _ = h.await; + } + let after = s.received_requests().await.unwrap().len(); + assert_eq!( + after - before, + 1, + "exactly one probe must reach the API after the window elapses" + ); +} + +/// base_url with a subpath (e.g. behind a reverse proxy at `/api/v2/`) +/// must be preserved when building the validate URL. +#[test] +fn build_url_preserves_subpath_with_trailing_slash() { + let c = NetworkApiClient::with_clock( + Some(Url::parse("http://auth.example.com/api/v2/").unwrap()), + TestClock::new(), + ); + let joined = c.base_url.as_ref().unwrap().join("internal/validate").unwrap(); + assert_eq!(joined.as_str(), "http://auth.example.com/api/v2/internal/validate"); +} + +/// Without a trailing slash on the input, the client should still +/// resolve to the subpath (we normalise at construction). +#[test] +fn build_url_preserves_subpath_without_trailing_slash() { + let c = NetworkApiClient::with_clock( + Some(Url::parse("http://auth.example.com/api/v2").unwrap()), + TestClock::new(), + ); + let joined = c.base_url.as_ref().unwrap().join("internal/validate").unwrap(); + assert_eq!(joined.as_str(), "http://auth.example.com/api/v2/internal/validate"); +} + +/// Plain root-only base still resolves correctly. +#[test] +fn build_url_root_base() { + let c = NetworkApiClient::with_clock( + Some(Url::parse("http://auth.example.com").unwrap()), + TestClock::new(), + ); + let joined = c.base_url.as_ref().unwrap().join("internal/validate").unwrap(); + assert_eq!(joined.as_str(), "http://auth.example.com/internal/validate"); +} + +/// End-to-end: validate against a wiremock mounted at a subpath +/// route. Pre-fix, the subpath was silently dropped. +#[tokio::test] +async fn validate_calls_subpath_route() { + let s = server().await; + Mock::given(method("POST")) + .and(path("/api/v2/internal/validate")) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({"user_id": "u1"}))) + .mount(&s) + .await; + let c = NetworkApiClient::with_clock( + Some(Url::parse(&format!("{}/api/v2/", s.uri())).unwrap()), + TestClock::new(), + ); + assert!(matches!( + c.validate("sqd_data_x").await, + ValidateResult::Exists { .. } + )); + assert_eq!(s.received_requests().await.unwrap().len(), 1); +} + +fn validate_call_count(label: &str) -> u64 { + crate::metrics::VALIDATE_CALL_TOTAL + .with_label_values(&[label]) + .get() +} + +// VALIDATE_CALL_TOTAL must NOT increment when the +// client is disabled (no base_url). No network call → no count. +#[tokio::test] +async fn validate_call_total_silent_when_disabled() { + let _g = metrics_lock().await; + let c = NetworkApiClient::with_clock(None, TestClock::new()); + let before = validate_call_count("fail_open"); + let _ = c.validate("sqd_data_x_y").await; + let _ = c.validate("sqd_data_x_y").await; + let _ = c.validate("sqd_data_x_y").await; + assert_eq!( + validate_call_count("fail_open"), + before, + "disabled client must not inflate validate_call_total" + ); +} + +// Once the breaker is Open, validate() must short-circuit without +// touching the network. We verify via wiremock's local request log +// (no global metric reference) so the test is independent of any +// other test running in parallel. +#[tokio::test] +async fn validate_short_circuits_when_breaker_open() { + let s = server().await; + Mock::given(method("POST")) + .respond_with(ResponseTemplate::new(500)) + .mount(&s) + .await; + let c = client(&s, TestClock::new()); + for _ in 0..50 { + let _ = c.validate("sqd_data_x_y").await; + } + for _ in 0..10 { + let _ = c.validate("sqd_data_x_y").await; + } + assert_eq!( + s.received_requests().await.unwrap().len(), + 50, + "open breaker must not issue any further network calls" + ); +} + +// A successful validate hits the wire (verified via wiremock). +#[tokio::test] +async fn validate_issues_a_real_send() { + let s = server().await; + Mock::given(method("POST")) + .and(path("/internal/validate")) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({"user_id": "u1"}))) + .mount(&s) + .await; + let c = client(&s, TestClock::new()); + let _ = c.validate("sqd_data_x_y").await; + assert_eq!(s.received_requests().await.unwrap().len(), 1); +} + +// counter and state mutate under one lock. A success +// followed by a single failure must NOT push the breaker open if the +// success reset the counter, even when the threshold-1 failures +// preceded the success. +#[tokio::test] +async fn success_resets_counter_atomically_with_state() { + let clock = TestClock::new(); + let breaker = Arc::new(CircuitBreaker::new( + 3, + Duration::from_secs(30), + clock as Arc, + )); + // 2 failures (threshold 3, not yet open). + breaker.acquire().unwrap().record_failure(); + breaker.acquire().unwrap().record_failure(); + // A success here must reset consecutive_errors to 0. + breaker.acquire().unwrap().record_success(); + // A subsequent single failure must NOT open the breaker — the + // streak was broken. Pre-fix, a stale fetch_add could have left + // n=3 visible to record_failure even after the reset. + breaker.acquire().unwrap().record_failure(); + assert!( + breaker.acquire().is_some(), + "single failure after a success must not open the breaker" + ); +} + +// RAII probe guard: dropping a Permit without calling +// record_success/record_failure must record a failure, so the breaker +// never gets stuck in HalfOpen if a probe is cancelled mid-flight. +#[tokio::test] +async fn dropped_permit_records_failure() { + let clock = TestClock::new(); + let breaker = Arc::new(CircuitBreaker::new( + 3, + Duration::from_secs(30), + clock.clone() as Arc, + )); + // Drop 3 permits without recording → breaker should open. + for _ in 0..3 { + let _permit = breaker.acquire(); + // implicit drop → record_failure_internal + } + assert!(breaker.acquire().is_none(), "breaker must be open"); +} + +// cancellation safety: a probe (HalfOpen permit) that is +// dropped without resolution re-opens the breaker for another full +// window, instead of leaving it stuck HalfOpen forever. +#[tokio::test] +async fn cancelled_probe_reopens_breaker() { + let clock = TestClock::new(); + let breaker = Arc::new(CircuitBreaker::new( + 3, + Duration::from_secs(30), + clock.clone() as Arc, + )); + // Open the breaker via 3 failures. + for _ in 0..3 { + breaker.acquire().unwrap().record_failure(); + } + assert!(breaker.acquire().is_none()); + clock.advance(Duration::from_secs(30)); + // Acquire the probe permit and drop it without recording (simulates + // client-cancelled future, panic, or shutdown). + let probe = breaker.acquire().expect("probe should be admitted"); + drop(probe); + // Breaker must NOT be stuck HalfOpen — should be Open again. + // After only 30s elapsed (already past the first window), advance + // another 30s and verify a new probe is admitted exactly once. + clock.advance(Duration::from_secs(30)); + assert!(breaker.acquire().is_some()); + assert!( + breaker.acquire().is_none(), + "second concurrent probe must be denied" + ); +} + +// half-open probe failure re-opens the breaker for another full window. +#[tokio::test] +async fn breaker_half_open_failure_reopens() { + let s = server().await; + Mock::given(method("POST")) + .respond_with(ResponseTemplate::new(500)) + .mount(&s) + .await; + let clock = TestClock::new(); + let c = NetworkApiClient::with_clock(Some(Url::parse(&s.uri()).unwrap()), clock.clone()); + for _ in 0..50 { + let _ = c.validate("sqd_data_x_y").await; + } + clock.advance(Duration::from_secs(30)); + let _ = c.validate("sqd_data_x_y").await; // probe fails + let received = s.received_requests().await.unwrap().len(); + // Immediately after a failed probe, breaker is Open again. + let _ = c.validate("sqd_data_x_y").await; + assert_eq!( + s.received_requests().await.unwrap().len(), + received, + "breaker must re-open after probe failure" + ); +} + +#[tokio::test] +async fn request_body_is_token_only() { + let s = server().await; + Mock::given(method("POST")) + .and(path("/internal/validate")) + .and(body_json(json!({"token": "sqd_data_abc_xyz"}))) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({"user_id": "u"}))) + .mount(&s) + .await; + let c = client(&s, TestClock::new()); + assert!(matches!( + c.validate("sqd_data_abc_xyz").await, + ValidateResult::Exists { .. } + )); +} diff --git a/crates/router/src/auth/clock.rs b/crates/router/src/auth/clock.rs new file mode 100644 index 0000000..d2aee64 --- /dev/null +++ b/crates/router/src/auth/clock.rs @@ -0,0 +1,44 @@ +use std::sync::Arc; +use std::time::Instant; + +pub trait Clock: Send + Sync + 'static { + fn now(&self) -> Instant; +} + +pub struct SystemClock; + +impl Clock for SystemClock { + fn now(&self) -> Instant { + Instant::now() + } +} + +#[cfg(test)] +pub struct TestClock { + now: std::sync::Mutex, +} + +#[cfg(test)] +impl TestClock { + pub fn new() -> Arc { + Arc::new(Self { + now: std::sync::Mutex::new(Instant::now()), + }) + } + + pub fn advance(&self, d: std::time::Duration) { + let mut g = self.now.lock().unwrap(); + *g += d; + } +} + +#[cfg(test)] +impl Clock for TestClock { + fn now(&self) -> Instant { + *self.now.lock().unwrap() + } +} + +pub fn system_clock() -> Arc { + Arc::new(SystemClock) +} diff --git a/crates/router/src/auth/middleware.rs b/crates/router/src/auth/middleware.rs new file mode 100644 index 0000000..d302a11 --- /dev/null +++ b/crates/router/src/auth/middleware.rs @@ -0,0 +1,331 @@ +use std::net::{IpAddr, SocketAddr}; +use std::sync::Arc; + +use axum::extract::ConnectInfo; +use axum::http::{HeaderValue, Request, StatusCode}; +use axum::middleware::Next; +use axum::response::{IntoResponse, Response}; +use axum::Json; +use serde_json::json; +use tracing::warn; + +use crate::auth::cache::KeyState; +use crate::auth::client::ValidateResult; +use crate::auth::AuthState; +use crate::metrics::{ + AUTH_LATENCY_SECONDS, AUTH_TOTAL, CACHE_HIT_TOTAL, CACHE_MISS_TOTAL, REQUESTS_BY_KEY, +}; + +const TOKEN_PREFIX: &str = "sqd_data_"; + +/// Fixed user_id for IP-allowlist bypass requests. Single label keeps the +/// top-keys sketch (REQUESTS_BY_KEY) bounded — see `decide` step 3. +const INTERNAL_BYPASS_USER_ID: &str = "internal"; + +/// Header inspected for the upstream-supplied forwarded chain. +/// +/// We deliberately read `X-Original-Forwarded-For` (set by ingress-nginx as a +/// verbatim copy of the inbound `X-Forwarded-For`) rather than the +/// nginx-rewritten `X-Forwarded-For` / `X-Real-IP`. Empirical check +/// (see PR rollout plan) showed nginx with `use-forwarded-headers: true` and +/// default `proxy-real-ip-cidr=0.0.0.0/0` happily promotes a client-supplied +/// leftmost XFF to `X-Real-IP` — i.e. those headers are spoofable. The XOFF +/// preserves the full chain `[client-supplied..., real-client, upstream-LB]`, +/// so we can walk rightmost stripping our own `trusted_ips` and recover the +/// real source IP that the upstream LB observed at TCP handshake. +const ORIGINAL_FORWARDED_FOR: &str = "X-Original-Forwarded-For"; + +/// Value attached to the request via Extensions on a successful auth pass. +/// Downstream handlers can read `user_id` for audit attribution. +#[derive(Clone, Debug)] +pub struct AuthContext { + #[allow(dead_code)] + pub user_id: String, +} + +#[derive(Debug)] +enum Outcome { + Ok(AuthContext), + Missing, + Invalid, + FailOpen, + /// Kill switch active (`DISABLE_V2_AUTH=true`). Counted under its own + /// label so the dashboard makes it obvious that auth is bypassed + /// globally, not silently absorbed into `ok`. + Disabled, +} + +impl Outcome { + fn label(&self) -> &'static str { + match self { + Outcome::Ok(_) => "ok", + Outcome::Missing => "missing", + Outcome::Invalid => "invalid", + Outcome::FailOpen => "fail_open", + Outcome::Disabled => "disabled", + } + } +} + +pub async fn auth(mut req: Request, next: Next) -> Response +where + B: Send + 'static, +{ + let state = req + .extensions() + .get::>() + .cloned() + .expect("AuthState extension is required by auth middleware"); + + // Kill switch — short-circuits BEFORE the latency timer / decide so the + // disabled path is genuinely ~zero work. We still emit one metric + // sample so dashboards can see the switch is engaged. + if state.disabled { + AUTH_TOTAL.with_label_values(&[Outcome::Disabled.label()]).inc(); + return next.run(req).await; + } + + let timer = AUTH_LATENCY_SECONDS.start_timer(); + let (outcome, real_ip) = decide(&state, &mut req).await; + AUTH_TOTAL.with_label_values(&[outcome.label()]).inc(); + timer.observe_duration(); + + let allowed = match &outcome { + Outcome::Ok(_) | Outcome::FailOpen | Outcome::Disabled => true, + Outcome::Missing | Outcome::Invalid => !should_enforce(&state, real_ip), + }; + + if let Outcome::Ok(ctx) = &outcome { + // Sketch update + label add/remove are performed under the + // top-keys lock so that a concurrent eviction can't be silently + // undone by a stale `with_label_values` call from another task. + // We label by user_id (the only token-derived value safe to + // expose: it identifies the tenant, not the secret material). + state.top_keys.observe_into(&ctx.user_id, &REQUESTS_BY_KEY); + req.extensions_mut().insert(ctx.clone()); + } + + if allowed { + next.run(req).await + } else { + deny() + } +} + +/// Whether the request should be enforced (denied on `Missing`/`Invalid`). +/// +/// One rule: enforce iff the resolved real-client IP matches any CIDR in +/// `state.enforce_for_ips`. Empty list -> never enforce. The wildcard +/// `0.0.0.0/0` + `::/0` (written `*` in the env var) -> enforce for every +/// source. Specific CIDRs -> canary scope. +/// +/// When `real_ip` is `None` (no XOFF, no `ConnectInfo`) we still enforce iff +/// the policy is "enforce for everyone" — the `*` shorthand is a catch-all +/// (`prefix_len == 0`) and shouldn't depend on resolving a source IP. In +/// strictly canary mode (only narrow CIDRs), `None` means "can't tell if +/// in scope" and we play it safe by NOT enforcing. +fn should_enforce(state: &AuthState, real_ip: Option) -> bool { + if state.enforce_for_ips.is_empty() { + return false; + } + match real_ip { + Some(ip) => state.enforce_for_ips.iter().any(|net| net.contains(&ip)), + None => state.enforce_for_ips.iter().any(|net| net.prefix_len() == 0), + } +} + +async fn decide(state: &AuthState, req: &mut Request) -> (Outcome, Option) { + // 1. Resolve the real client IP up-front. Every Outcome — including + // early rejections like duplicate-Authorization — needs to thread it + // out so `should_enforce` can decide canary scope correctly. + // Pre-fix: real_ip was extracted AFTER the duplicate-header check, so + // a request that hit (Outcome::Invalid, None) would pass through + // `should_enforce(state, None) == false` under any narrow-CIDR scope + // (no catch-all to short-circuit). That was an auth bypass: an + // in-scope client could send duplicate Authorization headers and + // sneak past enforcement. + let real_ip = extract_real_client_ip(req, &state.trusted_ips); + + // 2. Pull Authorization header. Bearer scheme only — no Token: fallback. + // Multiple Authorization headers are ambiguous (RFC 6750 §3.1 + // classifies them as invalid_request); reject as Invalid so proxies + // or load balancers that reorder headers can't smuggle a credential. + // We do this before the IP-bypass check so a request with both a + // smuggled extra Authorization AND a trusted source IP is still + // rejected — the duplicate header is itself the signal of trouble. + let auth_headers = req.headers().get_all(axum::http::header::AUTHORIZATION); + let mut iter = auth_headers.iter(); + let first = iter.next(); + if iter.next().is_some() { + return (Outcome::Invalid, real_ip); + } + + // 3. Source-IP bypass. Disabled when allowlist is empty. + // + // We attribute every bypass request under a single fixed label + // (`INTERNAL_BYPASS_USER_ID`) instead of `internal:` per source. + // Reason: the top-keys sketch (REQUESTS_BY_KEY) is bounded to 100 + // entries; if internal pods come from many distinct IPs (we've seen + // dozens of /16 pod-CIDR blocks in main GKE), each would claim a + // sketch slot and could evict real-tenant entries. The IP-level + // detail is preserved in access logs / traces, not in this metric. + if !state.internal_allowlist.is_empty() { + if let Some(ip) = real_ip { + if state.internal_allowlist.iter().any(|net| net.contains(&ip)) { + return ( + Outcome::Ok(AuthContext { + user_id: INTERNAL_BYPASS_USER_ID.to_string(), + }), + Some(ip), + ); + } + } + } + + let Some(token) = first + .and_then(|v| v.to_str().ok()) + .and_then(extract_bearer) + else { + return (Outcome::Missing, real_ip); + }; + + // 4. Cheap reject: must carry the sqd_data_ scope prefix. The full + // `sqd_data_` string is what we use as cache key and what + // we send to /internal/validate. + if !token.starts_with(TOKEN_PREFIX) || token.len() <= TOKEN_PREFIX.len() { + return (Outcome::Invalid, real_ip); + } + + // 5. Cache lookup. Maps the cached state to a middleware Outcome, + // or falls through to the Network API call on UNDEFINED. + if let Some(outcome) = lookup(state, token) { + return (outcome, real_ip); + } + + // 6. Singleflight + Network API. Concurrent misses for the same token + // serialise here; the second waiter re-checks the cache and (typically) + // hits the entry the leader wrote — including a brief `FailedRecently` + // sentinel that drains the queue without re-issuing 250ms timeouts. + let _sf = state.inflight.acquire(token).await; + if let Some(outcome) = lookup(state, token) { + return (outcome, real_ip); + } + CACHE_MISS_TOTAL.inc(); + // VALIDATE_CALL_TOTAL is emitted by `client.validate` itself — only + // on outcomes that actually attempted a network round-trip. Local + // short-circuits (auth disabled, breaker open) don't inflate it. + let outcome = match state.client.validate(token).await { + ValidateResult::Exists { user_id, expires_at } => { + state + .cache + .put_exists(token.to_string(), user_id.clone(), expires_at); + Outcome::Ok(AuthContext { user_id }) + } + ValidateResult::Deleted => { + state.cache.put_deleted(token.to_string()); + Outcome::Invalid + } + ValidateResult::FailOpen => { + state.cache.put_failed_recently(token.to_string()); + warn!("Network API unreachable; failing open"); + Outcome::FailOpen + } + }; + (outcome, real_ip) +} + +/// Cache lookup translated to an Outcome, or `None` if the slot is UNDEFINED. +fn lookup(state: &AuthState, token: &str) -> Option { + match state.cache.get(token)? { + KeyState::Exists { user_id } => { + CACHE_HIT_TOTAL.with_label_values(&["exists"]).inc(); + Some(Outcome::Ok(AuthContext { user_id })) + } + KeyState::Deleted => { + CACHE_HIT_TOTAL.with_label_values(&["deleted"]).inc(); + Some(Outcome::Invalid) + } + KeyState::FailedRecently => { + CACHE_HIT_TOTAL.with_label_values(&["fail_open"]).inc(); + Some(Outcome::FailOpen) + } + } +} + +fn extract_bearer(header: &str) -> Option<&str> { + let mut parts = header.splitn(2, ' '); + let scheme = parts.next()?; + if !scheme.eq_ignore_ascii_case("Bearer") { + return None; + } + let token = parts.next()?.trim(); + if token.is_empty() { + None + } else { + Some(token) + } +} + +/// Resolve the real client IP for IP-allowlist purposes. +/// +/// The chain `X-Original-Forwarded-For` is laid out left-to-right as: +/// +/// ```text +/// [X (?spoof?), ...], CLIENT_IP, GLB +/// └ payload supplied by │ │ +/// the client itself │ └ trusted upstream LB (in TRUSTED_IPS) +/// (do NOT trust) │ appended itself last +/// └ real source IP, written by GLB at TCP handshake; +/// this is what we want +/// ``` +/// +/// We walk rightmost-first, skip entries that match `trusted_ips`, and +/// return the first non-trusted IP. If the header is absent or yields no +/// non-trusted IP, fall back to the connection peer (`ConnectInfo`) — that +/// case covers direct in-cluster traffic via ClusterIP, where no XFF +/// chain is present and the peer IS the client pod. +fn extract_real_client_ip(req: &Request, trusted_ips: &[ipnet::IpNet]) -> Option { + if let Some(chain) = req + .headers() + .get(ORIGINAL_FORWARDED_FOR) + .and_then(|v| v.to_str().ok()) + { + // Walk rightmost. The first IP that is NOT in trusted_ips is the + // real source — anything to the left of it is client-supplied + // payload (potentially spoofed). + for segment in chain.split(',').rev() { + let parsed = match segment.trim().parse::() { + Ok(ip) => ip, + Err(_) => continue, + }; + if !trusted_ips.iter().any(|net| net.contains(&parsed)) { + return Some(parsed); + } + } + // All entries matched trusted_ips, or the chain was malformed. + // Fall through to ConnectInfo — better to short-circuit than to + // accidentally trust the LB's own IP as "the client". + } + + req.extensions() + .get::>() + .map(|ci| ci.0.ip()) +} + +fn deny() -> Response { + let body = Json(json!({ + "error": "CREDENTIALS_INVALID", + "message": "API key required or invalid. Get one at https://portal.sqd.dev", + "docs": "https://docs.sqd.dev/v2-keys", + })); + let mut resp = (StatusCode::FORBIDDEN, body).into_response(); + resp.headers_mut().insert( + axum::http::header::WWW_AUTHENTICATE, + HeaderValue::from_static("Bearer realm=\"sqd-archive\""), + ); + resp +} + + +#[cfg(test)] +mod tests; diff --git a/crates/router/src/auth/middleware/tests.rs b/crates/router/src/auth/middleware/tests.rs new file mode 100644 index 0000000..184e7bf --- /dev/null +++ b/crates/router/src/auth/middleware/tests.rs @@ -0,0 +1,1594 @@ +use super::*; +use crate::auth::clock::TestClock; +use crate::metrics::{ + AUTH_LATENCY_SECONDS, AUTH_TOTAL, CACHE_HIT_TOTAL, CACHE_MISS_TOTAL, VALIDATE_CALL_TOTAL, +}; +use axum::body::Body; +use axum::extract::Extension; +use axum::http::{header, Request, StatusCode}; +use axum::middleware::from_fn; +use axum::routing::get; +use axum::Router; +use serde_json::Value; +use std::sync::OnceLock; +use std::time::Duration; +use tokio::sync::{Mutex, MutexGuard}; +use tower::ServiceExt; +use url::Url; +use wiremock::matchers::{method, path}; +use wiremock::{Mock, MockServer, ResponseTemplate}; + +/// Prometheus counters are process-global; serialise tests that assert on +/// metric deltas so concurrent tests don't shift the baseline. +async fn metrics_lock() -> MutexGuard<'static, ()> { + static LOCK: OnceLock> = OnceLock::new(); + LOCK.get_or_init(|| Mutex::new(())).lock().await +} + +async fn downstream_handler(req: Request) -> Response { + match req.extensions().get::() { + Some(ctx) => format!("ok:{}", ctx.user_id).into_response(), + None => "ok:no-ctx".into_response(), + } +} + +fn app(state: Arc) -> Router { + Router::new() + .route("/test", get(downstream_handler).layer(from_fn(super::auth))) + .layer(Extension(state)) +} + +fn req(uri: &str) -> axum::http::request::Builder { + Request::builder().uri(uri) +} + +async fn body_string(resp: Response) -> String { + let bytes = hyper::body::to_bytes(resp.into_body()).await.unwrap(); + String::from_utf8(bytes.to_vec()).unwrap() +} + +fn good_validate_mock(user_id: &str) -> Mock { + Mock::given(method("POST")).and(path("/internal/validate")).respond_with( + ResponseTemplate::new(200) + .set_body_json(serde_json::json!({"user_id": user_id})), + ) +} + +fn nf_validate_mock() -> Mock { + // Validate API answer for "key deleted": 404 with a JSON body. The + // application/json content-type is what tells us this is a real + // API response (vs a misrouted 404 from a gateway / proxy error + // page). See `client.rs` for the heuristic. + Mock::given(method("POST")) + .and(path("/internal/validate")) + .respond_with( + ResponseTemplate::new(404) + .insert_header("content-type", "application/json") + .set_body_json(serde_json::json!({"deleted": true})), + ) +} + +fn count_auth(label: &str) -> u64 { + AUTH_TOTAL.with_label_values(&[label]).get() +} +fn count_cache_hit(label: &str) -> u64 { + CACHE_HIT_TOTAL.with_label_values(&[label]).get() +} +fn count_validate(label: &str) -> u64 { + VALIDATE_CALL_TOTAL.with_label_values(&[label]).get() +} + +#[tokio::test] +async fn bearer_header_extracts_key() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + good_validate_mock("u1").mount(&s).await; + let state = + AuthState::for_test(Some(Url::parse(&s.uri()).unwrap()), true, TestClock::new()); + + let before_ok = count_auth("ok"); + let resp = app(state.clone()) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Bearer sqd_data_abc_xyz") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + assert_eq!(body_string(resp).await, "ok:u1"); + assert_eq!(count_auth("ok") - before_ok, 1); +} + +// `Token:` header is NOT a fallback. Treated as missing. +#[tokio::test] +async fn no_token_header_fallback() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + good_validate_mock("u1").mount(&s).await; + let state = + AuthState::for_test(Some(Url::parse(&s.uri()).unwrap()), true, TestClock::new()); + + let resp = app(state) + .oneshot( + req("/test") + .header("Token", "sqd_data_abc_xyz") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + // No Authorization header -> Missing -> 403 (enforce=true). + assert_eq!(resp.status(), StatusCode::FORBIDDEN); + // Wiremock must not have been hit. + assert_eq!(s.received_requests().await.unwrap().len(), 0); +} + +#[tokio::test] +async fn non_bearer_scheme_treated_as_missing() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + good_validate_mock("u").mount(&s).await; + let state = AuthState::for_test(Some(Url::parse(&s.uri()).unwrap()), false, TestClock::new()); + + let before = count_auth("missing"); + let resp = app(state) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Basic dXNlcjpwYXNz") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + assert_eq!(count_auth("missing") - before, 1); +} + +#[tokio::test] +async fn missing_prefix_short_circuits() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + good_validate_mock("u").mount(&s).await; + let state = AuthState::for_test(Some(Url::parse(&s.uri()).unwrap()), true, TestClock::new()); + + let before_invalid = count_auth("invalid"); + let resp = app(state) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Bearer not_an_sqd_key") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::FORBIDDEN); + assert_eq!(count_auth("invalid") - before_invalid, 1); + assert_eq!(s.received_requests().await.unwrap().len(), 0, "cheap-reject must not call API"); +} + +#[tokio::test] +async fn missing_token_passes_when_disabled() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + good_validate_mock("u").mount(&s).await; + let state = AuthState::for_test(Some(Url::parse(&s.uri()).unwrap()), false, TestClock::new()); + + let before = count_auth("missing"); + let resp = app(state) + .oneshot(req("/test").body(Body::empty()).unwrap()) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + assert_eq!(count_auth("missing") - before, 1); +} + +#[tokio::test] +async fn missing_token_403_when_enabled() { + let _g = metrics_lock().await; + let state = AuthState::for_test(None, true, TestClock::new()); + let resp = app(state) + .oneshot(req("/test").body(Body::empty()).unwrap()) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::FORBIDDEN); + assert_eq!( + resp.headers() + .get(header::WWW_AUTHENTICATE) + .unwrap() + .to_str() + .unwrap(), + "Bearer realm=\"sqd-archive\"" + ); + let body: Value = serde_json::from_str(&body_string(resp).await).unwrap(); + assert_eq!(body["error"], "CREDENTIALS_INVALID"); + assert!(body["message"].as_str().unwrap().contains("API key")); +} + +// the flag changes only the action, not the work. +#[tokio::test] +async fn enforce_disabled_still_does_full_work() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; // 404 -> Deleted + let state = + AuthState::for_test(Some(Url::parse(&s.uri()).unwrap()), false, TestClock::new()); + + let before_calls = count_validate("deleted"); + let before_invalid = count_auth("invalid"); + let resp = app(state) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Bearer sqd_data_abc_xyz") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + // enforce=false -> request passes. + assert_eq!(resp.status(), StatusCode::OK); + // ...but Network API was called and the metric incremented. + assert_eq!(s.received_requests().await.unwrap().len(), 1); + assert_eq!(count_validate("deleted") - before_calls, 1); + assert_eq!(count_auth("invalid") - before_invalid, 1); +} + +// bad-key flood: one Network API call per 15s. +#[tokio::test] +async fn bad_key_flood_one_call_per_15s() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; + let clock = TestClock::new(); + let state = + AuthState::for_test(Some(Url::parse(&s.uri()).unwrap()), true, clock.clone()); + + for _ in 0..100 { + let resp = app(state.clone()) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Bearer sqd_data_bad_xxx") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::FORBIDDEN); + } + assert_eq!( + s.received_requests().await.unwrap().len(), + 1, + "100 reqs with same bad key within 15s -> 1 API call" + ); + + // Past the 15s Deleted TTL -> next request re-checks. + clock.advance(Duration::from_secs(16)); + let _ = app(state) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Bearer sqd_data_bad_xxx") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(s.received_requests().await.unwrap().len(), 2); +} + +// Timeout fails open and writes a brief FailedRecently sentinel so a +// burst of concurrent waiters doesn't each issue a 250ms timeout. +// After the sentinel TTL elapses the cache un-poisons. +#[tokio::test] +async fn network_api_timeout_fails_open() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/internal/validate")) + .respond_with(ResponseTemplate::new(200).set_delay(Duration::from_millis(500))) + .mount(&s) + .await; + let clock = TestClock::new(); + let state = + AuthState::for_test(Some(Url::parse(&s.uri()).unwrap()), true, clock.clone()); + + let before_fail = count_auth("fail_open"); + let resp = app(state.clone()) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Bearer sqd_data_abc_xyz") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!( + resp.status(), + StatusCode::OK, + "fail-open must pass even when enforcing" + ); + assert_eq!(count_auth("fail_open") - before_fail, 1); + + // 2nd request within the 1s sentinel: served from cache, no API call. + let _ = app(state.clone()) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Bearer sqd_data_abc_xyz") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(s.received_requests().await.unwrap().len(), 1); + + // After the sentinel TTL elapses, the cache un-poisons and the next + // request retries the API (we still recover quickly from outage). + clock.advance(Duration::from_secs(2)); + let _ = app(state) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Bearer sqd_data_abc_xyz") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(s.received_requests().await.unwrap().len(), 2); +} + +// cached Deleted denies even during outage. +#[tokio::test] +async fn cached_deleted_denies_during_outage() { + let _g = metrics_lock().await; + // No wiremock at all - any call would fail. + let state = AuthState::for_test( + Some(Url::parse("http://127.0.0.1:1").unwrap()), + true, + TestClock::new(), + ); + state.cache.put_deleted("sqd_data_abc_xyz".into()); + + let resp = app(state) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Bearer sqd_data_abc_xyz") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::FORBIDDEN); +} + +#[tokio::test] +async fn breaker_open_passes_through_without_dial() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/internal/validate")) + .respond_with(ResponseTemplate::new(500)) + .mount(&s) + .await; + let state = + AuthState::for_test(Some(Url::parse(&s.uri()).unwrap()), true, TestClock::new()); + + // Drive 50 distinct keys to get 50 cache misses + 50 errors -> breaker opens. + for i in 0..50 { + let token = format!("Bearer sqd_data_k{i}_xxx"); + let _ = app(state.clone()) + .oneshot(req("/test").header(header::AUTHORIZATION, token).body(Body::empty()).unwrap()) + .await + .unwrap(); + } + let before = s.received_requests().await.unwrap().len(); + assert_eq!(before, 50); + + // Next request: breaker open; expect fail-open (200, enforcing or not). + let resp = app(state) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Bearer sqd_data_kNEW_xxx") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + assert_eq!(s.received_requests().await.unwrap().len(), 50, "breaker must short-circuit"); +} + +#[tokio::test] +async fn key_id_in_request_extensions() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + good_validate_mock("user42").mount(&s).await; + let state = + AuthState::for_test(Some(Url::parse(&s.uri()).unwrap()), true, TestClock::new()); + + let resp = app(state) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Bearer sqd_data_abc_xyz") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + assert_eq!(body_string(resp).await, "ok:user42"); +} + +#[tokio::test] +async fn cache_miss_then_hit() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + good_validate_mock("u").mount(&s).await; + let state = + AuthState::for_test(Some(Url::parse(&s.uri()).unwrap()), true, TestClock::new()); + + let before_miss = CACHE_MISS_TOTAL.get(); + let before_hit = count_cache_hit("exists"); + for _ in 0..2 { + let resp = app(state.clone()) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Bearer sqd_data_abc_xyz") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + } + assert_eq!(s.received_requests().await.unwrap().len(), 1, "second req must be cached"); + assert_eq!(CACHE_MISS_TOTAL.get() - before_miss, 1); + assert_eq!(count_cache_hit("exists") - before_hit, 1); +} + +#[tokio::test] +async fn auth_latency_histogram_observes() { + let _g = metrics_lock().await; + let state = AuthState::for_test(None, true, TestClock::new()); + let before = AUTH_LATENCY_SECONDS.get_sample_count(); + let _ = app(state) + .oneshot(req("/test").body(Body::empty()).unwrap()) + .await + .unwrap(); + assert!(AUTH_LATENCY_SECONDS.get_sample_count() > before); +} + +// full token must never appear in any log line; only key_id may. +#[tokio::test] +#[tracing_test::traced_test] +async fn full_token_never_logged() { + let s = MockServer::start().await; + // Force the fail-open warn path so we know a log line was emitted. + Mock::given(method("POST")) + .and(path("/internal/validate")) + .respond_with(ResponseTemplate::new(500)) + .mount(&s) + .await; + let state = + AuthState::for_test(Some(Url::parse(&s.uri()).unwrap()), true, TestClock::new()); + let secret = "VERYRANDOMSECRET12345"; + let token = format!("Bearer sqd_data_abc_{secret}"); + let _ = app(state) + .oneshot(req("/test").header(header::AUTHORIZATION, &token).body(Body::empty()).unwrap()) + .await + .unwrap(); + assert!(!logs_contain(secret), "log captured the random token suffix"); + assert!(!logs_contain(&token), "log captured the full Authorization header"); +} + +// concurrent miss flood: the singleflight + cache combine to make +// N concurrent requests for the same key produce exactly ONE +// Network API call. +#[tokio::test] +async fn concurrent_miss_flood_coalesces_to_one_call() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + // Hold the response briefly so all 32 racers pile up at the singleflight. + Mock::given(method("POST")) + .and(path("/internal/validate")) + .respond_with( + ResponseTemplate::new(200) + .set_body_json(serde_json::json!({"user_id": "u1"})) + .set_delay(Duration::from_millis(50)), + ) + .mount(&s) + .await; + let state = + AuthState::for_test(Some(Url::parse(&s.uri()).unwrap()), true, TestClock::new()); + + let mut handles = Vec::new(); + for _ in 0..32 { + let state = state.clone(); + handles.push(tokio::spawn(async move { + app(state) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Bearer sqd_data_abc_xyz") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap() + })); + } + for h in handles { + let resp = h.await.unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + } + assert_eq!( + s.received_requests().await.unwrap().len(), + 1, + "32 concurrent misses for the same key must produce exactly 1 API call" + ); +} + +// sqd_data_ prefix only — empty token after the prefix is rejected. +#[tokio::test] +async fn empty_token_after_prefix_is_invalid() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + good_validate_mock("u").mount(&s).await; + let state = + AuthState::for_test(Some(Url::parse(&s.uri()).unwrap()), true, TestClock::new()); + let resp = app(state) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Bearer sqd_data_") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::FORBIDDEN); + assert_eq!(s.received_requests().await.unwrap().len(), 0); +} + +// Malformed success body must never cache a positive (Exists) entry — +// it gets the FailedRecently sentinel like any other parse failure. +#[tokio::test] +async fn malformed_validate_body_does_not_cache_exists() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/internal/validate")) + .respond_with(ResponseTemplate::new(200).set_body_string("garbage")) + .mount(&s) + .await; + let state = + AuthState::for_test(Some(Url::parse(&s.uri()).unwrap()), true, TestClock::new()); + let _ = app(state.clone()) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Bearer sqd_data_abc_xyz") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + // No Exists / Deleted entry; only the brief FailedRecently sentinel. + let cached = state.cache.get("sqd_data_abc_xyz"); + assert!( + matches!(cached, Some(KeyState::FailedRecently) | None), + "got {cached:?}" + ); +} + +// FailOpen sentinel drains the singleflight queue without +// serialising N × 250ms timeouts. With a delayed wiremock, 32 concurrent +// racers should produce at most 1 upstream call (the leader's), with +// followers reading the FailedRecently sentinel. +#[tokio::test] +async fn fail_open_sentinel_drains_queue_without_serialised_timeouts() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/internal/validate")) + .respond_with(ResponseTemplate::new(200).set_delay(Duration::from_millis(500))) + .mount(&s) + .await; + let state = + AuthState::for_test(Some(Url::parse(&s.uri()).unwrap()), true, TestClock::new()); + + let mut handles = Vec::new(); + for _ in 0..32 { + let state = state.clone(); + handles.push(tokio::spawn(async move { + app(state) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Bearer sqd_data_abc_xyz") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap() + })); + } + for h in handles { + assert_eq!(h.await.unwrap().status(), StatusCode::OK); + } + assert_eq!( + s.received_requests().await.unwrap().len(), + 1, + "FailedRecently sentinel must short-circuit all followers" + ); +} + +// duplicate Authorization headers are ambiguous +// (RFC 6750 §3.1). Reject as Invalid so a proxy that reorders or +// deduplicates headers can't smuggle a credential. +#[tokio::test] +async fn duplicate_authorization_headers_rejected() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + good_validate_mock("u").mount(&s).await; + let state = AuthState::for_test( + Some(Url::parse(&s.uri()).unwrap()), + true, + TestClock::new(), + ); + let resp = app(state) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Bearer sqd_data_abc_xyz") + .header(header::AUTHORIZATION, "Bearer sqd_data_def_uvw") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::FORBIDDEN); + assert_eq!( + s.received_requests().await.unwrap().len(), + 0, + "duplicate Authorization headers must not reach the API" + ); +} + +// Distinct tokens are independent cache entries — a stale entry for +// one token never affects another's lookup. +#[tokio::test] +async fn distinct_tokens_are_isolated_cache_entries() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + good_validate_mock("u").mount(&s).await; + let state = AuthState::for_test( + Some(Url::parse(&s.uri()).unwrap()), + true, + TestClock::new(), + ); + state.cache.put_deleted("sqd_data_OLD".into()); + // A different token is a different cache key — must validate via API. + let resp = app(state) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Bearer sqd_data_NEW") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + assert_eq!(s.received_requests().await.unwrap().len(), 1); +} + +// CACHE_MISS_TOTAL is incremented exactly once per +// *true* miss. Concurrent followers that wake up to a populated cache +// count as hits, not misses. +#[tokio::test] +async fn cache_miss_counter_does_not_double_count_under_singleflight() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + good_validate_mock("u").mount(&s).await; + let state = + AuthState::for_test(Some(Url::parse(&s.uri()).unwrap()), true, TestClock::new()); + + let before_miss = CACHE_MISS_TOTAL.get(); + let before_hit = count_cache_hit("exists"); + + let mut handles = Vec::new(); + for _ in 0..16 { + let state = state.clone(); + handles.push(tokio::spawn(async move { + app(state) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Bearer sqd_data_abc_xyz") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap() + })); + } + for h in handles { + let _ = h.await; + } + // 1 leader = 1 miss; 15 followers = 15 cache hits. + assert_eq!(CACHE_MISS_TOTAL.get() - before_miss, 1); + assert_eq!(count_cache_hit("exists") - before_hit, 15); + assert_eq!(s.received_requests().await.unwrap().len(), 1); +} + +// Error paths must never log the token (or any of its bytes). +#[tokio::test] +#[tracing_test::traced_test] +async fn warn_log_does_not_carry_token() { + let s = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/internal/validate")) + .respond_with(ResponseTemplate::new(500)) + .mount(&s) + .await; + let state = + AuthState::for_test(Some(Url::parse(&s.uri()).unwrap()), true, TestClock::new()); + let secret = "VERYSECRETMATERIAL12345"; + let _ = app(state) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, format!("Bearer sqd_data_{secret}")) + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert!(!logs_contain(secret), "warn log must not carry the token"); + assert!(!logs_contain("sqd_data_VERYSECRETMATERIAL")); +} + +// ─── IP-allowlist bypass ─────────────────────────────────────────────── +// +// These cover the bypass branch added in `decide` (after duplicate-Auth +// rejection, before Bearer extraction). They mirror the empirical +// T1-T4 scenarios captured during prod debugging: +// +// T1 baseline : XOFF chain ends with the trusted upstream LB, +// second-from-right is the real client. +// T2 spoof XFF : client prepends a fake IP; walk-rightmost must +// still resolve to the LB-attested real client. +// T3 direct ClusterIP: no XOFF — peer IP from ConnectInfo is used. +// T4 spoof both : same as T2; client-controlled X-Real-IP / +// X-Forwarded-For are NEVER consulted, only XOFF. + +use ipnet::IpNet; +use std::net::SocketAddr; + +fn netv(s: &str) -> IpNet { + s.parse().unwrap() +} + +/// Build an AuthState wired for IP-bypass. `s` provides the validate-API +/// URL that the standard Bearer path uses if we fall through. +fn bypass_state( + s: &MockServer, + enforce: bool, + trusted: Vec, + allow: Vec, +) -> Arc { + AuthState::for_test_with_bypass( + Some(Url::parse(&s.uri()).unwrap()), + enforce, + TestClock::new(), + trusted, + allow, + ) +} + +/// Wrap a request in `ConnectInfo` so the middleware can +/// read the connection peer (mimicking what +/// `into_make_service_with_connect_info` does in prod). +fn with_connect_info(mut req: Request, peer: &str) -> Request { + let addr: SocketAddr = peer.parse().unwrap(); + req.extensions_mut().insert(ConnectInfo(addr)); + req +} + +// T1-equivalent: chain `, ` with client in allowlist => bypass. +#[tokio::test] +async fn bypass_xoff_real_client_in_allowlist() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; // would-be-404 if we fell through + + let state = bypass_state( + &s, + true, + vec![netv("34.149.211.238/32")], + vec![netv("10.0.0.0/8")], + ); + + let resp = app(state) + .oneshot( + req("/test") + .header(ORIGINAL_FORWARDED_FOR, "10.4.5.5, 34.149.211.238") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(resp.status(), StatusCode::OK); + assert_eq!(body_string(resp).await, "ok:internal"); + // Bypass must NOT touch the Network API. + assert_eq!(s.received_requests().await.unwrap().len(), 0); +} + +// T1-equivalent inverse: real client outside allowlist -> standard Bearer +// path runs. With no Authorization header, that yields Missing -> 403. +#[tokio::test] +async fn bypass_xoff_real_client_outside_allowlist() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; + + let state = bypass_state( + &s, + true, + vec![netv("34.149.211.238/32")], + vec![netv("10.0.0.0/8")], + ); + + let resp = app(state) + .oneshot( + req("/test") + .header(ORIGINAL_FORWARDED_FOR, "94.43.76.236, 34.149.211.238") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(resp.status(), StatusCode::FORBIDDEN); +} + +// T2-equivalent (THE security test): client-supplied XFF leftmost is in +// allowlist, but the LB-attested real client (second from right) is not. +// walk-rightmost must surface the real client, NOT the spoof. +#[tokio::test] +async fn bypass_xoff_spoof_leftmost_is_rejected() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; + + let state = bypass_state( + &s, + true, + vec![netv("34.149.211.238/32")], + vec![netv("10.0.0.0/8")], + ); + + // Attacker tries to look like 10.4.5.5 by prepending it. GLB then + // appends real client (94.43.76.236) and itself. + let resp = app(state) + .oneshot( + req("/test") + .header( + ORIGINAL_FORWARDED_FOR, + "10.4.5.5, 94.43.76.236, 34.149.211.238", + ) + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + // Walk rightmost: skip 34.149.211.238 (trusted), next is 94.43.76.236 + // -> NOT in allowlist -> standard auth -> Missing -> 403. + assert_eq!(resp.status(), StatusCode::FORBIDDEN); +} + +// T3-equivalent: direct ClusterIP path. No XOFF; peer IP from ConnectInfo +// is the client pod IP. If in allowlist -> bypass. +#[tokio::test] +async fn bypass_clusterip_peer_in_allowlist() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; + + let state = bypass_state(&s, true, vec![], vec![netv("10.0.0.0/8")]); + + let request = with_connect_info( + req("/test").body(Body::empty()).unwrap(), + "10.4.1.202:54321", + ); + let resp = app(state).oneshot(request).await.unwrap(); + + assert_eq!(resp.status(), StatusCode::OK); + assert_eq!(body_string(resp).await, "ok:internal"); +} + +// ConnectInfo present but peer NOT in allowlist -> standard auth. +#[tokio::test] +async fn bypass_clusterip_peer_outside_allowlist() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; + + let state = bypass_state(&s, true, vec![], vec![netv("10.0.0.0/8")]); + + let request = with_connect_info( + req("/test").body(Body::empty()).unwrap(), + "8.8.8.8:443", + ); + let resp = app(state).oneshot(request).await.unwrap(); + + assert_eq!(resp.status(), StatusCode::FORBIDDEN); +} + +// T4-equivalent: client smuggles BOTH X-Real-IP and X-Forwarded-For with +// an in-allowlist value. We must ignore those (we only consult XOFF) and +// resolve the real client from the trusted-stripped XOFF chain. +#[tokio::test] +async fn bypass_ignores_x_real_ip_and_xff_after_nginx() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; + + let state = bypass_state( + &s, + true, + vec![netv("34.149.211.238/32")], + vec![netv("10.0.0.0/8")], + ); + + let resp = app(state) + .oneshot( + req("/test") + .header("X-Real-IP", "10.4.5.5") + .header("X-Forwarded-For", "10.4.5.5") + .header( + ORIGINAL_FORWARDED_FOR, + "10.4.5.5, 94.43.76.236, 34.149.211.238", + ) + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + // Despite spoofed X-Real-IP / X-Forwarded-For, walk-rightmost on + // XOFF resolves real client to 94.43.76.236 -> outside allowlist. + assert_eq!(resp.status(), StatusCode::FORBIDDEN); +} + +// Empty allowlist disables bypass entirely. Even a "trusted-looking" +// request goes through the standard Bearer path. +#[tokio::test] +async fn bypass_disabled_when_allowlist_empty() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; + + let state = bypass_state(&s, true, vec![netv("34.149.211.238/32")], vec![]); + + let resp = app(state) + .oneshot( + req("/test") + .header(ORIGINAL_FORWARDED_FOR, "10.4.5.5, 34.149.211.238") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(resp.status(), StatusCode::FORBIDDEN); +} + +// Malformed XOFF segments are skipped, not used as IPs. With a chain of +// garbage we fall back to ConnectInfo. +#[tokio::test] +async fn bypass_malformed_xoff_falls_back_to_connect_info() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; + + let state = bypass_state(&s, true, vec![], vec![netv("10.0.0.0/8")]); + + let request = with_connect_info( + req("/test") + .header(ORIGINAL_FORWARDED_FOR, "not-an-ip, also-bad") + .body(Body::empty()) + .unwrap(), + "10.4.1.202:54321", + ); + let resp = app(state).oneshot(request).await.unwrap(); + + // Malformed entries skipped; chain yields no valid IP; fall back to + // peer (ConnectInfo) which IS in allowlist. + assert_eq!(resp.status(), StatusCode::OK); + assert_eq!(body_string(resp).await, "ok:internal"); +} + +// Chain consisting entirely of trusted IPs -> no real-client extraction +// possible. Don't trust the LB's own IP as "the client" — fall back to +// ConnectInfo (which here is also trusted, so no bypass). +#[tokio::test] +async fn bypass_chain_all_trusted_falls_back() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; + + let state = bypass_state( + &s, + true, + vec![netv("34.149.211.238/32")], + vec![netv("10.0.0.0/8")], + ); + + let request = with_connect_info( + req("/test") + .header(ORIGINAL_FORWARDED_FOR, "34.149.211.238") + .body(Body::empty()) + .unwrap(), + "8.8.8.8:443", + ); + let resp = app(state).oneshot(request).await.unwrap(); + + assert_eq!(resp.status(), StatusCode::FORBIDDEN); +} + +// Multiple allowlist CIDRs: any of them matches -> bypass. +#[tokio::test] +async fn bypass_multiple_allowlist_cidrs() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; + + let state = bypass_state( + &s, + true, + vec![netv("34.149.211.238/32")], + vec![netv("10.0.0.0/8"), netv("35.1.2.0/29")], + ); + + // Hits second CIDR (dev NAT pool example). + let resp = app(state) + .oneshot( + req("/test") + .header(ORIGINAL_FORWARDED_FOR, "35.1.2.5, 34.149.211.238") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(resp.status(), StatusCode::OK); + assert_eq!(body_string(resp).await, "ok:internal"); +} + +// Duplicate-Authorization rejection runs FIRST, even when the source IP +// would otherwise qualify for bypass. The duplicate header is a smuggling +// signal in itself; bypass must not paper over it. +#[tokio::test] +async fn bypass_does_not_override_duplicate_auth_rejection() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + good_validate_mock("u1").mount(&s).await; + + let state = bypass_state( + &s, + true, + vec![netv("34.149.211.238/32")], + vec![netv("10.0.0.0/8")], + ); + + let resp = app(state) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Bearer sqd_data_a_b") + .header(header::AUTHORIZATION, "Bearer sqd_data_c_d") + .header(ORIGINAL_FORWARDED_FOR, "10.4.5.5, 34.149.211.238") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + // Duplicate Authorization -> Invalid -> 403 even with allowlisted source. + assert_eq!(resp.status(), StatusCode::FORBIDDEN); +} + +// Bypass with a valid Bearer token still bypasses (and uses the +// internal: user_id, not the token's user_id) — IP precedence is +// intentional: trusted-network identity overrides token identity. +#[tokio::test] +async fn bypass_takes_precedence_over_valid_bearer() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + good_validate_mock("u-from-token").mount(&s).await; + + let state = bypass_state( + &s, + true, + vec![netv("34.149.211.238/32")], + vec![netv("10.0.0.0/8")], + ); + + let resp = app(state) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Bearer sqd_data_abc_xyz") + .header(ORIGINAL_FORWARDED_FOR, "10.4.5.5, 34.149.211.238") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(resp.status(), StatusCode::OK); + assert_eq!(body_string(resp).await, "ok:internal"); + // Network API must NOT have been called — we short-circuited on IP. + assert_eq!(s.received_requests().await.unwrap().len(), 0); +} + +// No XOFF and no ConnectInfo -> middleware can't resolve a real IP. +// Bypass cannot fire; standard auth runs (here: no token -> Missing). +#[tokio::test] +async fn bypass_without_xoff_or_connect_info_falls_through() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; + + let state = bypass_state(&s, true, vec![], vec![netv("10.0.0.0/8")]); + + let resp = app(state) + .oneshot(req("/test").body(Body::empty()).unwrap()) + .await + .unwrap(); + + assert_eq!(resp.status(), StatusCode::FORBIDDEN); +} + +// ─── DISABLE_V2_AUTH (kill switch) ────────────────────────────────────── +// +// `disabled=true` short-circuits at the very top of `auth()`: no decide, +// no cache work, no Network API call. One metric sample under the +// `disabled` label so the dashboard shows the switch is engaged. + +fn all_ips_v() -> Vec { + vec![netv("0.0.0.0/0"), netv("::/0")] +} + +#[tokio::test] +async fn disable_short_circuits_missing_token() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + // Validate API would 404 if we ever reached it; we shouldn't. + nf_validate_mock().mount(&s).await; + + // Kill switch on; enforcement also globally on, but should be irrelevant. + let state = AuthState::for_test_full( + Some(Url::parse(&s.uri()).unwrap()), + TestClock::new(), + true, + all_ips_v(), + vec![], + vec![], + ); + + let before = AUTH_TOTAL.with_label_values(&["disabled"]).get(); + let resp = app(state) + .oneshot(req("/test").body(Body::empty()).unwrap()) + .await + .unwrap(); + + assert_eq!(resp.status(), StatusCode::OK); + assert_eq!( + AUTH_TOTAL.with_label_values(&["disabled"]).get() - before, + 1, + "kill switch must increment auth_total{{disabled}} exactly once" + ); + // Network API must NOT be called. + assert_eq!(s.received_requests().await.unwrap().len(), 0); +} + +#[tokio::test] +async fn disable_short_circuits_invalid_token() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; + + let state = AuthState::for_test_full( + Some(Url::parse(&s.uri()).unwrap()), + TestClock::new(), + true, + all_ips_v(), + vec![], + vec![], + ); + + let resp = app(state) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Bearer not_an_sqd_key") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + // Even an obviously invalid token passes when the kill switch is on. + assert_eq!(resp.status(), StatusCode::OK); + assert_eq!(s.received_requests().await.unwrap().len(), 0); +} + +#[tokio::test] +async fn disable_short_circuits_duplicate_authorization() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; + + let state = AuthState::for_test_full( + Some(Url::parse(&s.uri()).unwrap()), + TestClock::new(), + true, + all_ips_v(), + vec![], + vec![], + ); + + // Duplicate Authorization is normally a hard 403 (RFC 6750 §3.1). + // With the kill switch on, even that is suppressed — auth path is OFF. + let resp = app(state) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Bearer sqd_data_a_b") + .header(header::AUTHORIZATION, "Bearer sqd_data_c_d") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(resp.status(), StatusCode::OK); +} + +#[tokio::test] +async fn disable_does_not_emit_decide_metrics() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + good_validate_mock("u1").mount(&s).await; + + let state = AuthState::for_test_full( + Some(Url::parse(&s.uri()).unwrap()), + TestClock::new(), + true, + all_ips_v(), + vec![], + vec![], + ); + + let before_ok = count_auth("ok"); + let before_missing = count_auth("missing"); + let before_lat = AUTH_LATENCY_SECONDS.get_sample_count(); + + let _ = app(state) + .oneshot( + req("/test") + .header(header::AUTHORIZATION, "Bearer sqd_data_abc_xyz") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + // Kill switch path emits exactly the `disabled` counter — neither + // `ok` (which would imply a full decide), nor the latency histogram. + assert_eq!(count_auth("ok"), before_ok, "no ok metric on kill switch"); + assert_eq!(count_auth("missing"), before_missing); + assert_eq!( + AUTH_LATENCY_SECONDS.get_sample_count(), + before_lat, + "kill switch must skip the latency timer entirely" + ); +} + +// ─── ENFORCE_V2_AUTH_FOR_IPS (canary by IP) ───────────────────────────── +// +// The list `enforce_for_ips` decides who gets denied on Missing/Invalid: +// - empty -> never deny (observe-only mode); +// - `*` -> deny everyone (catch-all CIDRs 0.0.0.0/0 + ::/0); +// - narrow -> deny only sources matching a CIDR (canary scope). + +#[tokio::test] +async fn canary_enforces_for_ip_in_scope() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; + + // Only enforce for 10.4.0.0/16; everyone else fails open. + let state = AuthState::for_test_full( + Some(Url::parse(&s.uri()).unwrap()), + TestClock::new(), + false, + vec![netv("10.4.0.0/16")], + vec![], + vec![], + ); + + let request = with_connect_info( + req("/test").body(Body::empty()).unwrap(), + "10.4.5.5:54321", + ); + let resp = app(state).oneshot(request).await.unwrap(); + + // In-scope source, no token -> Missing -> deny. + assert_eq!(resp.status(), StatusCode::FORBIDDEN); +} + +#[tokio::test] +async fn canary_passes_for_ip_out_of_scope() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; + + let state = AuthState::for_test_full( + Some(Url::parse(&s.uri()).unwrap()), + TestClock::new(), + false, + vec![netv("10.4.0.0/16")], + vec![], + vec![], + ); + + let request = with_connect_info( + req("/test").body(Body::empty()).unwrap(), + "8.8.8.8:443", + ); + let resp = app(state).oneshot(request).await.unwrap(); + + // Out-of-scope source -> fail open even though the policy is "enforce". + assert_eq!(resp.status(), StatusCode::OK); +} + +// In canary mode, the metric still records what WOULD have happened — +// `Missing`/`Invalid` count the same way regardless of whether they were +// actually denied. The dashboard can split denied vs allowed by comparing +// auth_total against handler-side counters. +#[tokio::test] +async fn canary_meters_outcome_even_when_not_enforcing() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; + + let state = AuthState::for_test_full( + Some(Url::parse(&s.uri()).unwrap()), + TestClock::new(), + false, + vec![netv("10.4.0.0/16")], + vec![], + vec![], + ); + + let before_missing = count_auth("missing"); + let request = with_connect_info( + req("/test").body(Body::empty()).unwrap(), + "8.8.8.8:443", + ); + let _ = app(state).oneshot(request).await.unwrap(); + + assert_eq!( + count_auth("missing") - before_missing, + 1, + "canary fail-open must still record the would-be outcome" + ); +} + +// `*` (wildcard) is parsed by CLI as `0.0.0.0/0,::/0`, which equals +// "enforce for everyone" — same as the legacy `ENFORCE_V2_AUTH=true`. +#[tokio::test] +async fn canary_wildcard_enforces_for_every_source() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; + + let state = AuthState::for_test_full( + Some(Url::parse(&s.uri()).unwrap()), + TestClock::new(), + false, + all_ips_v(), + vec![], + vec![], + ); + + // Out-of-RFC1918 source still gets denied — wildcard catches it. + let request = with_connect_info( + req("/test").body(Body::empty()).unwrap(), + "8.8.8.8:443", + ); + let resp = app(state).oneshot(request).await.unwrap(); + + assert_eq!(resp.status(), StatusCode::FORBIDDEN); +} + +// Wildcard + no ConnectInfo: when the policy is "enforce for everyone", +// the absence of a resolvable IP must NOT degrade to fail-open. The +// catch-all bypass in `should_enforce` (prefix_len == 0) is what handles +// this — without it, every test that uses oneshot() without ConnectInfo +// would silently pass even under a "deny all" policy. +#[tokio::test] +async fn canary_wildcard_enforces_even_without_connect_info() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; + + let state = AuthState::for_test_full( + Some(Url::parse(&s.uri()).unwrap()), + TestClock::new(), + false, + all_ips_v(), + vec![], + vec![], + ); + + let resp = app(state) + .oneshot(req("/test").body(Body::empty()).unwrap()) + .await + .unwrap(); + + assert_eq!(resp.status(), StatusCode::FORBIDDEN); +} + +// Narrow canary (no catch-all) + missing IP -> fail open. +// This is the safety property: when we can't tell whether a request is +// in scope, we don't pretend it is. +#[tokio::test] +async fn canary_narrow_does_not_enforce_when_ip_unresolvable() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; + + let state = AuthState::for_test_full( + Some(Url::parse(&s.uri()).unwrap()), + TestClock::new(), + false, + vec![netv("10.4.0.0/16")], + vec![], + vec![], + ); + + let resp = app(state) + .oneshot(req("/test").body(Body::empty()).unwrap()) + .await + .unwrap(); + + assert_eq!(resp.status(), StatusCode::OK); +} + +// Canary scope reads the same XOFF chain as the bypass path — so the +// real-client extraction is consistent. A request that arrives via nginx +// with XOFF must be checked against `enforce_for_ips` using the +// LB-attested IP, NOT the (untrusted) connection peer (= nginx pod). +#[tokio::test] +async fn canary_scope_reads_xoff_real_client() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; + + let state = AuthState::for_test_full( + Some(Url::parse(&s.uri()).unwrap()), + TestClock::new(), + false, + vec![netv("94.43.0.0/16")], + vec![netv("34.149.211.238/32")], + vec![], + ); + + // nginx pod connects in 10.6.x.x; chain shows real client 94.43.x.x. + let request = with_connect_info( + req("/test") + .header(ORIGINAL_FORWARDED_FOR, "94.43.76.236, 34.149.211.238") + .body(Body::empty()) + .unwrap(), + "10.6.2.3:443", + ); + let resp = app(state).oneshot(request).await.unwrap(); + + // Real client (94.43.x.x) is in scope -> deny on missing token. + assert_eq!(resp.status(), StatusCode::FORBIDDEN); +} + +// Regression for the canary-bypass-via-duplicate-Authorization issue: +// pre-fix, `decide` rejected duplicate Authorization with +// `(Outcome::Invalid, None)` — the real_ip was never resolved. Then in +// `should_enforce`, a narrow-CIDR scope (no catch-all) returned false +// under None real_ip, so the request was allowed despite being in +// canary scope. The fix moves real_ip extraction above the +// duplicate-header check so the IP is always available for scope eval. +#[tokio::test] +async fn canary_narrow_enforces_duplicate_auth_for_in_scope_ip() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; + + // Narrow canary: enforce ONLY for 10.4.0.0/16. No catch-all — the + // bypass below is exactly the case the prior code mishandled. + let state = AuthState::for_test_full( + Some(Url::parse(&s.uri()).unwrap()), + TestClock::new(), + false, + vec![netv("10.4.0.0/16")], + vec![], + vec![], + ); + + // Source IP IS in scope; client smuggles two Authorization headers. + let request = with_connect_info( + req("/test") + .header(header::AUTHORIZATION, "Bearer sqd_data_a_b") + .header(header::AUTHORIZATION, "Bearer sqd_data_c_d") + .body(Body::empty()) + .unwrap(), + "10.4.5.5:54321", + ); + let resp = app(state).oneshot(request).await.unwrap(); + + // Must be 403: duplicate-auth -> Invalid, in-scope IP -> enforce. + assert_eq!(resp.status(), StatusCode::FORBIDDEN); + // Network API must NOT have been called — duplicate-auth short-circuits. + assert_eq!(s.received_requests().await.unwrap().len(), 0); +} + +// Mirror of the regression above, asserting the negative case: out-of-scope +// IP + duplicate-auth still falls through (canary policy says "don't +// enforce for this source"). Confirms the fix didn't accidentally widen +// enforcement to all sources. +#[tokio::test] +async fn canary_narrow_does_not_enforce_duplicate_auth_for_out_of_scope_ip() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; + + let state = AuthState::for_test_full( + Some(Url::parse(&s.uri()).unwrap()), + TestClock::new(), + false, + vec![netv("10.4.0.0/16")], + vec![], + vec![], + ); + + let request = with_connect_info( + req("/test") + .header(header::AUTHORIZATION, "Bearer sqd_data_a_b") + .header(header::AUTHORIZATION, "Bearer sqd_data_c_d") + .body(Body::empty()) + .unwrap(), + "8.8.8.8:443", + ); + let resp = app(state).oneshot(request).await.unwrap(); + + assert_eq!(resp.status(), StatusCode::OK); +} + +// Internal allowlist still wins over enforce-scope. A source that the +// allowlist accepts must short-circuit to `Ok` regardless of whether it +// would also have been "in canary scope". +#[tokio::test] +async fn canary_internal_allowlist_takes_precedence() { + let _g = metrics_lock().await; + let s = MockServer::start().await; + nf_validate_mock().mount(&s).await; + + let state = AuthState::for_test_full( + Some(Url::parse(&s.uri()).unwrap()), + TestClock::new(), + false, + all_ips_v(), // would deny everyone… + vec![], + vec![netv("10.0.0.0/8")], // …but internal pods bypass. + ); + + let request = with_connect_info( + req("/test").body(Body::empty()).unwrap(), + "10.4.5.5:54321", + ); + let resp = app(state).oneshot(request).await.unwrap(); + + assert_eq!(resp.status(), StatusCode::OK); + assert_eq!(body_string(resp).await, "ok:internal"); +} diff --git a/crates/router/src/auth/mod.rs b/crates/router/src/auth/mod.rs new file mode 100644 index 0000000..1ef24a6 --- /dev/null +++ b/crates/router/src/auth/mod.rs @@ -0,0 +1,132 @@ +pub mod cache; +pub mod client; +pub mod clock; +pub mod middleware; +pub mod singleflight; +pub mod topkeys; + +use std::sync::Arc; + +use ipnet::IpNet; + +pub use cache::KeyCache; +pub use client::NetworkApiClient; +pub use singleflight::Singleflight; +pub use topkeys::TopKeys; + +pub struct AuthState { + pub cache: KeyCache, + pub client: NetworkApiClient, + pub top_keys: TopKeys, + pub inflight: Singleflight, + /// Global kill switch. When true, the middleware bails out before any + /// header parsing or cache work and counts the request as `disabled`. + /// Overrides every other knob. + pub disabled: bool, + /// CIDRs whose source IPs trigger enforcement (deny on missing/invalid). + /// One knob — empty means "never enforce", `*` (expanded to + /// `0.0.0.0/0,::/0` at parse time) means "enforce for everyone", + /// specific CIDRs mean "canary scope". + pub enforce_for_ips: Vec, + /// Trusted upstream proxies. Used by the middleware to walk + /// `X-Original-Forwarded-For` rightmost-first and discard hops we put + /// there ourselves; the first non-trusted IP from the right is treated + /// as the real client. Empty -> XOFF is taken at face value. + pub trusted_ips: Vec, + /// Source IPs allowed to bypass Bearer auth. Matched against the + /// resolved real-client IP (see `trusted_ips`). Empty -> bypass disabled. + pub internal_allowlist: Vec, +} + +impl AuthState { + pub fn new( + client: NetworkApiClient, + disabled: bool, + enforce_for_ips: Vec, + trusted_ips: Vec, + internal_allowlist: Vec, + ) -> Arc { + Arc::new(Self { + cache: KeyCache::new(10_000), + client, + top_keys: TopKeys::new(100), + inflight: Singleflight::new(), + disabled, + enforce_for_ips, + trusted_ips, + internal_allowlist, + }) + } + + /// Test helper. `enforce` mirrors the old boolean: `true` -> enforce for + /// every source (expand to `0.0.0.0/0` + `::/0`), `false` -> never enforce. + /// Existing tests keep their boolean call shape; richer scope tests use + /// [`AuthState::for_test_full`]. + #[cfg(test)] + pub fn for_test( + base_url: Option, + enforce: bool, + clock: Arc, + ) -> Arc { + Self::for_test_full( + base_url, + clock, + false, + if enforce { all_ips() } else { Vec::new() }, + Vec::new(), + Vec::new(), + ) + } + + #[cfg(test)] + pub fn for_test_with_bypass( + base_url: Option, + enforce: bool, + clock: Arc, + trusted_ips: Vec, + internal_allowlist: Vec, + ) -> Arc { + Self::for_test_full( + base_url, + clock, + false, + if enforce { all_ips() } else { Vec::new() }, + trusted_ips, + internal_allowlist, + ) + } + + /// Test helper exposing every knob directly. Use when verifying the + /// kill switch or narrow-scope canary; otherwise prefer the simpler + /// `for_test` / `for_test_with_bypass`. + #[cfg(test)] + pub fn for_test_full( + base_url: Option, + clock: Arc, + disabled: bool, + enforce_for_ips: Vec, + trusted_ips: Vec, + internal_allowlist: Vec, + ) -> Arc { + Arc::new(Self { + cache: KeyCache::with_clock(10_000, clock.clone()), + client: NetworkApiClient::with_clock(base_url, clock), + top_keys: TopKeys::new(100), + inflight: Singleflight::new(), + disabled, + enforce_for_ips, + trusted_ips, + internal_allowlist, + }) + } +} + +/// `0.0.0.0/0` + `::/0` — the "enforce for every source" set used both by +/// the CLI parser (when the user writes `*`) and by the test boolean shim. +#[cfg(test)] +fn all_ips() -> Vec { + vec![ + "0.0.0.0/0".parse().unwrap(), + "::/0".parse().unwrap(), + ] +} diff --git a/crates/router/src/auth/singleflight.rs b/crates/router/src/auth/singleflight.rs new file mode 100644 index 0000000..ec7f941 --- /dev/null +++ b/crates/router/src/auth/singleflight.rs @@ -0,0 +1,144 @@ +use std::sync::Arc; + +use dashmap::DashMap; +use tokio::sync::Mutex; + +/// Per-key serialisation for cache-miss validation calls. +/// +/// When N concurrent requests arrive for the same `key_id` and the cache +/// is `UNDEFINED`, only the first one calls Network API; the others wait +/// on this mutex and re-check the cache once they acquire it. +/// +/// This bounds the bad-key flood guarantee from "1 call per 15s under +/// sequential load" to "1 call per 15s under any load shape". +pub struct Singleflight { + inner: DashMap>>, +} + +impl Singleflight { + pub fn new() -> Self { + Self { + inner: DashMap::new(), + } + } + + /// Acquire the per-key lock. The caller must drop the returned guard + /// before the next `acquire` for this key proceeds. + pub async fn acquire(&self, key: &str) -> SingleflightGuard { + let lock = self + .inner + .entry(key.to_string()) + .or_insert_with(|| Arc::new(Mutex::new(()))) + .clone(); + let guard = lock.clone().lock_owned().await; + SingleflightGuard { + guard: Some(guard), + map: &self.inner, + key: key.to_string(), + lock, + } + } +} + +impl Default for Singleflight { + fn default() -> Self { + Self::new() + } +} + +pub struct SingleflightGuard<'a> { + guard: Option>, + map: &'a DashMap>>, + key: String, + lock: Arc>, +} + +impl Drop for SingleflightGuard<'_> { + fn drop(&mut self) { + // Release the inner mutex so its Arc clone is decremented BEFORE + // remove_if runs its strong_count check. Then remove_if itself is + // the only authoritative read (the DashMap shard is locked while + // it runs, so no concurrent acquire can race in between). + self.guard.take(); + self.map + .remove_if(&self.key, |_, v| Arc::strong_count(v) <= 2); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::atomic::{AtomicU32, Ordering}; + use std::time::Duration; + + #[tokio::test] + async fn coalesces_concurrent_acquires() { + let sf = Arc::new(Singleflight::new()); + let in_flight = Arc::new(AtomicU32::new(0)); + let max_observed = Arc::new(AtomicU32::new(0)); + + let mut handles = Vec::new(); + for _ in 0..32 { + let sf = sf.clone(); + let inflight = in_flight.clone(); + let max = max_observed.clone(); + handles.push(tokio::spawn(async move { + let _g = sf.acquire("k").await; + let n = inflight.fetch_add(1, Ordering::SeqCst) + 1; + max.fetch_max(n, Ordering::SeqCst); + tokio::time::sleep(Duration::from_millis(2)).await; + inflight.fetch_sub(1, Ordering::SeqCst); + })); + } + for h in handles { + h.await.unwrap(); + } + assert_eq!( + max_observed.load(Ordering::SeqCst), + 1, + "only one task may hold the per-key slot at a time" + ); + } + + #[tokio::test] + async fn distinct_keys_run_in_parallel() { + let sf = Arc::new(Singleflight::new()); + // Both should acquire concurrently — no contention across keys. + let g1 = sf.acquire("k1").await; + let g2 = sf.acquire("k2").await; + drop(g1); + drop(g2); + } + + #[tokio::test] + async fn entries_are_cleaned_up() { + let sf = Singleflight::new(); + let g = sf.acquire("k").await; + drop(g); + assert_eq!(sf.inner.len(), 0, "map must not leak idle entries"); + } + + // TOCTOU collapse: under a tight pattern of acquire/drop/ + // acquire on the same key, the cleanup must never tear down an entry + // that another acquirer is mid-claim of. The DashMap shard lock taken + // by `remove_if` is the only authoritative read. + #[tokio::test] + async fn drop_then_immediate_reacquire_is_safe() { + let sf = Arc::new(Singleflight::new()); + let mut handles = Vec::new(); + for _ in 0..64 { + let sf = sf.clone(); + handles.push(tokio::spawn(async move { + for _ in 0..32 { + let _g = sf.acquire("k").await; + tokio::task::yield_now().await; + } + })); + } + for h in handles { + h.await.unwrap(); + } + // After the storm completes, the map must end empty (no leak). + assert_eq!(sf.inner.len(), 0); + } +} diff --git a/crates/router/src/auth/topkeys.rs b/crates/router/src/auth/topkeys.rs new file mode 100644 index 0000000..36682ec --- /dev/null +++ b/crates/router/src/auth/topkeys.rs @@ -0,0 +1,240 @@ +use std::collections::HashMap; +use std::sync::Mutex; + +use prometheus::IntCounterVec; + +/// Bounded-cardinality "top-N" sketch (Space-Saving / Metwally, with one +/// deliberate deviation). Keeps at most `capacity` keys; on overflow, the +/// lowest-count entry is replaced with the incoming key, which starts at +/// **count 1** — not `min_count + 1` as in the canonical Metwally +/// algorithm. The deviation prevents a flood of unique one-hit keys from +/// inheriting and inflating the min count, which would otherwise let them +/// crowd out a stable heavy hitter. The trade-off (slower convergence for +/// late-arriving genuine heavy hitters) is acceptable here because the +/// goal is bounded Prometheus label cardinality, not exact rank. +/// Returns the evicted key so the caller can clear any external labels +/// for it (e.g. Prometheus). +pub struct TopKeys { + inner: Mutex, +} + +struct Inner { + capacity: usize, + counts: HashMap, +} + +#[derive(Debug, Clone)] +pub struct ObserveResult { + /// Whether the caller should emit a metric for this `key_id`. + pub emit: bool, + /// If non-None, the caller should clear any external label for this id. + pub evicted: Option, +} + +impl TopKeys { + pub fn new(capacity: usize) -> Self { + Self { + inner: Mutex::new(Inner { + capacity, + counts: HashMap::new(), + }), + } + } + + /// Update the sketch and emit the corresponding Prometheus metric + /// updates atomically under the same lock. This prevents a race where + /// two concurrent observers' eviction-then-emit sequence could + /// interleave such that an evicted label is re-added (silently + /// breaking the cardinality bound). + pub fn observe_into(&self, key_id: &str, counter: &IntCounterVec) -> ObserveResult { + let mut g = self.inner.lock().unwrap(); + let result = Self::update_locked(&mut g, key_id); + if let Some(ref evicted) = result.evicted { + let _ = counter.remove_label_values(&[evicted.as_str()]); + } + if result.emit { + counter.with_label_values(&[key_id]).inc(); + } + result + } + + /// Sketch-only update without metric side effects. Exposed for unit + /// tests; production callers should use `observe_into` so the metric + /// mutation is serialised with the sketch update. + #[cfg(test)] + pub fn observe(&self, key_id: &str) -> ObserveResult { + let mut g = self.inner.lock().unwrap(); + Self::update_locked(&mut g, key_id) + } + + fn update_locked(g: &mut Inner, key_id: &str) -> ObserveResult { + if let Some(c) = g.counts.get_mut(key_id) { + *c += 1; + return ObserveResult { + emit: true, + evicted: None, + }; + } + if g.counts.len() < g.capacity { + g.counts.insert(key_id.to_string(), 1); + return ObserveResult { + emit: true, + evicted: None, + }; + } + // Cap full → replace the min entry. New keys start at count 1 + // (rather than `min_count + 1` per Metwally) so a flood of unique + // one-hit keys cannot inflate their own counts and crowd out + // stable heavy hitters. The cost: a genuine late-arriving heavy + // hitter starts from 1 and may take many observations to climb. + // Acceptable here — we want stable bounded Prometheus labels, not + // Space-Saving rank optimality. + let min_key = g + .counts + .iter() + .min_by_key(|(_, c)| *c) + .map(|(k, _)| k.clone()) + .expect("counts non-empty when capacity reached"); + g.counts.remove(&min_key); + g.counts.insert(key_id.to_string(), 1); + ObserveResult { + emit: true, + evicted: Some(min_key), + } + } + + #[cfg(test)] + pub fn contains(&self, key_id: &str) -> bool { + self.inner.lock().unwrap().counts.contains_key(key_id) + } + + #[cfg(test)] + pub fn len(&self) -> usize { + self.inner.lock().unwrap().counts.len() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashSet; + + #[test] + fn requests_by_key_bounded_cardinality() { + let tk = TopKeys::new(100); + let mut tracked: HashSet = HashSet::new(); + for i in 0..10_000 { + let k = format!("k{i}"); + let r = tk.observe(&k); + if r.emit { + tracked.insert(k); + } + if let Some(e) = r.evicted { + tracked.remove(&e); + } + } + assert!( + tracked.len() <= 100, + "tracked label set must stay ≤ capacity (got {})", + tracked.len() + ); + assert!(tk.len() <= 100); + } + + #[test] + fn top_keys_are_present() { + let tk = TopKeys::new(10); + for _ in 0..100 { + tk.observe("A"); + } + // Now churn with many one-hit keys. + for i in 0..1_000 { + tk.observe(&format!("nobody{i}")); + } + assert!( + tk.contains("A"), + "high-count key A must remain tracked through churn" + ); + assert!(tk.len() <= 10); + } + + #[test] + fn observe_returns_evicted_on_overflow() { + let tk = TopKeys::new(2); + let r1 = tk.observe("a"); + assert!(r1.emit && r1.evicted.is_none()); + let r2 = tk.observe("b"); + assert!(r2.emit && r2.evicted.is_none()); + let r3 = tk.observe("c"); + assert!(r3.emit && r3.evicted.is_some()); + } + + // sketch update + Prometheus mutation are atomic. + // Many threads observing distinct keys must end with the registered + // label set bounded by capacity. Pre-fix, a stale `with_label_values` + // racing with another thread's `remove_label_values` could resurrect + // an evicted label. + #[test] + fn observe_into_keeps_prometheus_cardinality_bounded_under_concurrency() { + use prometheus::core::Collector; + use prometheus::{opts, register_int_counter_vec, IntCounterVec}; + use std::sync::Arc; + use std::thread; + + // Use a unique metric per test to avoid colliding with the global + // REQUESTS_BY_KEY across other tests. + let counter: IntCounterVec = register_int_counter_vec!( + opts!( + "topkeys_concurrency_test_counter", + "Concurrency test for TopKeys::observe_into" + ), + &["key"] + ) + .unwrap(); + let tk = Arc::new(TopKeys::new(50)); + let mut handles = Vec::new(); + for t in 0..16 { + let tk = tk.clone(); + let counter = counter.clone(); + handles.push(thread::spawn(move || { + for i in 0..1_000 { + let k = format!("t{t}_k{i}"); + tk.observe_into(&k, &counter); + } + })); + } + for h in handles { + h.join().unwrap(); + } + let metrics = counter.collect(); + let total_labels: usize = metrics.iter().map(|m| m.get_metric().len()).sum(); + assert!( + total_labels <= 50, + "Prometheus cardinality must stay ≤ capacity even under \ + concurrent observe_into calls; got {total_labels}" + ); + } + + // new keys arriving on overflow start at count 1, not at + // `min_count + 1`. A flood of distinct keys cannot inflate their own + // counts and crowd out a stable heavy hitter (A:100 here). + #[test] + fn new_keys_start_at_one_not_inheriting_min_count() { + let tk = TopKeys::new(3); + for _ in 0..100 { + tk.observe("A"); // A reaches count 100 + } + for _ in 0..2 { + tk.observe("filler"); // ensure capacity is full + } + // At this point counts = {A:100, filler:2, ...} or similar; cap=3. + // Overflow with a new key: + let _ = tk.observe("first_overflow"); + // first_overflow should have count 1, not min+1 (=3) — meaning the + // next overflow will evict it back out, not promote it past A. + for i in 0..50 { + tk.observe(&format!("nobody{i}")); + } + assert!(tk.contains("A"), "A must remain through churn"); + } +} diff --git a/crates/router/src/cli.rs b/crates/router/src/cli.rs index 757a7c9..c61ee6b 100644 --- a/crates/router/src/cli.rs +++ b/crates/router/src/cli.rs @@ -1,7 +1,39 @@ +use std::str::FromStr; + use clap::Parser; +use ipnet::IpNet; +use url::Url; use crate::dataset::Dataset; +/// Comma-separated list of CIDRs (e.g. "10.0.0.0/8,35.1.2.0/29"). +/// Bare IPs are accepted as /32 (IPv4) or /128 (IPv6). Empty string -> empty list. +/// The wildcard token `*` expands to both `0.0.0.0/0` and `::/0` so it matches +/// any source — used as the "enforce for everyone" shorthand. +#[derive(Clone, Debug, Default)] +pub struct CidrList(pub Vec); + +impl FromStr for CidrList { + type Err = String; + + fn from_str(s: &str) -> Result { + let mut nets = Vec::new(); + for item in s.split(',').map(str::trim).filter(|s| !s.is_empty()) { + if item == "*" { + nets.push("0.0.0.0/0".parse().expect("static CIDR")); + nets.push("::/0".parse().expect("static CIDR")); + continue; + } + let parsed = item + .parse::() + .or_else(|_| item.parse::().map(IpNet::from)) + .map_err(|e| format!("invalid CIDR `{}`: {}", item, e))?; + nets.push(parsed); + } + Ok(CidrList(nets)) + } +} + fn parse_dataset(s: &str) -> Result { let pos = s .find('=') @@ -53,4 +85,48 @@ pub struct Cli { /// Scheduling interval (in seconds) #[clap(short = 'i', long, default_value = "300", value_name = "N")] pub scheduling_interval: u64, + + /// Comma-separated CIDRs that scope enforcement. The middleware returns + /// 403 on missing/invalid keys ONLY when the resolved real-client IP + /// matches one of these CIDRs; any other source falls through to the + /// handler (parsing, cache, and Network API calls run either way — the + /// list changes only the deny action). + /// + /// - empty (default) -> never enforce (observe-only canary mode); + /// - `*` -> enforce for everyone (expands to `0.0.0.0/0,::/0` internally); + /// - specific CIDRs -> canary scope (enforce just for these clients). + /// + /// Replaces the older `ENFORCE_V2_AUTH` boolean — one knob, no ambiguity. + #[clap(long, env = "ENFORCE_V2_AUTH_FOR_IPS", + value_name = "CIDR,CIDR,...", default_value = "")] + pub enforce_v2_auth_for_ips: CidrList, + + /// Global kill switch. When true, the auth middleware short-circuits at + /// the very top of `decide`: every request is allowed without parsing + /// headers, touching the cache, or calling the Network API. Counted as + /// `sqd_v2_auth_total{result="disabled"}` so the dashboard shows the + /// switch is active. Use only as an emergency disable when something is + /// broken and we need to drop the auth path entirely until it's fixed. + #[clap(long, env = "DISABLE_V2_AUTH", default_value = "false")] + pub disable_v2_auth: bool, + + /// Base URL of the Network API exposing POST /internal/validate. + /// When unset, the auth middleware fails open on every cache miss. + #[clap(long, env = "NETWORK_API_URL", value_name = "URL")] + pub network_api_url: Option, + + /// Comma-separated CIDRs of trusted upstream proxies (e.g. the public LB + /// in front of nginx-ingress). Used to walk `X-Original-Forwarded-For` + /// rightmost-first and discard hops we put there ourselves; the first + /// non-trusted IP from the right is treated as the real client. + /// Without this list, the rightmost element of XOFF is taken verbatim, + /// which may be the LB itself rather than the real client. + #[clap(long, env = "TRUSTED_IPS", value_name = "CIDR,CIDR,...", default_value = "")] + pub trusted_ips: CidrList, + + /// Comma-separated CIDRs allowed to bypass Bearer auth based on source IP + /// (after `TRUSTED_IPS` stripping). Empty disables the IP-based bypass — + /// every request goes through the standard Bearer path. + #[clap(long, env = "INTERNAL_ALLOWLIST", value_name = "CIDR,CIDR,...", default_value = "")] + pub internal_allowlist: CidrList, } diff --git a/crates/router/src/main.rs b/crates/router/src/main.rs index 426d35c..7f93815 100644 --- a/crates/router/src/main.rs +++ b/crates/router/src/main.rs @@ -8,6 +8,7 @@ use std::time::Duration; use storage::{S3Storage, Storage}; use url::Url; +mod auth; mod cli; mod logger; mod metrics; @@ -39,7 +40,19 @@ async fn main() { let scheduling_interval = Duration::from_secs(args.scheduling_interval); scheduler::start(controller.clone(), args.dataset, scheduling_interval); - Server::new(controller).run().await; + let api_client = match args.network_api_url { + Some(url) => auth::NetworkApiClient::new(url), + None => auth::NetworkApiClient::disabled(), + }; + let auth_state = auth::AuthState::new( + api_client, + args.disable_v2_auth, + args.enforce_v2_auth_for_ips.0, + args.trusted_ips.0, + args.internal_allowlist.0, + ); + + Server::new(controller).run(auth_state).await; } async fn create_storage(dataset: &str) -> Arc { diff --git a/crates/router/src/metrics.rs b/crates/router/src/metrics.rs index 77962f0..b996546 100644 --- a/crates/router/src/metrics.rs +++ b/crates/router/src/metrics.rs @@ -1,6 +1,8 @@ use lazy_static::lazy_static; use prometheus::{ - opts, register_int_counter_vec, register_int_gauge_vec, IntCounterVec, IntGaugeVec, + exponential_buckets, histogram_opts, opts, register_histogram, register_int_counter, + register_int_counter_vec, register_int_gauge_vec, Histogram, IntCounter, IntCounterVec, + IntGaugeVec, }; lazy_static! { @@ -16,4 +18,66 @@ lazy_static! { pub static ref DATASET_HEIGHT: IntGaugeVec = register_int_gauge_vec!(opts!("sqd_dataset_height", "Dataset height"), &["dataset"]) .expect("Can't create a metric"); + + /// Outcome of every auth-checked request: ok | missing | invalid | fail_open. + pub static ref AUTH_TOTAL: IntCounterVec = register_int_counter_vec!( + opts!("sqd_v2_auth_total", "v2 archive auth outcomes"), + &["result"] + ) + .expect("Can't create a metric"); + + /// Time spent in the auth middleware (parse + cache + Network API). + pub static ref AUTH_LATENCY_SECONDS: Histogram = register_histogram!(histogram_opts!( + "sqd_v2_auth_latency_seconds", + "v2 archive auth middleware latency", + exponential_buckets(0.0005, 2.0, 12).unwrap() + )) + .expect("Can't create a metric"); + + /// Cache hit broken down by entry state: exists | deleted. + pub static ref CACHE_HIT_TOTAL: IntCounterVec = register_int_counter_vec!( + opts!("sqd_v2_cache_hit_total", "v2 archive auth cache hits by state"), + &["state"] + ) + .expect("Can't create a metric"); + + /// Cache miss (UNDEFINED — no entry). + pub static ref CACHE_MISS_TOTAL: IntCounter = + register_int_counter!("sqd_v2_cache_miss_total", "v2 archive auth cache misses") + .expect("Can't create a metric"); + + /// Outbound /internal/validate call outcome: ok | deleted | fail_open. + pub static ref VALIDATE_CALL_TOTAL: IntCounterVec = register_int_counter_vec!( + opts!( + "sqd_v2_validate_call_total", + "Outbound /internal/validate calls by outcome" + ), + &["result"] + ) + .expect("Can't create a metric"); + + /// Per-key request count. Cardinality is bounded by the top-keys sketch. + pub static ref REQUESTS_BY_KEY: IntCounterVec = register_int_counter_vec!( + opts!( + "sqd_v2_requests_by_key", + "Requests by API key (top-100 by traffic)" + ), + // The label carries the raw `user_id` (from the validate API) or + // `internal:` for IP-bypass requests. It is NOT hashed — the + // value identifies a tenant, never the secret token material, and + // is intentionally exposed for attribution dashboards. The earlier + // name `key_id_hash` was misleading; this is the canonical key_id. + &["key_id"] + ) + .expect("Can't create a metric"); + + /// Worker URLs handed out to clients (Worker:Router ratio canary). + pub static ref WORKER_URLS_HANDED_TOTAL: IntCounterVec = register_int_counter_vec!( + opts!( + "sqd_router_worker_urls_handed_total", + "Worker URLs handed out by Router; compare to Worker incoming RPS" + ), + &["dataset"] + ) + .expect("Can't create a metric"); } diff --git a/crates/router/src/server.rs b/crates/router/src/server.rs index 0ea5dd7..224ee8e 100644 --- a/crates/router/src/server.rs +++ b/crates/router/src/server.rs @@ -42,14 +42,19 @@ async fn get_worker( Extension(controller): Extension>, ) -> Response { match controller.get_worker(&dataset, start_block) { - Ok(Some(url)) => url.into_response(), + Ok(Some(url)) => { + crate::metrics::WORKER_URLS_HANDED_TOTAL + .with_label_values(&[dataset.as_str()]) + .inc(); + url.into_response() + } Ok(None) => { let status = StatusCode::SERVICE_UNAVAILABLE; let msg = format!("not ready to serve block {} of dataset {}", start_block, dataset); NETWORK_ERRORS.with_label_values(&[&dataset, "get_worker", status.as_str()]).inc(); (status, msg).into_response() }, - Err(err) => (StatusCode::NOT_FOUND, err).into_response() + Err(err) => (StatusCode::NOT_FOUND, err).into_response(), } } @@ -93,18 +98,56 @@ impl Server { Server { controller } } - pub async fn run(&self) { + pub async fn run(&self, auth_state: Arc) { let app = Router::new() .route("/ping", post(ping)) - .route("/network/:dataset/:start_block/worker", get(get_worker)) + .route( + "/network/:dataset/:start_block/worker", + get(get_worker).layer(from_fn(crate::auth::middleware::auth)), + ) .route("/network/:dataset/height", get(get_height)) .route("/metrics", get(get_metrics)) .layer(from_fn(logging)) - .layer(Extension(self.controller.clone())); + .layer(Extension(self.controller.clone())) + .layer(Extension(auth_state)); let addr = SocketAddr::from(([0, 0, 0, 0], 3000)); + // `into_make_service_with_connect_info::` is required so + // `ConnectInfo` is available as a request extension — + // the auth middleware reads it as the fallback "real client IP" when + // no `X-Original-Forwarded-For` is present (direct ClusterIP path). axum::Server::bind(&addr) - .serve(app.into_make_service()) + .serve(app.into_make_service_with_connect_info::()) .await .unwrap() } } + +#[cfg(test)] +mod tests { + use super::*; + use router_controller::controller::ControllerBuilder; + + // Failure paths (404 unknown dataset, 503 not-ready) must not bump the + // worker-URL handout counter — only successful Ok(Some(url)) does. + #[tokio::test] + async fn worker_url_handout_does_not_increment_on_failure() { + let controller = Arc::new(ControllerBuilder::new().build()); + let dataset = "ds-no-such-thing"; + let before = crate::metrics::WORKER_URLS_HANDED_TOTAL + .with_label_values(&[dataset]) + .get(); + let resp = get_worker( + Path((dataset.to_string(), 0)), + Extension(controller), + ) + .await; + assert_ne!(resp.status(), StatusCode::OK); + let after = crate::metrics::WORKER_URLS_HANDED_TOTAL + .with_label_values(&[dataset]) + .get(); + assert_eq!( + after, before, + "failure paths must not increment the handout counter" + ); + } +}