diff --git a/CHANGELOG.md b/CHANGELOG.md index 24e38a89..1f08055f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,44 @@ explain user-visible changes in plain language instead of only mirroring PR titl Older release history remains available in GitHub Releases. +## [0.1.7] - 2026-05-16 + +`0.1.7` 主要完成了控制面的出站化收口:节点现在推荐只保留业务 80/443 端口,控制通讯改为由 `rginx-agent` 主动连向控制中心;同时,本地运维命令、远程状态/快照/指令链路、文档和发布准备都已经同步到这一模型。 + +### 新增 + +- 新增出站 agent 控制面路径,节点主动向控制中心建立 HTTPS / WebSocket 通讯,不再需要额外的节点控制端口、控制域名或节点侧控制证书。 +- 新增本地 `agent status`、`agent disable`、`agent enable` 运维命令,与现有的 `status`、`snapshot`、`delta`、`wait`、`traffic`、`upstreams`、`cache`、`system` 等命令形成统一的节点操作面。 +- 新增控制中心与节点之间的结构化命令、结果与快照轨道,覆盖注册、心跳、轮询、结果回报以及日常运维动作的传输语义。 + +### 更新与改进 + +- 继续收口 legacy `control_plane` 兼容路径,并把 README、OpenAPI、发布说明和计划文档同步到 outbound agent 模式。 +- 继续拆分大文件和测试模块,让 modularization gate 保持无 warning,也让后续版本维护、回归定位和代码阅读更轻。 +- release prep、packaging 和版本号文档同步到 `0.1.7`,发布前检查链路现在能直接对齐当前 workspace 版本。 + +### 问题修复 + +- 修复 agent、control plane、CLI、snapshot 和路由视图上的若干回归,让状态导出和本地控制语义保持一致。 +- 修复 release prep 所需的 changelog、release notes 和 workspace version 一致性问题,避免发布时再出现版本漂移。 + +## New + +- Added the outbound-agent control plane path so nodes connect to the control center directly and keep only business 80/443 ports open. +- Added local `agent status`, `agent disable`, and `agent enable` commands alongside the existing `status`, `snapshot`, `delta`, `wait`, `traffic`, `upstreams`, `cache`, and `system` commands. +- Added structured command, result, and snapshot flows between the control center and nodes for registration, heartbeat, polling, result reporting, and everyday operational actions. + +## Update & Improvement + +- Continued narrowing the legacy `control_plane` compatibility path and synced the README, OpenAPI, release notes, and planning docs to the outbound-agent model. +- Continued splitting large files and test modules so the modularization gate stays warning-free and future releases remain easier to maintain. +- Synced release-prep, packaging, and version-reference docs to `0.1.7`. + +## Bug Fixes + +- Fixed several agent, control-plane, CLI, snapshot, and routing-view regressions so state export and local control semantics stay aligned. +- Fixed the consistency between changelog, release notes, and workspace version required by release prep. + ## [0.1.6] - 2026-05-13 `0.1.6` 主要把边缘控制面、结构化配置变更和静态文件主路径一起收口到“可发布、可观测、可持续迭代”的状态。一方面,节点现在同时具备本地 admin socket 和远程 HTTPS control plane 两套对齐的观测/控制入口;另一方面,static file、runtime state 和 release baseline 也继续补齐,方便后续继续往多节点编排和性能迭代推进。 diff --git a/Cargo.lock b/Cargo.lock index 741e699e..6a387eba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -398,6 +398,12 @@ dependencies = [ "cc", ] +[[package]] +name = "cmov" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f88a43d011fc4a6876cb7344703e297c71dda42494fee094d5f7c76bf13f746" + [[package]] name = "colorchoice" version = "1.0.5" @@ -541,6 +547,15 @@ dependencies = [ "linktime-proc-macro", ] +[[package]] +name = "ctutils" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d5515a3834141de9eafb9717ad39eea8247b5674e6066c404e8c4b365d2a29e" +dependencies = [ + "cmov", +] + [[package]] name = "data-encoding" version = "2.11.0" @@ -589,6 +604,7 @@ dependencies = [ "block-buffer 0.12.0", "const-oid", "crypto-common 0.2.1", + "ctutils", ] [[package]] @@ -968,6 +984,15 @@ dependencies = [ "tracing", ] +[[package]] +name = "hmac" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6303bc9732ae41b04cb554b844a762b4115a61bfaa81e3e83050991eeb56863f" +dependencies = [ + "digest 0.11.2", +] + [[package]] name = "http" version = "1.4.0" @@ -2101,7 +2126,7 @@ checksum = "1e061d1b48cb8d38042de4ae0a7a6401009d6143dc80d2e2d6f31f0bdd6470c7" [[package]] name = "rginx" -version = "0.1.6" +version = "0.1.7" dependencies = [ "anyhow", "base64", @@ -2138,11 +2163,12 @@ dependencies = [ [[package]] name = "rginx-agent" -version = "0.1.6" +version = "0.1.7" dependencies = [ "bytes", "futures-util", "hex", + "hmac", "http", "http-body-util", "hyper", @@ -2168,11 +2194,12 @@ dependencies = [ "tokio-tungstenite", "tracing", "tungstenite", + "uuid", ] [[package]] name = "rginx-config" -version = "0.1.6" +version = "0.1.7" dependencies = [ "http", "ipnet", @@ -2189,7 +2216,7 @@ dependencies = [ [[package]] name = "rginx-core" -version = "0.1.6" +version = "0.1.7" dependencies = [ "http", "ipnet", @@ -2199,7 +2226,7 @@ dependencies = [ [[package]] name = "rginx-http" -version = "0.1.6" +version = "0.1.7" dependencies = [ "aws-lc-rs", "base64", @@ -2245,14 +2272,14 @@ dependencies = [ [[package]] name = "rginx-observability" -version = "0.1.6" +version = "0.1.7" dependencies = [ "tracing-subscriber", ] [[package]] name = "rginx-runtime" -version = "0.1.6" +version = "0.1.7" dependencies = [ "bytes", "ctor", @@ -2280,7 +2307,7 @@ dependencies = [ [[package]] name = "rginx-sdk" -version = "0.1.6" +version = "0.1.7" dependencies = [ "futures-util", "mockito", @@ -2986,8 +3013,11 @@ checksum = "8f72a05e828585856dacd553fba484c242c46e391fb0e58917c942ee9202915c" dependencies = [ "futures-util", "log", + "rustls", "rustls-native-certs", + "rustls-pki-types", "tokio", + "tokio-rustls", "tungstenite", ] @@ -3128,6 +3158,8 @@ dependencies = [ "httparse", "log", "rand 0.9.4", + "rustls", + "rustls-pki-types", "sha1 0.10.6", "thiserror 2.0.18", ] diff --git a/Cargo.toml b/Cargo.toml index 1f0177b8..b3ff1391 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ default-members = ["crates/rginx-app"] resolver = "2" [workspace.package] -version = "0.1.6" +version = "0.1.7" edition = "2024" authors = ["vansour"] license = "MIT OR Apache-2.0" diff --git a/README.md b/README.md index 36634ac8..796fe1cd 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ `rginx` 是一个面向 Linux 的 Rust 边缘反向代理单二进制项目。 -当前版本:`0.1.6` +当前版本:`0.1.7` ## 能力概览 @@ -18,7 +18,8 @@ - gRPC、grpc-web、trailers、`grpc-timeout` - 压缩、限流、CIDR allow/deny、`trusted_proxies`、`client_ip_header` - 热重载、优雅重启、平滑退出 -- 本地只读运维命令:`check`、`status`、`snapshot`、`snapshot-version`、`delta`、`wait`、`counters`、`traffic`、`peers`、`upstreams` +- 出站 agent 控制面:节点主动连接控制中心,不需要额外节点控制端口、控制域名或节点侧控制证书 +- 本地运维命令:`check`、`status`、`agent status`、`agent disable`、`agent enable`、`snapshot`、`snapshot-version`、`delta`、`wait`、`counters`、`traffic`、`peers`、`upstreams` ## 平台与交付 @@ -75,6 +76,12 @@ TLS 证书、局部 `upstreams` 和 `locations`。不要把网站配置重新内 写法如 `"X-Foo": "bar"` 仍然有效,动态值需使用 RON enum 写法如 `"X-Real-IP": ClientIp` 或 `"Origin": Template("https://{host}")`。 +控制面推荐使用 `agent` 出站模式,而不是让每个节点暴露旧的 +`control_plane.listen` 管理端口。迁移后,节点只需要监听自己网站的 +80/443 等业务端口;控制中心通过节点主动发起的 HTTPS/WebSocket 连接下发命令。 +保留旧 `control_plane` 配置仅用于兼容窗口,`rginx check` 会在详细输出中提示 +是否仍在打开额外节点控制端口。 + 静态文件能力当前已经可以直接承载常见站点、文档站和下载站主路径,稳定支持: - `root` / `alias` / `index` @@ -146,6 +153,9 @@ rginx -t rginx -s reload rginx check rginx status +rginx agent status +rginx agent disable +rginx agent enable rginx snapshot --include status --include traffic rginx snapshot-version rginx delta --since-version --include status diff --git a/configs/control-plane-mtls.example.ron b/configs/control-plane-mtls.example.ron index 678d4b36..89aa5205 100644 --- a/configs/control-plane-mtls.example.ron +++ b/configs/control-plane-mtls.example.ron @@ -1,12 +1,14 @@ -// Example configuration for mTLS client certificate authentication -// This enables mutual TLS authentication for the control plane +// Legacy example for mTLS client certificate authentication on the node-side +// control-plane server. New deployments should prefer `agent` outbound control +// so nodes do not open an extra control port or maintain a control-plane server +// certificate. Config( - control_plane: Some(ControlPlane( + control_plane: Some(ControlPlaneConfig( enabled: Some(true), listen: Some("0.0.0.0:9443"), - tls: Some(ControlPlaneTls( + tls: Some(ControlPlaneTlsConfig( // Server certificate and key cert_path: "/etc/rginx/control-plane.crt", key_path: "/etc/rginx/control-plane.key", diff --git a/crates/rginx-agent/Cargo.toml b/crates/rginx-agent/Cargo.toml index b7ce3416..07e12bd1 100644 --- a/crates/rginx-agent/Cargo.toml +++ b/crates/rginx-agent/Cargo.toml @@ -18,9 +18,11 @@ rginx-core = { path = "../rginx-core" } bytes.workspace = true futures-util = "0.3" hex = "0.4" +hmac = "0.13" http.workspace = true http-body-util.workspace = true hyper.workspace = true +hyper-rustls.workspace = true hyper-util.workspace = true ipnet.workspace = true libc.workspace = true @@ -32,13 +34,13 @@ sha2.workspace = true thiserror.workspace = true tokio = { workspace = true, features = ["io-util", "net", "time", "fs"] } tokio-rustls.workspace = true -tokio-tungstenite = "0.29" +tokio-tungstenite = { version = "0.29", features = ["rustls-tls-native-roots"] } tracing.workspace = true tungstenite = "0.29" prometheus = "0.14" lazy_static = "1.5" +uuid.workspace = true [dev-dependencies] -hyper-rustls.workspace = true rcgen = "0.14" tempfile = "3.27" diff --git a/crates/rginx-agent/src/agent_core.rs b/crates/rginx-agent/src/agent_core.rs new file mode 100644 index 00000000..dd18d2f7 --- /dev/null +++ b/crates/rginx-agent/src/agent_core.rs @@ -0,0 +1,287 @@ +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use rginx_config::managed::ManagedResourceMutation; +use rginx_http::{ + ApplyResultSnapshot, CacheInvalidationResult, CachePurgeResult, ReloadOutcomeSnapshot, + ReloadResultSnapshot, SharedState, SnapshotModule, +}; + +use crate::error::{Error, Result}; +use crate::model::{ + ConfigApplyResultView, NodeActionStatusView, NodeCacheView, NodeControlResultView, + NodeDeltaView, NodeRevisionView, NodeSnapshotView, NodeStatusView, NodeSystemView, + NodeTrafficView, NodeUpstreamsView, NodeWaitView, +}; +use crate::server::control::{ConfigApplyExecutor, ReloadExecutor, UnsupportedConfigApplyExecutor}; +use crate::system::collect_system_view; + +const RELOAD_COMPLETION_TIMEOUT: Duration = Duration::from_secs(30); + +#[derive(Clone)] +pub struct AgentCore { + state: SharedState, + reload_executor: Arc, + config_apply_executor: Arc, +} + +impl AgentCore { + pub fn new(state: SharedState, reload_executor: Arc) -> Self { + Self { + state, + reload_executor, + config_apply_executor: Arc::new(UnsupportedConfigApplyExecutor), + } + } + + pub fn with_config_apply_executor( + mut self, + config_apply_executor: Arc, + ) -> Self { + self.config_apply_executor = config_apply_executor; + self + } + + pub fn shared_state(&self) -> &SharedState { + &self.state + } + + pub async fn status(&self) -> Result { + Ok(NodeStatusView::from(self.state.status_snapshot().await)) + } + + pub async fn snapshot(&self, window_secs: Option) -> Result { + Ok(NodeSnapshotView { + snapshot_version: self.state.current_snapshot_version(), + status: self.state.status_snapshot().await, + counters: self.state.counters_snapshot(), + traffic: self.state.traffic_stats_snapshot_with_window(window_secs), + peer_health: self.state.peer_health_snapshot().await, + upstreams: self.state.upstream_stats_snapshot_with_window(window_secs), + cache: self.state.cache_stats_snapshot().await, + }) + } + + pub async fn delta_since( + &self, + since_version: u64, + window_secs: Option, + ) -> Result { + let delta = self.state.snapshot_delta_since( + since_version, + Some(&SnapshotModule::all()), + window_secs, + ); + Ok(NodeDeltaView::from(delta)) + } + + pub async fn wait_for_snapshot_change( + &self, + since_version: u64, + timeout: Option, + ) -> Result { + let snapshot_version = self.state.wait_for_snapshot_change(since_version, timeout).await; + Ok(NodeWaitView { snapshot_version }) + } + + pub async fn traffic(&self, window_secs: Option) -> Result { + Ok(NodeTrafficView::from(self.state.traffic_stats_snapshot_with_window(window_secs))) + } + + pub async fn upstreams(&self, window_secs: Option) -> Result { + Ok(NodeUpstreamsView { + peer_health: self.state.peer_health_snapshot().await, + upstreams: self.state.upstream_stats_snapshot_with_window(window_secs), + }) + } + + pub async fn cache(&self) -> Result { + Ok(NodeCacheView::from(self.state.cache_stats_snapshot().await)) + } + + pub async fn system(&self) -> Result { + let config = self.state.current_config().await; + let cache_zone_paths = + config.cache_zones.values().map(|zone| zone.path.clone()).collect::>(); + tokio::task::spawn_blocking(move || collect_system_view(&cache_zone_paths)) + .await + .map_err(|error| Error::Server(error.to_string()))? + } + + pub async fn revision(&self) -> Result { + Ok(NodeRevisionView::from(self.state.revision_status_snapshot().await)) + } + + pub async fn reload(&self) -> Result { + let initial_status = self.state.status_snapshot().await.reload; + let fallback_revision = self.state.current_revision().await; + self.reload_executor.execute().await?; + self.wait_for_reload_attempt(initial_status.attempts_total).await?; + Ok(self.reload_action_status(fallback_revision).await) + } + + pub async fn action_status(&self, accepted_revision: u64) -> NodeActionStatusView { + NodeActionStatusView { + accepted_revision, + revision: self.state.revision_status_snapshot().await, + last_reload_result: last_reload_result(&self.state).await, + last_apply_result: last_apply_result(&self.state), + } + } + + pub async fn wrap_result(&self, result: T) -> NodeControlResultView { + let current_revision = self.state.current_revision().await; + NodeControlResultView { status: self.action_status(current_revision).await, result } + } + + pub async fn apply_config( + &self, + request: ManagedResourceMutation, + ) -> Result> { + let outcome = self.config_apply_executor.execute(request).await?; + Ok(NodeControlResultView { + status: self.action_status(outcome.accepted_revision).await, + result: outcome.result, + }) + } + + pub async fn purge_cache( + &self, + command: CachePurgeCommand, + ) -> Result> { + let result = match command.target { + CachePurgeTarget::Zone => self.state.purge_cache_zone(&command.zone_name).await, + CachePurgeTarget::Key(key) => { + self.state.purge_cache_key(&command.zone_name, &key).await + } + CachePurgeTarget::Prefix(prefix) => { + self.state.purge_cache_prefix(&command.zone_name, &prefix).await + } + } + .map_err(Error::InvalidRequest)?; + Ok(self.wrap_result(result).await) + } + + pub async fn invalidate_cache( + &self, + command: CacheInvalidateCommand, + ) -> Result> { + let result = match command.target { + CacheInvalidateTarget::Zone => { + self.state.invalidate_cache_zone(&command.zone_name).await + } + CacheInvalidateTarget::Key(key) => { + self.state.invalidate_cache_key(&command.zone_name, &key).await + } + CacheInvalidateTarget::Prefix(prefix) => { + self.state.invalidate_cache_prefix(&command.zone_name, &prefix).await + } + CacheInvalidateTarget::Tag(tag) => { + self.state.invalidate_cache_tag(&command.zone_name, &tag).await + } + } + .map_err(Error::InvalidRequest)?; + Ok(self.wrap_result(result).await) + } + + pub async fn clear_cache_invalidations( + &self, + command: CacheClearInvalidationsCommand, + ) -> Result> { + let result = self + .state + .clear_cache_invalidations(&command.zone_name) + .await + .map_err(Error::InvalidRequest)?; + Ok(self.wrap_result(result).await) + } + + pub async fn set_desired_revision( + &self, + desired_revision: u64, + ) -> Result { + self.state.set_desired_revision(desired_revision); + Ok(self.action_status(desired_revision).await) + } + + async fn reload_action_status(&self, fallback_revision: u64) -> NodeActionStatusView { + let current_revision = self.state.current_revision().await; + let last_reload_result = last_reload_result(&self.state).await; + let accepted_revision = match last_reload_result.as_ref().map(|result| &result.outcome) { + Some(ReloadOutcomeSnapshot::Success { revision }) => *revision, + _ => current_revision.max(fallback_revision), + }; + NodeActionStatusView { + accepted_revision, + revision: self.state.revision_status_snapshot().await, + last_reload_result, + last_apply_result: last_apply_result(&self.state), + } + } + + async fn wait_for_reload_attempt(&self, attempts_before: u64) -> Result<()> { + let started = Instant::now(); + let mut observed_version = self.state.current_snapshot_version(); + loop { + let reload = self.state.status_snapshot().await.reload; + if reload.attempts_total > attempts_before { + return Ok(()); + } + + let Some(remaining) = RELOAD_COMPLETION_TIMEOUT.checked_sub(started.elapsed()) else { + return Err(Error::Server( + "timed out waiting for runtime reload completion".to_string(), + )); + }; + + let changed_version = + self.state.wait_for_snapshot_change(observed_version, Some(remaining)).await; + if changed_version == observed_version { + return Err(Error::Server( + "timed out waiting for runtime reload completion".to_string(), + )); + } + observed_version = changed_version; + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CachePurgeCommand { + pub zone_name: String, + pub target: CachePurgeTarget, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum CachePurgeTarget { + Zone, + Key(String), + Prefix(String), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CacheInvalidateCommand { + pub zone_name: String, + pub target: CacheInvalidateTarget, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum CacheInvalidateTarget { + Zone, + Key(String), + Prefix(String), + Tag(String), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CacheClearInvalidationsCommand { + pub zone_name: String, +} + +async fn last_reload_result(state: &SharedState) -> Option { + state.status_snapshot().await.reload.last_result +} + +fn last_apply_result(state: &SharedState) -> Option { + state.apply_status_snapshot().last_result +} diff --git a/crates/rginx-agent/src/circuit_breaker.rs b/crates/rginx-agent/src/circuit_breaker.rs index 3f9c7150..d465c70c 100644 --- a/crates/rginx-agent/src/circuit_breaker.rs +++ b/crates/rginx-agent/src/circuit_breaker.rs @@ -283,149 +283,4 @@ fn current_timestamp() -> u64 { } #[cfg(test)] -mod tests { - use super::*; - - #[tokio::test] - async fn test_circuit_breaker_closed_state() { - let config = CircuitBreakerConfig { - failure_threshold: 3, - success_threshold: 2, - timeout_secs: 5, - half_open_max_requests: 2, - }; - let breaker = CircuitBreaker::new(config); - - let result = breaker.call(async { Ok::<_, ()>(42) }).await; - assert!(result.is_ok()); - assert_eq!(breaker.get_state().await, CircuitState::Closed); - } - - #[tokio::test] - async fn test_circuit_breaker_opens_on_failures() { - let config = CircuitBreakerConfig { - failure_threshold: 3, - success_threshold: 2, - timeout_secs: 5, - half_open_max_requests: 2, - }; - let breaker = CircuitBreaker::new(config); - - for _ in 0..3 { - let _ = breaker.call(async { Err::<(), _>("error") }).await; - } - - assert_eq!(breaker.get_state().await, CircuitState::Open); - } - - #[tokio::test] - async fn test_circuit_breaker_rejects_when_open() { - let config = CircuitBreakerConfig { - failure_threshold: 2, - success_threshold: 2, - timeout_secs: 60, - half_open_max_requests: 2, - }; - let breaker = CircuitBreaker::new(config); - - for _ in 0..2 { - let _ = breaker.call(async { Err::<(), _>("error") }).await; - } - - let result = breaker.call(async { Ok::<_, ()>(42) }).await; - assert!(matches!(result, Err(CircuitBreakerError::CircuitOpen))); - } - - #[tokio::test] - async fn test_circuit_breaker_half_open_transition() { - let config = CircuitBreakerConfig { - failure_threshold: 2, - success_threshold: 2, - timeout_secs: 1, - half_open_max_requests: 2, - }; - let breaker = CircuitBreaker::new(config); - - for _ in 0..2 { - let _ = breaker.call(async { Err::<(), _>("error") }).await; - } - - assert_eq!(breaker.get_state().await, CircuitState::Open); - - tokio::time::sleep(tokio::time::Duration::from_secs(2)).await; - - let result = breaker.call(async { Ok::<_, ()>(42) }).await; - assert!(result.is_ok()); - assert_eq!(breaker.get_state().await, CircuitState::HalfOpen); - } - - #[tokio::test] - async fn test_circuit_breaker_closes_after_success() { - let config = CircuitBreakerConfig { - failure_threshold: 2, - success_threshold: 2, - timeout_secs: 1, - half_open_max_requests: 3, - }; - let breaker = CircuitBreaker::new(config); - - for _ in 0..2 { - let _ = breaker.call(async { Err::<(), _>("error") }).await; - } - - tokio::time::sleep(tokio::time::Duration::from_secs(2)).await; - - for _ in 0..2 { - let _ = breaker.call(async { Ok::<_, ()>(42) }).await; - } - - assert_eq!(breaker.get_state().await, CircuitState::Closed); - } - - #[tokio::test] - async fn test_circuit_breaker_stats() { - let config = CircuitBreakerConfig::default(); - let breaker = CircuitBreaker::new(config); - - let _ = breaker.call(async { Ok::<_, ()>(42) }).await; - let _ = breaker.call(async { Err::<(), _>("error") }).await; - - let stats = breaker.get_stats().await; - assert_eq!(stats.total_requests, 2); - assert!(stats.last_failure_time.is_some()); - } - - #[tokio::test] - async fn test_circuit_breaker_registry() { - let registry = CircuitBreakerRegistry::default(); - - let breaker1 = registry.get_or_create("service1").await; - let breaker2 = registry.get_or_create("service1").await; - - assert!(Arc::ptr_eq(&breaker1, &breaker2)); - - let breakers = registry.list().await; - assert_eq!(breakers.len(), 1); - assert!(breakers.contains(&"service1".to_string())); - } - - #[tokio::test] - async fn test_circuit_breaker_reset() { - let config = CircuitBreakerConfig { - failure_threshold: 2, - success_threshold: 2, - timeout_secs: 60, - half_open_max_requests: 2, - }; - let breaker = CircuitBreaker::new(config); - - for _ in 0..2 { - let _ = breaker.call(async { Err::<(), _>("error") }).await; - } - - assert_eq!(breaker.get_state().await, CircuitState::Open); - - breaker.reset().await; - assert_eq!(breaker.get_state().await, CircuitState::Closed); - } -} +mod tests; diff --git a/crates/rginx-agent/src/circuit_breaker/tests.rs b/crates/rginx-agent/src/circuit_breaker/tests.rs new file mode 100644 index 00000000..ef2c0155 --- /dev/null +++ b/crates/rginx-agent/src/circuit_breaker/tests.rs @@ -0,0 +1,144 @@ +use super::*; + +#[tokio::test] +async fn test_circuit_breaker_closed_state() { + let config = CircuitBreakerConfig { + failure_threshold: 3, + success_threshold: 2, + timeout_secs: 5, + half_open_max_requests: 2, + }; + let breaker = CircuitBreaker::new(config); + + let result = breaker.call(async { Ok::<_, ()>(42) }).await; + assert!(result.is_ok()); + assert_eq!(breaker.get_state().await, CircuitState::Closed); +} + +#[tokio::test] +async fn test_circuit_breaker_opens_on_failures() { + let config = CircuitBreakerConfig { + failure_threshold: 3, + success_threshold: 2, + timeout_secs: 5, + half_open_max_requests: 2, + }; + let breaker = CircuitBreaker::new(config); + + for _ in 0..3 { + let _ = breaker.call(async { Err::<(), _>("error") }).await; + } + + assert_eq!(breaker.get_state().await, CircuitState::Open); +} + +#[tokio::test] +async fn test_circuit_breaker_rejects_when_open() { + let config = CircuitBreakerConfig { + failure_threshold: 2, + success_threshold: 2, + timeout_secs: 60, + half_open_max_requests: 2, + }; + let breaker = CircuitBreaker::new(config); + + for _ in 0..2 { + let _ = breaker.call(async { Err::<(), _>("error") }).await; + } + + let result = breaker.call(async { Ok::<_, ()>(42) }).await; + assert!(matches!(result, Err(CircuitBreakerError::CircuitOpen))); +} + +#[tokio::test] +async fn test_circuit_breaker_half_open_transition() { + let config = CircuitBreakerConfig { + failure_threshold: 2, + success_threshold: 2, + timeout_secs: 1, + half_open_max_requests: 2, + }; + let breaker = CircuitBreaker::new(config); + + for _ in 0..2 { + let _ = breaker.call(async { Err::<(), _>("error") }).await; + } + + assert_eq!(breaker.get_state().await, CircuitState::Open); + + tokio::time::sleep(tokio::time::Duration::from_secs(2)).await; + + let result = breaker.call(async { Ok::<_, ()>(42) }).await; + assert!(result.is_ok()); + assert_eq!(breaker.get_state().await, CircuitState::HalfOpen); +} + +#[tokio::test] +async fn test_circuit_breaker_closes_after_success() { + let config = CircuitBreakerConfig { + failure_threshold: 2, + success_threshold: 2, + timeout_secs: 1, + half_open_max_requests: 3, + }; + let breaker = CircuitBreaker::new(config); + + for _ in 0..2 { + let _ = breaker.call(async { Err::<(), _>("error") }).await; + } + + tokio::time::sleep(tokio::time::Duration::from_secs(2)).await; + + for _ in 0..2 { + let _ = breaker.call(async { Ok::<_, ()>(42) }).await; + } + + assert_eq!(breaker.get_state().await, CircuitState::Closed); +} + +#[tokio::test] +async fn test_circuit_breaker_stats() { + let config = CircuitBreakerConfig::default(); + let breaker = CircuitBreaker::new(config); + + let _ = breaker.call(async { Ok::<_, ()>(42) }).await; + let _ = breaker.call(async { Err::<(), _>("error") }).await; + + let stats = breaker.get_stats().await; + assert_eq!(stats.total_requests, 2); + assert!(stats.last_failure_time.is_some()); +} + +#[tokio::test] +async fn test_circuit_breaker_registry() { + let registry = CircuitBreakerRegistry::default(); + + let breaker1 = registry.get_or_create("service1").await; + let breaker2 = registry.get_or_create("service1").await; + + assert!(Arc::ptr_eq(&breaker1, &breaker2)); + + let breakers = registry.list().await; + assert_eq!(breakers.len(), 1); + assert!(breakers.contains(&"service1".to_string())); +} + +#[tokio::test] +async fn test_circuit_breaker_reset() { + let config = CircuitBreakerConfig { + failure_threshold: 2, + success_threshold: 2, + timeout_secs: 60, + half_open_max_requests: 2, + }; + let breaker = CircuitBreaker::new(config); + + for _ in 0..2 { + let _ = breaker.call(async { Err::<(), _>("error") }).await; + } + + assert_eq!(breaker.get_state().await, CircuitState::Open); + + breaker.reset().await; + assert_eq!(breaker.get_state().await, CircuitState::Closed); +} diff --git a/crates/rginx-agent/src/config_history.rs b/crates/rginx-agent/src/config_history.rs index d118cafb..46587409 100644 --- a/crates/rginx-agent/src/config_history.rs +++ b/crates/rginx-agent/src/config_history.rs @@ -3,12 +3,15 @@ use std::path::PathBuf; use std::sync::Arc; use serde::{Deserialize, Serialize}; -use sha2::{Digest, Sha256}; use tokio::sync::RwLock; use crate::error::{Error, Result}; use crate::registry::current_timestamp_ms; +mod diff; + +use diff::{calculate_diff, calculate_hash}; + /// Configuration revision record #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ConfigRevision { @@ -236,151 +239,5 @@ impl ConfigHistory { } } -fn calculate_hash(config: &serde_json::Value) -> String { - let content = serde_json::to_string(config).unwrap_or_default(); - let hash = Sha256::digest(content.as_bytes()); - hex::encode(hash) -} - -fn calculate_diff(old: &serde_json::Value, new: &serde_json::Value) -> ConfigDiff { - let mut changes = Vec::new(); - let mut additions = 0; - let mut removals = 0; - let mut modifications = 0; - - // Simple diff implementation - compare JSON values - diff_values("", old, new, &mut changes, &mut additions, &mut removals, &mut modifications); - - ConfigDiff { changes, summary: DiffSummary { additions, removals, modifications } } -} - -fn diff_values( - path: &str, - old: &serde_json::Value, - new: &serde_json::Value, - changes: &mut Vec, - additions: &mut usize, - removals: &mut usize, - modifications: &mut usize, -) { - use serde_json::Value; - - match (old, new) { - (Value::Object(old_map), Value::Object(new_map)) => { - // Check for removed and modified keys - for (key, old_val) in old_map { - let new_path = - if path.is_empty() { format!("/{}", key) } else { format!("{}/{}", path, key) }; - - if let Some(new_val) = new_map.get(key) { - if old_val != new_val { - diff_values( - &new_path, - old_val, - new_val, - changes, - additions, - removals, - modifications, - ); - } - } else { - *removals += 1; - changes.push(ConfigChange { - op: ChangeOperation::Remove, - path: new_path, - old_value: Some(old_val.clone()), - new_value: None, - }); - } - } - - // Check for added keys - for (key, new_val) in new_map { - if !old_map.contains_key(key) { - let new_path = if path.is_empty() { - format!("/{}", key) - } else { - format!("{}/{}", path, key) - }; - *additions += 1; - changes.push(ConfigChange { - op: ChangeOperation::Add, - path: new_path, - old_value: None, - new_value: Some(new_val.clone()), - }); - } - } - } - _ if old != new => { - *modifications += 1; - changes.push(ConfigChange { - op: ChangeOperation::Replace, - path: path.to_string(), - old_value: Some(old.clone()), - new_value: Some(new.clone()), - }); - } - _ => {} - } -} - #[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_calculate_hash() { - let config = serde_json::json!({"key": "value"}); - let hash = calculate_hash(&config); - assert!(!hash.is_empty()); - assert_eq!(hash.len(), 64); // SHA256 produces 64 hex characters - } - - #[test] - fn test_calculate_diff_add() { - let old = serde_json::json!({"a": 1}); - let new = serde_json::json!({"a": 1, "b": 2}); - let diff = calculate_diff(&old, &new); - assert_eq!(diff.summary.additions, 1); - assert_eq!(diff.summary.removals, 0); - assert_eq!(diff.summary.modifications, 0); - } - - #[test] - fn test_calculate_diff_remove() { - let old = serde_json::json!({"a": 1, "b": 2}); - let new = serde_json::json!({"a": 1}); - let diff = calculate_diff(&old, &new); - assert_eq!(diff.summary.additions, 0); - assert_eq!(diff.summary.removals, 1); - assert_eq!(diff.summary.modifications, 0); - } - - #[test] - fn test_calculate_diff_replace() { - let old = serde_json::json!({"a": 1}); - let new = serde_json::json!({"a": 2}); - let diff = calculate_diff(&old, &new); - assert_eq!(diff.summary.additions, 0); - assert_eq!(diff.summary.removals, 0); - assert_eq!(diff.summary.modifications, 1); - } - - #[tokio::test] - async fn test_config_history() { - let temp_dir = tempfile::tempdir().unwrap(); - let history = ConfigHistory::new(temp_dir.path().to_path_buf(), 10); - - let config = serde_json::json!({"test": "value"}); - history - .record(1, "test-user".to_string(), config, ConfigMetadata::default()) - .await - .unwrap(); - - let revision = history.get(1).await.unwrap(); - assert_eq!(revision.revision, 1); - assert_eq!(revision.applied_by, "test-user"); - } -} +mod tests; diff --git a/crates/rginx-agent/src/config_history/diff.rs b/crates/rginx-agent/src/config_history/diff.rs new file mode 100644 index 00000000..c200306c --- /dev/null +++ b/crates/rginx-agent/src/config_history/diff.rs @@ -0,0 +1,97 @@ +use sha2::{Digest, Sha256}; + +use super::{ChangeOperation, ConfigChange, ConfigDiff, DiffSummary}; + +pub(super) fn calculate_hash(config: &serde_json::Value) -> String { + let content = serde_json::to_string(config).unwrap_or_default(); + let hash = Sha256::digest(content.as_bytes()); + hex::encode(hash) +} + +pub(super) fn calculate_diff(old: &serde_json::Value, new: &serde_json::Value) -> ConfigDiff { + let mut changes = Vec::new(); + let mut additions = 0; + let mut removals = 0; + let mut modifications = 0; + + diff_values("", old, new, &mut changes, &mut additions, &mut removals, &mut modifications); + + ConfigDiff { changes, summary: DiffSummary { additions, removals, modifications } } +} + +fn diff_values( + path: &str, + old: &serde_json::Value, + new: &serde_json::Value, + changes: &mut Vec, + additions: &mut usize, + removals: &mut usize, + modifications: &mut usize, +) { + use serde_json::Value; + + match (old, new) { + (Value::Object(old_map), Value::Object(new_map)) => { + diff_object_values(path, old_map, new_map, changes, additions, removals, modifications); + } + _ if old != new => { + *modifications += 1; + changes.push(ConfigChange { + op: ChangeOperation::Replace, + path: path.to_string(), + old_value: Some(old.clone()), + new_value: Some(new.clone()), + }); + } + _ => {} + } +} + +fn diff_object_values( + path: &str, + old_map: &serde_json::Map, + new_map: &serde_json::Map, + changes: &mut Vec, + additions: &mut usize, + removals: &mut usize, + modifications: &mut usize, +) { + for (key, old_val) in old_map { + let next_path = if path.is_empty() { format!("/{key}") } else { format!("{path}/{key}") }; + if let Some(new_val) = new_map.get(key) { + if old_val != new_val { + diff_values( + &next_path, + old_val, + new_val, + changes, + additions, + removals, + modifications, + ); + } + } else { + *removals += 1; + changes.push(ConfigChange { + op: ChangeOperation::Remove, + path: next_path, + old_value: Some(old_val.clone()), + new_value: None, + }); + } + } + + for (key, new_val) in new_map { + if !old_map.contains_key(key) { + let next_path = + if path.is_empty() { format!("/{key}") } else { format!("{path}/{key}") }; + *additions += 1; + changes.push(ConfigChange { + op: ChangeOperation::Add, + path: next_path, + old_value: None, + new_value: Some(new_val.clone()), + }); + } + } +} diff --git a/crates/rginx-agent/src/config_history/tests.rs b/crates/rginx-agent/src/config_history/tests.rs new file mode 100644 index 00000000..4afe8c19 --- /dev/null +++ b/crates/rginx-agent/src/config_history/tests.rs @@ -0,0 +1,52 @@ +use super::*; + +#[test] +fn test_calculate_hash() { + let config = serde_json::json!({"key": "value"}); + let hash = calculate_hash(&config); + assert!(!hash.is_empty()); + assert_eq!(hash.len(), 64); // SHA256 produces 64 hex characters +} + +#[test] +fn test_calculate_diff_add() { + let old = serde_json::json!({"a": 1}); + let new = serde_json::json!({"a": 1, "b": 2}); + let diff = calculate_diff(&old, &new); + assert_eq!(diff.summary.additions, 1); + assert_eq!(diff.summary.removals, 0); + assert_eq!(diff.summary.modifications, 0); +} + +#[test] +fn test_calculate_diff_remove() { + let old = serde_json::json!({"a": 1, "b": 2}); + let new = serde_json::json!({"a": 1}); + let diff = calculate_diff(&old, &new); + assert_eq!(diff.summary.additions, 0); + assert_eq!(diff.summary.removals, 1); + assert_eq!(diff.summary.modifications, 0); +} + +#[test] +fn test_calculate_diff_replace() { + let old = serde_json::json!({"a": 1}); + let new = serde_json::json!({"a": 2}); + let diff = calculate_diff(&old, &new); + assert_eq!(diff.summary.additions, 0); + assert_eq!(diff.summary.removals, 0); + assert_eq!(diff.summary.modifications, 1); +} + +#[tokio::test] +async fn test_config_history() { + let temp_dir = tempfile::tempdir().unwrap(); + let history = ConfigHistory::new(temp_dir.path().to_path_buf(), 10); + + let config = serde_json::json!({"test": "value"}); + history.record(1, "test-user".to_string(), config, ConfigMetadata::default()).await.unwrap(); + + let revision = history.get(1).await.unwrap(); + assert_eq!(revision.revision, 1); + assert_eq!(revision.applied_by, "test-user"); +} diff --git a/crates/rginx-agent/src/config_validator.rs b/crates/rginx-agent/src/config_validator.rs index a6f4a748..abf4f0bd 100644 --- a/crates/rginx-agent/src/config_validator.rs +++ b/crates/rginx-agent/src/config_validator.rs @@ -209,48 +209,4 @@ pub struct ImpactAssessment { } #[cfg(test)] -mod tests { - use super::*; - - #[tokio::test] - async fn test_validate_syntax_valid() { - let validator = ConfigValidator::new(); - let config = serde_json::json!({"key": "value"}); - assert!(validator.validate_syntax(&config).is_ok()); - } - - #[tokio::test] - async fn test_validate_syntax_invalid() { - let validator = ConfigValidator::new(); - let config = serde_json::json!("not an object"); - assert!(validator.validate_syntax(&config).is_err()); - } - - #[tokio::test] - async fn test_validate_dry_run() { - let validator = ConfigValidator::new(); - let config = serde_json::json!({"test": "config"}); - let result = validator.validate_dry_run(&config).await.unwrap(); - assert!(result.valid); - } - - #[tokio::test] - async fn test_assess_impact_no_change() { - let validator = ConfigValidator::new(); - let config = serde_json::json!({"test": "config"}); - let impact = validator.assess_impact(&config, &config).await; - assert!(!impact.requires_reload); - assert!(!impact.affects_traffic); - } - - #[tokio::test] - async fn test_assess_impact_with_change() { - let validator = ConfigValidator::new(); - let old_config = serde_json::json!({"upstreams": {"api": {"peers": []}}}); - let new_config = - serde_json::json!({"upstreams": {"api": {"peers": [{"addr": "127.0.0.1:8080"}]}}}); - let impact = validator.assess_impact(&old_config, &new_config).await; - assert!(impact.requires_reload); - assert!(impact.affects_traffic); - } -} +mod tests; diff --git a/crates/rginx-agent/src/config_validator/tests.rs b/crates/rginx-agent/src/config_validator/tests.rs new file mode 100644 index 00000000..0155ff03 --- /dev/null +++ b/crates/rginx-agent/src/config_validator/tests.rs @@ -0,0 +1,43 @@ +use super::*; + +#[tokio::test] +async fn test_validate_syntax_valid() { + let validator = ConfigValidator::new(); + let config = serde_json::json!({"key": "value"}); + assert!(validator.validate_syntax(&config).is_ok()); +} + +#[tokio::test] +async fn test_validate_syntax_invalid() { + let validator = ConfigValidator::new(); + let config = serde_json::json!("not an object"); + assert!(validator.validate_syntax(&config).is_err()); +} + +#[tokio::test] +async fn test_validate_dry_run() { + let validator = ConfigValidator::new(); + let config = serde_json::json!({"test": "config"}); + let result = validator.validate_dry_run(&config).await.unwrap(); + assert!(result.valid); +} + +#[tokio::test] +async fn test_assess_impact_no_change() { + let validator = ConfigValidator::new(); + let config = serde_json::json!({"test": "config"}); + let impact = validator.assess_impact(&config, &config).await; + assert!(!impact.requires_reload); + assert!(!impact.affects_traffic); +} + +#[tokio::test] +async fn test_assess_impact_with_change() { + let validator = ConfigValidator::new(); + let old_config = serde_json::json!({"upstreams": {"api": {"peers": []}}}); + let new_config = + serde_json::json!({"upstreams": {"api": {"peers": [{"addr": "127.0.0.1:8080"}]}}}); + let impact = validator.assess_impact(&old_config, &new_config).await; + assert!(impact.requires_reload); + assert!(impact.affects_traffic); +} diff --git a/crates/rginx-agent/src/control_center/mod.rs b/crates/rginx-agent/src/control_center/mod.rs new file mode 100644 index 00000000..d08a251b --- /dev/null +++ b/crates/rginx-agent/src/control_center/mod.rs @@ -0,0 +1,21 @@ +//! Minimal control-center-side state model for outbound agents. +//! +//! This module is the in-process MVP behind the Phase 7 control-center API: +//! node registration, heartbeats, command queueing, command results, events, +//! and rollout target selection. + +mod model; +mod query; +mod rollout; +mod store; +mod trait_adapter; + +pub use model::{ + ControlCenterCommandCreate, ControlCenterCommandRecord, ControlCenterCommandState, + ControlCenterEvent, ControlCenterEventCreate, ControlCenterNode, ControlCenterNodeFilter, + ControlCenterNodeHealth, HeartbeatRecord, NodeSession, +}; +pub use rollout::{ + ControlCenterRollout, ControlCenterRolloutCreate, RolloutTarget, RolloutTargetSelector, +}; +pub use store::ControlCenterStore; diff --git a/crates/rginx-agent/src/control_center/model.rs b/crates/rginx-agent/src/control_center/model.rs new file mode 100644 index 00000000..e0a6c086 --- /dev/null +++ b/crates/rginx-agent/src/control_center/model.rs @@ -0,0 +1,162 @@ +use std::collections::BTreeMap; + +use serde::{Deserialize, Serialize}; + +use crate::{AgentCommand, AgentCommandResult, AgentCommandStatus, AgentCommandType}; + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +pub struct ControlCenterNode { + pub node_id: String, + pub version: String, + pub region: Option, + pub pop: Option, + pub labels: BTreeMap, + pub capabilities: Vec, + pub health: ControlCenterNodeHealth, + pub registered_at_unix_ms: u64, + pub last_heartbeat_at_unix_ms: u64, + pub snapshot_version: u64, + pub current_revision: u64, + pub desired_revision: u64, + pub converged: bool, +} + +impl ControlCenterNode { + pub fn matches(&self, filter: &ControlCenterNodeFilter) -> bool { + if let Some(region) = &filter.region + && self.region.as_ref() != Some(region) + { + return false; + } + if let Some(pop) = &filter.pop + && self.pop.as_ref() != Some(pop) + { + return false; + } + if let Some(health) = filter.health + && self.health != health + { + return false; + } + filter.labels.iter().all(|(key, value)| self.labels.get(key) == Some(value)) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum ControlCenterNodeHealth { + Healthy, + Degraded, + Offline, +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +pub struct NodeSession { + pub session_id: String, + pub node_id: String, + pub started_at_unix_ms: u64, + pub last_seen_at_unix_ms: u64, +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +pub struct HeartbeatRecord { + pub node_id: String, + pub recorded_at_unix_ms: u64, + pub snapshot_version: u64, + pub current_revision: u64, + pub desired_revision: u64, + pub converged: bool, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum ControlCenterCommandState { + Queued, + Delivered, + Succeeded, + Failed, + Expired, +} + +impl From for ControlCenterCommandState { + fn from(status: AgentCommandStatus) -> Self { + match status { + AgentCommandStatus::Succeeded => Self::Succeeded, + AgentCommandStatus::Failed => Self::Failed, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +pub struct ControlCenterCommandCreate { + #[serde(rename = "type")] + pub command_type: AgentCommandType, + #[serde(default)] + pub revision: Option, + #[serde(default)] + pub expires_at_unix_ms: Option, + #[serde(default)] + pub payload: serde_json::Value, + #[serde(default)] + pub signature: Option, +} + +impl ControlCenterCommandCreate { + pub fn reload() -> Self { + Self { + command_type: AgentCommandType::Reload, + revision: None, + expires_at_unix_ms: None, + payload: serde_json::Value::Null, + signature: None, + } + } + + pub fn set_desired_revision(revision: u64) -> Self { + Self { + command_type: AgentCommandType::SetDesiredRevision, + revision: Some(revision), + expires_at_unix_ms: None, + payload: serde_json::json!({ "desired_revision": revision }), + signature: None, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +pub struct ControlCenterCommandRecord { + pub command: AgentCommand, + pub state: ControlCenterCommandState, + pub created_at_unix_ms: u64, + pub delivered_at_unix_ms: Option, + pub completed_at_unix_ms: Option, + pub result: Option, +} + +#[derive(Debug, Clone, Default, PartialEq, Eq, Deserialize, Serialize)] +pub struct ControlCenterNodeFilter { + #[serde(default)] + pub region: Option, + #[serde(default)] + pub pop: Option, + #[serde(default)] + pub health: Option, + #[serde(default)] + pub labels: BTreeMap, +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +pub struct ControlCenterEventCreate { + pub event_type: String, + #[serde(default)] + pub payload: serde_json::Value, +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +pub struct ControlCenterEvent { + pub id: String, + pub node_id: String, + pub event_type: String, + pub payload: serde_json::Value, + pub created_at_unix_ms: u64, +} diff --git a/crates/rginx-agent/src/control_center/query.rs b/crates/rginx-agent/src/control_center/query.rs new file mode 100644 index 00000000..624296e9 --- /dev/null +++ b/crates/rginx-agent/src/control_center/query.rs @@ -0,0 +1,118 @@ +use std::cmp::Reverse; + +use uuid::Uuid; + +use crate::{AgentCommandResult, Result}; + +use super::model::{ + ControlCenterCommandRecord, ControlCenterEvent, ControlCenterNode, ControlCenterNodeHealth, + HeartbeatRecord, NodeSession, +}; +use super::rollout::{ + ControlCenterRollout, ControlCenterRolloutCreate, RolloutTarget, RolloutTargetSelector, +}; +use super::store::{ControlCenterStore, ensure_node, unix_ms}; + +impl ControlCenterStore { + pub async fn get_command(&self, command_id: &str) -> Option { + self.state.read().await.commands.get(command_id).cloned() + } + + pub async fn recent_results(&self, node_id: &str, limit: usize) -> Vec { + let state = self.state.read().await; + let mut results = state + .commands + .values() + .filter_map(|record| record.result.clone()) + .filter(|result| result.node_id == node_id) + .collect::>(); + results.sort_by_key(|result| Reverse(result.finished_at_unix_ms)); + results.truncate(limit); + results + } + + pub async fn sessions_for_node(&self, node_id: &str) -> Vec { + let state = self.state.read().await; + let mut sessions = state + .node_sessions + .values() + .filter(|session| session.node_id == node_id) + .cloned() + .collect::>(); + sessions.sort_by_key(|session| session.started_at_unix_ms); + sessions + } + + pub async fn heartbeats_for_node(&self, node_id: &str) -> Vec { + self.state + .read() + .await + .heartbeats + .get(node_id) + .cloned() + .unwrap_or_default() + .into_iter() + .collect() + } + + pub async fn mark_node_health( + &self, + node_id: &str, + health: ControlCenterNodeHealth, + ) -> Result<()> { + let mut state = self.state.write().await; + ensure_node(&state, node_id)?; + state.nodes.get_mut(node_id).expect("node was checked").health = health; + Ok(()) + } + + pub async fn select_rollout_targets( + &self, + selector: RolloutTargetSelector, + ) -> Vec { + let state = self.state.read().await; + selected_targets(&state.nodes, &selector) + } + + pub async fn create_rollout( + &self, + request: ControlCenterRolloutCreate, + ) -> Result { + let mut state = self.state.write().await; + let target_node_ids = selected_targets(&state.nodes, &request.selector) + .into_iter() + .map(|target| target.node_id) + .collect(); + let rollout = ControlCenterRollout { + id: Uuid::now_v7().to_string(), + name: request.name, + selector: request.selector, + target_node_ids, + created_at_unix_ms: unix_ms(), + }; + state.rollouts.insert(rollout.id.clone(), rollout.clone()); + Ok(rollout) + } + + pub async fn get_rollout(&self, rollout_id: &str) -> Option { + self.state.read().await.rollouts.get(rollout_id).cloned() + } + + pub async fn events_for_node(&self, node_id: &str) -> Vec { + self.state + .read() + .await + .events + .iter() + .filter(|event| event.node_id == node_id) + .cloned() + .collect() + } +} + +fn selected_targets( + nodes: &std::collections::BTreeMap, + selector: &RolloutTargetSelector, +) -> Vec { + nodes.values().filter(|node| selector.matches(node)).map(RolloutTarget::from).collect() +} diff --git a/crates/rginx-agent/src/control_center/rollout.rs b/crates/rginx-agent/src/control_center/rollout.rs new file mode 100644 index 00000000..003221ed --- /dev/null +++ b/crates/rginx-agent/src/control_center/rollout.rs @@ -0,0 +1,90 @@ +use std::collections::BTreeMap; + +use serde::{Deserialize, Serialize}; + +use super::model::{ControlCenterNode, ControlCenterNodeHealth}; + +#[derive(Debug, Clone, Default, PartialEq, Eq, Deserialize, Serialize)] +pub struct RolloutTargetSelector { + #[serde(default)] + pub region: Option, + #[serde(default)] + pub pop: Option, + #[serde(default)] + pub labels: BTreeMap, + #[serde(default)] + pub require_healthy: bool, + #[serde(default)] + pub require_converged: bool, + #[serde(default)] + pub desired_revision: Option, +} + +impl RolloutTargetSelector { + pub fn matches(&self, node: &ControlCenterNode) -> bool { + if let Some(region) = &self.region + && node.region.as_ref() != Some(region) + { + return false; + } + if let Some(pop) = &self.pop + && node.pop.as_ref() != Some(pop) + { + return false; + } + if self.require_healthy && node.health != ControlCenterNodeHealth::Healthy { + return false; + } + if self.require_converged && !node.converged { + return false; + } + if let Some(revision) = self.desired_revision + && node.desired_revision != revision + { + return false; + } + self.labels.iter().all(|(key, value)| node.labels.get(key) == Some(value)) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +pub struct RolloutTarget { + pub node_id: String, + pub region: Option, + pub pop: Option, + pub labels: BTreeMap, + pub current_revision: u64, + pub desired_revision: u64, + pub converged: bool, + pub health: ControlCenterNodeHealth, +} + +impl From<&ControlCenterNode> for RolloutTarget { + fn from(node: &ControlCenterNode) -> Self { + Self { + node_id: node.node_id.clone(), + region: node.region.clone(), + pop: node.pop.clone(), + labels: node.labels.clone(), + current_revision: node.current_revision, + desired_revision: node.desired_revision, + converged: node.converged, + health: node.health, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +pub struct ControlCenterRolloutCreate { + pub name: String, + pub selector: RolloutTargetSelector, +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +pub struct ControlCenterRollout { + pub id: String, + pub name: String, + pub selector: RolloutTargetSelector, + pub target_node_ids: Vec, + pub created_at_unix_ms: u64, +} diff --git a/crates/rginx-agent/src/control_center/store.rs b/crates/rginx-agent/src/control_center/store.rs new file mode 100644 index 00000000..0d795241 --- /dev/null +++ b/crates/rginx-agent/src/control_center/store.rs @@ -0,0 +1,289 @@ +use std::collections::{BTreeMap, VecDeque}; +use std::sync::Arc; +use std::time::{SystemTime, UNIX_EPOCH}; + +use tokio::sync::RwLock; +use uuid::Uuid; + +use crate::{ + AgentCommand, AgentCommandResult, AgentHeartbeatRequest, AgentPollResponse, + AgentRegisterRequest, Error, Result, sign_agent_command, +}; + +use super::model::{ + ControlCenterCommandCreate, ControlCenterCommandRecord, ControlCenterCommandState, + ControlCenterEvent, ControlCenterEventCreate, ControlCenterNode, ControlCenterNodeFilter, + ControlCenterNodeHealth, HeartbeatRecord, NodeSession, +}; +use super::rollout::ControlCenterRollout; + +const EVENT_LIMIT: usize = 1024; +const HEARTBEAT_LIMIT_PER_NODE: usize = 128; +const DEFAULT_SIGNED_COMMAND_TTL_MS: u64 = 5 * 60 * 1000; + +#[derive(Clone, Default)] +pub struct ControlCenterStore { + pub(super) state: Arc>, + command_signing_key: Option>, +} + +#[derive(Default)] +pub(super) struct ControlCenterState { + pub(super) nodes: BTreeMap, + pub(super) node_sessions: BTreeMap, + pub(super) commands: BTreeMap, + pub(super) node_command_order: BTreeMap>, + pub(super) desired_revisions: BTreeMap, + pub(super) heartbeats: BTreeMap>, + pub(super) events: VecDeque, + pub(super) rollouts: BTreeMap, +} + +impl ControlCenterStore { + pub fn new() -> Self { + Self::default() + } + + pub fn with_command_signing_key(mut self, key: impl Into) -> Self { + let key = key.into().trim().to_string(); + if !key.is_empty() { + self.command_signing_key = Some(Arc::from(key)); + } + self + } + + pub async fn register(&self, request: AgentRegisterRequest) -> Result { + let now = unix_ms(); + let mut state = self.state.write().await; + let previous = state.nodes.get(&request.node_id).cloned(); + let desired = previous.as_ref().map(|node| node.desired_revision).unwrap_or(0); + let node = ControlCenterNode { + node_id: request.node_id.clone(), + version: request.version, + region: request.region, + pop: request.pop, + labels: request.labels, + capabilities: request.capabilities, + health: ControlCenterNodeHealth::Healthy, + registered_at_unix_ms: previous.as_ref().map_or(now, |node| node.registered_at_unix_ms), + last_heartbeat_at_unix_ms: now, + snapshot_version: previous.as_ref().map_or(0, |node| node.snapshot_version), + current_revision: previous.as_ref().map_or(0, |node| node.current_revision), + desired_revision: desired, + converged: previous.as_ref().is_none_or(|node| node.converged), + }; + let session = NodeSession { + session_id: Uuid::now_v7().to_string(), + node_id: node.node_id.clone(), + started_at_unix_ms: now, + last_seen_at_unix_ms: now, + }; + state.node_sessions.insert(session.session_id.clone(), session); + state.nodes.insert(node.node_id.clone(), node.clone()); + Ok(node) + } + + pub async fn heartbeat(&self, request: AgentHeartbeatRequest) -> Result { + let now = unix_ms(); + let mut state = self.state.write().await; + ensure_node(&state, &request.node_id)?; + let desired = *state + .desired_revisions + .entry(request.node_id.clone()) + .or_insert(request.desired_revision); + let updated_node = { + let node = state.nodes.get_mut(&request.node_id).expect("node was checked"); + node.last_heartbeat_at_unix_ms = now; + node.snapshot_version = request.snapshot_version; + node.current_revision = request.current_revision; + node.desired_revision = desired; + node.converged = request.converged + && request.desired_revision == desired + && request.current_revision == desired; + node.health = ControlCenterNodeHealth::Healthy; + node.clone() + }; + + let record = HeartbeatRecord { + node_id: request.node_id.clone(), + recorded_at_unix_ms: now, + snapshot_version: request.snapshot_version, + current_revision: request.current_revision, + desired_revision: desired, + converged: updated_node.converged, + }; + let records = state.heartbeats.entry(request.node_id.clone()).or_default(); + records.push_back(record); + while records.len() > HEARTBEAT_LIMIT_PER_NODE { + records.pop_front(); + } + for session in + state.node_sessions.values_mut().filter(|item| item.node_id == request.node_id) + { + session.last_seen_at_unix_ms = now; + } + Ok(updated_node) + } + + pub async fn create_command( + &self, + node_id: &str, + request: ControlCenterCommandCreate, + ) -> Result { + let now = unix_ms(); + let mut state = self.state.write().await; + ensure_node(&state, node_id)?; + let mut command = AgentCommand { + id: Uuid::now_v7().to_string(), + command_type: request.command_type, + target_node_id: node_id.to_string(), + revision: request.revision, + expires_at_unix_ms: request.expires_at_unix_ms, + payload: request.payload, + signature: request.signature, + }; + if self.command_signing_key.is_some() && command.expires_at_unix_ms.is_none() { + command.expires_at_unix_ms = Some(now.saturating_add(DEFAULT_SIGNED_COMMAND_TTL_MS)); + } + if let Some(key) = self.command_signing_key.as_deref() { + command.signature = Some(sign_agent_command(key, &command)?); + } else if command.signature.is_some() { + return Err(Error::InvalidRequest( + "command signature requires a configured control-center signing key".to_string(), + )); + } + Self::finish_command_create(node_id, now, command, &mut state) + } + + fn finish_command_create( + node_id: &str, + now: u64, + command: AgentCommand, + state: &mut ControlCenterState, + ) -> Result { + if let Some(revision) = command.revision { + state.desired_revisions.insert(node_id.to_string(), revision); + if let Some(node) = state.nodes.get_mut(node_id) { + node.desired_revision = revision; + node.converged = node.current_revision == revision && node.converged; + } + } + let record = ControlCenterCommandRecord { + command: command.clone(), + state: ControlCenterCommandState::Queued, + created_at_unix_ms: now, + delivered_at_unix_ms: None, + completed_at_unix_ms: None, + result: None, + }; + state.node_command_order.entry(node_id.to_string()).or_default().push(command.id.clone()); + state.commands.insert(command.id.clone(), record.clone()); + Ok(record) + } + + pub async fn poll_commands( + &self, + node_id: &str, + cursor: Option, + ) -> Result { + let now = unix_ms(); + let mut state = self.state.write().await; + ensure_node(&state, node_id)?; + let order = state.node_command_order.get(node_id).cloned().unwrap_or_default(); + let start = cursor + .as_ref() + .and_then(|id| order.iter().position(|candidate| candidate == id)) + .map_or(0, |index| index + 1); + let mut commands = Vec::new(); + for command_id in order.iter().skip(start) { + let Some(record) = state.commands.get_mut(command_id) else { + continue; + }; + match record.state { + ControlCenterCommandState::Queued => { + if record.command.expires_at_unix_ms.is_some_and(|expires| expires <= now) { + record.state = ControlCenterCommandState::Expired; + continue; + } + record.state = ControlCenterCommandState::Delivered; + record.delivered_at_unix_ms = Some(now); + commands.push(record.command.clone()); + } + ControlCenterCommandState::Delivered => { + // Replay delivered commands until the agent accepts the result. + if record.delivered_at_unix_ms.is_none() { + record.delivered_at_unix_ms = Some(now); + } + commands.push(record.command.clone()); + } + ControlCenterCommandState::Succeeded + | ControlCenterCommandState::Failed + | ControlCenterCommandState::Expired => continue, + } + } + let next_cursor = commands.last().map(|command| command.id.clone()); + Ok(AgentPollResponse { commands, next_cursor }) + } + + pub async fn post_result( + &self, + result: AgentCommandResult, + ) -> Result { + let mut state = self.state.write().await; + let record = state + .commands + .get_mut(&result.command_id) + .ok_or_else(|| Error::NotFound(format!("command `{}` not found", result.command_id)))?; + if record.command.target_node_id != result.node_id { + return Err(Error::InvalidRequest(format!( + "command `{}` targets `{}` but result came from `{}`", + result.command_id, record.command.target_node_id, result.node_id + ))); + } + record.state = result.status.into(); + record.completed_at_unix_ms = Some(result.finished_at_unix_ms); + record.result = Some(result); + Ok(record.clone()) + } + + pub async fn record_event( + &self, + node_id: &str, + request: ControlCenterEventCreate, + ) -> Result { + let mut state = self.state.write().await; + ensure_node(&state, node_id)?; + let event = ControlCenterEvent { + id: Uuid::now_v7().to_string(), + node_id: node_id.to_string(), + event_type: request.event_type, + payload: request.payload, + created_at_unix_ms: unix_ms(), + }; + state.events.push_back(event.clone()); + while state.events.len() > EVENT_LIMIT { + state.events.pop_front(); + } + Ok(event) + } + + pub async fn list_nodes(&self, filter: ControlCenterNodeFilter) -> Vec { + let state = self.state.read().await; + state.nodes.values().filter(|node| node.matches(&filter)).cloned().collect() + } + + pub async fn get_node(&self, node_id: &str) -> Option { + self.state.read().await.nodes.get(node_id).cloned() + } +} + +pub(super) fn ensure_node(state: &ControlCenterState, node_id: &str) -> Result<()> { + if state.nodes.contains_key(node_id) { + return Ok(()); + } + Err(Error::NotFound(format!("node `{node_id}` not registered"))) +} + +pub(super) fn unix_ms() -> u64 { + SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default().as_millis() as u64 +} diff --git a/crates/rginx-agent/src/control_center/trait_adapter.rs b/crates/rginx-agent/src/control_center/trait_adapter.rs new file mode 100644 index 00000000..4eb6223d --- /dev/null +++ b/crates/rginx-agent/src/control_center/trait_adapter.rs @@ -0,0 +1,46 @@ +use std::future::Future; +use std::pin::Pin; +use std::time::Duration; + +use crate::{ + AgentCommandResult, AgentHeartbeatRequest, AgentPollResponse, AgentRegisterRequest, + OutboundControlPlaneClient, Result, +}; + +use super::store::ControlCenterStore; + +impl OutboundControlPlaneClient for ControlCenterStore { + fn register( + &self, + request: AgentRegisterRequest, + ) -> Pin> + Send + 'static>> { + let store = self.clone(); + Box::pin(async move { ControlCenterStore::register(&store, request).await.map(|_| ()) }) + } + + fn heartbeat( + &self, + request: AgentHeartbeatRequest, + ) -> Pin> + Send + 'static>> { + let store = self.clone(); + Box::pin(async move { ControlCenterStore::heartbeat(&store, request).await.map(|_| ()) }) + } + + fn poll_commands( + &self, + node_id: String, + cursor: Option, + _timeout: Duration, + ) -> Pin> + Send + 'static>> { + let store = self.clone(); + Box::pin(async move { ControlCenterStore::poll_commands(&store, &node_id, cursor).await }) + } + + fn post_result( + &self, + result: AgentCommandResult, + ) -> Pin> + Send + 'static>> { + let store = self.clone(); + Box::pin(async move { ControlCenterStore::post_result(&store, result).await.map(|_| ()) }) + } +} diff --git a/crates/rginx-agent/src/events.rs b/crates/rginx-agent/src/events.rs index 986687ba..58eb7b4e 100644 --- a/crates/rginx-agent/src/events.rs +++ b/crates/rginx-agent/src/events.rs @@ -196,65 +196,4 @@ impl EventBus { } #[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_event_type() { - let event = ControlPlaneEvent::ReloadCompleted { - node_id: "test-node".to_string(), - revision: 1, - success: true, - duration_ms: 100, - timestamp: 1000, - }; - assert_eq!(event.event_type(), "reload_completed"); - } - - #[test] - fn test_event_filter_matches() { - let filter = EventFilter { - event_types: vec!["reload_completed".to_string()], - node_ids: vec!["test-node".to_string()], - regions: vec![], - }; - - let event = ControlPlaneEvent::ReloadCompleted { - node_id: "test-node".to_string(), - revision: 1, - success: true, - duration_ms: 100, - timestamp: 1000, - }; - - assert!(filter.matches(&event)); - - let event2 = ControlPlaneEvent::ReloadCompleted { - node_id: "other-node".to_string(), - revision: 1, - success: true, - duration_ms: 100, - timestamp: 1000, - }; - - assert!(!filter.matches(&event2)); - } - - #[tokio::test] - async fn test_event_bus_publish() { - let bus = EventBus::new(100); - let mut rx = bus.subscribe_channel(); - - let event = ControlPlaneEvent::NodeStatusChanged { - node_id: "test-node".to_string(), - old_status: NodeStatus::Healthy, - new_status: NodeStatus::Offline, - timestamp: 1000, - }; - - bus.publish(event.clone()).await; - - let received = rx.recv().await.unwrap(); - assert_eq!(received.event_type(), event.event_type()); - } -} +mod tests; diff --git a/crates/rginx-agent/src/events/tests.rs b/crates/rginx-agent/src/events/tests.rs new file mode 100644 index 00000000..c3b7b050 --- /dev/null +++ b/crates/rginx-agent/src/events/tests.rs @@ -0,0 +1,60 @@ +use super::*; + +#[test] +fn test_event_type() { + let event = ControlPlaneEvent::ReloadCompleted { + node_id: "test-node".to_string(), + revision: 1, + success: true, + duration_ms: 100, + timestamp: 1000, + }; + assert_eq!(event.event_type(), "reload_completed"); +} + +#[test] +fn test_event_filter_matches() { + let filter = EventFilter { + event_types: vec!["reload_completed".to_string()], + node_ids: vec!["test-node".to_string()], + regions: vec![], + }; + + let event = ControlPlaneEvent::ReloadCompleted { + node_id: "test-node".to_string(), + revision: 1, + success: true, + duration_ms: 100, + timestamp: 1000, + }; + + assert!(filter.matches(&event)); + + let event2 = ControlPlaneEvent::ReloadCompleted { + node_id: "other-node".to_string(), + revision: 1, + success: true, + duration_ms: 100, + timestamp: 1000, + }; + + assert!(!filter.matches(&event2)); +} + +#[tokio::test] +async fn test_event_bus_publish() { + let bus = EventBus::new(100); + let mut rx = bus.subscribe_channel(); + + let event = ControlPlaneEvent::NodeStatusChanged { + node_id: "test-node".to_string(), + old_status: NodeStatus::Healthy, + new_status: NodeStatus::Offline, + timestamp: 1000, + }; + + bus.publish(event.clone()).await; + + let received = rx.recv().await.unwrap(); + assert_eq!(received.event_type(), event.event_type()); +} diff --git a/crates/rginx-agent/src/gradual_rollout.rs b/crates/rginx-agent/src/gradual_rollout.rs index f5a0ae76..ee8ce31c 100644 --- a/crates/rginx-agent/src/gradual_rollout.rs +++ b/crates/rginx-agent/src/gradual_rollout.rs @@ -3,6 +3,8 @@ use std::collections::HashMap; use std::sync::Arc; use tokio::sync::RwLock; +mod status; + #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub enum RolloutStrategy { Canary, @@ -257,73 +259,6 @@ impl GradualRolloutManager { let node_states = self.node_states.read().await; node_states.get(node_id).cloned() } - - pub async fn get_rollout_status(&self, rollout_id: &str) -> Option { - let rollouts = self.rollouts.read().await; - let rollout = rollouts.get(rollout_id)?; - - let node_states = self.node_states.read().await; - let rollout_nodes: Vec<_> = - node_states.values().filter(|s| s.rollout_id == rollout_id).collect(); - - let nodes_updated = rollout_nodes.len() as u32; - let nodes_total = rollout.stages.iter().map(|s| s.target_nodes.len() as u32).sum(); - - let healthy_nodes = - rollout_nodes.iter().filter(|s| s.health_status == HealthStatus::Healthy).count(); - - let success_rate = - if nodes_updated > 0 { healthy_nodes as f64 / nodes_updated as f64 } else { 0.0 }; - - let started_at = rollout.stages.first().and_then(|s| s.started_at); - - let completed_at = if rollout.phase == RolloutPhase::Completed { - rollout.stages.last().and_then(|s| s.completed_at) - } else { - None - }; - - Some(RolloutStatus { - rollout_id: rollout_id.to_string(), - phase: rollout.phase.clone(), - current_stage: rollout.current_stage, - total_stages: rollout.stages.len() as u32, - nodes_updated, - nodes_total, - success_rate, - started_at, - completed_at, - error_message: None, - }) - } - - pub async fn check_stage_health(&self, rollout_id: &str) -> Result { - let rollouts = self.rollouts.read().await; - let rollout = - rollouts.get(rollout_id).ok_or_else(|| format!("Rollout {} not found", rollout_id))?; - - let current_stage_idx = rollout.current_stage as usize; - let current_stage = rollout - .stages - .get(current_stage_idx) - .ok_or_else(|| "Invalid current stage".to_string())?; - - let node_states = self.node_states.read().await; - let stage_nodes: Vec<_> = node_states - .values() - .filter(|s| s.rollout_id == rollout_id && s.stage_id == current_stage.stage_id) - .collect(); - - if stage_nodes.is_empty() { - return Ok(true); - } - - let healthy_count = - stage_nodes.iter().filter(|s| s.health_status == HealthStatus::Healthy).count(); - - let success_rate = healthy_count as f64 / stage_nodes.len() as f64; - Ok(success_rate >= current_stage.success_threshold) - } } impl Default for GradualRolloutManager { @@ -337,154 +272,4 @@ fn current_timestamp() -> u64 { } #[cfg(test)] -mod tests { - use super::*; - - fn create_test_plan() -> RolloutPlan { - RolloutPlan { - rollout_id: "test-rollout-1".to_string(), - strategy: RolloutStrategy::Canary, - config_revision: 100, - stages: vec![ - RolloutStage { - stage_id: 1, - target_percentage: 10, - target_nodes: vec!["node1".to_string()], - duration_secs: 300, - health_check_interval_secs: 30, - success_threshold: 0.95, - started_at: None, - completed_at: None, - status: RolloutPhase::Pending, - }, - RolloutStage { - stage_id: 2, - target_percentage: 90, - target_nodes: vec!["node2".to_string(), "node3".to_string()], - duration_secs: 600, - health_check_interval_secs: 60, - success_threshold: 0.95, - started_at: None, - completed_at: None, - status: RolloutPhase::Pending, - }, - ], - auto_rollback_on_failure: true, - created_at: current_timestamp(), - created_by: "admin".to_string(), - current_stage: 0, - phase: RolloutPhase::Pending, - } - } - - #[tokio::test] - async fn test_create_rollout() { - let manager = GradualRolloutManager::new(); - let plan = create_test_plan(); - let result = manager.create_rollout(plan).await; - assert!(result.is_ok()); - } - - #[tokio::test] - async fn test_invalid_percentage() { - let manager = GradualRolloutManager::new(); - let mut plan = create_test_plan(); - plan.stages[0].target_percentage = 50; - let result = manager.create_rollout(plan).await; - assert!(result.is_err()); - } - - #[tokio::test] - async fn test_start_rollout() { - let manager = GradualRolloutManager::new(); - let plan = create_test_plan(); - let rollout_id = plan.rollout_id.clone(); - manager.create_rollout(plan).await.unwrap(); - - let result = manager.start_rollout(&rollout_id).await; - assert!(result.is_ok()); - - let rollout = manager.get_rollout(&rollout_id).await.unwrap(); - assert_eq!(rollout.phase, RolloutPhase::InProgress); - assert_eq!(rollout.current_stage, 0); - } - - #[tokio::test] - async fn test_advance_stage() { - let manager = GradualRolloutManager::new(); - let plan = create_test_plan(); - let rollout_id = plan.rollout_id.clone(); - manager.create_rollout(plan).await.unwrap(); - manager.start_rollout(&rollout_id).await.unwrap(); - - let result = manager.advance_stage(&rollout_id).await; - assert!(result.is_ok()); - - let rollout = manager.get_rollout(&rollout_id).await.unwrap(); - assert_eq!(rollout.current_stage, 1); - } - - #[tokio::test] - async fn test_pause_resume() { - let manager = GradualRolloutManager::new(); - let plan = create_test_plan(); - let rollout_id = plan.rollout_id.clone(); - manager.create_rollout(plan).await.unwrap(); - manager.start_rollout(&rollout_id).await.unwrap(); - - manager.pause_rollout(&rollout_id).await.unwrap(); - let rollout = manager.get_rollout(&rollout_id).await.unwrap(); - assert_eq!(rollout.phase, RolloutPhase::Paused); - - manager.resume_rollout(&rollout_id).await.unwrap(); - let rollout = manager.get_rollout(&rollout_id).await.unwrap(); - assert_eq!(rollout.phase, RolloutPhase::InProgress); - } - - #[tokio::test] - async fn test_rollback() { - let manager = GradualRolloutManager::new(); - let plan = create_test_plan(); - let rollout_id = plan.rollout_id.clone(); - manager.create_rollout(plan).await.unwrap(); - manager.start_rollout(&rollout_id).await.unwrap(); - - let result = manager.rollback(&rollout_id, "test failure").await; - assert!(result.is_ok()); - - let rollout = manager.get_rollout(&rollout_id).await.unwrap(); - assert_eq!(rollout.phase, RolloutPhase::RolledBack); - } - - #[tokio::test] - async fn test_node_state() { - let manager = GradualRolloutManager::new(); - let state = NodeRolloutState { - node_id: "node1".to_string(), - rollout_id: "rollout1".to_string(), - stage_id: 1, - config_revision: 100, - applied_at: current_timestamp(), - health_status: HealthStatus::Healthy, - error_count: 0, - }; - - manager.update_node_state(state.clone()).await.unwrap(); - let retrieved = manager.get_node_state("node1").await.unwrap(); - assert_eq!(retrieved.node_id, "node1"); - assert_eq!(retrieved.health_status, HealthStatus::Healthy); - } - - #[tokio::test] - async fn test_rollout_status() { - let manager = GradualRolloutManager::new(); - let plan = create_test_plan(); - let rollout_id = plan.rollout_id.clone(); - manager.create_rollout(plan).await.unwrap(); - manager.start_rollout(&rollout_id).await.unwrap(); - - let status = manager.get_rollout_status(&rollout_id).await.unwrap(); - assert_eq!(status.phase, RolloutPhase::InProgress); - assert_eq!(status.current_stage, 0); - } -} +mod tests; diff --git a/crates/rginx-agent/src/gradual_rollout/status.rs b/crates/rginx-agent/src/gradual_rollout/status.rs new file mode 100644 index 00000000..037207d4 --- /dev/null +++ b/crates/rginx-agent/src/gradual_rollout/status.rs @@ -0,0 +1,67 @@ +use super::*; + +impl GradualRolloutManager { + pub async fn get_rollout_status(&self, rollout_id: &str) -> Option { + let rollouts = self.rollouts.read().await; + let rollout = rollouts.get(rollout_id)?; + + let node_states = self.node_states.read().await; + let rollout_nodes = + node_states.values().filter(|state| state.rollout_id == rollout_id).collect::>(); + + let nodes_updated = rollout_nodes.len() as u32; + let nodes_total = rollout.stages.iter().map(|stage| stage.target_nodes.len() as u32).sum(); + let healthy_nodes = rollout_nodes + .iter() + .filter(|state| state.health_status == HealthStatus::Healthy) + .count(); + let success_rate = + if nodes_updated > 0 { healthy_nodes as f64 / nodes_updated as f64 } else { 0.0 }; + let started_at = rollout.stages.first().and_then(|stage| stage.started_at); + let completed_at = (rollout.phase == RolloutPhase::Completed) + .then(|| rollout.stages.last().and_then(|stage| stage.completed_at)) + .flatten(); + + Some(RolloutStatus { + rollout_id: rollout_id.to_string(), + phase: rollout.phase.clone(), + current_stage: rollout.current_stage, + total_stages: rollout.stages.len() as u32, + nodes_updated, + nodes_total, + success_rate, + started_at, + completed_at, + error_message: None, + }) + } + + pub async fn check_stage_health(&self, rollout_id: &str) -> Result { + let rollouts = self.rollouts.read().await; + let rollout = + rollouts.get(rollout_id).ok_or_else(|| format!("Rollout {rollout_id} not found"))?; + + let current_stage_idx = rollout.current_stage as usize; + let current_stage = rollout + .stages + .get(current_stage_idx) + .ok_or_else(|| "Invalid current stage".to_string())?; + + let node_states = self.node_states.read().await; + let stage_nodes = node_states + .values() + .filter(|state| { + state.rollout_id == rollout_id && state.stage_id == current_stage.stage_id + }) + .collect::>(); + + if stage_nodes.is_empty() { + return Ok(true); + } + + let healthy_count = + stage_nodes.iter().filter(|state| state.health_status == HealthStatus::Healthy).count(); + let success_rate = healthy_count as f64 / stage_nodes.len() as f64; + Ok(success_rate >= current_stage.success_threshold) + } +} diff --git a/crates/rginx-agent/src/gradual_rollout/tests.rs b/crates/rginx-agent/src/gradual_rollout/tests.rs new file mode 100644 index 00000000..1b4ab3c6 --- /dev/null +++ b/crates/rginx-agent/src/gradual_rollout/tests.rs @@ -0,0 +1,149 @@ +use super::*; + +fn create_test_plan() -> RolloutPlan { + RolloutPlan { + rollout_id: "test-rollout-1".to_string(), + strategy: RolloutStrategy::Canary, + config_revision: 100, + stages: vec![ + RolloutStage { + stage_id: 1, + target_percentage: 10, + target_nodes: vec!["node1".to_string()], + duration_secs: 300, + health_check_interval_secs: 30, + success_threshold: 0.95, + started_at: None, + completed_at: None, + status: RolloutPhase::Pending, + }, + RolloutStage { + stage_id: 2, + target_percentage: 90, + target_nodes: vec!["node2".to_string(), "node3".to_string()], + duration_secs: 600, + health_check_interval_secs: 60, + success_threshold: 0.95, + started_at: None, + completed_at: None, + status: RolloutPhase::Pending, + }, + ], + auto_rollback_on_failure: true, + created_at: current_timestamp(), + created_by: "admin".to_string(), + current_stage: 0, + phase: RolloutPhase::Pending, + } +} + +#[tokio::test] +async fn test_create_rollout() { + let manager = GradualRolloutManager::new(); + let plan = create_test_plan(); + let result = manager.create_rollout(plan).await; + assert!(result.is_ok()); +} + +#[tokio::test] +async fn test_invalid_percentage() { + let manager = GradualRolloutManager::new(); + let mut plan = create_test_plan(); + plan.stages[0].target_percentage = 50; + let result = manager.create_rollout(plan).await; + assert!(result.is_err()); +} + +#[tokio::test] +async fn test_start_rollout() { + let manager = GradualRolloutManager::new(); + let plan = create_test_plan(); + let rollout_id = plan.rollout_id.clone(); + manager.create_rollout(plan).await.unwrap(); + + let result = manager.start_rollout(&rollout_id).await; + assert!(result.is_ok()); + + let rollout = manager.get_rollout(&rollout_id).await.unwrap(); + assert_eq!(rollout.phase, RolloutPhase::InProgress); + assert_eq!(rollout.current_stage, 0); +} + +#[tokio::test] +async fn test_advance_stage() { + let manager = GradualRolloutManager::new(); + let plan = create_test_plan(); + let rollout_id = plan.rollout_id.clone(); + manager.create_rollout(plan).await.unwrap(); + manager.start_rollout(&rollout_id).await.unwrap(); + + let result = manager.advance_stage(&rollout_id).await; + assert!(result.is_ok()); + + let rollout = manager.get_rollout(&rollout_id).await.unwrap(); + assert_eq!(rollout.current_stage, 1); +} + +#[tokio::test] +async fn test_pause_resume() { + let manager = GradualRolloutManager::new(); + let plan = create_test_plan(); + let rollout_id = plan.rollout_id.clone(); + manager.create_rollout(plan).await.unwrap(); + manager.start_rollout(&rollout_id).await.unwrap(); + + manager.pause_rollout(&rollout_id).await.unwrap(); + let rollout = manager.get_rollout(&rollout_id).await.unwrap(); + assert_eq!(rollout.phase, RolloutPhase::Paused); + + manager.resume_rollout(&rollout_id).await.unwrap(); + let rollout = manager.get_rollout(&rollout_id).await.unwrap(); + assert_eq!(rollout.phase, RolloutPhase::InProgress); +} + +#[tokio::test] +async fn test_rollback() { + let manager = GradualRolloutManager::new(); + let plan = create_test_plan(); + let rollout_id = plan.rollout_id.clone(); + manager.create_rollout(plan).await.unwrap(); + manager.start_rollout(&rollout_id).await.unwrap(); + + let result = manager.rollback(&rollout_id, "test failure").await; + assert!(result.is_ok()); + + let rollout = manager.get_rollout(&rollout_id).await.unwrap(); + assert_eq!(rollout.phase, RolloutPhase::RolledBack); +} + +#[tokio::test] +async fn test_node_state() { + let manager = GradualRolloutManager::new(); + let state = NodeRolloutState { + node_id: "node1".to_string(), + rollout_id: "rollout1".to_string(), + stage_id: 1, + config_revision: 100, + applied_at: current_timestamp(), + health_status: HealthStatus::Healthy, + error_count: 0, + }; + + manager.update_node_state(state.clone()).await.unwrap(); + let retrieved = manager.get_node_state("node1").await.unwrap(); + assert_eq!(retrieved.node_id, "node1"); + assert_eq!(retrieved.health_status, HealthStatus::Healthy); +} + +#[tokio::test] +async fn test_rollout_status() { + let manager = GradualRolloutManager::new(); + let plan = create_test_plan(); + let rollout_id = plan.rollout_id.clone(); + manager.create_rollout(plan).await.unwrap(); + manager.start_rollout(&rollout_id).await.unwrap(); + + let status = manager.get_rollout_status(&rollout_id).await.unwrap(); + assert_eq!(status.phase, RolloutPhase::InProgress); + assert_eq!(status.current_stage, 0); +} diff --git a/crates/rginx-agent/src/lib.rs b/crates/rginx-agent/src/lib.rs index adad41a5..92e8a2c5 100644 --- a/crates/rginx-agent/src/lib.rs +++ b/crates/rginx-agent/src/lib.rs @@ -1,14 +1,24 @@ +//! rginx control-plane primitives. +//! +//! The current HTTP server adapter is the legacy node-side control-plane model: +//! it listens on a node-local control port and accepts inbound management +//! requests. New control-plane communication should target the outbound agent +//! model described in `docs/AGENT_OUTBOUND_CONTROL_PLANE_PLAN.md`. + +pub mod agent_core; pub mod api; mod audit; pub mod auth; pub mod circuit_breaker; pub mod config_history; pub mod config_validator; +pub mod control_center; pub mod error; pub mod events; pub mod gradual_rollout; pub mod metrics; pub mod model; +pub mod outbound; pub mod rate_limit; pub mod registry; mod server; @@ -16,6 +26,10 @@ mod system; mod tls; mod websocket; +pub use agent_core::{ + AgentCore, CacheClearInvalidationsCommand, CacheInvalidateCommand, CacheInvalidateTarget, + CachePurgeCommand, CachePurgeTarget, +}; pub use api::CONTROL_PLANE_API_VERSION; pub use auth::{ActionScope, ApiKeyStatus, AuthDecision, AuthMethod, AuthorizationRequirement}; pub use circuit_breaker::{ @@ -28,6 +42,12 @@ pub use config_history::{ pub use config_validator::{ ConfigValidator, ImpactAssessment, IssueSeverity, ValidationIssue, ValidationResult, }; +pub use control_center::{ + ControlCenterCommandCreate, ControlCenterCommandRecord, ControlCenterCommandState, + ControlCenterEvent, ControlCenterEventCreate, ControlCenterNode, ControlCenterNodeFilter, + ControlCenterNodeHealth, ControlCenterRollout, ControlCenterRolloutCreate, ControlCenterStore, + HeartbeatRecord, NodeSession, RolloutTarget, RolloutTargetSelector, +}; pub use error::{Error, Result}; pub use events::{ControlPlaneEvent, EventBus, EventFilter}; pub use gradual_rollout::{ @@ -35,6 +55,15 @@ pub use gradual_rollout::{ RolloutStatus, RolloutStrategy, }; pub use model::{ControlPlaneResource, NodeControlAction, NodeObservabilityView}; +pub use outbound::{ + AgentCommand, AgentCommandExecutionState, AgentCommandResult, AgentCommandStatus, + AgentCommandType, AgentConnectionState, AgentHeartbeatRequest, AgentInFlightCommand, + AgentPersistentState, AgentPollResponse, AgentRegisterRequest, AgentStateStore, + AgentStreamCommandBatch, AgentStreamHello, AgentStreamMessage, AuthenticatedRequestHeaders, + HttpOutboundControlPlaneClient, OutboundAgent, OutboundAgentCycleOutcome, OutboundAuthVerifier, + OutboundControlPlaneClient, OutboundRequestSigner, OutboundStreamClient, + WebSocketOutboundStreamClient, sign_agent_command, verify_agent_command_signature, +}; pub use rate_limit::{RateLimit, RateLimitConfig, RateLimiter}; pub use registry::{NodeFilter, NodeHealth, NodeInfo, NodeRegistration, NodeRegistry, NodeStatus}; pub use server::control::{ diff --git a/crates/rginx-agent/src/metrics.rs b/crates/rginx-agent/src/metrics.rs index 86d90055..451487ab 100644 --- a/crates/rginx-agent/src/metrics.rs +++ b/crates/rginx-agent/src/metrics.rs @@ -141,66 +141,4 @@ pub fn record_config_rollback(success: bool) { } #[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_record_request() { - record_request("GET", 200, Some("node1")); - record_request("POST", 201, None); - } - - #[test] - fn test_record_request_duration() { - record_request_duration("GET", 200, 0.123); - record_request_duration("POST", 500, 1.456); - } - - #[test] - fn test_websocket_connections() { - increment_websocket_connections(); - decrement_websocket_connections(); - } - - #[test] - fn test_registered_nodes() { - set_registered_nodes(10.0); - set_registered_nodes(5.0); - } - - #[test] - fn test_config_operations() { - record_config_push("node1", true); - record_config_push("node2", false); - record_config_validation(true); - record_config_validation(false); - record_config_rollback(true); - } - - #[test] - fn test_auth_and_rate_limit() { - record_auth_failure("invalid_token"); - record_auth_failure("expired_token"); - record_rate_limit_hit("/api/config"); - } - - #[test] - fn test_events() { - record_event_published("NodeRegistered"); - record_event_published("ConfigApplied"); - } - - #[test] - fn test_metrics_collector() { - // Record some metrics first - record_request("GET", 200, Some("test-node")); - record_config_validation(true); - record_event_published("TestEvent"); - - let collector = MetricsCollector::new(); - let output = collector.gather(); - - // Should contain at least the metrics we just recorded - assert!(output.contains("rginx_control_plane_requests_total")); - } -} +mod tests; diff --git a/crates/rginx-agent/src/metrics/tests.rs b/crates/rginx-agent/src/metrics/tests.rs new file mode 100644 index 00000000..6900b90d --- /dev/null +++ b/crates/rginx-agent/src/metrics/tests.rs @@ -0,0 +1,61 @@ +use super::*; + +#[test] +fn test_record_request() { + record_request("GET", 200, Some("node1")); + record_request("POST", 201, None); +} + +#[test] +fn test_record_request_duration() { + record_request_duration("GET", 200, 0.123); + record_request_duration("POST", 500, 1.456); +} + +#[test] +fn test_websocket_connections() { + increment_websocket_connections(); + decrement_websocket_connections(); +} + +#[test] +fn test_registered_nodes() { + set_registered_nodes(10.0); + set_registered_nodes(5.0); +} + +#[test] +fn test_config_operations() { + record_config_push("node1", true); + record_config_push("node2", false); + record_config_validation(true); + record_config_validation(false); + record_config_rollback(true); +} + +#[test] +fn test_auth_and_rate_limit() { + record_auth_failure("invalid_token"); + record_auth_failure("expired_token"); + record_rate_limit_hit("/api/config"); +} + +#[test] +fn test_events() { + record_event_published("NodeRegistered"); + record_event_published("ConfigApplied"); +} + +#[test] +fn test_metrics_collector() { + // Record some metrics first + record_request("GET", 200, Some("test-node")); + record_config_validation(true); + record_event_published("TestEvent"); + + let collector = MetricsCollector::new(); + let output = collector.gather(); + + // Should contain at least the metrics we just recorded + assert!(output.contains("rginx_control_plane_requests_total")); +} diff --git a/crates/rginx-agent/src/outbound/auth.rs b/crates/rginx-agent/src/outbound/auth.rs new file mode 100644 index 00000000..8f903903 --- /dev/null +++ b/crates/rginx-agent/src/outbound/auth.rs @@ -0,0 +1,200 @@ +use std::collections::HashMap; +use std::time::Duration; + +use hmac::{Hmac, KeyInit, Mac}; +use http::Method; +use sha2::{Digest, Sha256}; +use uuid::Uuid; + +use crate::error::{Error, Result}; + +use super::command::unix_ms; +use super::model::AgentCommand; + +pub const TIMESTAMP_HEADER: &str = "x-rginx-timestamp"; +pub const NONCE_HEADER: &str = "x-rginx-nonce"; +pub const BODY_SHA256_HEADER: &str = "x-rginx-body-sha256"; +pub const SIGNATURE_HEADER: &str = "x-rginx-signature"; + +#[derive(Clone, PartialEq, Eq)] +pub struct AuthenticatedRequestHeaders { + pub authorization: String, + pub timestamp: String, + pub nonce: String, + pub body_sha256: String, + pub signature: String, +} + +#[derive(Clone)] +pub struct OutboundRequestSigner { + token: String, +} + +impl OutboundRequestSigner { + pub fn new(token: impl Into) -> Result { + let token = token.into().trim().to_string(); + if token.is_empty() { + return Err(Error::Unauthorized("outbound agent token is missing".to_string())); + } + Ok(Self { token }) + } + + pub fn sign( + &self, + method: &Method, + path_and_query: &str, + body: &[u8], + ) -> Result { + self.sign_with_nonce(method.as_str(), path_and_query, body, unix_ms(), Uuid::now_v7()) + } + + pub(crate) fn sign_with_nonce( + &self, + method: &str, + path_and_query: &str, + body: &[u8], + timestamp: u64, + nonce: Uuid, + ) -> Result { + let body_sha256 = sha256_hex(body); + let nonce = nonce.to_string(); + let timestamp = timestamp.to_string(); + let material = + request_signing_material(method, path_and_query, ×tamp, &nonce, &body_sha256); + Ok(AuthenticatedRequestHeaders { + authorization: format!("Bearer {}", self.token), + timestamp, + nonce, + body_sha256, + signature: hmac_sha256_hex(self.token.as_bytes(), material.as_bytes())?, + }) + } + + pub fn sign_command(&self, command: &AgentCommand) -> Result { + sign_agent_command(&self.token, command) + } +} + +pub struct OutboundAuthVerifier { + token: String, + max_clock_skew: Duration, + seen_nonces: HashMap, +} + +impl OutboundAuthVerifier { + pub fn new(token: impl Into, max_clock_skew: Duration) -> Result { + let token = token.into().trim().to_string(); + if token.is_empty() { + return Err(Error::Unauthorized("outbound agent token is missing".to_string())); + } + Ok(Self { token, max_clock_skew, seen_nonces: HashMap::new() }) + } + + pub fn verify( + &mut self, + method: &str, + path_and_query: &str, + body: &[u8], + headers: &AuthenticatedRequestHeaders, + now_unix_ms: u64, + ) -> Result<()> { + if headers.authorization != format!("Bearer {}", self.token) { + return Err(Error::Unauthorized("outbound agent token was rejected".to_string())); + } + let timestamp = headers + .timestamp + .parse::() + .map_err(|_| Error::Unauthorized("outbound agent timestamp is invalid".to_string()))?; + let skew_ms = self.max_clock_skew.as_millis().min(u128::from(u64::MAX)) as u64; + if timestamp.abs_diff(now_unix_ms) > skew_ms { + return Err(Error::Unauthorized("outbound agent timestamp expired".to_string())); + } + let body_sha256 = sha256_hex(body); + if headers.body_sha256 != body_sha256 { + return Err(Error::Unauthorized("outbound agent body hash mismatch".to_string())); + } + let material = request_signing_material( + method, + path_and_query, + &headers.timestamp, + &headers.nonce, + &headers.body_sha256, + ); + let expected = hmac_sha256_hex(self.token.as_bytes(), material.as_bytes())?; + if !constant_time_eq(headers.signature.as_bytes(), expected.as_bytes()) { + return Err(Error::Unauthorized("outbound agent signature mismatch".to_string())); + } + self.prune_seen_nonces(now_unix_ms, skew_ms); + if self.seen_nonces.contains_key(&headers.nonce) { + return Err(Error::Unauthorized("outbound agent replay request rejected".to_string())); + } + self.seen_nonces.insert(headers.nonce.clone(), timestamp); + Ok(()) + } + + fn prune_seen_nonces(&mut self, now_unix_ms: u64, skew_ms: u64) { + self.seen_nonces.retain(|_, timestamp| now_unix_ms.saturating_sub(*timestamp) <= skew_ms); + } +} + +pub fn sign_agent_command(token: &str, command: &AgentCommand) -> Result { + let token = token.trim(); + if token.is_empty() { + return Err(Error::Unauthorized("agent command signing token is missing".to_string())); + } + hmac_sha256_hex(token.as_bytes(), command_signing_material(command)?.as_bytes()) +} + +pub fn verify_agent_command_signature(token: &str, command: &AgentCommand) -> Result<()> { + let Some(signature) = command.signature.as_deref() else { + return Err(Error::Unauthorized("agent command signature is missing".to_string())); + }; + let expected = sign_agent_command(token, command)?; + if constant_time_eq(signature.as_bytes(), expected.as_bytes()) { + Ok(()) + } else { + Err(Error::Unauthorized("agent command signature mismatch".to_string())) + } +} + +fn request_signing_material( + method: &str, + path: &str, + timestamp: &str, + nonce: &str, + body_sha256: &str, +) -> String { + format!("{method}\n{path}\n{timestamp}\n{nonce}\n{body_sha256}") +} + +fn command_signing_material(command: &AgentCommand) -> Result { + Ok(format!( + "{}\n{}\n{}\n{}\n{}\n{}", + command.id, + command.command_type.as_str(), + command.target_node_id, + command.revision.map(|v| v.to_string()).unwrap_or_default(), + command.expires_at_unix_ms.map(|v| v.to_string()).unwrap_or_default(), + sha256_hex(&serde_json::to_vec(&command.payload)?), + )) +} + +fn sha256_hex(bytes: &[u8]) -> String { + hex::encode(Sha256::digest(bytes)) +} + +fn hmac_sha256_hex(key: &[u8], message: &[u8]) -> Result { + type HmacSha256 = Hmac; + + let mut mac = HmacSha256::new_from_slice(key) + .map_err(|_| Error::Server("failed to construct HMAC-SHA256".to_string()))?; + mac.update(message); + Ok(hex::encode(mac.finalize().into_bytes())) +} + +fn constant_time_eq(left: &[u8], right: &[u8]) -> bool { + if left.len() != right.len() { + return false; + } + left.iter().zip(right).fold(0u8, |acc, (a, b)| acc | (a ^ b)) == 0 +} diff --git a/crates/rginx-agent/src/outbound/client.rs b/crates/rginx-agent/src/outbound/client.rs new file mode 100644 index 00000000..e788e962 --- /dev/null +++ b/crates/rginx-agent/src/outbound/client.rs @@ -0,0 +1,246 @@ +use std::future::Future; +use std::pin::Pin; +use std::time::Duration; + +use bytes::Bytes; +use http::header::{AUTHORIZATION, CONTENT_TYPE, HeaderName, HeaderValue}; +use http::{Method, Request, StatusCode, Uri}; +use http_body_util::{BodyExt, Full}; +use hyper_rustls::HttpsConnector; +use hyper_rustls::HttpsConnectorBuilder; +use hyper_util::client::legacy::Client; +use hyper_util::client::legacy::connect::HttpConnector; +use hyper_util::rt::TokioExecutor; + +use crate::error::{Error, Result}; + +use super::auth::{ + BODY_SHA256_HEADER, NONCE_HEADER, OutboundRequestSigner, SIGNATURE_HEADER, TIMESTAMP_HEADER, +}; +use super::encoding::{encode_path_segment, encode_query_component}; +use super::model::{ + AgentCommandResult, AgentHeartbeatRequest, AgentPollResponse, AgentRegisterRequest, +}; + +const NODE_ID_HEADER: HeaderName = HeaderName::from_static("x-rginx-node-id"); + +pub type OutboundClientFuture = Pin> + Send + 'static>>; + +pub trait OutboundControlPlaneClient: Send + Sync { + fn register(&self, request: AgentRegisterRequest) -> OutboundClientFuture<()>; + fn heartbeat(&self, request: AgentHeartbeatRequest) -> OutboundClientFuture<()>; + fn poll_commands( + &self, + node_id: String, + cursor: Option, + timeout: Duration, + ) -> OutboundClientFuture; + fn post_result(&self, result: AgentCommandResult) -> OutboundClientFuture<()>; +} + +#[derive(Clone)] +pub struct HttpOutboundControlPlaneClient { + endpoint: Uri, + signer: OutboundRequestSigner, + request_timeout: Duration, + client: Client, Full>, +} + +impl HttpOutboundControlPlaneClient { + pub fn new(endpoint: Uri, token: String) -> Self { + Self::with_request_timeout(endpoint, token, Duration::from_secs(30)) + } + + pub fn with_request_timeout(endpoint: Uri, token: String, request_timeout: Duration) -> Self { + let connector = HttpsConnectorBuilder::new() + .with_native_roots() + .expect("native TLS roots should load") + .https_or_http() + .enable_http1() + .enable_http2() + .build(); + Self { + endpoint, + signer: OutboundRequestSigner::new(token) + .expect("outbound agent token should be validated before client construction"), + request_timeout, + client: Client::builder(TokioExecutor::new()).build(connector), + } + } + + async fn send_json( + &self, + method: Method, + node_id: &str, + path_and_query: String, + body: Option<&T>, + timeout: Duration, + ) -> Result> { + let body = match body { + Some(body) => serde_json::to_vec(body)?, + None => Vec::new(), + }; + let signed_path = self.path_for(&path_and_query); + let uri = self.uri_for_path(&signed_path)?; + let signed = self.signer.sign(&method, &signed_path, &body)?; + let request = Request::builder() + .method(method) + .uri(uri) + .header(AUTHORIZATION, header_value(&signed.authorization)?) + .header(NODE_ID_HEADER, HeaderValue::from_str(node_id).map_err(header_error)?) + .header(TIMESTAMP_HEADER, header_value(&signed.timestamp)?) + .header(NONCE_HEADER, header_value(&signed.nonce)?) + .header(BODY_SHA256_HEADER, header_value(&signed.body_sha256)?) + .header(SIGNATURE_HEADER, header_value(&signed.signature)?) + .header(CONTENT_TYPE, "application/json") + .body(Full::new(Bytes::from(body)))?; + let response = tokio::time::timeout(timeout, self.client.request(request)) + .await + .map_err(|_| Error::Server("outbound agent request timed out".to_string()))? + .map_err(|error| Error::Server(format!("outbound agent request failed: {error}")))?; + let status = response.status(); + let bytes = response + .into_body() + .collect() + .await + .map_err(|error| Error::Server(format!("outbound agent response failed: {error}")))? + .to_bytes(); + + if status == StatusCode::NO_CONTENT { + return Ok(None); + } + if !status.is_success() { + return Err(Error::Server(format!("control center returned HTTP {status}"))); + } + if bytes.is_empty() { + return Ok(None); + } + Ok(Some(serde_json::from_slice::(&bytes)?)) + } + + fn uri_for_path(&self, path_and_query: &str) -> Result { + let scheme = self.endpoint.scheme_str().unwrap_or("https"); + let authority = self + .endpoint + .authority() + .ok_or_else(|| Error::InvalidRequest("agent endpoint must include authority".into()))?; + format!("{scheme}://{authority}{path_and_query}") + .parse::() + .map_err(|error| Error::InvalidRequest(error.to_string())) + } + + fn path_for(&self, path_and_query: &str) -> String { + let base_path = self.endpoint.path().trim_end_matches('/'); + format!("{base_path}{path_and_query}") + } +} + +impl OutboundControlPlaneClient for HttpOutboundControlPlaneClient { + fn register(&self, request: AgentRegisterRequest) -> OutboundClientFuture<()> { + let client = self.clone(); + Box::pin(async move { + client + .send_json::<_, serde_json::Value>( + Method::POST, + &request.node_id, + "/v1/agents/register".to_string(), + Some(&request), + client.request_timeout, + ) + .await?; + Ok(()) + }) + } + + fn heartbeat(&self, request: AgentHeartbeatRequest) -> OutboundClientFuture<()> { + let client = self.clone(); + Box::pin(async move { + let path = heartbeat_path(&request.node_id); + client + .send_json::<_, serde_json::Value>( + Method::POST, + &request.node_id, + path, + Some(&request), + client.request_timeout, + ) + .await?; + Ok(()) + }) + } + + fn poll_commands( + &self, + node_id: String, + cursor: Option, + timeout: Duration, + ) -> OutboundClientFuture { + let client = self.clone(); + Box::pin(async move { + let path = commands_path(&node_id, cursor.as_deref(), timeout); + Ok(client + .send_json::( + Method::GET, + &node_id, + path, + None, + timeout + Duration::from_secs(5), + ) + .await? + .unwrap_or_else(AgentPollResponse::empty)) + }) + } + + fn post_result(&self, result: AgentCommandResult) -> OutboundClientFuture<()> { + let client = self.clone(); + Box::pin(async move { + let path = command_result_path(&result.node_id, &result.command_id); + client + .send_json::<_, serde_json::Value>( + Method::POST, + &result.node_id, + path, + Some(&result), + client.request_timeout, + ) + .await?; + Ok(()) + }) + } +} + +fn header_value(value: &str) -> Result { + HeaderValue::from_str(value).map_err(header_error) +} + +fn header_error(error: http::header::InvalidHeaderValue) -> Error { + Error::InvalidRequest(format!("invalid outbound agent header value: {error}")) +} + +fn heartbeat_path(node_id: &str) -> String { + format!("/v1/agents/{}/heartbeat", encode_path_segment(node_id)) +} + +fn commands_path(node_id: &str, cursor: Option<&str>, timeout: Duration) -> String { + let mut path = format!( + "/v1/agents/{}/commands?timeout={}s", + encode_path_segment(node_id), + timeout.as_secs() + ); + if let Some(cursor) = cursor { + path.push_str("&after="); + path.push_str(&encode_query_component(cursor)); + } + path +} + +fn command_result_path(node_id: &str, command_id: &str) -> String { + format!( + "/v1/agents/{}/commands/{}/result", + encode_path_segment(node_id), + encode_path_segment(command_id) + ) +} + +#[cfg(test)] +mod tests; diff --git a/crates/rginx-agent/src/outbound/client/tests.rs b/crates/rginx-agent/src/outbound/client/tests.rs new file mode 100644 index 00000000..290dbbd2 --- /dev/null +++ b/crates/rginx-agent/src/outbound/client/tests.rs @@ -0,0 +1,16 @@ +use std::time::Duration; + +use super::{command_result_path, commands_path, heartbeat_path}; + +#[test] +fn outbound_paths_encode_agent_identifiers_and_cursors() { + assert_eq!(heartbeat_path("edge/sfo 1"), "/v1/agents/edge%2Fsfo%201/heartbeat"); + assert_eq!( + commands_path("edge/sfo 1", Some("cmd?next=1/2"), Duration::from_secs(30)), + "/v1/agents/edge%2Fsfo%201/commands?timeout=30s&after=cmd%3Fnext%3D1%2F2" + ); + assert_eq!( + command_result_path("edge/sfo 1", "cmd/result #1"), + "/v1/agents/edge%2Fsfo%201/commands/cmd%2Fresult%20%231/result" + ); +} diff --git a/crates/rginx-agent/src/outbound/command.rs b/crates/rginx-agent/src/outbound/command.rs new file mode 100644 index 00000000..dc9e74cf --- /dev/null +++ b/crates/rginx-agent/src/outbound/command.rs @@ -0,0 +1,157 @@ +use std::time::{SystemTime, UNIX_EPOCH}; + +use rginx_config::managed::ManagedResourceMutation; +use serde::Deserialize; + +use crate::agent_core::{ + AgentCore, CacheInvalidateCommand, CacheInvalidateTarget, CachePurgeCommand, CachePurgeTarget, +}; +use crate::error::{Error, Result}; + +use super::model::{AgentCommand, AgentCommandType}; + +pub(super) async fn execute( + core: &AgentCore, + node_id: &str, + command: &AgentCommand, + command_signing_key: Option<&str>, +) -> Result { + validate_command(node_id, command, command_signing_key)?; + match command.command_type { + AgentCommandType::Reload => to_value(core.reload().await?), + AgentCommandType::ApplyConfig => { + let request = payload::(&command.payload)?; + to_value(core.apply_config(request).await?) + } + AgentCommandType::SetDesiredRevision => { + let desired_revision = desired_revision(command)?; + to_value(core.set_desired_revision(desired_revision).await?) + } + AgentCommandType::CachePurge => { + to_value(core.purge_cache(cache_purge(&command.payload)?).await?) + } + AgentCommandType::CacheInvalidate => { + to_value(core.invalidate_cache(cache_invalidate(&command.payload)?).await?) + } + AgentCommandType::CollectSnapshot => { + let payload = payload::(&command.payload)?; + to_value(core.snapshot(payload.window_secs).await?) + } + } +} + +pub(super) fn unix_ms() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis() + .min(u128::from(u64::MAX)) as u64 +} + +fn validate_command( + node_id: &str, + command: &AgentCommand, + command_signing_key: Option<&str>, +) -> Result<()> { + if command.id.trim().is_empty() { + return Err(Error::InvalidRequest("agent command id must not be empty".to_string())); + } + if command.target_node_id != node_id { + return Err(Error::Forbidden(format!( + "agent command targets `{}` but this node is `{node_id}`", + command.target_node_id + ))); + } + if command.expires_at_unix_ms.is_some_and(|expires| expires < unix_ms()) { + return Err(Error::InvalidRequest("agent command expired".to_string())); + } + if let Some(key) = command_signing_key { + if command.expires_at_unix_ms.is_none() { + return Err(Error::InvalidRequest( + "agent command expires_at_unix_ms is missing".to_string(), + )); + } + super::auth::verify_agent_command_signature(key, command)?; + } + Ok(()) +} + +#[derive(Deserialize)] +struct SnapshotPayload { + #[serde(default)] + window_secs: Option, +} + +#[derive(Deserialize)] +struct DesiredRevisionPayload { + desired_revision: Option, +} + +#[derive(Deserialize)] +struct CachePurgePayload { + zone_name: String, + key: Option, + prefix: Option, +} + +#[derive(Deserialize)] +struct CacheInvalidatePayload { + zone_name: String, + key: Option, + prefix: Option, + tag: Option, +} + +fn desired_revision(command: &AgentCommand) -> Result { + let payload = payload::(&command.payload)?; + payload + .desired_revision + .or(command.revision) + .ok_or_else(|| Error::InvalidRequest("desired revision command requires revision".into())) +} + +fn cache_purge(value: &serde_json::Value) -> Result { + let payload = payload::(value)?; + let target = match (payload.key, payload.prefix) { + (Some(key), None) => CachePurgeTarget::Key(non_empty("key", key)?), + (None, Some(prefix)) => CachePurgeTarget::Prefix(non_empty("prefix", prefix)?), + (None, None) => CachePurgeTarget::Zone, + (Some(_), Some(_)) => { + return Err(Error::InvalidRequest("cache_purge accepts only one selector".to_string())); + } + }; + Ok(CachePurgeCommand { zone_name: non_empty("zone_name", payload.zone_name)?, target }) +} + +fn cache_invalidate(value: &serde_json::Value) -> Result { + let payload = payload::(value)?; + let target = match (payload.key, payload.prefix, payload.tag) { + (Some(key), None, None) => CacheInvalidateTarget::Key(non_empty("key", key)?), + (None, Some(prefix), None) => CacheInvalidateTarget::Prefix(non_empty("prefix", prefix)?), + (None, None, Some(tag)) => CacheInvalidateTarget::Tag(non_empty("tag", tag)?), + (None, None, None) => CacheInvalidateTarget::Zone, + _ => { + return Err(Error::InvalidRequest( + "cache_invalidate accepts only one selector".to_string(), + )); + } + }; + Ok(CacheInvalidateCommand { zone_name: non_empty("zone_name", payload.zone_name)?, target }) +} + +fn payload(value: &serde_json::Value) -> Result { + serde_json::from_value(value.clone()).map_err(Error::Serde) +} + +fn non_empty(field: &str, value: String) -> Result { + let value = value.trim().to_string(); + if value.is_empty() { + Err(Error::InvalidRequest(format!("{field} must not be empty"))) + } else { + Ok(value) + } +} + +fn to_value(value: T) -> Result { + serde_json::to_value(value).map_err(Error::Serde) +} diff --git a/crates/rginx-agent/src/outbound/encoding.rs b/crates/rginx-agent/src/outbound/encoding.rs new file mode 100644 index 00000000..ee6a31fc --- /dev/null +++ b/crates/rginx-agent/src/outbound/encoding.rs @@ -0,0 +1,26 @@ +pub(super) fn encode_path_segment(value: &str) -> String { + percent_encode_unreserved(value) +} + +pub(super) fn encode_query_component(value: &str) -> String { + percent_encode_unreserved(value) +} + +fn percent_encode_unreserved(value: &str) -> String { + let mut encoded = String::with_capacity(value.len()); + for byte in value.bytes() { + if is_unreserved(byte) { + encoded.push(byte as char); + } else { + const HEX: &[u8; 16] = b"0123456789ABCDEF"; + encoded.push('%'); + encoded.push(HEX[(byte >> 4) as usize] as char); + encoded.push(HEX[(byte & 0x0f) as usize] as char); + } + } + encoded +} + +fn is_unreserved(byte: u8) -> bool { + byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'.' | b'_' | b'~') +} diff --git a/crates/rginx-agent/src/outbound/mod.rs b/crates/rginx-agent/src/outbound/mod.rs new file mode 100644 index 00000000..b373aa0f --- /dev/null +++ b/crates/rginx-agent/src/outbound/mod.rs @@ -0,0 +1,32 @@ +mod auth; +mod client; +mod command; +mod encoding; +mod model; +mod outcome; +mod requests; +mod runner; +mod state; +mod status; +mod stream; +mod timing; + +pub use auth::{ + AuthenticatedRequestHeaders, OutboundAuthVerifier, OutboundRequestSigner, sign_agent_command, + verify_agent_command_signature, +}; +pub use client::{HttpOutboundControlPlaneClient, OutboundControlPlaneClient}; +pub use model::{ + AgentCommand, AgentCommandResult, AgentCommandStatus, AgentCommandType, AgentHeartbeatRequest, + AgentPollResponse, AgentRegisterRequest, +}; +pub use outcome::OutboundAgentCycleOutcome; +pub use runner::OutboundAgent; +pub use state::{ + AgentCommandExecutionState, AgentConnectionState, AgentInFlightCommand, AgentPersistentState, + AgentStateStore, +}; +pub use stream::{ + AgentStreamCommandBatch, AgentStreamHello, AgentStreamMessage, OutboundStreamClient, + WebSocketOutboundStreamClient, +}; diff --git a/crates/rginx-agent/src/outbound/model.rs b/crates/rginx-agent/src/outbound/model.rs new file mode 100644 index 00000000..7baf3695 --- /dev/null +++ b/crates/rginx-agent/src/outbound/model.rs @@ -0,0 +1,94 @@ +use std::collections::BTreeMap; + +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +pub struct AgentRegisterRequest { + pub node_id: String, + pub version: String, + pub region: Option, + pub pop: Option, + pub labels: BTreeMap, + pub capabilities: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +pub struct AgentHeartbeatRequest { + pub node_id: String, + pub snapshot_version: u64, + pub current_revision: u64, + pub desired_revision: u64, + pub converged: bool, +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +pub struct AgentPollResponse { + #[serde(default)] + pub commands: Vec, + #[serde(default)] + pub next_cursor: Option, +} + +impl AgentPollResponse { + pub fn empty() -> Self { + Self { commands: Vec::new(), next_cursor: None } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +pub struct AgentCommand { + pub id: String, + #[serde(rename = "type")] + pub command_type: AgentCommandType, + pub target_node_id: String, + #[serde(default)] + pub revision: Option, + #[serde(default)] + pub expires_at_unix_ms: Option, + #[serde(default)] + pub payload: serde_json::Value, + #[serde(default)] + pub signature: Option, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum AgentCommandType { + Reload, + ApplyConfig, + SetDesiredRevision, + CachePurge, + CacheInvalidate, + CollectSnapshot, +} + +impl AgentCommandType { + pub fn as_str(self) -> &'static str { + match self { + Self::Reload => "reload", + Self::ApplyConfig => "apply_config", + Self::SetDesiredRevision => "set_desired_revision", + Self::CachePurge => "cache_purge", + Self::CacheInvalidate => "cache_invalidate", + Self::CollectSnapshot => "collect_snapshot", + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum AgentCommandStatus { + Succeeded, + Failed, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct AgentCommandResult { + pub command_id: String, + pub node_id: String, + pub status: AgentCommandStatus, + pub started_at_unix_ms: u64, + pub finished_at_unix_ms: u64, + pub result: serde_json::Value, + pub error: Option, +} diff --git a/crates/rginx-agent/src/outbound/outcome.rs b/crates/rginx-agent/src/outbound/outcome.rs new file mode 100644 index 00000000..105ef4b9 --- /dev/null +++ b/crates/rginx-agent/src/outbound/outcome.rs @@ -0,0 +1,5 @@ +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct OutboundAgentCycleOutcome { + pub commands_received: usize, + pub results_posted: usize, +} diff --git a/crates/rginx-agent/src/outbound/requests.rs b/crates/rginx-agent/src/outbound/requests.rs new file mode 100644 index 00000000..e01091c5 --- /dev/null +++ b/crates/rginx-agent/src/outbound/requests.rs @@ -0,0 +1,37 @@ +use rginx_core::AgentSettings; + +use crate::agent_core::AgentCore; +use crate::error::Result; + +use super::model::{AgentHeartbeatRequest, AgentRegisterRequest}; + +pub(super) fn register_request(settings: &AgentSettings) -> AgentRegisterRequest { + AgentRegisterRequest { + node_id: settings.node_id.clone(), + version: env!("CARGO_PKG_VERSION").to_string(), + region: settings.region.clone(), + pop: settings.pop.clone(), + labels: settings.labels.clone(), + capabilities: vec![ + "reload".to_string(), + "config.apply".to_string(), + "cache.purge".to_string(), + "cache.invalidate".to_string(), + "snapshot.delta".to_string(), + ], + } +} + +pub(super) async fn heartbeat_request( + settings: &AgentSettings, + core: &AgentCore, +) -> Result { + let revision = core.revision().await?.0; + Ok(AgentHeartbeatRequest { + node_id: settings.node_id.clone(), + snapshot_version: core.shared_state().current_snapshot_version(), + current_revision: revision.current_revision, + desired_revision: revision.desired_revision, + converged: revision.converged, + }) +} diff --git a/crates/rginx-agent/src/outbound/runner.rs b/crates/rginx-agent/src/outbound/runner.rs new file mode 100644 index 00000000..2a5ba750 --- /dev/null +++ b/crates/rginx-agent/src/outbound/runner.rs @@ -0,0 +1,299 @@ +use std::collections::VecDeque; +use std::path::PathBuf; +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use rginx_core::AgentSettings; +use tokio::sync::watch; + +use crate::agent_core::AgentCore; +use crate::error::Result; + +use super::client::{HttpOutboundControlPlaneClient, OutboundControlPlaneClient}; +use super::command::{execute, unix_ms}; +use super::model::{AgentCommand, AgentCommandResult, AgentCommandStatus, AgentPollResponse}; +use super::outcome::OutboundAgentCycleOutcome; +use super::requests::{heartbeat_request, register_request}; +use super::state::{ + AgentCommandExecutionState, AgentConnectionState, AgentInFlightCommand, AgentPersistentState, + AgentStateStore, +}; +use super::status::publish_agent_runtime; +use super::stream::{AgentStreamHello, OutboundStreamClient}; +use super::timing::{wait_or_shutdown, wait_while_locally_disabled}; + +pub struct OutboundAgent { + settings: AgentSettings, + core: AgentCore, + client: Arc, + stream_client: Option>, + registered: bool, + next_heartbeat_due: Option, + state: AgentPersistentState, + state_store: Option, + pending_results: VecDeque, + command_signing_key: Option>, + local_disable: Option>, +} + +impl OutboundAgent { + pub fn with_default_client(settings: AgentSettings, token: String, core: AgentCore) -> Self { + let client = Arc::new(HttpOutboundControlPlaneClient::with_request_timeout( + settings.endpoint.clone(), + token.clone(), + settings.request_timeout, + )); + let stream_client = + Arc::new(super::stream::WebSocketOutboundStreamClient::with_connect_timeout( + settings.endpoint.clone(), + token.clone(), + settings.connect_timeout, + )); + Self::new(settings, core, client) + .with_stream_client(stream_client) + .with_command_signing_key(token) + } + + pub fn new( + settings: AgentSettings, + core: AgentCore, + client: Arc, + ) -> Self { + let node_id = settings.node_id.clone(); + Self { + settings, + core, + client, + stream_client: None, + registered: false, + next_heartbeat_due: None, + state: AgentPersistentState::default_for_node(node_id), + state_store: None, + pending_results: VecDeque::new(), + command_signing_key: None, + local_disable: None, + } + } + + pub fn with_command_signing_key(mut self, key: impl Into) -> Self { + let key = key.into().trim().to_string(); + if !key.is_empty() { + self.command_signing_key = Some(Arc::from(key)); + } + self + } + + pub fn with_stream_client(mut self, client: Arc) -> Self { + self.stream_client = Some(client); + self + } + + pub fn with_state_path(self, path: impl Into) -> Result { + self.with_state_store(AgentStateStore::new(path)) + } + + pub fn with_local_disable(mut self, disabled: watch::Receiver) -> Self { + self.local_disable = Some(disabled); + self + } + + pub fn with_state_store(mut self, store: AgentStateStore) -> Result { + self.state = store.load_or_default(&self.settings.node_id)?; + if let Some(result) = self.state.recover_in_flight_as_failure() { + self.state.remember_result(result.clone(), store.recent_results_limit()); + self.pending_results.push_back(result); + } + store.save(&self.state)?; + self.state_store = Some(store); + Ok(self) + } + + pub async fn run(mut self, mut shutdown: watch::Receiver) -> Result<()> { + let mut backoff = self.settings.backoff_initial; + while !*shutdown.borrow() { + if wait_while_locally_disabled(&mut self.local_disable, &mut shutdown).await { + break; + } + match self.run_cycle().await { + Ok(outcome) => { + backoff = self.settings.backoff_initial; + if outcome.commands_received == 0 + && wait_or_shutdown(Duration::from_millis(100), &mut shutdown).await + { + break; + } + } + Err(error) => { + self.registered = false; + self.state.connection_state = AgentConnectionState::OfflineRetrying; + let _ = self.persist_state(); + tracing::warn!(%error, "outbound agent control center request failed"); + if wait_or_shutdown(backoff, &mut shutdown).await { + break; + } + backoff = backoff.saturating_mul(2).min(self.settings.backoff_max); + } + } + } + Ok(()) + } + + pub async fn run_cycle(&mut self) -> Result { + if !self.registered { + self.state.connection_state = AgentConnectionState::Registering; + self.persist_state()?; + self.client.register(register_request(&self.settings)).await?; + self.registered = true; + self.state.last_register_success_unix_ms = Some(unix_ms()); + self.state.connection_state = AgentConnectionState::Connected; + self.next_heartbeat_due = Some(Instant::now()); + self.persist_state()?; + } + + if self.heartbeat_due() { + self.client.heartbeat(heartbeat_request(&self.settings, &self.core).await?).await?; + self.state.last_heartbeat_success_unix_ms = Some(unix_ms()); + self.state.connection_state = AgentConnectionState::Connected; + self.next_heartbeat_due = Some(Instant::now() + self.settings.heartbeat_interval); + self.persist_state()?; + } + + self.post_pending_results().await?; + + let (response, stream_client) = self.command_response().await?; + self.process_poll_response(response, stream_client).await + } + + async fn command_response( + &self, + ) -> Result<(AgentPollResponse, Option>)> { + if let Some(stream_client) = &self.stream_client { + let hello = AgentStreamHello { + node_id: self.settings.node_id.clone(), + version: env!("CARGO_PKG_VERSION").to_string(), + cursor: self.state.command_cursor.clone(), + capabilities: register_request(&self.settings).capabilities, + }; + match stream_client.receive_commands(hello, self.settings.poll_timeout).await { + Ok(batch) => return Ok((batch.into(), Some(stream_client.clone()))), + Err(error) => tracing::warn!( + %error, + "outbound agent stream failed; falling back to long polling" + ), + } + } + Ok(( + self.client + .poll_commands( + self.settings.node_id.clone(), + self.state.command_cursor.clone(), + self.settings.poll_timeout, + ) + .await?, + None, + )) + } + + async fn process_poll_response( + &mut self, + response: AgentPollResponse, + stream_client: Option>, + ) -> Result { + let commands_received = response.commands.len(); + let mut results_posted = 0usize; + let mut last_command_id = None; + + for command in response.commands { + let command_id = command.id.clone(); + let result = self.command_result(command).await?; + if let Some(stream_client) = &stream_client { + stream_client.post_result(result).await?; + } else { + self.client.post_result(result).await?; + } + self.state.command_cursor = Some(command_id.clone()); + self.persist_state()?; + last_command_id = Some(command_id); + results_posted += 1; + } + + if let Some(next_cursor) = response.next_cursor.or(last_command_id) { + self.state.command_cursor = Some(next_cursor); + self.persist_state()?; + } + + Ok(OutboundAgentCycleOutcome { commands_received, results_posted }) + } + + async fn post_pending_results(&mut self) -> Result<()> { + while let Some(result) = self.pending_results.front().cloned() { + self.client.post_result(result).await?; + self.pending_results.pop_front(); + } + Ok(()) + } + + async fn command_result(&mut self, command: AgentCommand) -> Result { + if let Some(result) = self.state.cached_result(&command.id) { + return Ok(result); + } + + self.state.in_flight_command = Some(AgentInFlightCommand::received(command.clone())); + self.persist_state()?; + if let Some(in_flight) = self.state.in_flight_command.as_mut() { + in_flight.mark(AgentCommandExecutionState::Executing); + } + self.persist_state()?; + + let started_at_unix_ms = unix_ms(); + let outcome = execute( + &self.core, + &self.settings.node_id, + &command, + self.command_signing_key.as_deref(), + ) + .await; + let finished_at_unix_ms = unix_ms(); + + let result = match outcome { + Ok(result) => AgentCommandResult { + command_id: command.id, + node_id: self.settings.node_id.clone(), + status: AgentCommandStatus::Succeeded, + started_at_unix_ms, + finished_at_unix_ms, + result, + error: None, + }, + Err(error) => AgentCommandResult { + command_id: command.id, + node_id: self.settings.node_id.clone(), + status: AgentCommandStatus::Failed, + started_at_unix_ms, + finished_at_unix_ms, + result: serde_json::Value::Null, + error: Some(error.to_string()), + }, + }; + self.state.in_flight_command = None; + self.state.remember_result(result.clone(), self.recent_results_limit()); + self.persist_state()?; + Ok(result) + } + + fn heartbeat_due(&self) -> bool { + self.next_heartbeat_due.is_none_or(|due| Instant::now() >= due) + } + + fn recent_results_limit(&self) -> usize { + self.state_store.as_ref().map(AgentStateStore::recent_results_limit).unwrap_or(128) + } + + fn persist_state(&self) -> Result<()> { + if let Some(store) = &self.state_store { + store.save(&self.state)?; + } + publish_agent_runtime(&self.core, &self.state); + Ok(()) + } +} diff --git a/crates/rginx-agent/src/outbound/state.rs b/crates/rginx-agent/src/outbound/state.rs new file mode 100644 index 00000000..bbfad1dc --- /dev/null +++ b/crates/rginx-agent/src/outbound/state.rs @@ -0,0 +1,189 @@ +use std::path::{Path, PathBuf}; + +use serde::{Deserialize, Serialize}; + +use crate::error::{Error, Result}; + +use super::command::unix_ms; +use super::model::{AgentCommand, AgentCommandResult, AgentCommandStatus}; + +const STATE_VERSION: u32 = 1; +const DEFAULT_RECENT_RESULTS_LIMIT: usize = 128; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +#[derive(Default)] +pub enum AgentConnectionState { + #[default] + Starting, + Registering, + Connected, + Degraded, + OfflineRetrying, +} + +impl AgentConnectionState { + pub fn as_str(self) -> &'static str { + match self { + Self::Starting => "starting", + Self::Registering => "registering", + Self::Connected => "connected", + Self::Degraded => "degraded", + Self::OfflineRetrying => "offline_retrying", + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum AgentCommandExecutionState { + Received, + Accepted, + Executing, + Succeeded, + Failed, + Rejected, + TimedOut, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct AgentInFlightCommand { + pub command: AgentCommand, + pub state: AgentCommandExecutionState, + pub received_at_unix_ms: u64, + pub updated_at_unix_ms: u64, +} + +impl AgentInFlightCommand { + pub fn received(command: AgentCommand) -> Self { + let now = unix_ms(); + Self { + command, + state: AgentCommandExecutionState::Received, + received_at_unix_ms: now, + updated_at_unix_ms: now, + } + } + + pub fn mark(&mut self, state: AgentCommandExecutionState) { + self.state = state; + self.updated_at_unix_ms = unix_ms(); + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct AgentPersistentState { + pub version: u32, + pub node_id: String, + pub connection_state: AgentConnectionState, + #[serde(default)] + pub command_cursor: Option, + #[serde(default)] + pub in_flight_command: Option, + #[serde(default)] + pub recent_results: Vec, + #[serde(default)] + pub last_register_success_unix_ms: Option, + #[serde(default)] + pub last_heartbeat_success_unix_ms: Option, +} + +impl AgentPersistentState { + pub fn default_for_node(node_id: impl Into) -> Self { + Self { + version: STATE_VERSION, + node_id: node_id.into(), + connection_state: AgentConnectionState::Starting, + command_cursor: None, + in_flight_command: None, + recent_results: Vec::new(), + last_register_success_unix_ms: None, + last_heartbeat_success_unix_ms: None, + } + } + + pub fn cached_result(&self, command_id: &str) -> Option { + self.recent_results.iter().find(|result| result.command_id == command_id).cloned() + } + + pub fn remember_result(&mut self, result: AgentCommandResult, limit: usize) { + self.recent_results.retain(|stored| stored.command_id != result.command_id); + self.recent_results.push(result); + let limit = limit.max(1); + if self.recent_results.len() > limit { + let drain_count = self.recent_results.len() - limit; + self.recent_results.drain(0..drain_count); + } + } + + pub fn recover_in_flight_as_failure(&mut self) -> Option { + let in_flight = self.in_flight_command.take()?; + if self.cached_result(&in_flight.command.id).is_some() { + return None; + } + let now = unix_ms(); + Some(AgentCommandResult { + command_id: in_flight.command.id, + node_id: self.node_id.clone(), + status: AgentCommandStatus::Failed, + started_at_unix_ms: in_flight.received_at_unix_ms, + finished_at_unix_ms: now, + result: serde_json::Value::Null, + error: Some(format!( + "agent restarted while command was in flight ({:?})", + in_flight.state + )), + }) + } +} + +#[derive(Debug, Clone)] +pub struct AgentStateStore { + path: PathBuf, + recent_results_limit: usize, +} + +impl AgentStateStore { + pub fn new(path: impl Into) -> Self { + Self { path: path.into(), recent_results_limit: DEFAULT_RECENT_RESULTS_LIMIT } + } + + pub fn path(&self) -> &Path { + &self.path + } + + pub fn recent_results_limit(&self) -> usize { + self.recent_results_limit + } + + pub fn load_or_default(&self, node_id: &str) -> Result { + let bytes = match std::fs::read(&self.path) { + Ok(bytes) => bytes, + Err(error) if error.kind() == std::io::ErrorKind::NotFound => { + return Ok(AgentPersistentState::default_for_node(node_id)); + } + Err(error) => return Err(error.into()), + }; + let state: AgentPersistentState = serde_json::from_slice(&bytes)?; + if state.version != STATE_VERSION { + return Err(Error::InvalidRequest(format!( + "unsupported outbound agent state version {}", + state.version + ))); + } + if state.node_id == node_id { + Ok(state) + } else { + Ok(AgentPersistentState::default_for_node(node_id)) + } + } + + pub fn save(&self, state: &AgentPersistentState) -> Result<()> { + let parent = self.path.parent().unwrap_or_else(|| Path::new(".")); + std::fs::create_dir_all(parent)?; + let tmp_path = self.path.with_extension("json.tmp"); + std::fs::write(&tmp_path, serde_json::to_vec_pretty(state)?)?; + std::fs::rename(tmp_path, &self.path)?; + Ok(()) + } +} diff --git a/crates/rginx-agent/src/outbound/status.rs b/crates/rginx-agent/src/outbound/status.rs new file mode 100644 index 00000000..5cba4cd5 --- /dev/null +++ b/crates/rginx-agent/src/outbound/status.rs @@ -0,0 +1,16 @@ +use crate::agent_core::AgentCore; + +use super::state::AgentPersistentState; + +pub(super) fn publish_agent_runtime(core: &AgentCore, state: &AgentPersistentState) { + core.shared_state().update_agent_runtime(rginx_http::AgentRuntimeUpdate { + connection_state: state.connection_state.as_str().to_string(), + command_cursor: state.command_cursor.clone(), + in_flight_command_id: state + .in_flight_command + .as_ref() + .map(|in_flight| in_flight.command.id.clone()), + last_register_success_unix_ms: state.last_register_success_unix_ms, + last_heartbeat_success_unix_ms: state.last_heartbeat_success_unix_ms, + }); +} diff --git a/crates/rginx-agent/src/outbound/stream.rs b/crates/rginx-agent/src/outbound/stream.rs new file mode 100644 index 00000000..5b2e4a44 --- /dev/null +++ b/crates/rginx-agent/src/outbound/stream.rs @@ -0,0 +1,260 @@ +use std::time::Duration; + +use futures_util::{SinkExt, StreamExt}; +use http::{Method, Uri}; +use serde::{Deserialize, Serialize}; +use tokio_tungstenite::connect_async; +use tokio_tungstenite::tungstenite::{ClientRequestBuilder, Message}; + +use crate::error::{Error, Result}; + +use super::auth::{ + BODY_SHA256_HEADER, NONCE_HEADER, OutboundRequestSigner, SIGNATURE_HEADER, TIMESTAMP_HEADER, +}; +use super::client::OutboundClientFuture; +use super::encoding::encode_query_component; +use super::model::{AgentCommand, AgentCommandResult, AgentPollResponse}; + +const NODE_ID_HEADER: &str = "x-rginx-node-id"; +const STREAM_PATH: &str = "/v1/agents/stream"; + +pub trait OutboundStreamClient: Send + Sync { + fn receive_commands( + &self, + hello: AgentStreamHello, + timeout: Duration, + ) -> OutboundClientFuture; + fn post_result(&self, result: AgentCommandResult) -> OutboundClientFuture<()>; +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct AgentStreamHello { + pub node_id: String, + pub version: String, + #[serde(default)] + pub cursor: Option, + pub capabilities: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct AgentStreamCommandBatch { + pub commands: Vec, + pub next_cursor: Option, +} + +impl AgentStreamCommandBatch { + pub fn empty() -> Self { + Self { commands: Vec::new(), next_cursor: None } + } +} + +impl From for AgentPollResponse { + fn from(batch: AgentStreamCommandBatch) -> Self { + Self { commands: batch.commands, next_cursor: batch.next_cursor } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum AgentStreamMessage { + Hello(AgentStreamHello), + Registered { + #[serde(default)] + node_id: Option, + }, + Heartbeat { + #[serde(default)] + payload: serde_json::Value, + }, + Command { + command: AgentCommand, + #[serde(default)] + next_cursor: Option, + }, + Commands { + commands: Vec, + #[serde(default)] + next_cursor: Option, + }, + CommandResult { + result: AgentCommandResult, + }, + SnapshotDelta { + #[serde(default)] + payload: serde_json::Value, + }, + Event { + #[serde(default)] + payload: serde_json::Value, + }, + Ping { + #[serde(default)] + id: Option, + }, + Pong { + #[serde(default)] + id: Option, + }, + Error { + message: String, + }, +} + +#[derive(Clone)] +pub struct WebSocketOutboundStreamClient { + endpoint: Uri, + signer: OutboundRequestSigner, + connect_timeout: Duration, +} + +impl WebSocketOutboundStreamClient { + pub fn new(endpoint: Uri, token: String) -> Self { + Self::with_connect_timeout(endpoint, token, Duration::from_secs(30)) + } + + pub fn with_connect_timeout(endpoint: Uri, token: String, connect_timeout: Duration) -> Self { + Self { + endpoint, + signer: OutboundRequestSigner::new(token) + .expect("outbound agent token should be validated before stream construction"), + connect_timeout, + } + } + + async fn connect( + &self, + node_id: &str, + cursor: Option<&str>, + ) -> Result< + tokio_tungstenite::WebSocketStream< + tokio_tungstenite::MaybeTlsStream, + >, + > { + let signed_path = self.path_for(&stream_path(cursor)); + let uri = self.stream_uri_for_path(&signed_path)?; + let signed = self.signer.sign(&Method::GET, &signed_path, &[])?; + let request = ClientRequestBuilder::new(uri) + .with_header("Authorization", signed.authorization) + .with_header(NODE_ID_HEADER, node_id.to_string()) + .with_header(TIMESTAMP_HEADER, signed.timestamp) + .with_header(NONCE_HEADER, signed.nonce) + .with_header(BODY_SHA256_HEADER, signed.body_sha256) + .with_header(SIGNATURE_HEADER, signed.signature); + let (stream, _) = tokio::time::timeout(self.connect_timeout, connect_async(request)) + .await + .map_err(|_| Error::Server("outbound agent stream connect timed out".to_string()))? + .map_err(|error| Error::Server(format!("outbound agent stream failed: {error}")))?; + Ok(stream) + } + + fn stream_uri_for_path(&self, path_and_query: &str) -> Result { + let scheme = match self.endpoint.scheme_str() { + Some("http") => "ws", + _ => "wss", + }; + let authority = self + .endpoint + .authority() + .ok_or_else(|| Error::InvalidRequest("agent endpoint must include authority".into()))?; + format!("{scheme}://{authority}{path_and_query}") + .parse::() + .map_err(|error| Error::InvalidRequest(error.to_string())) + } + + fn path_for(&self, path_and_query: &str) -> String { + let base_path = self.endpoint.path().trim_end_matches('/'); + format!("{base_path}{path_and_query}") + } +} + +impl OutboundStreamClient for WebSocketOutboundStreamClient { + fn receive_commands( + &self, + hello: AgentStreamHello, + timeout: Duration, + ) -> OutboundClientFuture { + let client = self.clone(); + Box::pin(async move { + let mut stream = client.connect(&hello.node_id, hello.cursor.as_deref()).await?; + let hello_message = AgentStreamMessage::Hello(hello); + send_json_message(&mut stream, &hello_message).await?; + + loop { + let next = match tokio::time::timeout(timeout, stream.next()).await { + Ok(Some(next)) => next, + Ok(None) => return Err(Error::Server("outbound agent stream closed".into())), + Err(_) => return Ok(AgentStreamCommandBatch::empty()), + }; + match next.map_err(|error| { + Error::Server(format!("outbound agent stream failed: {error}")) + })? { + Message::Text(text) => match serde_json::from_str::(&text)? + { + AgentStreamMessage::Command { command, next_cursor } => { + return Ok(AgentStreamCommandBatch { + commands: vec![command], + next_cursor, + }); + } + AgentStreamMessage::Commands { commands, next_cursor } => { + return Ok(AgentStreamCommandBatch { commands, next_cursor }); + } + AgentStreamMessage::Ping { id } => { + send_json_message(&mut stream, &AgentStreamMessage::Pong { id }) + .await?; + } + AgentStreamMessage::Error { message } => { + return Err(Error::Server(format!( + "control center stream error: {message}" + ))); + } + _ => {} + }, + Message::Ping(data) => { + stream.send(Message::Pong(data)).await.map_err(|error| { + Error::Server(format!("outbound agent stream failed: {error}")) + })? + } + Message::Close(_) => { + return Err(Error::Server("outbound agent stream closed".into())); + } + _ => {} + } + } + }) + } + + fn post_result(&self, result: AgentCommandResult) -> OutboundClientFuture<()> { + let client = self.clone(); + Box::pin(async move { + let mut stream = client.connect(&result.node_id, None).await?; + send_json_message(&mut stream, &AgentStreamMessage::CommandResult { result }).await?; + let _ = stream.close(None).await; + Ok(()) + }) + } +} + +fn stream_path(cursor: Option<&str>) -> String { + let mut path = STREAM_PATH.to_string(); + if let Some(cursor) = cursor { + path.push_str("?after="); + path.push_str(&encode_query_component(cursor)); + } + path +} + +async fn send_json_message( + stream: &mut tokio_tungstenite::WebSocketStream< + tokio_tungstenite::MaybeTlsStream, + >, + message: &AgentStreamMessage, +) -> Result<()> { + stream + .send(Message::Text(serde_json::to_string(message)?.into())) + .await + .map_err(|error| Error::Server(format!("outbound agent stream failed: {error}"))) +} + +#[cfg(test)] +mod tests; diff --git a/crates/rginx-agent/src/outbound/stream/tests.rs b/crates/rginx-agent/src/outbound/stream/tests.rs new file mode 100644 index 00000000..218f23d2 --- /dev/null +++ b/crates/rginx-agent/src/outbound/stream/tests.rs @@ -0,0 +1,9 @@ +use super::stream_path; + +#[test] +fn stream_path_encodes_cursor_query_component() { + assert_eq!( + stream_path(Some("cursor?next=1/2 &x=y")), + "/v1/agents/stream?after=cursor%3Fnext%3D1%2F2%20%26x%3Dy" + ); +} diff --git a/crates/rginx-agent/src/outbound/timing.rs b/crates/rginx-agent/src/outbound/timing.rs new file mode 100644 index 00000000..589cad38 --- /dev/null +++ b/crates/rginx-agent/src/outbound/timing.rs @@ -0,0 +1,39 @@ +use std::time::Duration; + +use tokio::sync::watch; + +pub(super) async fn wait_or_shutdown( + duration: Duration, + shutdown: &mut watch::Receiver, +) -> bool { + tokio::select! { + _ = tokio::time::sleep(duration) => false, + changed = shutdown.changed() => changed.is_err() || *shutdown.borrow(), + } +} + +pub(super) async fn wait_while_locally_disabled( + local_disable: &mut Option>, + shutdown: &mut watch::Receiver, +) -> bool { + let Some(local_disable) = local_disable.as_mut() else { + return false; + }; + + while *local_disable.borrow() && !*shutdown.borrow() { + tokio::select! { + changed = shutdown.changed() => { + if changed.is_err() || *shutdown.borrow() { + return true; + } + } + changed = local_disable.changed() => { + if changed.is_err() { + return false; + } + } + } + } + + *shutdown.borrow() +} diff --git a/crates/rginx-agent/src/rate_limit.rs b/crates/rginx-agent/src/rate_limit.rs index a796b7b2..50bb540d 100644 --- a/crates/rginx-agent/src/rate_limit.rs +++ b/crates/rginx-agent/src/rate_limit.rs @@ -180,76 +180,4 @@ pub enum RateLimitDecision { } #[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_token_bucket_basic() { - let mut bucket = TokenBucket::new(10, 1.0); - assert!(bucket.try_acquire(5)); - assert_eq!(bucket.available_tokens(), 5); - assert!(bucket.try_acquire(5)); - assert_eq!(bucket.available_tokens(), 0); - assert!(!bucket.try_acquire(1)); - } - - #[test] - fn test_token_bucket_refill() { - let mut bucket = TokenBucket::new(10, 10.0); // 10 tokens per second - assert!(bucket.try_acquire(10)); - - std::thread::sleep(Duration::from_millis(500)); // Wait 0.5s, should refill 5 tokens - - let available = bucket.available_tokens(); - assert!((4..=6).contains(&available)); // Allow some timing variance - } - - #[tokio::test] - async fn test_rate_limiter_global() { - let config = RateLimitConfig { - global: Some(RateLimit { requests_per_second: 10, burst: 10 }), - per_api_key: None, - per_endpoint: HashMap::new(), - per_ip: None, - }; - - let limiter = RateLimiter::new(config); - - // Should allow first 10 requests - for _ in 0..10 { - let decision = limiter.check_rate_limit(None, "/test", "127.0.0.1").await.unwrap(); - assert!(matches!(decision, RateLimitDecision::Allow)); - } - - // 11th request should be rejected - let decision = limiter.check_rate_limit(None, "/test", "127.0.0.1").await.unwrap(); - assert!(matches!(decision, RateLimitDecision::Reject { .. })); - } - - #[tokio::test] - async fn test_rate_limiter_per_api_key() { - let config = RateLimitConfig { - global: None, - per_api_key: Some(RateLimit { requests_per_second: 5, burst: 5 }), - per_endpoint: HashMap::new(), - per_ip: None, - }; - - let limiter = RateLimiter::new(config); - - // Key1 should have its own bucket - for _ in 0..5 { - let decision = - limiter.check_rate_limit(Some("key1"), "/test", "127.0.0.1").await.unwrap(); - assert!(matches!(decision, RateLimitDecision::Allow)); - } - - // Key1 exhausted - let decision = limiter.check_rate_limit(Some("key1"), "/test", "127.0.0.1").await.unwrap(); - assert!(matches!(decision, RateLimitDecision::Reject { .. })); - - // Key2 should still work - let decision = limiter.check_rate_limit(Some("key2"), "/test", "127.0.0.1").await.unwrap(); - assert!(matches!(decision, RateLimitDecision::Allow)); - } -} +mod tests; diff --git a/crates/rginx-agent/src/rate_limit/tests.rs b/crates/rginx-agent/src/rate_limit/tests.rs new file mode 100644 index 00000000..b712c9af --- /dev/null +++ b/crates/rginx-agent/src/rate_limit/tests.rs @@ -0,0 +1,70 @@ +use super::*; + +#[test] +fn test_token_bucket_basic() { + let mut bucket = TokenBucket::new(10, 1.0); + assert!(bucket.try_acquire(5)); + assert_eq!(bucket.available_tokens(), 5); + assert!(bucket.try_acquire(5)); + assert_eq!(bucket.available_tokens(), 0); + assert!(!bucket.try_acquire(1)); +} + +#[test] +fn test_token_bucket_refill() { + let mut bucket = TokenBucket::new(10, 10.0); // 10 tokens per second + assert!(bucket.try_acquire(10)); + + std::thread::sleep(Duration::from_millis(500)); // Wait 0.5s, should refill 5 tokens + + let available = bucket.available_tokens(); + assert!((4..=6).contains(&available)); // Allow some timing variance +} + +#[tokio::test] +async fn test_rate_limiter_global() { + let config = RateLimitConfig { + global: Some(RateLimit { requests_per_second: 10, burst: 10 }), + per_api_key: None, + per_endpoint: HashMap::new(), + per_ip: None, + }; + + let limiter = RateLimiter::new(config); + + // Should allow first 10 requests + for _ in 0..10 { + let decision = limiter.check_rate_limit(None, "/test", "127.0.0.1").await.unwrap(); + assert!(matches!(decision, RateLimitDecision::Allow)); + } + + // 11th request should be rejected + let decision = limiter.check_rate_limit(None, "/test", "127.0.0.1").await.unwrap(); + assert!(matches!(decision, RateLimitDecision::Reject { .. })); +} + +#[tokio::test] +async fn test_rate_limiter_per_api_key() { + let config = RateLimitConfig { + global: None, + per_api_key: Some(RateLimit { requests_per_second: 5, burst: 5 }), + per_endpoint: HashMap::new(), + per_ip: None, + }; + + let limiter = RateLimiter::new(config); + + // Key1 should have its own bucket + for _ in 0..5 { + let decision = limiter.check_rate_limit(Some("key1"), "/test", "127.0.0.1").await.unwrap(); + assert!(matches!(decision, RateLimitDecision::Allow)); + } + + // Key1 exhausted + let decision = limiter.check_rate_limit(Some("key1"), "/test", "127.0.0.1").await.unwrap(); + assert!(matches!(decision, RateLimitDecision::Reject { .. })); + + // Key2 should still work + let decision = limiter.check_rate_limit(Some("key2"), "/test", "127.0.0.1").await.unwrap(); + assert!(matches!(decision, RateLimitDecision::Allow)); +} diff --git a/crates/rginx-agent/src/registry.rs b/crates/rginx-agent/src/registry.rs index 8ed559b8..0ddf2aa9 100644 --- a/crates/rginx-agent/src/registry.rs +++ b/crates/rginx-agent/src/registry.rs @@ -228,114 +228,4 @@ pub(crate) fn current_timestamp_ms() -> u64 { } #[cfg(test)] -mod tests { - use super::*; - - #[tokio::test] - async fn test_node_registration() { - let registry = NodeRegistry::new(Duration::from_secs(60)); - - let registration = NodeRegistration { - node_id: "test-node-1".to_string(), - region: Some("us-west-1".to_string()), - pop: Some("sfo".to_string()), - capabilities: vec!["http3".to_string()], - control_plane_addr: "https://localhost:9443".to_string(), - labels: [("env".to_string(), "test".to_string())].into_iter().collect(), - metadata: HashMap::new(), - }; - - let node_info = registry.register(registration).await.unwrap(); - assert_eq!(node_info.registration.node_id, "test-node-1"); - assert_eq!(node_info.status, NodeStatus::Healthy); - } - - #[tokio::test] - async fn test_heartbeat() { - let registry = NodeRegistry::new(Duration::from_secs(60)); - - let registration = NodeRegistration { - node_id: "test-node-1".to_string(), - region: None, - pop: None, - capabilities: vec![], - control_plane_addr: "https://localhost:9443".to_string(), - labels: HashMap::new(), - metadata: HashMap::new(), - }; - - registry.register(registration).await.unwrap(); - - let health = NodeHealth { - load_avg_1m: 0.5, - load_avg_5m: 0.6, - load_avg_15m: 0.7, - memory_usage_percent: 50.0, - disk_usage_percent: 30.0, - active_connections: 100, - requests_per_second: 50.0, - }; - - let node_info = registry.heartbeat("test-node-1", health).await.unwrap(); - assert_eq!(node_info.health.load_avg_1m, 0.5); - } - - #[tokio::test] - async fn test_node_filter() { - let registry = NodeRegistry::new(Duration::from_secs(60)); - - let registration1 = NodeRegistration { - node_id: "node-1".to_string(), - region: Some("us-west-1".to_string()), - pop: Some("sfo".to_string()), - capabilities: vec![], - control_plane_addr: "https://localhost:9443".to_string(), - labels: [("env".to_string(), "prod".to_string())].into_iter().collect(), - metadata: HashMap::new(), - }; - - let registration2 = NodeRegistration { - node_id: "node-2".to_string(), - region: Some("us-east-1".to_string()), - pop: Some("nyc".to_string()), - capabilities: vec![], - control_plane_addr: "https://localhost:9443".to_string(), - labels: [("env".to_string(), "dev".to_string())].into_iter().collect(), - metadata: HashMap::new(), - }; - - registry.register(registration1).await.unwrap(); - registry.register(registration2).await.unwrap(); - - let filter = NodeFilter { region: Some("us-west-1".to_string()), ..Default::default() }; - - let nodes = registry.list_nodes(filter).await; - assert_eq!(nodes.len(), 1); - assert_eq!(nodes[0].registration.node_id, "node-1"); - } - - #[tokio::test] - async fn test_heartbeat_timeout() { - let registry = NodeRegistry::new(Duration::from_millis(100)); - - let registration = NodeRegistration { - node_id: "test-node-1".to_string(), - region: None, - pop: None, - capabilities: vec![], - control_plane_addr: "https://localhost:9443".to_string(), - labels: HashMap::new(), - metadata: HashMap::new(), - }; - - registry.register(registration).await.unwrap(); - - // Wait for timeout - tokio::time::sleep(Duration::from_millis(150)).await; - - registry.check_heartbeat_timeouts().await; - - let node = registry.get_node("test-node-1").await.unwrap(); - assert_eq!(node.status, NodeStatus::Offline); - } -} +mod tests; diff --git a/crates/rginx-agent/src/registry/tests.rs b/crates/rginx-agent/src/registry/tests.rs new file mode 100644 index 00000000..42094598 --- /dev/null +++ b/crates/rginx-agent/src/registry/tests.rs @@ -0,0 +1,109 @@ +use super::*; + +#[tokio::test] +async fn test_node_registration() { + let registry = NodeRegistry::new(Duration::from_secs(60)); + + let registration = NodeRegistration { + node_id: "test-node-1".to_string(), + region: Some("us-west-1".to_string()), + pop: Some("sfo".to_string()), + capabilities: vec!["http3".to_string()], + control_plane_addr: "https://localhost:9443".to_string(), + labels: [("env".to_string(), "test".to_string())].into_iter().collect(), + metadata: HashMap::new(), + }; + + let node_info = registry.register(registration).await.unwrap(); + assert_eq!(node_info.registration.node_id, "test-node-1"); + assert_eq!(node_info.status, NodeStatus::Healthy); +} + +#[tokio::test] +async fn test_heartbeat() { + let registry = NodeRegistry::new(Duration::from_secs(60)); + + let registration = NodeRegistration { + node_id: "test-node-1".to_string(), + region: None, + pop: None, + capabilities: vec![], + control_plane_addr: "https://localhost:9443".to_string(), + labels: HashMap::new(), + metadata: HashMap::new(), + }; + + registry.register(registration).await.unwrap(); + + let health = NodeHealth { + load_avg_1m: 0.5, + load_avg_5m: 0.6, + load_avg_15m: 0.7, + memory_usage_percent: 50.0, + disk_usage_percent: 30.0, + active_connections: 100, + requests_per_second: 50.0, + }; + + let node_info = registry.heartbeat("test-node-1", health).await.unwrap(); + assert_eq!(node_info.health.load_avg_1m, 0.5); +} + +#[tokio::test] +async fn test_node_filter() { + let registry = NodeRegistry::new(Duration::from_secs(60)); + + let registration1 = NodeRegistration { + node_id: "node-1".to_string(), + region: Some("us-west-1".to_string()), + pop: Some("sfo".to_string()), + capabilities: vec![], + control_plane_addr: "https://localhost:9443".to_string(), + labels: [("env".to_string(), "prod".to_string())].into_iter().collect(), + metadata: HashMap::new(), + }; + + let registration2 = NodeRegistration { + node_id: "node-2".to_string(), + region: Some("us-east-1".to_string()), + pop: Some("nyc".to_string()), + capabilities: vec![], + control_plane_addr: "https://localhost:9443".to_string(), + labels: [("env".to_string(), "dev".to_string())].into_iter().collect(), + metadata: HashMap::new(), + }; + + registry.register(registration1).await.unwrap(); + registry.register(registration2).await.unwrap(); + + let filter = NodeFilter { region: Some("us-west-1".to_string()), ..Default::default() }; + + let nodes = registry.list_nodes(filter).await; + assert_eq!(nodes.len(), 1); + assert_eq!(nodes[0].registration.node_id, "node-1"); +} + +#[tokio::test] +async fn test_heartbeat_timeout() { + let registry = NodeRegistry::new(Duration::from_millis(100)); + + let registration = NodeRegistration { + node_id: "test-node-1".to_string(), + region: None, + pop: None, + capabilities: vec![], + control_plane_addr: "https://localhost:9443".to_string(), + labels: HashMap::new(), + metadata: HashMap::new(), + }; + + registry.register(registration).await.unwrap(); + + // Wait for timeout + tokio::time::sleep(Duration::from_millis(150)).await; + + registry.check_heartbeat_timeouts().await; + + let node = registry.get_node("test-node-1").await.unwrap(); + assert_eq!(node.status, NodeStatus::Offline); +} diff --git a/crates/rginx-agent/src/server/control.rs b/crates/rginx-agent/src/server/control.rs index 50bbf03f..65a9a431 100644 --- a/crates/rginx-agent/src/server/control.rs +++ b/crates/rginx-agent/src/server/control.rs @@ -1,11 +1,12 @@ use std::future::Future; use std::pin::Pin; use std::sync::Arc; -use std::time::{Duration, Instant}; +use std::time::Duration; use rginx_config::managed::ManagedResourceMutation; -use rginx_http::{ApplyResultSnapshot, ReloadOutcomeSnapshot, ReloadResultSnapshot, SharedState}; +use rginx_http::SharedState; +use crate::agent_core::AgentCore; use crate::circuit_breaker::{CircuitBreakerConfig, CircuitBreakerRegistry}; use crate::config_history::ConfigHistory; use crate::config_validator::ConfigValidator; @@ -33,13 +34,9 @@ pub struct ConfigApplyOutcome { pub result: ConfigApplyResultView, } -const RELOAD_COMPLETION_TIMEOUT: Duration = Duration::from_secs(30); - #[derive(Clone)] pub struct ControlPlaneContext { - state: SharedState, - reload_executor: Arc, - config_apply_executor: Arc, + agent_core: AgentCore, node_registry: Arc, event_bus: Arc, config_history: Arc, @@ -52,9 +49,7 @@ impl ControlPlaneContext { pub fn new(state: SharedState, reload_executor: Arc) -> Self { let temp_dir = std::env::temp_dir().join("rginx-config-history"); Self { - state, - reload_executor, - config_apply_executor: Arc::new(UnsupportedConfigApplyExecutor), + agent_core: AgentCore::new(state, reload_executor), node_registry: Arc::new(NodeRegistry::new(Duration::from_secs(90))), event_bus: Arc::new(EventBus::new(1000)), config_history: Arc::new(ConfigHistory::new(temp_dir, 100)), @@ -70,7 +65,7 @@ impl ControlPlaneContext { mut self, config_apply_executor: Arc, ) -> Self { - self.config_apply_executor = config_apply_executor; + self.agent_core = self.agent_core.with_config_apply_executor(config_apply_executor); self } @@ -89,8 +84,12 @@ impl ControlPlaneContext { self } + pub fn agent_core(&self) -> &AgentCore { + &self.agent_core + } + pub fn shared_state(&self) -> &SharedState { - &self.state + self.agent_core.shared_state() } pub fn node_registry(&self) -> &Arc { @@ -118,87 +117,22 @@ impl ControlPlaneContext { } pub async fn execute_reload(&self) -> Result { - let initial_status = self.state.status_snapshot().await.reload; - let fallback_revision = self.state.current_revision().await; - self.reload_executor.execute().await?; - self.wait_for_reload_attempt(initial_status.attempts_total).await?; - Ok(self.reload_action_status(fallback_revision).await) + self.agent_core.reload().await } pub async fn action_status(&self, accepted_revision: u64) -> NodeActionStatusView { - NodeActionStatusView { - accepted_revision, - revision: self.state.revision_status_snapshot().await, - last_reload_result: last_reload_result(&self.state).await, - last_apply_result: last_apply_result(&self.state), - } + self.agent_core.action_status(accepted_revision).await } pub async fn wrap_result(&self, result: T) -> NodeControlResultView { - let current_revision = self.state.current_revision().await; - NodeControlResultView { status: self.action_status(current_revision).await, result } + self.agent_core.wrap_result(result).await } pub async fn execute_config_apply( &self, request: ManagedResourceMutation, ) -> Result> { - let outcome = self.config_apply_executor.execute(request).await?; - Ok(NodeControlResultView { - status: self.action_status(outcome.accepted_revision).await, - result: outcome.result, - }) - } -} - -async fn last_reload_result(state: &SharedState) -> Option { - state.status_snapshot().await.reload.last_result -} - -fn last_apply_result(state: &SharedState) -> Option { - state.apply_status_snapshot().last_result -} - -impl ControlPlaneContext { - async fn reload_action_status(&self, fallback_revision: u64) -> NodeActionStatusView { - let current_revision = self.state.current_revision().await; - let last_reload_result = last_reload_result(&self.state).await; - let accepted_revision = match last_reload_result.as_ref().map(|result| &result.outcome) { - Some(ReloadOutcomeSnapshot::Success { revision }) => *revision, - _ => current_revision.max(fallback_revision), - }; - NodeActionStatusView { - accepted_revision, - revision: self.state.revision_status_snapshot().await, - last_reload_result, - last_apply_result: last_apply_result(&self.state), - } - } - - async fn wait_for_reload_attempt(&self, attempts_before: u64) -> Result<()> { - let started = Instant::now(); - let mut observed_version = self.state.current_snapshot_version(); - loop { - let reload = self.state.status_snapshot().await.reload; - if reload.attempts_total > attempts_before { - return Ok(()); - } - - let Some(remaining) = RELOAD_COMPLETION_TIMEOUT.checked_sub(started.elapsed()) else { - return Err(Error::Server( - "timed out waiting for runtime reload completion".to_string(), - )); - }; - - let changed_version = - self.state.wait_for_snapshot_change(observed_version, Some(remaining)).await; - if changed_version == observed_version { - return Err(Error::Server( - "timed out waiting for runtime reload completion".to_string(), - )); - } - observed_version = changed_version; - } + self.agent_core.apply_config(request).await } } diff --git a/crates/rginx-agent/src/server/maintenance.rs b/crates/rginx-agent/src/server/maintenance.rs new file mode 100644 index 00000000..c2624a31 --- /dev/null +++ b/crates/rginx-agent/src/server/maintenance.rs @@ -0,0 +1,49 @@ +use std::sync::Arc; +use std::time::Duration; + +use tokio::sync::watch; + +use crate::rate_limit::RateLimiter; +use crate::registry::NodeRegistry; + +pub(super) fn spawn_rate_limiter_cleanup( + rate_limiter: Arc, + mut shutdown: watch::Receiver, +) { + tokio::spawn(async move { + let mut interval = tokio::time::interval(Duration::from_secs(300)); + loop { + tokio::select! { + _ = interval.tick() => { + rate_limiter.cleanup_stale_buckets(Duration::from_secs(600)).await; + } + _ = shutdown.changed() => { + if *shutdown.borrow() { + break; + } + } + } + } + }); +} + +pub(super) fn spawn_registry_heartbeat_check( + registry: Arc, + mut shutdown: watch::Receiver, +) { + tokio::spawn(async move { + let mut interval = tokio::time::interval(Duration::from_secs(10)); + loop { + tokio::select! { + _ = interval.tick() => { + registry.check_heartbeat_timeouts().await; + } + _ = shutdown.changed() => { + if *shutdown.borrow() { + break; + } + } + } + } + }); +} diff --git a/crates/rginx-agent/src/server/mod.rs b/crates/rginx-agent/src/server/mod.rs index aa9742b3..5f52f68d 100644 --- a/crates/rginx-agent/src/server/mod.rs +++ b/crates/rginx-agent/src/server/mod.rs @@ -21,6 +21,7 @@ use crate::tls::load_tls_server_config; pub(crate) mod breaker; pub(crate) mod config; pub mod control; +mod maintenance; pub(crate) mod registry; mod request; mod response; @@ -70,45 +71,10 @@ pub async fn run_with_listener( let connection_slots = std::sync::Arc::new(Semaphore::new(MAX_CONCURRENT_CONNECTIONS)); let listen_addr = listener.local_addr()?; - tracing::info!(listen = %listen_addr, tls = true, "control plane listening"); + tracing::warn!(listen = %listen_addr, tls = true, "legacy node-side control plane server listening; prefer outbound agent model"); - // Spawn cleanup task for rate limiter - let rate_limiter_cleanup = rate_limiter.clone(); - let mut shutdown_cleanup = shutdown.clone(); - tokio::spawn(async move { - let mut interval = tokio::time::interval(Duration::from_secs(300)); // Cleanup every 5 minutes - loop { - tokio::select! { - _ = interval.tick() => { - rate_limiter_cleanup.cleanup_stale_buckets(Duration::from_secs(600)).await; - } - _ = shutdown_cleanup.changed() => { - if *shutdown_cleanup.borrow() { - break; - } - } - } - } - }); - - // Spawn heartbeat timeout check task - let registry = context.node_registry().clone(); - let mut shutdown_heartbeat = shutdown.clone(); - tokio::spawn(async move { - let mut interval = tokio::time::interval(Duration::from_secs(10)); - loop { - tokio::select! { - _ = interval.tick() => { - registry.check_heartbeat_timeouts().await; - } - _ = shutdown_heartbeat.changed() => { - if *shutdown_heartbeat.borrow() { - break; - } - } - } - } - }); + maintenance::spawn_rate_limiter_cleanup(rate_limiter.clone(), shutdown.clone()); + maintenance::spawn_registry_heartbeat_check(context.node_registry().clone(), shutdown.clone()); loop { tokio::select! { diff --git a/crates/rginx-agent/src/server/request/read.rs b/crates/rginx-agent/src/server/request/read.rs index e80a6ae2..59776518 100644 --- a/crates/rginx-agent/src/server/request/read.rs +++ b/crates/rginx-agent/src/server/request/read.rs @@ -2,16 +2,10 @@ use bytes::Bytes; use http::{Request, Response}; use http_body_util::Full; use hyper::body::Incoming; -use rginx_http::SnapshotModule; use crate::error::{Error, Result}; -use crate::model::{ - NodeCacheView, NodeDeltaView, NodeRevisionView, NodeSnapshotView, NodeStatusView, - NodeSystemView, NodeTrafficView, NodeUpstreamsView, NodeWaitView, -}; use crate::server::control::ControlPlaneContext; use crate::server::response::json_response; -use crate::system::collect_system_view; use super::query::{parse_delta_query, parse_recent_window_secs, parse_wait_query}; @@ -59,54 +53,33 @@ pub(super) async fn route_get_request( return handle_readiness_check(context).await; } - let state = context.shared_state(); match path { - "/v1/node/status" => json_response(NodeStatusView::from(state.status_snapshot().await)), + "/v1/node/status" => json_response(context.agent_core().status().await?), "/v1/node/snapshot" => { let window_secs = parse_recent_window_secs(request.uri().query())?; - let snapshot = NodeSnapshotView::capture(state, window_secs).await; - json_response(snapshot) + json_response(context.agent_core().snapshot(window_secs).await?) } "/v1/node/delta" => { let (since_version, window_secs) = parse_delta_query(request.uri().query())?; - let delta = state.snapshot_delta_since( - since_version, - Some(&SnapshotModule::all()), - window_secs, - ); - json_response(NodeDeltaView::from(delta)) + json_response(context.agent_core().delta_since(since_version, window_secs).await?) } "/v1/node/wait" => { let (since_version, timeout) = parse_wait_query(request.uri().query())?; - let snapshot_version = state.wait_for_snapshot_change(since_version, timeout).await; - json_response(NodeWaitView { snapshot_version }) + json_response( + context.agent_core().wait_for_snapshot_change(since_version, timeout).await?, + ) } "/v1/node/traffic" => { let window_secs = parse_recent_window_secs(request.uri().query())?; - json_response(NodeTrafficView::from( - state.traffic_stats_snapshot_with_window(window_secs), - )) + json_response(context.agent_core().traffic(window_secs).await?) } "/v1/node/upstreams" => { let window_secs = parse_recent_window_secs(request.uri().query())?; - json_response(NodeUpstreamsView { - peer_health: state.peer_health_snapshot().await, - upstreams: state.upstream_stats_snapshot_with_window(window_secs), - }) - } - "/v1/node/cache" => json_response(NodeCacheView::from(state.cache_stats_snapshot().await)), - "/v1/node/system" => { - let config = state.current_config().await; - let cache_zone_paths = - config.cache_zones.values().map(|zone| zone.path.clone()).collect::>(); - let view = tokio::task::spawn_blocking(move || collect_system_view(&cache_zone_paths)) - .await - .map_err(|error| Error::Server(error.to_string()))??; - json_response::(view) - } - "/v1/node/revision" => { - json_response(NodeRevisionView::from(state.revision_status_snapshot().await)) + json_response(context.agent_core().upstreams(window_secs).await?) } + "/v1/node/cache" => json_response(context.agent_core().cache().await?), + "/v1/node/system" => json_response(context.agent_core().system().await?), + "/v1/node/revision" => json_response(context.agent_core().revision().await?), _ => Err(Error::InvalidRequest(format!("unknown control plane path `{path}`"))), } } @@ -164,20 +137,6 @@ async fn route_config_history_get_request( Err(Error::InvalidRequest(format!("unknown config history path `{path}`"))) } -impl NodeSnapshotView { - async fn capture(state: &rginx_http::SharedState, window_secs: Option) -> Self { - Self { - snapshot_version: state.current_snapshot_version(), - status: state.status_snapshot().await, - counters: state.counters_snapshot(), - traffic: state.traffic_stats_snapshot_with_window(window_secs), - peer_health: state.peer_health_snapshot().await, - upstreams: state.upstream_stats_snapshot_with_window(window_secs), - cache: state.cache_stats_snapshot().await, - } - } -} - /// Handle /metrics endpoint - export Prometheus metrics fn handle_metrics_request() -> Result>> { use prometheus::Encoder; diff --git a/crates/rginx-agent/src/server/write.rs b/crates/rginx-agent/src/server/write.rs index 7a5f3cc1..45ba2dc7 100644 --- a/crates/rginx-agent/src/server/write.rs +++ b/crates/rginx-agent/src/server/write.rs @@ -5,12 +5,18 @@ use hyper::body::Incoming; use rginx_config::managed::ManagedResourceMutation; use serde::Deserialize; +use crate::agent_core::{ + CacheClearInvalidationsCommand, CacheInvalidateCommand, CacheInvalidateTarget, + CachePurgeCommand, CachePurgeTarget, +}; use crate::error::{Error, Result}; use crate::server::control::ControlPlaneContext; use crate::server::response::json_response; const MAX_CONTROL_PLANE_BODY_BYTES: usize = 1024 * 1024; +mod routing; + #[derive(Debug, Deserialize)] struct CachePurgeRequest { zone_name: String, @@ -60,22 +66,22 @@ pub(super) async fn handle_post( // Check if this is a rollout endpoint if path.starts_with("/v1/rollouts") { - return route_rollout_post_request(request, context).await; + return routing::route_rollout_post_request(request, context).await; } // Check if this is a circuit breaker endpoint if path.starts_with("/v1/circuit-breakers") { - return route_circuit_breaker_post_request(request, context).await; + return routing::route_circuit_breaker_post_request(request, context).await; } match path { "/v1/runtime/reload" => { ensure_empty_json_object(request).await?; - json_response(context.execute_reload().await?) + json_response(context.agent_core().reload().await?) } "/v1/config/apply" => { let payload: ManagedResourceMutation = decode_json_body(request).await?; - json_response(context.execute_config_apply(payload).await?) + json_response(context.agent_core().apply_config(payload).await?) } "/v1/cache/purge" => { let payload: CachePurgeRequest = decode_json_body(request).await?; @@ -84,18 +90,16 @@ pub(super) async fn handle_post( let prefix = normalized_optional_field("prefix", payload.prefix)?; ensure_zero_or_one_selector(&[("key", key.is_some()), ("prefix", prefix.is_some())])?; - let result = match (key, prefix) { - (Some(key), None) => context.shared_state().purge_cache_key(&zone_name, &key).await, - (None, Some(prefix)) => { - context.shared_state().purge_cache_prefix(&zone_name, &prefix).await - } - (None, None) => context.shared_state().purge_cache_zone(&zone_name).await, + let target = match (key, prefix) { + (Some(key), None) => CachePurgeTarget::Key(key), + (None, Some(prefix)) => CachePurgeTarget::Prefix(prefix), + (None, None) => CachePurgeTarget::Zone, (Some(_), Some(_)) => { unreachable!("selector validation should reject multiple variants") } - } - .map_err(Error::InvalidRequest)?; - json_response(context.wrap_result(result).await) + }; + let command = CachePurgeCommand { zone_name, target }; + json_response(context.agent_core().purge_cache(command).await?) } "/v1/cache/invalidate" => { let payload: CacheInvalidateRequest = decode_json_body(request).await?; @@ -109,38 +113,27 @@ pub(super) async fn handle_post( ("tag", tag.is_some()), ])?; - let result = match (key, prefix, tag) { - (Some(key), None, None) => { - context.shared_state().invalidate_cache_key(&zone_name, &key).await - } - (None, Some(prefix), None) => { - context.shared_state().invalidate_cache_prefix(&zone_name, &prefix).await - } - (None, None, Some(tag)) => { - context.shared_state().invalidate_cache_tag(&zone_name, &tag).await - } - (None, None, None) => { - context.shared_state().invalidate_cache_zone(&zone_name).await - } + let target = match (key, prefix, tag) { + (Some(key), None, None) => CacheInvalidateTarget::Key(key), + (None, Some(prefix), None) => CacheInvalidateTarget::Prefix(prefix), + (None, None, Some(tag)) => CacheInvalidateTarget::Tag(tag), + (None, None, None) => CacheInvalidateTarget::Zone, _ => unreachable!("selector validation should reject multiple variants"), - } - .map_err(Error::InvalidRequest)?; - json_response(context.wrap_result(result).await) + }; + let command = CacheInvalidateCommand { zone_name, target }; + json_response(context.agent_core().invalidate_cache(command).await?) } "/v1/cache/clear-invalidations" => { let payload: CacheZoneRequest = decode_json_body(request).await?; let zone_name = normalized_required_field("zone_name", payload.zone_name)?; - let result = context - .shared_state() - .clear_cache_invalidations(&zone_name) - .await - .map_err(Error::InvalidRequest)?; - json_response(context.wrap_result(result).await) + let command = CacheClearInvalidationsCommand { zone_name }; + json_response(context.agent_core().clear_cache_invalidations(command).await?) } "/v1/node/desired-revision" => { let payload: DesiredRevisionRequest = decode_json_body(request).await?; - context.shared_state().set_desired_revision(payload.desired_revision); - json_response(context.action_status(payload.desired_revision).await) + json_response( + context.agent_core().set_desired_revision(payload.desired_revision).await?, + ) } path => Err(Error::InvalidRequest(format!("unknown control plane path `{path}`"))), } @@ -258,75 +251,3 @@ fn ensure_zero_or_one_selector(selectors: &[(&str, bool)]) -> Result<()> { } Ok(()) } - -/// Route rollout POST requests -async fn route_rollout_post_request( - request: Request, - context: &ControlPlaneContext, -) -> Result>> { - let path = request.uri().path(); - let manager = context.rollout_manager().clone(); - - if path == "/v1/rollouts" { - let body_bytes = read_body_bytes(request).await?; - return crate::server::rollout::handle_create_rollout(body_bytes, manager) - .await - .map_err(Error::Server); - } - - if let Some(rollout_id) = path.strip_prefix("/v1/rollouts/") { - if let Some(rest) = rollout_id.strip_suffix("/start") { - return crate::server::rollout::handle_start_rollout(rest, manager) - .await - .map_err(Error::Server); - } - if let Some(rest) = rollout_id.strip_suffix("/pause") { - return crate::server::rollout::handle_pause_rollout(rest, manager) - .await - .map_err(Error::Server); - } - if let Some(rest) = rollout_id.strip_suffix("/resume") { - return crate::server::rollout::handle_resume_rollout(rest, manager) - .await - .map_err(Error::Server); - } - if let Some(rest) = rollout_id.strip_suffix("/advance") { - return crate::server::rollout::handle_advance_stage(rest, manager) - .await - .map_err(Error::Server); - } - if let Some(rest) = rollout_id.strip_suffix("/rollback") { - return crate::server::rollout::handle_rollback(rest, manager, "manual rollback") - .await - .map_err(Error::Server); - } - } - - Err(Error::NotFound("Resource not found".to_string())) -} - -/// Route circuit breaker POST requests -async fn route_circuit_breaker_post_request( - request: Request, - context: &ControlPlaneContext, -) -> Result>> { - let path = request.uri().path(); - let registry = context.circuit_breaker_registry().clone(); - - if let Some(name) = path.strip_prefix("/v1/circuit-breakers/") - && let Some(breaker_name) = name.strip_suffix("/reset") - { - return crate::server::breaker::handle_reset_circuit_breaker(breaker_name, registry) - .await - .map_err(Error::Server); - } - - Err(Error::NotFound("Resource not found".to_string())) -} - -async fn read_body_bytes(request: Request) -> Result { - let body = request.into_body(); - let collected = - body.collect().await.map_err(|e| Error::Server(format!("failed to read body: {}", e)))?; - Ok(collected.to_bytes()) -} diff --git a/crates/rginx-agent/src/server/write/routing.rs b/crates/rginx-agent/src/server/write/routing.rs new file mode 100644 index 00000000..a874ac25 --- /dev/null +++ b/crates/rginx-agent/src/server/write/routing.rs @@ -0,0 +1,73 @@ +use super::*; + +pub(super) async fn route_rollout_post_request( + request: Request, + context: &ControlPlaneContext, +) -> Result>> { + let path = request.uri().path(); + let manager = context.rollout_manager().clone(); + + if path == "/v1/rollouts" { + let body_bytes = read_body_bytes(request).await?; + return crate::server::rollout::handle_create_rollout(body_bytes, manager) + .await + .map_err(Error::Server); + } + + if let Some(rollout_id) = path.strip_prefix("/v1/rollouts/") { + if let Some(rest) = rollout_id.strip_suffix("/start") { + return crate::server::rollout::handle_start_rollout(rest, manager) + .await + .map_err(Error::Server); + } + if let Some(rest) = rollout_id.strip_suffix("/pause") { + return crate::server::rollout::handle_pause_rollout(rest, manager) + .await + .map_err(Error::Server); + } + if let Some(rest) = rollout_id.strip_suffix("/resume") { + return crate::server::rollout::handle_resume_rollout(rest, manager) + .await + .map_err(Error::Server); + } + if let Some(rest) = rollout_id.strip_suffix("/advance") { + return crate::server::rollout::handle_advance_stage(rest, manager) + .await + .map_err(Error::Server); + } + if let Some(rest) = rollout_id.strip_suffix("/rollback") { + return crate::server::rollout::handle_rollback(rest, manager, "manual rollback") + .await + .map_err(Error::Server); + } + } + + Err(Error::NotFound("Resource not found".to_string())) +} + +pub(super) async fn route_circuit_breaker_post_request( + request: Request, + context: &ControlPlaneContext, +) -> Result>> { + let path = request.uri().path(); + let registry = context.circuit_breaker_registry().clone(); + + if let Some(name) = path.strip_prefix("/v1/circuit-breakers/") + && let Some(breaker_name) = name.strip_suffix("/reset") + { + return crate::server::breaker::handle_reset_circuit_breaker(breaker_name, registry) + .await + .map_err(Error::Server); + } + + Err(Error::NotFound("Resource not found".to_string())) +} + +async fn read_body_bytes(request: Request) -> Result { + let body = request.into_body(); + let collected = body + .collect() + .await + .map_err(|error| Error::Server(format!("failed to read body: {error}")))?; + Ok(collected.to_bytes()) +} diff --git a/crates/rginx-agent/src/tests.rs b/crates/rginx-agent/src/tests.rs index 4d6f068c..adac70ac 100644 --- a/crates/rginx-agent/src/tests.rs +++ b/crates/rginx-agent/src/tests.rs @@ -25,6 +25,13 @@ use crate::{ ControlPlaneResource, NodeControlAction, NodeObservabilityView, }; +mod agent_core; +mod control_center; +mod outbound; +mod outbound_auth; +mod outbound_persistence; +mod outbound_runtime; +mod outbound_stream; mod read_api; mod support; mod write_api; diff --git a/crates/rginx-agent/src/tests/agent_core.rs b/crates/rginx-agent/src/tests/agent_core.rs new file mode 100644 index 00000000..0ac8a1e6 --- /dev/null +++ b/crates/rginx-agent/src/tests/agent_core.rs @@ -0,0 +1,114 @@ +use super::*; + +#[tokio::test] +async fn agent_core_snapshot_captures_runtime_state() { + let state = + rginx_http::SharedState::from_config(snapshot()).expect("shared state should build"); + let core = crate::AgentCore::new(state, Arc::new(TestReloadExecutor::failing())); + + let snapshot = core.snapshot(None).await.expect("snapshot should be captured"); + + assert_eq!(snapshot.snapshot_version, 0); + assert_eq!(snapshot.status.revision, 0); + assert_eq!(snapshot.cache.zones.len(), 0); +} + +#[tokio::test] +async fn agent_core_reload_uses_attached_executor() { + let state = + rginx_http::SharedState::from_config(snapshot()).expect("shared state should build"); + let core = crate::AgentCore::new(state.clone(), Arc::new(TestReloadExecutor::success(state))); + + let status = core.reload().await.expect("reload should succeed"); + + assert_eq!(status.accepted_revision, 1); + assert_eq!(status.revision.current_revision, 1); + assert!(status.last_reload_result.is_some()); +} + +#[tokio::test] +async fn agent_core_config_apply_uses_attached_executor() { + let state = + rginx_http::SharedState::from_config(snapshot()).expect("shared state should build"); + let core = + crate::AgentCore::new(state.clone(), Arc::new(TestReloadExecutor::success(state.clone()))) + .with_config_apply_executor(Arc::new(TestConfigApplyExecutor::success(state))); + let request = serde_json::from_value::( + serde_json::json!({ + "operation": "upsert", + "kind": "virtual_host", + "metadata": { + "id": "managed-app", + "owner": "team-a", + "tenant": "prod" + }, + "spec": { + "server_names": ["managed.example.com"], + "locations": [{ + "matcher": { "Exact": "/" }, + "handler": { + "Return": { + "status": 200, + "location": "", + "body": "managed\n" + } + } + }] + } + }), + ) + .expect("managed mutation should decode"); + + let result = core.apply_config(request).await.expect("apply should succeed"); + + assert_eq!(result.status.accepted_revision, 1); + assert_eq!(result.result.operation, "upsert"); + assert_eq!(result.result.kind, "virtual_host"); + assert_eq!(result.result.resource_id, "managed-app"); +} + +#[tokio::test] +async fn agent_core_cache_operations_return_control_status() { + let tempdir = tempfile::tempdir().expect("cache temp dir should exist"); + let state = rginx_http::SharedState::from_config(snapshot_with_cache_zone( + tempdir.path().join("cache"), + )) + .expect("shared state should build"); + let core = crate::AgentCore::new(state, Arc::new(TestReloadExecutor::failing())); + + let purge = core + .purge_cache(crate::CachePurgeCommand { + zone_name: "default".to_string(), + target: crate::CachePurgeTarget::Zone, + }) + .await + .expect("cache purge should succeed"); + assert_eq!(purge.status.accepted_revision, 0); + assert_eq!(purge.result.zone_name, "default"); + assert_eq!(purge.result.scope, "all"); + + let invalidate = core + .invalidate_cache(crate::CacheInvalidateCommand { + zone_name: "default".to_string(), + target: crate::CacheInvalidateTarget::Zone, + }) + .await + .expect("cache invalidate should succeed"); + assert_eq!(invalidate.status.accepted_revision, 0); + assert_eq!(invalidate.result.zone_name, "default"); + assert_eq!(invalidate.result.scope, "all"); +} + +#[tokio::test] +async fn agent_core_desired_revision_updates_convergence_status() { + let state = + rginx_http::SharedState::from_config(snapshot()).expect("shared state should build"); + let core = crate::AgentCore::new(state, Arc::new(TestReloadExecutor::failing())); + + let status = core.set_desired_revision(9).await.expect("desired revision should update"); + + assert_eq!(status.accepted_revision, 9); + assert_eq!(status.revision.current_revision, 0); + assert_eq!(status.revision.desired_revision, 9); + assert!(!status.revision.converged); +} diff --git a/crates/rginx-agent/src/tests/control_center.rs b/crates/rginx-agent/src/tests/control_center.rs new file mode 100644 index 00000000..4cecc610 --- /dev/null +++ b/crates/rginx-agent/src/tests/control_center.rs @@ -0,0 +1,252 @@ +use std::collections::BTreeMap; + +use crate::{ + AgentCommand, AgentCommandResult, AgentCommandStatus, AgentCommandType, AgentHeartbeatRequest, + AgentRegisterRequest, ControlCenterCommandCreate, ControlCenterCommandState, + ControlCenterEventCreate, ControlCenterNodeFilter, ControlCenterNodeHealth, + ControlCenterRolloutCreate, ControlCenterStore, RolloutTargetSelector, sign_agent_command, +}; + +#[tokio::test] +async fn control_center_queues_command_for_agent_and_records_result() { + let store = ControlCenterStore::new(); + store.register(registration("edge-sfo-1", &[("env", "prod")])).await.unwrap(); + + let queued = store + .create_command("edge-sfo-1", ControlCenterCommandCreate::set_desired_revision(42)) + .await + .unwrap(); + assert_eq!(queued.state, ControlCenterCommandState::Queued); + assert_eq!(store.get_node("edge-sfo-1").await.unwrap().desired_revision, 42); + + let response = store.poll_commands("edge-sfo-1", None).await.unwrap(); + assert_eq!(response.commands.len(), 1); + assert_eq!(response.commands[0].revision, Some(42)); + assert_eq!(response.next_cursor.as_deref(), Some(queued.command.id.as_str())); + + let delivered = store.get_command(&queued.command.id).await.unwrap(); + assert_eq!(delivered.state, ControlCenterCommandState::Delivered); + + let result = command_result(&response.commands[0], AgentCommandStatus::Succeeded, 1000, None); + store.post_result(result).await.unwrap(); + let completed = store.get_command(&queued.command.id).await.unwrap(); + assert_eq!(completed.state, ControlCenterCommandState::Succeeded); + assert!(completed.result.is_some()); + + let empty = store.poll_commands("edge-sfo-1", response.next_cursor).await.unwrap(); + assert!(empty.commands.is_empty()); +} + +#[tokio::test] +async fn control_center_signs_commands_after_generating_ids() { + let store = ControlCenterStore::new().with_command_signing_key("secret"); + store.register(registration("edge-sfo-1", &[("env", "prod")])).await.unwrap(); + + let queued = + store.create_command("edge-sfo-1", ControlCenterCommandCreate::reload()).await.unwrap(); + let expected = sign_agent_command("secret", &queued.command).unwrap(); + assert!(queued.command.expires_at_unix_ms.is_some()); + assert_eq!(queued.command.signature.as_deref(), Some(expected.as_str())); +} + +#[tokio::test] +async fn control_center_replays_delivered_commands_until_result_is_accepted() { + let store = ControlCenterStore::new(); + store.register(registration("edge-sfo-1", &[("env", "prod")])).await.unwrap(); + + let first = + store.create_command("edge-sfo-1", ControlCenterCommandCreate::reload()).await.unwrap(); + let first_poll = store.poll_commands("edge-sfo-1", None).await.unwrap(); + assert_eq!( + first_poll.commands.iter().map(|command| command.id.as_str()).collect::>(), + vec![first.command.id.as_str()] + ); + assert_eq!( + store.get_command(&first.command.id).await.unwrap().state, + ControlCenterCommandState::Delivered + ); + + let second = store + .create_command("edge-sfo-1", ControlCenterCommandCreate::set_desired_revision(43)) + .await + .unwrap(); + + let replay = store.poll_commands("edge-sfo-1", None).await.unwrap(); + assert_eq!( + replay.commands.iter().map(|command| command.id.as_str()).collect::>(), + vec![first.command.id.as_str(), second.command.id.as_str()] + ); + assert_eq!( + store.get_command(&second.command.id).await.unwrap().state, + ControlCenterCommandState::Delivered + ); + + store + .post_result(command_result(&replay.commands[0], AgentCommandStatus::Succeeded, 1000, None)) + .await + .unwrap(); + + let next = store.poll_commands("edge-sfo-1", Some(first.command.id.clone())).await.unwrap(); + assert_eq!( + next.commands.iter().map(|command| command.id.as_str()).collect::>(), + vec![second.command.id.as_str()] + ); +} + +#[tokio::test] +async fn control_center_exposes_node_status_events_and_recent_results() { + let store = ControlCenterStore::new(); + store.register(registration("edge-sfo-1", &[("env", "prod"), ("tier", "edge")])).await.unwrap(); + store.heartbeat(heartbeat("edge-sfo-1", 3, 3, true)).await.unwrap(); + store + .record_event( + "edge-sfo-1", + ControlCenterEventCreate { + event_type: "config.applied".to_string(), + payload: serde_json::json!({ "revision": 3 }), + }, + ) + .await + .unwrap(); + + store.create_command("edge-sfo-1", ControlCenterCommandCreate::reload()).await.unwrap(); + store + .create_command( + "edge-sfo-1", + ControlCenterCommandCreate { + command_type: AgentCommandType::ApplyConfig, + revision: None, + expires_at_unix_ms: None, + payload: serde_json::json!({ "revision": 4 }), + signature: None, + }, + ) + .await + .unwrap(); + + let response = store.poll_commands("edge-sfo-1", None).await.unwrap(); + store + .post_result(command_result( + &response.commands[0], + AgentCommandStatus::Succeeded, + 1000, + None, + )) + .await + .unwrap(); + store + .post_result(command_result( + &response.commands[1], + AgentCommandStatus::Failed, + 2000, + Some("invalid config"), + )) + .await + .unwrap(); + + let node = store.get_node("edge-sfo-1").await.unwrap(); + assert_eq!(node.current_revision, 3); + assert!(node.converged); + assert_eq!(node.health, ControlCenterNodeHealth::Healthy); + assert_eq!(store.sessions_for_node("edge-sfo-1").await.len(), 1); + assert_eq!(store.heartbeats_for_node("edge-sfo-1").await.len(), 1); + assert_eq!(store.events_for_node("edge-sfo-1").await.len(), 1); + + let nodes = store + .list_nodes(ControlCenterNodeFilter { + labels: labels(&[("env", "prod"), ("tier", "edge")]), + ..ControlCenterNodeFilter::default() + }) + .await; + assert_eq!(nodes.len(), 1); + assert_eq!(nodes[0].node_id, "edge-sfo-1"); + + let recent = store.recent_results("edge-sfo-1", 1).await; + assert_eq!(recent.len(), 1); + assert_eq!(recent[0].status, AgentCommandStatus::Failed); +} + +#[tokio::test] +async fn control_center_rollout_selects_targets_by_labels_health_and_convergence() { + let store = ControlCenterStore::new(); + store.register(registration("edge-sfo-1", &[("env", "prod"), ("tier", "edge")])).await.unwrap(); + store.register(registration("edge-sfo-2", &[("env", "prod"), ("tier", "edge")])).await.unwrap(); + store.register(registration("edge-lax-1", &[("env", "prod"), ("tier", "edge")])).await.unwrap(); + store + .register(registration("edge-staging-1", &[("env", "staging"), ("tier", "edge")])) + .await + .unwrap(); + store.heartbeat(heartbeat("edge-sfo-1", 10, 10, true)).await.unwrap(); + store.heartbeat(heartbeat("edge-sfo-2", 9, 10, false)).await.unwrap(); + store.heartbeat(heartbeat("edge-lax-1", 10, 10, true)).await.unwrap(); + store.heartbeat(heartbeat("edge-staging-1", 10, 10, true)).await.unwrap(); + store.mark_node_health("edge-lax-1", ControlCenterNodeHealth::Offline).await.unwrap(); + + let selector = RolloutTargetSelector { + region: Some("us-west".to_string()), + labels: labels(&[("env", "prod"), ("tier", "edge")]), + require_healthy: true, + require_converged: true, + desired_revision: Some(10), + ..RolloutTargetSelector::default() + }; + let targets = store.select_rollout_targets(selector.clone()).await; + assert_eq!( + targets.iter().map(|target| target.node_id.as_str()).collect::>(), + vec!["edge-sfo-1"] + ); + + let rollout = store + .create_rollout(ControlCenterRolloutCreate { name: "rev-10-canary".to_string(), selector }) + .await + .unwrap(); + assert_eq!(rollout.target_node_ids, vec!["edge-sfo-1"]); + assert_eq!(store.get_rollout(&rollout.id).await.unwrap().target_node_ids, vec!["edge-sfo-1"]); +} + +fn registration(node_id: &str, labels: &[(&str, &str)]) -> AgentRegisterRequest { + AgentRegisterRequest { + node_id: node_id.to_string(), + version: "0.1.0".to_string(), + region: Some("us-west".to_string()), + pop: Some("sfo".to_string()), + labels: labels.iter().map(|(key, value)| (key.to_string(), value.to_string())).collect(), + capabilities: vec!["reload".to_string(), "apply_config".to_string()], + } +} + +fn heartbeat( + node_id: &str, + current_revision: u64, + desired_revision: u64, + converged: bool, +) -> AgentHeartbeatRequest { + AgentHeartbeatRequest { + node_id: node_id.to_string(), + snapshot_version: current_revision, + current_revision, + desired_revision, + converged, + } +} + +fn command_result( + command: &AgentCommand, + status: AgentCommandStatus, + finished_at_unix_ms: u64, + error: Option<&str>, +) -> AgentCommandResult { + AgentCommandResult { + command_id: command.id.clone(), + node_id: command.target_node_id.clone(), + status, + started_at_unix_ms: finished_at_unix_ms.saturating_sub(50), + finished_at_unix_ms, + result: serde_json::json!({ "command": command.id }), + error: error.map(str::to_string), + } +} + +fn labels(items: &[(&str, &str)]) -> BTreeMap { + items.iter().map(|(key, value)| (key.to_string(), value.to_string())).collect() +} diff --git a/crates/rginx-agent/src/tests/outbound.rs b/crates/rginx-agent/src/tests/outbound.rs new file mode 100644 index 00000000..c82c2408 --- /dev/null +++ b/crates/rginx-agent/src/tests/outbound.rs @@ -0,0 +1,359 @@ +use std::collections::{BTreeMap, VecDeque}; +use std::future::Future; +use std::path::Path; +use std::pin::Pin; + +use rginx_core::{AgentAuthSettings, AgentSettings}; + +use super::*; + +#[tokio::test] +async fn outbound_agent_cycle_registers_heartbeats_polls_and_posts_results() { + let tempdir = tempfile::tempdir().expect("cache temp dir should exist"); + let state = rginx_http::SharedState::from_config(snapshot_with_cache_zone( + tempdir.path().join("cache"), + )) + .expect("shared state should build"); + let core = + crate::AgentCore::new(state.clone(), Arc::new(TestReloadExecutor::success(state.clone()))) + .with_config_apply_executor(Arc::new(TestConfigApplyExecutor::success(state))); + let control = MockControlCenter::new(); + control.push_poll(crate::AgentPollResponse { + commands: vec![ + command("cmd-reload", "reload", serde_json::json!({})), + command("cmd-apply", "apply_config", managed_vhost_payload()), + command("cmd-purge", "cache_purge", serde_json::json!({"zone_name": "default"})), + ], + next_cursor: Some("cursor-3".to_string()), + }); + + let mut agent = crate::OutboundAgent::new(settings(), core, Arc::new(control.clone())); + let outcome = agent.run_cycle().await.expect("outbound cycle should succeed"); + + assert_eq!(outcome.commands_received, 3); + assert_eq!(outcome.results_posted, 3); + assert_eq!( + control.calls(), + vec![ + "register:edge-sfo-1", + "heartbeat:edge-sfo-1", + "poll:edge-sfo-1:None", + "result:cmd-reload:succeeded", + "result:cmd-apply:succeeded", + "result:cmd-purge:succeeded", + ] + ); + let results = control.results(); + assert_eq!(results[1].result["result"]["operation"], "upsert"); + assert_eq!(results[2].result["result"]["zone_name"], "default"); +} + +#[tokio::test] +async fn outbound_agent_does_not_reexecute_duplicate_command_ids() { + let state = + rginx_http::SharedState::from_config(snapshot()).expect("shared state should build"); + let core = + crate::AgentCore::new(state.clone(), Arc::new(TestReloadExecutor::success(state.clone()))); + let control = MockControlCenter::new(); + let reload = command("cmd-repeat", "reload", serde_json::json!({})); + control + .push_poll(crate::AgentPollResponse { commands: vec![reload.clone()], next_cursor: None }); + control.push_poll(crate::AgentPollResponse { commands: vec![reload], next_cursor: None }); + + let mut agent = crate::OutboundAgent::new(settings(), core, Arc::new(control.clone())); + agent.run_cycle().await.expect("first command should succeed"); + agent.run_cycle().await.expect("duplicate command should be reported"); + + assert_eq!(state.status_snapshot().await.reload.attempts_total, 1); + assert_eq!(control.results().len(), 2); +} + +#[tokio::test] +async fn outbound_agent_requires_valid_command_signature_when_configured() { + let state = + rginx_http::SharedState::from_config(snapshot()).expect("shared state should build"); + let core = + crate::AgentCore::new(state.clone(), Arc::new(TestReloadExecutor::success(state.clone()))); + let control = MockControlCenter::new(); + let mut signed = command("cmd-signed", "reload", serde_json::json!({})); + sign_command(&mut signed, "secret"); + control.push_poll(crate::AgentPollResponse { commands: vec![signed], next_cursor: None }); + + let mut agent = crate::OutboundAgent::new(settings(), core, Arc::new(control.clone())) + .with_command_signing_key("secret"); + let outcome = agent.run_cycle().await.expect("signed command should be processed"); + + assert_eq!(outcome.commands_received, 1); + assert_eq!(outcome.results_posted, 1); + assert_eq!(state.status_snapshot().await.reload.attempts_total, 1); + assert_eq!(control.results()[0].status, crate::AgentCommandStatus::Succeeded); +} + +#[tokio::test] +async fn outbound_agent_rejects_unsigned_or_badly_signed_commands() { + let state = + rginx_http::SharedState::from_config(snapshot()).expect("shared state should build"); + let core = + crate::AgentCore::new(state.clone(), Arc::new(TestReloadExecutor::success(state.clone()))); + let control = MockControlCenter::new(); + let unsigned = command("cmd-unsigned", "reload", serde_json::json!({})); + let unsigned = with_future_expiry(unsigned); + let mut wrong_signature = command("cmd-bad-signature", "reload", serde_json::json!({})); + sign_command(&mut wrong_signature, "other-secret"); + control.push_poll(crate::AgentPollResponse { + commands: vec![unsigned, wrong_signature], + next_cursor: None, + }); + + let mut agent = crate::OutboundAgent::new(settings(), core, Arc::new(control.clone())) + .with_command_signing_key("secret"); + let outcome = agent.run_cycle().await.expect("invalid commands should be reported"); + + assert_eq!(outcome.commands_received, 2); + assert_eq!(outcome.results_posted, 2); + assert_eq!(state.status_snapshot().await.reload.attempts_total, 0); + let results = control.results(); + assert_eq!(results[0].status, crate::AgentCommandStatus::Failed); + assert!(results[0].error.as_deref().unwrap_or_default().contains("signature")); + assert_eq!(results[1].status, crate::AgentCommandStatus::Failed); + assert!(results[1].error.as_deref().unwrap_or_default().contains("signature")); +} + +#[tokio::test] +async fn outbound_agent_rejects_expired_or_wrong_node_commands() { + let state = + rginx_http::SharedState::from_config(snapshot()).expect("shared state should build"); + let core = + crate::AgentCore::new(state.clone(), Arc::new(TestReloadExecutor::success(state.clone()))); + let control = MockControlCenter::new(); + let mut expired = command("cmd-expired", "reload", serde_json::json!({})); + expired.expires_at_unix_ms = Some(1); + let mut wrong_node = command("cmd-wrong-node", "reload", serde_json::json!({})); + wrong_node.target_node_id = "edge-lax-1".to_string(); + control.push_poll(crate::AgentPollResponse { + commands: vec![expired, wrong_node], + next_cursor: None, + }); + + let mut agent = crate::OutboundAgent::new(settings(), core, Arc::new(control.clone())); + let outcome = agent.run_cycle().await.expect("invalid commands should be reported"); + + assert_eq!(outcome.commands_received, 2); + assert_eq!(outcome.results_posted, 2); + assert_eq!(state.status_snapshot().await.reload.attempts_total, 0); + let results = control.results(); + assert_eq!(results[0].status, crate::AgentCommandStatus::Failed); + assert!(results[0].error.as_deref().unwrap_or_default().contains("expired")); + assert_eq!(results[1].status, crate::AgentCommandStatus::Failed); + assert!(results[1].error.as_deref().unwrap_or_default().contains("targets")); +} + +#[tokio::test] +async fn outbound_agent_can_recover_after_control_center_error() { + let state = + rginx_http::SharedState::from_config(snapshot()).expect("shared state should build"); + let core = crate::AgentCore::new(state, Arc::new(TestReloadExecutor::failing())); + let control = MockControlCenter::new(); + control.fail_next_register("control center unavailable"); + control.push_poll(crate::AgentPollResponse::empty()); + + let mut agent = crate::OutboundAgent::new(settings(), core, Arc::new(control.clone())); + let error = agent.run_cycle().await.expect_err("first register should fail"); + assert!(error.to_string().contains("control center unavailable")); + + agent.run_cycle().await.expect("second cycle should register and poll"); + assert_eq!( + control.calls(), + vec![ + "register:edge-sfo-1", + "register:edge-sfo-1", + "heartbeat:edge-sfo-1", + "poll:edge-sfo-1:None", + ] + ); +} + +#[derive(Clone, Default)] +pub(super) struct MockControlCenter { + state: Arc>, +} + +#[derive(Default)] +struct MockControlCenterState { + calls: Vec, + polls: VecDeque, + register_errors: VecDeque, + poll_errors: VecDeque, + result_errors: VecDeque, + results: Vec, +} + +impl MockControlCenter { + pub(super) fn new() -> Self { + Self::default() + } + + pub(super) fn push_poll(&self, response: crate::AgentPollResponse) { + self.state.lock().unwrap().polls.push_back(response); + } + + pub(super) fn fail_next_register(&self, error: &str) { + self.state.lock().unwrap().register_errors.push_back(error.to_string()); + } + + pub(super) fn fail_next_poll(&self, error: &str) { + self.state.lock().unwrap().poll_errors.push_back(error.to_string()); + } + + pub(super) fn fail_next_result(&self, error: &str) { + self.state.lock().unwrap().result_errors.push_back(error.to_string()); + } + + pub(super) fn calls(&self) -> Vec { + self.state.lock().unwrap().calls.clone() + } + + pub(super) fn results(&self) -> Vec { + self.state.lock().unwrap().results.clone() + } +} + +impl crate::OutboundControlPlaneClient for MockControlCenter { + fn register( + &self, + request: crate::AgentRegisterRequest, + ) -> Pin> + Send + 'static>> { + let state = self.state.clone(); + Box::pin(async move { + let mut state = state.lock().unwrap(); + state.calls.push(format!("register:{}", request.node_id)); + if let Some(error) = state.register_errors.pop_front() { + return Err(crate::Error::Server(error)); + } + Ok(()) + }) + } + + fn heartbeat( + &self, + request: crate::AgentHeartbeatRequest, + ) -> Pin> + Send + 'static>> { + let state = self.state.clone(); + Box::pin(async move { + state.lock().unwrap().calls.push(format!("heartbeat:{}", request.node_id)); + Ok(()) + }) + } + + fn poll_commands( + &self, + node_id: String, + cursor: Option, + _timeout: std::time::Duration, + ) -> Pin> + Send + 'static>> + { + let state = self.state.clone(); + Box::pin(async move { + let mut state = state.lock().unwrap(); + state.calls.push(format!("poll:{node_id}:{cursor:?}")); + if let Some(error) = state.poll_errors.pop_front() { + return Err(crate::Error::Server(error)); + } + Ok(state.polls.pop_front().unwrap_or_else(crate::AgentPollResponse::empty)) + }) + } + + fn post_result( + &self, + result: crate::AgentCommandResult, + ) -> Pin> + Send + 'static>> { + let state = self.state.clone(); + Box::pin(async move { + let mut state = state.lock().unwrap(); + state.calls.push(format!( + "result:{}:{}", + result.command_id, + serde_json::to_value(result.status).unwrap().as_str().unwrap() + )); + if let Some(error) = state.result_errors.pop_front() { + return Err(crate::Error::Server(error)); + } + state.results.push(result); + Ok(()) + }) + } +} + +pub(super) fn settings() -> AgentSettings { + settings_with_state_path("/tmp/rginx-agent-state.json") +} + +pub(super) fn settings_with_state_path(path: impl AsRef) -> AgentSettings { + AgentSettings { + endpoint: "https://control.example.com".parse().unwrap(), + node_id: "edge-sfo-1".to_string(), + auth: AgentAuthSettings { token_path: "/tmp/rginx-agent.token".into() }, + state_path: path.as_ref().to_path_buf(), + region: Some("us-west".to_string()), + pop: Some("sfo".to_string()), + labels: BTreeMap::from([("tier".to_string(), "edge".to_string())]), + heartbeat_interval: std::time::Duration::from_secs(30), + connect_timeout: std::time::Duration::from_secs(10), + request_timeout: std::time::Duration::from_secs(30), + poll_timeout: std::time::Duration::from_secs(30), + backoff_initial: std::time::Duration::from_millis(10), + backoff_max: std::time::Duration::from_millis(20), + } +} + +pub(super) fn command( + id: &str, + command_type: &str, + payload: serde_json::Value, +) -> crate::AgentCommand { + serde_json::from_value(serde_json::json!({ + "id": id, + "type": command_type, + "target_node_id": "edge-sfo-1", + "payload": payload, + })) + .expect("command should decode") +} + +fn sign_command(command: &mut crate::AgentCommand, key: &str) { + if command.expires_at_unix_ms.is_none() { + command.expires_at_unix_ms = Some(u64::MAX); + } + command.signature = + Some(crate::outbound::sign_agent_command(key, command).expect("command should sign")); +} + +fn with_future_expiry(mut command: crate::AgentCommand) -> crate::AgentCommand { + command.expires_at_unix_ms = Some(u64::MAX); + command +} + +fn managed_vhost_payload() -> serde_json::Value { + serde_json::json!({ + "operation": "upsert", + "kind": "virtual_host", + "metadata": { + "id": "managed-app", + "owner": "team-a", + "tenant": "prod" + }, + "spec": { + "server_names": ["managed.example.com"], + "locations": [{ + "matcher": { "Exact": "/" }, + "handler": { + "Return": { + "status": 200, + "location": "", + "body": "managed\n" + } + } + }] + } + }) +} diff --git a/crates/rginx-agent/src/tests/outbound_auth.rs b/crates/rginx-agent/src/tests/outbound_auth.rs new file mode 100644 index 00000000..a95abf4f --- /dev/null +++ b/crates/rginx-agent/src/tests/outbound_auth.rs @@ -0,0 +1,119 @@ +use std::time::Duration; + +use http::Method; +use uuid::Uuid; + +#[test] +fn outbound_request_auth_rejects_missing_wrong_expired_and_replayed_tokens() { + assert!(crate::OutboundRequestSigner::new(" ").err().unwrap().to_string().contains("missing")); + assert!( + crate::OutboundAuthVerifier::new(" ", Duration::from_secs(60)) + .err() + .unwrap() + .to_string() + .contains("missing") + ); + + let signer = crate::OutboundRequestSigner::new("secret").unwrap(); + let mut verifier = crate::OutboundAuthVerifier::new("secret", Duration::from_secs(60)).unwrap(); + let body = br#"{"node_id":"edge-sfo-1"}"#; + let signed = signer.sign(&Method::POST, "/v1/agents/register", body).unwrap(); + let request_time = signed.timestamp.parse::().unwrap(); + + verifier + .verify("POST", "/v1/agents/register", body, &signed, request_time) + .expect("first request should authenticate"); + assert!( + verifier + .verify("POST", "/v1/agents/register", body, &signed, request_time) + .unwrap_err() + .to_string() + .contains("replay") + ); + + let mut verifier = crate::OutboundAuthVerifier::new("other", Duration::from_secs(60)).unwrap(); + assert!( + verifier + .verify("POST", "/v1/agents/register", body, &signed, request_time) + .unwrap_err() + .to_string() + .contains("token") + ); + + let mut verifier = crate::OutboundAuthVerifier::new("secret", Duration::from_secs(1)).unwrap(); + assert!( + verifier + .verify("POST", "/v1/agents/register", body, &signed, request_time + 10_000) + .unwrap_err() + .to_string() + .contains("expired") + ); +} + +#[test] +fn outbound_request_auth_rejects_body_or_signature_tampering_without_burning_nonce() { + let signer = crate::OutboundRequestSigner::new("secret").unwrap(); + let mut verifier = crate::OutboundAuthVerifier::new("secret", Duration::from_secs(60)).unwrap(); + let body = br#"{"node_id":"edge-sfo-1"}"#; + let signed = signer.sign(&Method::POST, "/v1/agents/register", body).unwrap(); + let request_time = signed.timestamp.parse::().unwrap(); + + assert!( + verifier + .verify( + "POST", + "/v1/agents/register", + br#"{"node_id":"edge-lax-1"}"#, + &signed, + request_time, + ) + .unwrap_err() + .to_string() + .contains("body hash") + ); + verifier + .verify("POST", "/v1/agents/register", body, &signed, request_time) + .expect("valid request should still authenticate after rejected tampering"); + + let mut verifier = crate::OutboundAuthVerifier::new("secret", Duration::from_secs(60)).unwrap(); + let mut tampered = signed; + tampered.signature = "bad-signature".to_string(); + assert!( + verifier + .verify("POST", "/v1/agents/register", body, &tampered, request_time) + .unwrap_err() + .to_string() + .contains("signature") + ); +} + +#[test] +fn outbound_request_auth_prunes_nonces_after_clock_skew_window() { + let signer = crate::OutboundRequestSigner::new("secret").unwrap(); + let mut verifier = crate::OutboundAuthVerifier::new("secret", Duration::from_secs(1)).unwrap(); + let body = br#"{"node_id":"edge-sfo-1"}"#; + let path = "/v1/agents/register"; + let nonce = Uuid::from_u128(1); + + let first = signer.sign_with_nonce("POST", path, body, 1_000, nonce).unwrap(); + verifier.verify("POST", path, body, &first, 1_000).expect("first request should authenticate"); + + let reused_within_window = signer.sign_with_nonce("POST", path, body, 1_500, nonce).unwrap(); + assert!( + verifier + .verify("POST", path, body, &reused_within_window, 1_500) + .unwrap_err() + .to_string() + .contains("replay") + ); + + let current = signer.sign_with_nonce("POST", path, body, 2_001, Uuid::from_u128(2)).unwrap(); + verifier + .verify("POST", path, body, ¤t, 2_001) + .expect("new request should prune old nonce entries"); + + let reused_after_window = signer.sign_with_nonce("POST", path, body, 2_001, nonce).unwrap(); + verifier + .verify("POST", path, body, &reused_after_window, 2_001) + .expect("nonce outside the skew window should no longer be retained"); +} diff --git a/crates/rginx-agent/src/tests/outbound_persistence.rs b/crates/rginx-agent/src/tests/outbound_persistence.rs new file mode 100644 index 00000000..f1e27936 --- /dev/null +++ b/crates/rginx-agent/src/tests/outbound_persistence.rs @@ -0,0 +1,166 @@ +use super::outbound::{MockControlCenter, command, settings, settings_with_state_path}; +use super::*; + +#[tokio::test] +async fn outbound_agent_restores_cursor_after_restart() { + let tempdir = tempfile::tempdir().expect("state temp dir should exist"); + let state_path = tempdir.path().join("agent-state.json"); + let state = + rginx_http::SharedState::from_config(snapshot()).expect("shared state should build"); + let core = + crate::AgentCore::new(state.clone(), Arc::new(TestReloadExecutor::success(state.clone()))); + let control = MockControlCenter::new(); + control.push_poll(crate::AgentPollResponse { + commands: vec![command("cmd-cursor", "reload", serde_json::json!({}))], + next_cursor: Some("cursor-after-cmd".to_string()), + }); + + let mut agent = crate::OutboundAgent::new( + settings_with_state_path(&state_path), + core, + Arc::new(control.clone()), + ) + .with_state_path(state_path.clone()) + .expect("state store should attach"); + agent.run_cycle().await.expect("first agent should persist cursor"); + + let restarted_state = + rginx_http::SharedState::from_config(snapshot()).expect("shared state should build"); + let restarted_core = + crate::AgentCore::new(restarted_state, Arc::new(TestReloadExecutor::failing())); + let restarted_control = MockControlCenter::new(); + restarted_control.push_poll(crate::AgentPollResponse::empty()); + let mut restarted = crate::OutboundAgent::new( + settings_with_state_path(&state_path), + restarted_core, + Arc::new(restarted_control.clone()), + ) + .with_state_path(state_path) + .expect("state should restore"); + + restarted.run_cycle().await.expect("restarted agent should poll from cursor"); + + assert!( + restarted_control + .calls() + .contains(&"poll:edge-sfo-1:Some(\"cursor-after-cmd\")".to_string()) + ); +} + +#[tokio::test] +async fn outbound_agent_reuses_persisted_result_for_replayed_command_after_restart() { + let tempdir = tempfile::tempdir().expect("state temp dir should exist"); + let state_path = tempdir.path().join("agent-state.json"); + let state = + rginx_http::SharedState::from_config(snapshot()).expect("shared state should build"); + let core = + crate::AgentCore::new(state.clone(), Arc::new(TestReloadExecutor::success(state.clone()))); + let control = MockControlCenter::new(); + let replayed = command("cmd-replay", "reload", serde_json::json!({})); + control.push_poll(crate::AgentPollResponse { + commands: vec![replayed.clone()], + next_cursor: None, + }); + + let mut agent = crate::OutboundAgent::new( + settings_with_state_path(&state_path), + core, + Arc::new(control.clone()), + ) + .with_state_path(state_path.clone()) + .expect("state store should attach"); + agent.run_cycle().await.expect("first command should execute"); + assert_eq!(state.status_snapshot().await.reload.attempts_total, 1); + + let restarted_state = + rginx_http::SharedState::from_config(snapshot()).expect("shared state should build"); + let restarted_core = crate::AgentCore::new( + restarted_state.clone(), + Arc::new(TestReloadExecutor::success(restarted_state.clone())), + ); + let restarted_control = MockControlCenter::new(); + restarted_control + .push_poll(crate::AgentPollResponse { commands: vec![replayed], next_cursor: None }); + let mut restarted = crate::OutboundAgent::new( + settings_with_state_path(&state_path), + restarted_core, + Arc::new(restarted_control.clone()), + ) + .with_state_path(state_path) + .expect("state should restore"); + + restarted.run_cycle().await.expect("replayed command should use cached result"); + + assert_eq!(restarted_state.status_snapshot().await.reload.attempts_total, 0); + assert_eq!(restarted_control.results().len(), 1); + assert_eq!(restarted_control.results()[0].status, crate::AgentCommandStatus::Succeeded); +} + +#[tokio::test] +async fn outbound_agent_reports_recovered_in_flight_command_after_restart() { + let tempdir = tempfile::tempdir().expect("state temp dir should exist"); + let state_path = tempdir.path().join("agent-state.json"); + let interrupted = command("cmd-interrupted", "reload", serde_json::json!({})); + let store = crate::AgentStateStore::new(&state_path); + let mut persisted = crate::AgentPersistentState::default_for_node("edge-sfo-1"); + persisted.command_cursor = Some("cursor-before".to_string()); + persisted.in_flight_command = Some(crate::AgentInFlightCommand { + command: interrupted, + state: crate::AgentCommandExecutionState::Executing, + received_at_unix_ms: 1_760_000_000_000, + updated_at_unix_ms: 1_760_000_000_100, + }); + store.save(&persisted).expect("state should persist"); + + let state = + rginx_http::SharedState::from_config(snapshot()).expect("shared state should build"); + let core = + crate::AgentCore::new(state.clone(), Arc::new(TestReloadExecutor::success(state.clone()))); + let control = MockControlCenter::new(); + control.push_poll(crate::AgentPollResponse::empty()); + let mut agent = crate::OutboundAgent::new( + settings_with_state_path(&state_path), + core, + Arc::new(control.clone()), + ) + .with_state_path(state_path) + .expect("state should restore"); + + agent.run_cycle().await.expect("recovered in-flight result should post"); + + assert_eq!(state.status_snapshot().await.reload.attempts_total, 0); + let results = control.results(); + assert_eq!(results.len(), 1); + assert_eq!(results[0].command_id, "cmd-interrupted"); + assert_eq!(results[0].status, crate::AgentCommandStatus::Failed); + assert!(results[0].error.as_deref().unwrap_or_default().contains("in flight")); + assert!(control.calls().contains(&"poll:edge-sfo-1:Some(\"cursor-before\")".to_string())); +} + +#[tokio::test] +async fn outbound_agent_keeps_business_state_available_across_network_5xx_and_timeout_errors() { + let state = + rginx_http::SharedState::from_config(snapshot()).expect("shared state should build"); + let core = + crate::AgentCore::new(state.clone(), Arc::new(TestReloadExecutor::success(state.clone()))); + let control = MockControlCenter::new(); + control.fail_next_register("network unreachable"); + control.fail_next_poll("control center returned HTTP 500"); + control.fail_next_result("outbound agent request timed out"); + let timeout_command = command("cmd-timeout", "reload", serde_json::json!({})); + control.push_poll(crate::AgentPollResponse { + commands: vec![timeout_command.clone()], + next_cursor: None, + }); + control + .push_poll(crate::AgentPollResponse { commands: vec![timeout_command], next_cursor: None }); + + let mut agent = crate::OutboundAgent::new(settings(), core, Arc::new(control.clone())); + + assert!(agent.run_cycle().await.unwrap_err().to_string().contains("network")); + assert!(agent.run_cycle().await.unwrap_err().to_string().contains("HTTP 500")); + assert!(agent.run_cycle().await.unwrap_err().to_string().contains("timed out")); + agent.run_cycle().await.expect("agent should recover after transient errors"); + + assert_eq!(state.status_snapshot().await.reload.attempts_total, 1); +} diff --git a/crates/rginx-agent/src/tests/outbound_runtime.rs b/crates/rginx-agent/src/tests/outbound_runtime.rs new file mode 100644 index 00000000..ec1b12c7 --- /dev/null +++ b/crates/rginx-agent/src/tests/outbound_runtime.rs @@ -0,0 +1,65 @@ +use super::outbound::{MockControlCenter, settings}; +use super::*; + +#[tokio::test] +async fn outbound_agent_publishes_runtime_status_to_shared_state() { + let state = + rginx_http::SharedState::from_config(snapshot()).expect("shared state should build"); + let settings = settings(); + state.set_agent_configured(&settings); + let core = + crate::AgentCore::new(state.clone(), Arc::new(TestReloadExecutor::success(state.clone()))); + let control = MockControlCenter::new(); + control.push_poll(crate::AgentPollResponse::empty()); + + let mut agent = crate::OutboundAgent::new(settings, core, Arc::new(control)); + agent.run_cycle().await.expect("outbound cycle should succeed"); + + let status = state.status_snapshot().await; + assert!(status.agent.configured); + assert!(status.agent.enabled); + assert_eq!(status.agent.connection_state, "connected"); + assert!(status.agent.last_register_success_unix_ms.is_some()); + assert!(status.agent.last_heartbeat_success_unix_ms.is_some()); +} + +#[tokio::test] +async fn outbound_agent_run_waits_while_locally_disabled() { + let state = + rginx_http::SharedState::from_config(snapshot()).expect("shared state should build"); + let core = + crate::AgentCore::new(state.clone(), Arc::new(TestReloadExecutor::success(state.clone()))); + let control = MockControlCenter::new(); + control.push_poll(crate::AgentPollResponse::empty()); + let (shutdown_tx, shutdown_rx) = watch::channel(false); + let (disable_tx, disable_rx) = watch::channel(true); + + let task = tokio::spawn( + crate::OutboundAgent::new(settings(), core, Arc::new(control.clone())) + .with_local_disable(disable_rx) + .run(shutdown_rx), + ); + + tokio::time::sleep(Duration::from_millis(50)).await; + assert!(control.calls().is_empty()); + + disable_tx.send(false).expect("local disable should be cleared"); + wait_for_control_call(&control, "register:edge-sfo-1").await; + + shutdown_tx.send(true).expect("shutdown should be sent"); + tokio::time::timeout(Duration::from_secs(1), task) + .await + .expect("outbound task should stop") + .expect("outbound task should join") + .expect("outbound task should succeed"); +} + +async fn wait_for_control_call(control: &MockControlCenter, expected: &str) { + for _ in 0..20 { + if control.calls().iter().any(|call| call == expected) { + return; + } + tokio::time::sleep(Duration::from_millis(10)).await; + } + panic!("missing control call `{expected}`; calls: {:?}", control.calls()); +} diff --git a/crates/rginx-agent/src/tests/outbound_stream.rs b/crates/rginx-agent/src/tests/outbound_stream.rs new file mode 100644 index 00000000..863f4b15 --- /dev/null +++ b/crates/rginx-agent/src/tests/outbound_stream.rs @@ -0,0 +1,147 @@ +use std::collections::VecDeque; +use std::future::Future; +use std::pin::Pin; + +use super::outbound::{MockControlCenter, command, settings}; +use super::*; + +#[tokio::test] +async fn outbound_agent_uses_stream_commands_without_polling_when_stream_is_online() { + let state = + rginx_http::SharedState::from_config(snapshot()).expect("shared state should build"); + let core = + crate::AgentCore::new(state.clone(), Arc::new(TestReloadExecutor::success(state.clone()))); + let control = MockControlCenter::new(); + let stream = MockStream::new(); + stream.push_batch(crate::AgentStreamCommandBatch { + commands: vec![command("cmd-stream", "reload", serde_json::json!({}))], + next_cursor: Some("cursor-stream-1".to_string()), + }); + + let mut agent = crate::OutboundAgent::new(settings(), core, Arc::new(control.clone())) + .with_stream_client(Arc::new(stream.clone())); + let outcome = agent.run_cycle().await.expect("stream command should process"); + + assert_eq!(outcome.commands_received, 1); + assert_eq!(outcome.results_posted, 1); + assert_eq!(state.status_snapshot().await.reload.attempts_total, 1); + assert!(!control.calls().iter().any(|call| call.starts_with("poll:"))); + assert_eq!(stream.results()[0].command_id, "cmd-stream"); +} + +#[tokio::test] +async fn outbound_agent_falls_back_to_long_polling_when_stream_disconnects() { + let state = + rginx_http::SharedState::from_config(snapshot()).expect("shared state should build"); + let core = + crate::AgentCore::new(state.clone(), Arc::new(TestReloadExecutor::success(state.clone()))); + let control = MockControlCenter::new(); + control.push_poll(crate::AgentPollResponse { + commands: vec![command("cmd-poll-fallback", "reload", serde_json::json!({}))], + next_cursor: None, + }); + let stream = MockStream::new(); + stream.fail_next("stream closed"); + + let mut agent = crate::OutboundAgent::new(settings(), core, Arc::new(control.clone())) + .with_stream_client(Arc::new(stream)); + let outcome = agent.run_cycle().await.expect("long polling fallback should process"); + + assert_eq!(outcome.commands_received, 1); + assert_eq!(state.status_snapshot().await.reload.attempts_total, 1); + assert!(control.calls().contains(&"poll:edge-sfo-1:None".to_string())); + assert_eq!(control.results()[0].command_id, "cmd-poll-fallback"); +} + +#[tokio::test] +async fn outbound_agent_fallback_polling_continues_from_stream_cursor() { + let state = + rginx_http::SharedState::from_config(snapshot()).expect("shared state should build"); + let core = + crate::AgentCore::new(state.clone(), Arc::new(TestReloadExecutor::success(state.clone()))); + let control = MockControlCenter::new(); + control.push_poll(crate::AgentPollResponse::empty()); + let stream = MockStream::new(); + stream.push_batch(crate::AgentStreamCommandBatch { + commands: vec![command("cmd-stream-cursor", "reload", serde_json::json!({}))], + next_cursor: Some("cursor-from-stream".to_string()), + }); + stream.fail_next("stream reconnect failed"); + + let mut agent = crate::OutboundAgent::new(settings(), core, Arc::new(control.clone())) + .with_stream_client(Arc::new(stream.clone())); + agent.run_cycle().await.expect("first stream command should process"); + agent.run_cycle().await.expect("fallback poll should use stream cursor"); + + assert_eq!(state.status_snapshot().await.reload.attempts_total, 1); + assert!(control.calls().contains(&"poll:edge-sfo-1:Some(\"cursor-from-stream\")".to_string())); + assert_eq!(stream.hellos()[1].cursor.as_deref(), Some("cursor-from-stream")); +} + +#[derive(Clone, Default)] +struct MockStream { + state: Arc>, +} + +#[derive(Default)] +struct MockStreamState { + hellos: Vec, + batches: VecDeque>, + results: Vec, +} + +impl MockStream { + fn new() -> Self { + Self::default() + } + + fn push_batch(&self, batch: crate::AgentStreamCommandBatch) { + self.state.lock().unwrap().batches.push_back(Ok(batch)); + } + + fn fail_next(&self, error: &str) { + self.state.lock().unwrap().batches.push_back(Err(error.to_string())); + } + + fn hellos(&self) -> Vec { + self.state.lock().unwrap().hellos.clone() + } + + fn results(&self) -> Vec { + self.state.lock().unwrap().results.clone() + } +} + +impl crate::OutboundStreamClient for MockStream { + fn receive_commands( + &self, + hello: crate::AgentStreamHello, + _timeout: std::time::Duration, + ) -> Pin> + Send + 'static>> + { + let state = self.state.clone(); + Box::pin(async move { + let mut state = state.lock().unwrap(); + state.hellos.push(hello); + match state + .batches + .pop_front() + .unwrap_or_else(|| Ok(crate::AgentStreamCommandBatch::empty())) + { + Ok(batch) => Ok(batch), + Err(error) => Err(crate::Error::Server(error)), + } + }) + } + + fn post_result( + &self, + result: crate::AgentCommandResult, + ) -> Pin> + Send + 'static>> { + let state = self.state.clone(); + Box::pin(async move { + state.lock().unwrap().results.push(result); + Ok(()) + }) + } +} diff --git a/crates/rginx-agent/src/tests/support.rs b/crates/rginx-agent/src/tests/support.rs index bdd6b42d..8f0be0ca 100644 --- a/crates/rginx-agent/src/tests/support.rs +++ b/crates/rginx-agent/src/tests/support.rs @@ -10,6 +10,7 @@ const CONTROL_PLANE_RETRY_DELAY: Duration = Duration::from_millis(50); pub(super) fn snapshot() -> ConfigSnapshot { ConfigSnapshot { control_plane: None, + agent: None, acme: None, managed_certificates: Vec::new(), cache_zones: HashMap::new(), diff --git a/crates/rginx-agent/src/tls.rs b/crates/rginx-agent/src/tls.rs index 4ac70f32..af09c22d 100644 --- a/crates/rginx-agent/src/tls.rs +++ b/crates/rginx-agent/src/tls.rs @@ -160,19 +160,4 @@ fn parse_certificate(cert_der: &CertificateDer) -> Option { } #[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_client_cert_identity() { - let identity = ClientCertIdentity { - common_name: "test-client".to_string(), - organization: Some("Test Org".to_string()), - organizational_unit: Some("Engineering".to_string()), - serial_number: "123456".to_string(), - }; - - assert_eq!(identity.common_name, "test-client"); - assert_eq!(identity.organization, Some("Test Org".to_string())); - } -} +mod tests; diff --git a/crates/rginx-agent/src/tls/tests.rs b/crates/rginx-agent/src/tls/tests.rs new file mode 100644 index 00000000..0e11e909 --- /dev/null +++ b/crates/rginx-agent/src/tls/tests.rs @@ -0,0 +1,14 @@ +use super::*; + +#[test] +fn test_client_cert_identity() { + let identity = ClientCertIdentity { + common_name: "test-client".to_string(), + organization: Some("Test Org".to_string()), + organizational_unit: Some("Engineering".to_string()), + serial_number: "123456".to_string(), + }; + + assert_eq!(identity.common_name, "test-client"); + assert_eq!(identity.organization, Some("Test Org".to_string())); +} diff --git a/crates/rginx-app/src/admin_cli/agent.rs b/crates/rginx-app/src/admin_cli/agent.rs new file mode 100644 index 00000000..3157cf38 --- /dev/null +++ b/crates/rginx-app/src/admin_cli/agent.rs @@ -0,0 +1,72 @@ +use std::collections::BTreeMap; + +use super::render::{ + print_fields, print_section, render_bool, render_enabled, render_optional_path, + render_optional_value, +}; +use super::socket::{query_admin_socket, unexpected_admin_response}; +use super::*; + +pub(super) fn print_admin_agent(config_path: &Path, args: &AgentArgs) -> anyhow::Result<()> { + match args.command { + AgentAdminCommand::Status => print_agent_status(config_path), + AgentAdminCommand::Disable => set_agent_disabled(config_path, true), + AgentAdminCommand::Enable => set_agent_disabled(config_path, false), + } +} + +fn print_agent_status(config_path: &Path) -> anyhow::Result<()> { + match query_admin_socket(config_path, AdminRequest::GetAgentStatus)? { + AdminResponse::AgentStatus(status) => { + print_agent_snapshot(&status); + Ok(()) + } + response => Err(unexpected_admin_response("agent status", &response)), + } +} + +fn set_agent_disabled(config_path: &Path, disabled: bool) -> anyhow::Result<()> { + match query_admin_socket(config_path, AdminRequest::SetAgentDisabled { disabled })? { + AdminResponse::AgentStatus(status) => { + print_agent_snapshot(&status); + Ok(()) + } + response => Err(unexpected_admin_response("agent enable/disable", &response)), + } +} + +fn print_agent_snapshot(status: &rginx_http::AgentRuntimeSnapshot) { + print_section("Agent"); + print_fields( + 1, + [ + ("configured", render_bool(status.configured).to_string()), + ("enabled", render_enabled(status.enabled).to_string()), + ("locally_disabled", render_bool(status.locally_disabled).to_string()), + ("endpoint", render_optional_value(status.endpoint.as_deref())), + ("node_id", render_optional_value(status.node_id.as_deref())), + ("state_path", render_optional_path(status.state_path.as_deref())), + ("region", render_optional_value(status.region.as_deref())), + ("pop", render_optional_value(status.pop.as_deref())), + ("labels", render_labels(&status.labels)), + ("connection_state", status.connection_state.clone()), + ("command_cursor", render_optional_value(status.command_cursor.as_deref())), + ("in_flight_command_id", render_optional_value(status.in_flight_command_id.as_deref())), + ( + "last_register_success_unix_ms", + render_optional_value(status.last_register_success_unix_ms), + ), + ( + "last_heartbeat_success_unix_ms", + render_optional_value(status.last_heartbeat_success_unix_ms), + ), + ], + ); +} + +fn render_labels(labels: &BTreeMap) -> String { + if labels.is_empty() { + return "-".to_string(); + } + labels.iter().map(|(key, value)| format!("{key}={value}")).collect::>().join(", ") +} diff --git a/crates/rginx-app/src/admin_cli/mod.rs b/crates/rginx-app/src/admin_cli/mod.rs index 729b1201..abf54359 100644 --- a/crates/rginx-app/src/admin_cli/mod.rs +++ b/crates/rginx-app/src/admin_cli/mod.rs @@ -9,10 +9,11 @@ pub(super) use rginx_runtime::admin::{ use crate::cli::Command; pub(super) use crate::cli::{ - CacheZoneArgs, DeltaArgs, DesiredRevisionArgs, InvalidateCacheArgs, PurgeCacheArgs, - SnapshotArgs, SnapshotModuleArg, WaitArgs, WindowArgs, + AgentAdminCommand, AgentArgs, CacheZoneArgs, DeltaArgs, DesiredRevisionArgs, + InvalidateCacheArgs, PurgeCacheArgs, SnapshotArgs, SnapshotModuleArg, WaitArgs, WindowArgs, }; +mod agent; mod cache; mod counters; mod peers; @@ -30,6 +31,10 @@ pub(crate) fn run_admin_command(config_path: &Path, command: &Command) -> anyhow snapshot::print_admin_snapshot(config_path, args)?; Ok(true) } + Command::Agent(args) => { + agent::print_admin_agent(config_path, args)?; + Ok(true) + } Command::SnapshotVersion => { snapshot::print_admin_snapshot_version(config_path)?; Ok(true) diff --git a/crates/rginx-app/src/admin_cli/socket.rs b/crates/rginx-app/src/admin_cli/socket.rs index 40910cc3..e74ced1d 100644 --- a/crates/rginx-app/src/admin_cli/socket.rs +++ b/crates/rginx-app/src/admin_cli/socket.rs @@ -41,6 +41,7 @@ fn admin_response_kind(response: &AdminResponse) -> &'static str { AdminResponse::TrafficStats(_) => "traffic_stats", AdminResponse::PeerHealth(_) => "peer_health", AdminResponse::UpstreamStats(_) => "upstream_stats", + AdminResponse::AgentStatus(_) => "agent_status", AdminResponse::CachePurge(_) => "cache_purge", AdminResponse::CacheInvalidation(_) => "cache_invalidation", AdminResponse::Revision(RevisionSnapshot { .. }) => "revision", diff --git a/crates/rginx-app/src/admin_cli/status/runtime.rs b/crates/rginx-app/src/admin_cli/status/runtime.rs index e80b7658..406aa937 100644 --- a/crates/rginx-app/src/admin_cli/status/runtime.rs +++ b/crates/rginx-app/src/admin_cli/status/runtime.rs @@ -129,6 +129,10 @@ pub(super) fn print_status_summary(status: &rginx_http::RuntimeStatusSnapshot) { ), ("acme", render_enabled(status.acme.enabled).to_string()), ("acme_managed_certificates", status.acme.managed_certificates.len().to_string()), + ("agent", render_enabled(status.agent.enabled).to_string()), + ("agent_configured", render_bool(status.agent.configured).to_string()), + ("agent_locally_disabled", render_bool(status.agent.locally_disabled).to_string()), + ("agent_connection_state", status.agent.connection_state.clone()), ( "acme_last_errors", status diff --git a/crates/rginx-app/src/check.rs b/crates/rginx-app/src/check.rs index c8d7dad3..27f868f0 100644 --- a/crates/rginx-app/src/check.rs +++ b/crates/rginx-app/src/check.rs @@ -1,4 +1,5 @@ mod acme; +mod control; mod render; mod routes; mod summary; diff --git a/crates/rginx-app/src/check/control.rs b/crates/rginx-app/src/check/control.rs new file mode 100644 index 00000000..082ee185 --- /dev/null +++ b/crates/rginx-app/src/check/control.rs @@ -0,0 +1,36 @@ +use std::net::SocketAddr; +use std::path::PathBuf; + +pub(super) struct ControlPlaneCheckDetails { + pub(super) mode: &'static str, + pub(super) agent_endpoint: Option, + pub(super) agent_node_id: Option, + pub(super) agent_state_path: Option, + pub(super) legacy_listen: Option, + pub(super) opens_extra_node_port: bool, + pub(super) migration_hint: Option<&'static str>, +} + +pub(super) fn control_plane_check_details( + config: &rginx_config::ConfigSnapshot, +) -> ControlPlaneCheckDetails { + let agent = config.agent.as_ref(); + let legacy = config.control_plane.as_ref(); + + ControlPlaneCheckDetails { + mode: match (agent.is_some(), legacy.is_some()) { + (true, false) => "outbound agent", + (true, true) => "outbound agent + legacy node server", + (false, true) => "legacy node server", + (false, false) => "local admin only", + }, + agent_endpoint: agent.map(|agent| agent.endpoint.to_string()), + agent_node_id: agent.map(|agent| agent.node_id.clone()), + agent_state_path: agent.map(|agent| agent.state_path.clone()), + legacy_listen: legacy.map(|control_plane| control_plane.listen), + opens_extra_node_port: legacy.is_some(), + migration_hint: legacy.map(|_| { + "configure agent.endpoint/node_id/token_path, then remove control_plane.listen/tls/api_keys_path" + }), + } +} diff --git a/crates/rginx-app/src/check/render.rs b/crates/rginx-app/src/check/render.rs index ec06def3..0375a358 100644 --- a/crates/rginx-app/src/check/render.rs +++ b/crates/rginx-app/src/check/render.rs @@ -1,4 +1,5 @@ mod acme; +mod control; mod listeners; mod tls; @@ -8,6 +9,7 @@ use std::sync::atomic::{AtomicBool, Ordering}; use super::summary::CheckSummary; use acme::print_acme_details; +use control::print_control_plane_details; static PRINTED_SECTION: AtomicBool = AtomicBool::new(false); @@ -29,6 +31,7 @@ pub(crate) fn print_check_success( CheckOutputMode::Detailed => { print_configuration_summary_detailed(config_path, &summary); listeners::print_listener_details(&summary); + print_control_plane_details(&summary); print_route_transport_details(&summary); print_cache_zone_details(&summary); print_acme_details(&summary); diff --git a/crates/rginx-app/src/check/render/control.rs b/crates/rginx-app/src/check/render/control.rs new file mode 100644 index 00000000..69b8b085 --- /dev/null +++ b/crates/rginx-app/src/check/render/control.rs @@ -0,0 +1,29 @@ +use super::super::summary::CheckSummary; +use super::{print_label_value, print_section, render_bool, render_optional}; + +pub(super) fn print_control_plane_details(summary: &CheckSummary) { + print_section("Control plane"); + print_label_value(1, "Mode", summary.control_plane.mode); + print_label_value( + 1, + "Opens extra node port", + render_bool(summary.control_plane.opens_extra_node_port), + ); + print_label_value( + 1, + "Agent endpoint", + render_optional(summary.control_plane.agent_endpoint.as_deref()), + ); + print_label_value( + 1, + "Agent node ID", + render_optional(summary.control_plane.agent_node_id.as_deref()), + ); + print_label_value( + 1, + "Agent state path", + render_optional(summary.control_plane.agent_state_path.as_ref().map(|path| path.display())), + ); + print_label_value(1, "Legacy listen", render_optional(summary.control_plane.legacy_listen)); + print_label_value(1, "Migration hint", render_optional(summary.control_plane.migration_hint)); +} diff --git a/crates/rginx-app/src/check/summary.rs b/crates/rginx-app/src/check/summary.rs index a1740cac..d3ab1e4e 100644 --- a/crates/rginx-app/src/check/summary.rs +++ b/crates/rginx-app/src/check/summary.rs @@ -1,6 +1,7 @@ use std::path::PathBuf; use super::acme::{AcmeCheckDetails, acme_check_details}; +use super::control::{ControlPlaneCheckDetails, control_plane_check_details}; use super::routes::{RouteTransportCheckDetails, route_transport_check_details}; use super::tls::{TlsCheckDetails, tls_check_details}; @@ -21,6 +22,7 @@ pub(crate) struct CheckSummary { pub(super) worker_threads: Option, pub(super) accept_workers: usize, pub(super) route_transport: RouteTransportCheckDetails, + pub(super) control_plane: ControlPlaneCheckDetails, pub(super) acme: AcmeCheckDetails, pub(super) tls: TlsCheckDetails, } @@ -100,6 +102,7 @@ pub(crate) fn build_check_summary(config: &rginx_config::ConfigSnapshot) -> Chec worker_threads: config.runtime.worker_threads, accept_workers: config.runtime.accept_workers, route_transport, + control_plane: control_plane_check_details(config), acme, tls, } diff --git a/crates/rginx-app/src/cli.rs b/crates/rginx-app/src/cli.rs index 525d9ad5..ea7f6310 100644 --- a/crates/rginx-app/src/cli.rs +++ b/crates/rginx-app/src/cli.rs @@ -26,6 +26,7 @@ pub struct Cli { #[derive(Debug, Clone, Subcommand)] pub enum Command { Acme(AcmeArgs), + Agent(AgentArgs), Check, Snapshot(SnapshotArgs), SnapshotVersion, @@ -49,11 +50,24 @@ pub struct AcmeArgs { pub command: AcmeCommand, } +#[derive(Debug, Clone, Args)] +pub struct AgentArgs { + #[command(subcommand)] + pub command: AgentAdminCommand, +} + #[derive(Debug, Clone, Subcommand)] pub enum AcmeCommand { Issue(AcmeIssueArgs), } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Subcommand)] +pub enum AgentAdminCommand { + Status, + Disable, + Enable, +} + #[derive(Debug, Clone, Args)] #[command(group = ArgGroup::new("mode").required(true).args(["once"]))] pub struct AcmeIssueArgs { diff --git a/crates/rginx-app/src/cli/tests.rs b/crates/rginx-app/src/cli/tests.rs index 6dffc725..5150c3e9 100644 --- a/crates/rginx-app/src/cli/tests.rs +++ b/crates/rginx-app/src/cli/tests.rs @@ -3,9 +3,10 @@ use std::path::{Path, PathBuf}; use clap::Parser; use super::{ - AcmeArgs, AcmeCommand, AcmeIssueArgs, CacheZoneArgs, Cli, Command, DeltaArgs, - DesiredRevisionArgs, InvalidateCacheArgs, PurgeCacheArgs, SignalCommand, SnapshotArgs, - SnapshotModuleArg, WaitArgs, WindowArgs, installed_config_path, pid_path_for_config, + AcmeArgs, AcmeCommand, AcmeIssueArgs, AgentAdminCommand, AgentArgs, CacheZoneArgs, Cli, + Command, DeltaArgs, DesiredRevisionArgs, InvalidateCacheArgs, PurgeCacheArgs, SignalCommand, + SnapshotArgs, SnapshotModuleArg, WaitArgs, WindowArgs, installed_config_path, + pid_path_for_config, }; #[test] @@ -74,6 +75,31 @@ fn cli_accepts_status_subcommand() { assert!(matches!(cli.command, Some(Command::Status))); } +#[test] +fn cli_accepts_agent_status_subcommand() { + let cli = Cli::try_parse_from(["rginx", "agent", "status"]).expect("cli should parse"); + + assert!(matches!( + cli.command, + Some(Command::Agent(AgentArgs { command: AgentAdminCommand::Status })) + )); +} + +#[test] +fn cli_accepts_agent_enable_disable_subcommands() { + let disable = Cli::try_parse_from(["rginx", "agent", "disable"]).expect("cli should parse"); + let enable = Cli::try_parse_from(["rginx", "agent", "enable"]).expect("cli should parse"); + + assert!(matches!( + disable.command, + Some(Command::Agent(AgentArgs { command: AgentAdminCommand::Disable })) + )); + assert!(matches!( + enable.command, + Some(Command::Agent(AgentArgs { command: AgentAdminCommand::Enable })) + )); +} + #[test] fn cli_accepts_one_shot_acme_issue_subcommand() { let cli = Cli::try_parse_from(["rginx", "acme", "issue", "--once"]).expect("cli should parse"); diff --git a/crates/rginx-app/src/main.rs b/crates/rginx-app/src/main.rs index 55d291f5..e414df85 100644 --- a/crates/rginx-app/src/main.rs +++ b/crates/rginx-app/src/main.rs @@ -73,6 +73,7 @@ fn main() -> anyhow::Result<()> { Some(Command::Check) => unreachable!("`check` subcommand and `-t` conflict at clap level"), Some( Command::Status + | Command::Agent(_) | Command::Cache | Command::PurgeCache(_) | Command::InvalidateCache(_) diff --git a/crates/rginx-app/tests/admin/commands.rs b/crates/rginx-app/tests/admin/commands.rs index ab1879db..2fd85ee9 100644 --- a/crates/rginx-app/tests/admin/commands.rs +++ b/crates/rginx-app/tests/admin/commands.rs @@ -1,3 +1,5 @@ +#[path = "commands/agent.rs"] +mod agent; #[path = "commands/cache.rs"] mod cache; #[path = "commands/lifecycle.rs"] diff --git a/crates/rginx-app/tests/admin/commands/agent.rs b/crates/rginx-app/tests/admin/commands/agent.rs new file mode 100644 index 00000000..8bc99293 --- /dev/null +++ b/crates/rginx-app/tests/admin/commands/agent.rs @@ -0,0 +1,51 @@ +use super::super::*; + +#[test] +fn agent_command_reads_local_status_and_toggles_disable_gate() { + let listen_addr = reserve_loopback_addr(); + let mut server = ServerHarness::spawn("rginx-admin-agent", |_| return_config(listen_addr)); + server.wait_for_http_ready(listen_addr, Duration::from_secs(5)); + let socket_path = admin_socket_path_for_config(server.config_path()); + wait_for_admin_socket(&socket_path, Duration::from_secs(5)); + + let output = run_rginx(["--config", server.config_path().to_str().unwrap(), "agent", "status"]); + assert!( + output.status.success(), + "agent status command should succeed: {}", + render_output(&output) + ); + let stdout = String::from_utf8_lossy(&output.stdout); + assert!(has_exact_line(&stdout, "Agent")); + assert!(has_exact_line(&stdout, "configured: no")); + assert!(has_exact_line(&stdout, "enabled: disabled")); + assert!(has_exact_line(&stdout, "locally_disabled: no")); + assert!(has_exact_line(&stdout, "connection_state: not_configured")); + + let output = + run_rginx(["--config", server.config_path().to_str().unwrap(), "agent", "disable"]); + assert!( + output.status.success(), + "agent disable command should succeed: {}", + render_output(&output) + ); + let stdout = String::from_utf8_lossy(&output.stdout); + assert!(has_exact_line(&stdout, "locally_disabled: yes")); + + let response = query_admin_socket(&socket_path, AdminRequest::GetAgentStatus) + .expect("admin socket should return agent status"); + let AdminResponse::AgentStatus(status) = response else { + panic!("admin socket should return agent status"); + }; + assert!(status.locally_disabled); + + let output = run_rginx(["--config", server.config_path().to_str().unwrap(), "agent", "enable"]); + assert!( + output.status.success(), + "agent enable command should succeed: {}", + render_output(&output) + ); + let stdout = String::from_utf8_lossy(&output.stdout); + assert!(has_exact_line(&stdout, "locally_disabled: no")); + + server.shutdown_and_wait(Duration::from_secs(5)); +} diff --git a/crates/rginx-app/tests/admin/snapshot/core.rs b/crates/rginx-app/tests/admin/snapshot/core.rs index 172f5897..a36bf6ea 100644 --- a/crates/rginx-app/tests/admin/snapshot/core.rs +++ b/crates/rginx-app/tests/admin/snapshot/core.rs @@ -84,7 +84,7 @@ fn snapshot_command_returns_aggregate_json_snapshot() { let AdminResponse::Snapshot(snapshot) = response else { panic!("admin socket should return aggregate snapshot"); }; - assert_eq!(snapshot.schema_version, 15); + assert_eq!(snapshot.schema_version, 16); assert!(snapshot.captured_at_unix_ms > 0); assert!(snapshot.pid > 0); assert_eq!(snapshot.binary_version, env!("CARGO_PKG_VERSION")); @@ -116,7 +116,7 @@ fn snapshot_command_returns_aggregate_json_snapshot() { let stdout = String::from_utf8_lossy(&output.stdout); let snapshot: serde_json::Value = serde_json::from_str(&stdout).expect("snapshot command should print valid JSON"); - assert_eq!(snapshot["schema_version"], serde_json::Value::from(15)); + assert_eq!(snapshot["schema_version"], serde_json::Value::from(16)); assert!(snapshot["captured_at_unix_ms"].as_u64().unwrap_or(0) > 0); assert!(snapshot["pid"].as_u64().unwrap_or(0) > 0); assert_eq!(snapshot["binary_version"], serde_json::Value::from(env!("CARGO_PKG_VERSION"))); diff --git a/crates/rginx-app/tests/check/summary.rs b/crates/rginx-app/tests/check/summary.rs index 4c1a7f6b..88d4a7f4 100644 --- a/crates/rginx-app/tests/check/summary.rs +++ b/crates/rginx-app/tests/check/summary.rs @@ -86,6 +86,41 @@ fn check_reports_route_transport_policy_summary() { let _ = fs::remove_dir_all(temp_dir); } +#[test] +fn check_reports_control_plane_migration_hint() { + let temp_dir = temp_dir("rginx-check-control-plane"); + fs::create_dir_all(&temp_dir).expect("temp test dir should be created"); + let config_path = temp_dir.join("control-plane.ron"); + let listen_addr: SocketAddr = "127.0.0.1:18084".parse().unwrap(); + fs::write(temp_dir.join("agent.token"), "secret").expect("agent token should be written"); + fs::write(temp_dir.join("control.crt"), "test cert").expect("cert should be written"); + fs::write(temp_dir.join("control.key"), "test key").expect("key should be written"); + fs::write(temp_dir.join("keys.json"), "{\"keys\":[]}").expect("api keys should be written"); + + fs::write( + &config_path, + format!( + "Config(\n runtime: RuntimeConfig(\n shutdown_timeout_secs: 2,\n ),\n agent: Some(AgentConfig(\n enabled: Some(true),\n endpoint: Some(\"https://control.example.com/rginx\"),\n node_id: Some(\"edge-sfo-1\"),\n token_path: Some(\"agent.token\"),\n state_path: Some(\"agent-state.json\"),\n )),\n control_plane: Some(ControlPlaneConfig(\n enabled: Some(true),\n listen: Some(\"127.0.0.1:19443\"),\n tls: Some(ControlPlaneTlsConfig(\n cert_path: \"control.crt\",\n key_path: \"control.key\",\n )),\n api_keys_path: Some(\"keys.json\"),\n )),\n server: ServerConfig(\n listen: {:?},\n ),\n upstreams: [],\n locations: [\n LocationConfig(\n matcher: Exact(\"/\"),\n handler: Return(\n status: 200,\n location: \"\",\n body: Some(\"checked\\n\"),\n ),\n ),\n ],\n)\n", + listen_addr.to_string() + ), + ) + .expect("control-plane config should be written"); + + let output = run_rginx(["check", "--config", config_path.to_str().unwrap()]); + + assert!(output.status.success(), "check should succeed: {}", render_output(&output)); + let stdout = String::from_utf8_lossy(&output.stdout); + assert!(stdout.contains("Control plane")); + assert!(stdout.contains("Mode: outbound agent + legacy node server")); + assert!(stdout.contains("Opens extra node port: yes")); + assert!(stdout.contains("Agent endpoint: https://control.example.com/rginx")); + assert!(stdout.contains("Agent node ID: edge-sfo-1")); + assert!(stdout.contains("Legacy listen: 127.0.0.1:19443")); + assert!(stdout.contains("Migration hint: configure agent.endpoint/node_id/token_path")); + + let _ = fs::remove_dir_all(temp_dir); +} + #[test] fn check_reports_explicit_listener_summary_and_reload_boundary() { let temp_dir = temp_dir("rginx-check-listeners-test"); diff --git a/crates/rginx-config/src/compile/agent.rs b/crates/rginx-config/src/compile/agent.rs new file mode 100644 index 00000000..271d1d23 --- /dev/null +++ b/crates/rginx-config/src/compile/agent.rs @@ -0,0 +1,239 @@ +use std::path::Path; +use std::time::Duration; + +use rginx_core::{AgentAuthSettings, AgentSettings, Error, Result}; + +use crate::compile::path::resolve_path; +use crate::model::AgentConfig; + +const DEFAULT_HEARTBEAT_INTERVAL_SECS: u64 = 30; +const DEFAULT_CONNECT_TIMEOUT_SECS: u64 = 10; +const DEFAULT_REQUEST_TIMEOUT_SECS: u64 = 30; +const DEFAULT_POLL_TIMEOUT_SECS: u64 = 30; +const DEFAULT_BACKOFF_INITIAL_MS: u64 = 500; +const DEFAULT_BACKOFF_MAX_SECS: u64 = 60; +const DEFAULT_AGENT_STATE_PATH: &str = "/var/lib/rginx/agent/state.json"; + +const MIN_TIMEOUT_SECS: u64 = 1; +const MAX_TIMEOUT_SECS: u64 = 300; +const MIN_HEARTBEAT_SECS: u64 = 1; +const MAX_HEARTBEAT_SECS: u64 = 3600; +const MIN_BACKOFF_INITIAL_MS: u64 = 100; +const MAX_BACKOFF_INITIAL_MS: u64 = 60_000; +const MIN_BACKOFF_MAX_SECS: u64 = 1; +const MAX_BACKOFF_MAX_SECS: u64 = 3600; + +pub(super) fn compile_agent_settings( + agent: Option, + base_dir: &Path, +) -> Result> { + let Some(agent) = agent else { + return Ok(None); + }; + + let enabled = agent.enabled.unwrap_or(false); + if !enabled { + return Ok(None); + } + + let endpoint = parse_https_endpoint(&required_string("agent.endpoint", agent.endpoint)?)?; + let node_id = required_string("agent.node_id", agent.node_id)?; + let token_path = resolve_path(base_dir, required_string("agent.token_path", agent.token_path)?); + let state_path = optional_path( + "agent.state_path", + agent.state_path.as_deref(), + base_dir, + DEFAULT_AGENT_STATE_PATH, + )?; + ensure_regular_file(&token_path, "agent.token_path", "agent token file")?; + + let heartbeat_interval = duration_secs( + "agent.heartbeat_interval_secs", + agent.heartbeat_interval_secs, + DEFAULT_HEARTBEAT_INTERVAL_SECS, + MIN_HEARTBEAT_SECS, + MAX_HEARTBEAT_SECS, + )?; + let connect_timeout = duration_secs( + "agent.connect_timeout_secs", + agent.connect_timeout_secs, + DEFAULT_CONNECT_TIMEOUT_SECS, + MIN_TIMEOUT_SECS, + MAX_TIMEOUT_SECS, + )?; + let request_timeout = duration_secs( + "agent.request_timeout_secs", + agent.request_timeout_secs, + DEFAULT_REQUEST_TIMEOUT_SECS, + MIN_TIMEOUT_SECS, + MAX_TIMEOUT_SECS, + )?; + let poll_timeout = duration_secs( + "agent.poll_timeout_secs", + agent.poll_timeout_secs, + DEFAULT_POLL_TIMEOUT_SECS, + MIN_TIMEOUT_SECS, + MAX_TIMEOUT_SECS, + )?; + let backoff_initial = duration_millis( + "agent.backoff_initial_ms", + agent.backoff_initial_ms, + DEFAULT_BACKOFF_INITIAL_MS, + MIN_BACKOFF_INITIAL_MS, + MAX_BACKOFF_INITIAL_MS, + )?; + let backoff_max = duration_secs( + "agent.backoff_max_secs", + agent.backoff_max_secs, + DEFAULT_BACKOFF_MAX_SECS, + MIN_BACKOFF_MAX_SECS, + MAX_BACKOFF_MAX_SECS, + )?; + ensure_backoff_order(backoff_initial, backoff_max)?; + + Ok(Some(AgentSettings { + endpoint, + node_id, + auth: AgentAuthSettings { token_path }, + state_path, + region: normalize_optional_string("agent.region", agent.region.as_deref())?, + pop: normalize_optional_string("agent.pop", agent.pop.as_deref())?, + labels: normalize_labels(agent.labels)?, + heartbeat_interval, + connect_timeout, + request_timeout, + poll_timeout, + backoff_initial, + backoff_max, + })) +} + +fn parse_https_endpoint(value: &str) -> Result { + let uri = value + .parse::() + .map_err(|error| Error::Config(format!("agent.endpoint `{value}` is invalid: {error}")))?; + if uri.scheme_str() != Some("https") || uri.authority().is_none() { + return Err(Error::Config("agent.endpoint must be an HTTPS URL".to_string())); + } + Ok(uri) +} + +fn required_string(field: &str, value: Option) -> Result { + let value = value + .ok_or_else(|| Error::Config(format!("{field} is required when agent.enabled=true")))?; + let trimmed = value.trim(); + if trimmed.is_empty() { + return Err(Error::Config(format!("{field} must not be empty"))); + } + Ok(trimmed.to_string()) +} + +fn normalize_optional_string(field: &str, value: Option<&str>) -> Result> { + value + .map(|value| { + let trimmed = value.trim(); + if trimmed.is_empty() { + Err(Error::Config(format!("{field} must not be empty"))) + } else { + Ok(trimmed.to_string()) + } + }) + .transpose() +} + +fn optional_path( + field: &str, + value: Option<&str>, + base_dir: &Path, + default: &str, +) -> Result { + let value = value.unwrap_or(default).trim(); + if value.is_empty() { + return Err(Error::Config(format!("{field} must not be empty"))); + } + Ok(resolve_path(base_dir, value.to_string())) +} + +fn normalize_labels( + labels: std::collections::BTreeMap, +) -> Result> { + labels + .into_iter() + .map(|(key, value)| { + let key = key.trim().to_string(); + let value = value.trim().to_string(); + if key.is_empty() { + return Err(Error::Config("agent.labels keys must not be empty".to_string())); + } + if value.is_empty() { + return Err(Error::Config(format!("agent.labels entry `{key}` must not be empty"))); + } + Ok((key, value)) + }) + .collect() +} + +fn duration_secs( + field: &str, + value: Option, + default: u64, + min: u64, + max: u64, +) -> Result { + let value = value.unwrap_or(default); + ensure_range(field, value, min, max)?; + Ok(Duration::from_secs(value)) +} + +fn duration_millis( + field: &str, + value: Option, + default: u64, + min: u64, + max: u64, +) -> Result { + let value = value.unwrap_or(default); + ensure_range(field, value, min, max)?; + Ok(Duration::from_millis(value)) +} + +fn ensure_range(field: &str, value: u64, min: u64, max: u64) -> Result<()> { + if (min..=max).contains(&value) { + Ok(()) + } else { + Err(Error::Config(format!("{field} must be between {min} and {max}"))) + } +} + +fn ensure_backoff_order(backoff_initial: Duration, backoff_max: Duration) -> Result<()> { + if backoff_initial <= backoff_max { + Ok(()) + } else { + Err(Error::Config( + "agent.backoff_initial_ms must not exceed agent.backoff_max_secs".to_string(), + )) + } +} + +fn ensure_regular_file(path: &Path, field: &str, label: &str) -> Result<()> { + let file = std::fs::File::open(path).map_err(|error| { + Error::Config(format!( + "{field} `{}` could not be opened ({label}): {error}", + path.display() + )) + })?; + let metadata = file.metadata().map_err(|error| { + Error::Config(format!( + "{field} `{}` metadata could not be read ({label}): {error}", + path.display() + )) + })?; + if metadata.is_file() { + Ok(()) + } else { + Err(Error::Config(format!( + "{field} `{}` does not resolve to a regular file ({label})", + path.display() + ))) + } +} diff --git a/crates/rginx-config/src/compile/control_plane.rs b/crates/rginx-config/src/compile/control_plane.rs index 33ee3a77..e2a37c8d 100644 --- a/crates/rginx-config/src/compile/control_plane.rs +++ b/crates/rginx-config/src/compile/control_plane.rs @@ -7,6 +7,12 @@ use rginx_core::{ControlPlaneSettings, ControlPlaneTlsSettings, Error, Result}; use crate::compile::path::resolve_path; use crate::model::ControlPlaneConfig; +const LEGACY_CONTROL_PLANE_WARNING: &str = concat!( + "control_plane.enabled=true starts the legacy node-side control-plane server and opens an ", + "extra node management port; prefer agent.enabled=true for outbound control without node-side ", + "control ports, domains, or server certificates" +); + pub(super) fn compile_control_plane_settings( control_plane: Option, base_dir: &Path, @@ -20,6 +26,8 @@ pub(super) fn compile_control_plane_settings( return Ok(None); } + tracing::warn!("{LEGACY_CONTROL_PLANE_WARNING}"); + let listen = control_plane .listen .ok_or_else(|| { diff --git a/crates/rginx-config/src/compile/mod.rs b/crates/rginx-config/src/compile/mod.rs index 1f151f58..2edcbf6b 100644 --- a/crates/rginx-config/src/compile/mod.rs +++ b/crates/rginx-config/src/compile/mod.rs @@ -8,6 +8,7 @@ use rginx_core::{ConfigSnapshot, Result, VirtualHost}; use crate::validate::validate; mod acme; +mod agent; mod cache; mod control_plane; mod path; @@ -64,6 +65,7 @@ pub fn compile_with_base_and_options( let Config { runtime, + agent, control_plane, acme: raw_acme, listeners: raw_listeners, @@ -74,6 +76,7 @@ pub fn compile_with_base_and_options( servers: raw_servers, } = raw; let runtime = runtime::compile_runtime_settings(runtime)?; + let agent = agent::compile_agent_settings(agent, base_dir)?; let control_plane = control_plane::compile_control_plane_settings(control_plane, base_dir)?; let acme = acme::compile_global_acme(raw_acme, base_dir); let cache_zones = cache::compile_cache_zones(raw_cache_zones, base_dir)?; @@ -146,6 +149,7 @@ pub fn compile_with_base_and_options( let mut snapshot = ConfigSnapshot { runtime, + agent, control_plane, acme, managed_certificates, diff --git a/crates/rginx-config/src/compile/route.rs b/crates/rginx-config/src/compile/route.rs index e6240d97..c70acacf 100644 --- a/crates/rginx-config/src/compile/route.rs +++ b/crates/rginx-config/src/compile/route.rs @@ -6,22 +6,21 @@ use std::time::Duration; use http::StatusCode; use ipnet::IpNet; use rginx_core::{ - Error, ErrorPageTarget, GrpcRouteMatch, ProxyHeaderTemplate, ProxyHeaderValue, - ProxyRedirectMode, ProxyTarget, ProxyUriMode, Result, ReturnAction, Route, RouteAccessControl, - RouteAction, RouteBufferingPolicy, RouteCompressionPolicy, RouteErrorPage, RouteMatcher, - RouteRateLimit, RouteRegexMatcher, RouteRewrite, RouteRewriteStop, TryFileStep, Upstream, + Error, ErrorPageTarget, GrpcRouteMatch, Result, Route, RouteAccessControl, + RouteBufferingPolicy, RouteCompressionPolicy, RouteErrorPage, RouteMatcher, RouteRateLimit, + RouteRegexMatcher, RouteRewrite, RouteRewriteStop, TryFileStep, Upstream, }; use crate::model::{ - ErrorPageTargetConfig, HandlerConfig, LocationConfig, MatcherConfig, - ProxyHeaderDynamicValueConfig, ProxyHeaderValueConfig, ProxyHttpVersionConfig, - ProxyRedirectModeConfig, RewriteStopConfig, RouteBufferingPolicyConfig, - RouteCompressionPolicyConfig, TryFileStepConfig, + ErrorPageTargetConfig, LocationConfig, MatcherConfig, RewriteStopConfig, + RouteBufferingPolicyConfig, RouteCompressionPolicyConfig, TryFileStepConfig, }; +mod action; mod file; -use file::{compile_file_route_action, file_route_prefix_from_config}; +use action::compile_route_action; +use file::file_route_prefix_from_config; pub(super) fn compile_routes( locations: Vec, @@ -154,87 +153,6 @@ fn compile_route( }) } -fn compile_route_action( - handler: HandlerConfig, - route_prefix: Option, - upstreams: &HashMap>, - local_upstream_names: &HashMap, - base_dir: &Path, -) -> Result { - match handler { - HandlerConfig::Proxy { - upstream, - preserve_host, - strip_prefix, - proxy_pass_uri, - proxy_http_version, - proxy_redirect, - proxy_set_headers, - } => { - let resolved_upstream = - local_upstream_names.get(&upstream).cloned().unwrap_or_else(|| upstream.clone()); - let compiled = upstreams.get(&resolved_upstream).cloned().ok_or_else(|| { - Error::Config(format!("proxy upstream `{upstream}` is not defined")) - })?; - - let proxy_set_headers = proxy_set_headers - .into_iter() - .map(|(name, value)| { - let header_name = name - .parse::() - .map_err(|e| Error::Config(format!("invalid header name `{name}`: {e}")))?; - let header_value = compile_proxy_header_value(&name, value)?; - Ok((header_name, header_value)) - }) - .collect::>>()?; - - Ok(RouteAction::Proxy(ProxyTarget { - upstream_name: resolved_upstream, - upstream: compiled, - preserve_host: preserve_host.unwrap_or(false), - uri_mode: compile_proxy_uri_mode(strip_prefix, proxy_pass_uri)?, - request_version: match proxy_http_version.unwrap_or(ProxyHttpVersionConfig::Http11) - { - ProxyHttpVersionConfig::Http11 => http::Version::HTTP_11, - ProxyHttpVersionConfig::Http2 => http::Version::HTTP_2, - }, - redirect: match proxy_redirect.unwrap_or(ProxyRedirectModeConfig::Default) { - ProxyRedirectModeConfig::Default => ProxyRedirectMode::Default, - ProxyRedirectModeConfig::Off => ProxyRedirectMode::Off, - }, - proxy_set_headers, - })) - } - file_handler @ HandlerConfig::File { .. } => { - compile_file_route_action(file_handler, route_prefix, base_dir) - } - HandlerConfig::Return { status, location, body } => Ok(RouteAction::Return(ReturnAction { - status: StatusCode::from_u16(status)?, - location, - body, - })), - } -} - -fn compile_proxy_uri_mode( - strip_prefix: Option, - proxy_pass_uri: Option, -) -> Result { - let strip_prefix = strip_prefix.and_then(|value| (!value.is_empty()).then_some(value)); - let proxy_pass_uri = proxy_pass_uri.and_then(|value| (!value.is_empty()).then_some(value)); - - match (strip_prefix, proxy_pass_uri) { - (Some(prefix), Some(replacement)) => { - Ok(ProxyUriMode::ReplacePrefix { matched_prefix: prefix, replacement }) - } - (Some(prefix), None) => Ok(ProxyUriMode::StripPrefix(prefix)), - (None, Some(replacement)) => { - Ok(ProxyUriMode::ReplacePrefix { matched_prefix: "/".to_string(), replacement }) - } - (None, None) => Ok(ProxyUriMode::PassOriginal), - } -} - fn compile_rewrite_rules( rewrite_rules: Vec, ) -> Result> { @@ -290,44 +208,6 @@ fn compile_error_pages(configs: Vec) -> Result Result { - match value { - ProxyHeaderValueConfig::Static(value) => { - let value = value.parse::().map_err(|error| { - Error::Config(format!("invalid header value for `{name}`: {error}")) - })?; - Ok(ProxyHeaderValue::Static(value)) - } - ProxyHeaderValueConfig::Dynamic(dynamic) => match dynamic { - ProxyHeaderDynamicValueConfig::Host => Ok(ProxyHeaderValue::Host), - ProxyHeaderDynamicValueConfig::Scheme => Ok(ProxyHeaderValue::Scheme), - ProxyHeaderDynamicValueConfig::ClientIp => Ok(ProxyHeaderValue::ClientIp), - ProxyHeaderDynamicValueConfig::RemoteAddr => Ok(ProxyHeaderValue::RemoteAddr), - ProxyHeaderDynamicValueConfig::PeerAddr => Ok(ProxyHeaderValue::PeerAddr), - ProxyHeaderDynamicValueConfig::ForwardedFor => Ok(ProxyHeaderValue::ForwardedFor), - ProxyHeaderDynamicValueConfig::RequestHeader(header_name) => { - let header_name = - header_name.parse::().map_err(|error| { - Error::Config(format!( - "invalid request header source `{header_name}` for proxy header `{name}`: {error}" - )) - })?; - Ok(ProxyHeaderValue::RequestHeader(header_name)) - } - ProxyHeaderDynamicValueConfig::Template(template) => { - let template = ProxyHeaderTemplate::parse(template).map_err(|error| { - Error::Config(format!("invalid template for proxy header `{name}`: {error}")) - })?; - Ok(ProxyHeaderValue::Template(template)) - } - ProxyHeaderDynamicValueConfig::Remove => Ok(ProxyHeaderValue::Remove), - }, - } -} - fn compile_route_access_control( matcher: &RouteMatcher, allow_cidrs: Vec, diff --git a/crates/rginx-config/src/compile/route/action.rs b/crates/rginx-config/src/compile/route/action.rs new file mode 100644 index 00000000..591bb46a --- /dev/null +++ b/crates/rginx-config/src/compile/route/action.rs @@ -0,0 +1,135 @@ +use std::collections::HashMap; +use std::path::Path; +use std::sync::Arc; + +use http::StatusCode; +use rginx_core::{ + Error, ProxyHeaderTemplate, ProxyHeaderValue, ProxyRedirectMode, ProxyTarget, ProxyUriMode, + Result, ReturnAction, RouteAction, Upstream, +}; + +use crate::model::{ + HandlerConfig, ProxyHeaderDynamicValueConfig, ProxyHeaderValueConfig, ProxyHttpVersionConfig, + ProxyRedirectModeConfig, +}; + +use super::file::compile_file_route_action; + +pub(super) fn compile_route_action( + handler: HandlerConfig, + route_prefix: Option, + upstreams: &HashMap>, + local_upstream_names: &HashMap, + base_dir: &Path, +) -> Result { + match handler { + HandlerConfig::Proxy { + upstream, + preserve_host, + strip_prefix, + proxy_pass_uri, + proxy_http_version, + proxy_redirect, + proxy_set_headers, + } => { + let resolved_upstream = + local_upstream_names.get(&upstream).cloned().unwrap_or_else(|| upstream.clone()); + let compiled = upstreams.get(&resolved_upstream).cloned().ok_or_else(|| { + Error::Config(format!("proxy upstream `{upstream}` is not defined")) + })?; + + let proxy_set_headers = proxy_set_headers + .into_iter() + .map(|(name, value)| { + let header_name = name + .parse::() + .map_err(|e| Error::Config(format!("invalid header name `{name}`: {e}")))?; + let header_value = compile_proxy_header_value(&name, value)?; + Ok((header_name, header_value)) + }) + .collect::>>()?; + + Ok(RouteAction::Proxy(ProxyTarget { + upstream_name: resolved_upstream, + upstream: compiled, + preserve_host: preserve_host.unwrap_or(false), + uri_mode: compile_proxy_uri_mode(strip_prefix, proxy_pass_uri)?, + request_version: match proxy_http_version.unwrap_or(ProxyHttpVersionConfig::Http11) + { + ProxyHttpVersionConfig::Http11 => http::Version::HTTP_11, + ProxyHttpVersionConfig::Http2 => http::Version::HTTP_2, + }, + redirect: match proxy_redirect.unwrap_or(ProxyRedirectModeConfig::Default) { + ProxyRedirectModeConfig::Default => ProxyRedirectMode::Default, + ProxyRedirectModeConfig::Off => ProxyRedirectMode::Off, + }, + proxy_set_headers, + })) + } + file_handler @ HandlerConfig::File { .. } => { + compile_file_route_action(file_handler, route_prefix, base_dir) + } + HandlerConfig::Return { status, location, body } => Ok(RouteAction::Return(ReturnAction { + status: StatusCode::from_u16(status)?, + location, + body, + })), + } +} + +fn compile_proxy_uri_mode( + strip_prefix: Option, + proxy_pass_uri: Option, +) -> Result { + let strip_prefix = strip_prefix.and_then(|value| (!value.is_empty()).then_some(value)); + let proxy_pass_uri = proxy_pass_uri.and_then(|value| (!value.is_empty()).then_some(value)); + + match (strip_prefix, proxy_pass_uri) { + (Some(prefix), Some(replacement)) => { + Ok(ProxyUriMode::ReplacePrefix { matched_prefix: prefix, replacement }) + } + (Some(prefix), None) => Ok(ProxyUriMode::StripPrefix(prefix)), + (None, Some(replacement)) => { + Ok(ProxyUriMode::ReplacePrefix { matched_prefix: "/".to_string(), replacement }) + } + (None, None) => Ok(ProxyUriMode::PassOriginal), + } +} + +fn compile_proxy_header_value( + name: &str, + value: ProxyHeaderValueConfig, +) -> Result { + match value { + ProxyHeaderValueConfig::Static(value) => { + let value = value.parse::().map_err(|error| { + Error::Config(format!("invalid header value for `{name}`: {error}")) + })?; + Ok(ProxyHeaderValue::Static(value)) + } + ProxyHeaderValueConfig::Dynamic(dynamic) => match dynamic { + ProxyHeaderDynamicValueConfig::Host => Ok(ProxyHeaderValue::Host), + ProxyHeaderDynamicValueConfig::Scheme => Ok(ProxyHeaderValue::Scheme), + ProxyHeaderDynamicValueConfig::ClientIp => Ok(ProxyHeaderValue::ClientIp), + ProxyHeaderDynamicValueConfig::RemoteAddr => Ok(ProxyHeaderValue::RemoteAddr), + ProxyHeaderDynamicValueConfig::PeerAddr => Ok(ProxyHeaderValue::PeerAddr), + ProxyHeaderDynamicValueConfig::ForwardedFor => Ok(ProxyHeaderValue::ForwardedFor), + ProxyHeaderDynamicValueConfig::RequestHeader(header_name) => { + let header_name = + header_name.parse::().map_err(|error| { + Error::Config(format!( + "invalid request header source `{header_name}` for proxy header `{name}`: {error}" + )) + })?; + Ok(ProxyHeaderValue::RequestHeader(header_name)) + } + ProxyHeaderDynamicValueConfig::Template(template) => { + let template = ProxyHeaderTemplate::parse(template).map_err(|error| { + Error::Config(format!("invalid template for proxy header `{name}`: {error}")) + })?; + Ok(ProxyHeaderValue::Template(template)) + } + ProxyHeaderDynamicValueConfig::Remove => Ok(ProxyHeaderValue::Remove), + }, + } +} diff --git a/crates/rginx-config/src/compile/tests.rs b/crates/rginx-config/src/compile/tests.rs index 1943264e..6e40d817 100644 --- a/crates/rginx-config/src/compile/tests.rs +++ b/crates/rginx-config/src/compile/tests.rs @@ -2,12 +2,13 @@ use std::fs; use std::time::Duration; use crate::model::{ - CacheRouteConfig, CacheZoneConfig, Config, ControlPlaneConfig, ControlPlaneTlsConfig, - HandlerConfig, Http3Config, ListenerConfig, LocationConfig, MatcherConfig, - ProxyHeaderDynamicValueConfig, ProxyHeaderValueConfig, RouteBufferingPolicyConfig, - RouteCompressionPolicyConfig, RuntimeConfig, ServerConfig, ServerTlsConfig, - TlsCipherSuiteConfig, TlsKeyExchangeGroupConfig, UpstreamConfig, UpstreamLoadBalanceConfig, - UpstreamPeerConfig, UpstreamProtocolConfig, UpstreamTlsConfig, VirtualHostConfig, + AgentConfig, CacheRouteConfig, CacheZoneConfig, Config, ControlPlaneConfig, + ControlPlaneTlsConfig, HandlerConfig, Http3Config, ListenerConfig, LocationConfig, + MatcherConfig, ProxyHeaderDynamicValueConfig, ProxyHeaderValueConfig, + RouteBufferingPolicyConfig, RouteCompressionPolicyConfig, RuntimeConfig, ServerConfig, + ServerTlsConfig, TlsCipherSuiteConfig, TlsKeyExchangeGroupConfig, UpstreamConfig, + UpstreamLoadBalanceConfig, UpstreamPeerConfig, UpstreamProtocolConfig, UpstreamTlsConfig, + VirtualHostConfig, }; use tempfile::TempDir; @@ -59,6 +60,7 @@ fn test_location(matcher: MatcherConfig, handler: HandlerConfig) -> LocationConf fn base_config() -> Config { Config { + agent: None, control_plane: None, acme: None, cache_zones: Vec::new(), @@ -140,6 +142,7 @@ fn base_config() -> Config { } mod acme; +mod agent; mod cache; mod cache_p1; mod cache_p2; diff --git a/crates/rginx-config/src/compile/tests/acme.rs b/crates/rginx-config/src/compile/tests/acme.rs index 819b9f65..ee3b1b62 100644 --- a/crates/rginx-config/src/compile/tests/acme.rs +++ b/crates/rginx-config/src/compile/tests/acme.rs @@ -113,6 +113,7 @@ fn managed_acme_config( Config { control_plane: None, + agent: None, acme: Some(crate::model::AcmeConfig { directory_url: directory_url.to_string(), contacts: vec![ diff --git a/crates/rginx-config/src/compile/tests/agent.rs b/crates/rginx-config/src/compile/tests/agent.rs new file mode 100644 index 00000000..22e4ae93 --- /dev/null +++ b/crates/rginx-config/src/compile/tests/agent.rs @@ -0,0 +1,145 @@ +use super::*; + +#[test] +fn compile_emits_no_agent_when_disabled() { + let config = Config { + agent: Some(AgentConfig { + enabled: Some(false), + endpoint: Some("https://control.example.com".to_string()), + node_id: Some("edge-sfo-1".to_string()), + token_path: Some("agent.token".to_string()), + state_path: None, + region: None, + pop: None, + labels: Default::default(), + heartbeat_interval_secs: None, + connect_timeout_secs: None, + request_timeout_secs: None, + poll_timeout_secs: None, + backoff_initial_ms: None, + backoff_max_secs: None, + }), + ..base_config() + }; + + let snapshot = compile(config).expect("disabled agent should compile"); + assert!(snapshot.agent.is_none()); +} + +#[test] +fn compile_resolves_enabled_agent_settings() { + let base_dir = temp_base_dir("rginx-agent-"); + std::fs::write(base_dir.path().join("agent.token"), "secret") + .expect("agent token should exist"); + let config = Config { + agent: Some(AgentConfig { + enabled: Some(true), + endpoint: Some("https://control.example.com/rginx".to_string()), + node_id: Some(" edge-sfo-1 ".to_string()), + token_path: Some("agent.token".to_string()), + state_path: Some("agent-state.json".to_string()), + region: Some(" us-west ".to_string()), + pop: Some(" sfo ".to_string()), + labels: [(" tier ".to_string(), " edge ".to_string())].into_iter().collect(), + heartbeat_interval_secs: Some(15), + connect_timeout_secs: Some(3), + request_timeout_secs: Some(12), + poll_timeout_secs: Some(25), + backoff_initial_ms: Some(250), + backoff_max_secs: Some(30), + }), + ..base_config() + }; + + let snapshot = + compile_with_base(config, base_dir.path()).expect("enabled agent should compile"); + let agent = snapshot.agent.expect("agent should compile"); + + assert_eq!(agent.endpoint.to_string(), "https://control.example.com/rginx"); + assert_eq!(agent.node_id, "edge-sfo-1"); + assert_eq!(agent.auth.token_path, base_dir.path().join("agent.token")); + assert_eq!(agent.state_path, base_dir.path().join("agent-state.json")); + assert_eq!(agent.region.as_deref(), Some("us-west")); + assert_eq!(agent.pop.as_deref(), Some("sfo")); + assert_eq!(agent.labels.get("tier").map(String::as_str), Some("edge")); + assert_eq!(agent.heartbeat_interval, Duration::from_secs(15)); + assert_eq!(agent.connect_timeout, Duration::from_secs(3)); + assert_eq!(agent.request_timeout, Duration::from_secs(12)); + assert_eq!(agent.poll_timeout, Duration::from_secs(25)); + assert_eq!(agent.backoff_initial, Duration::from_millis(250)); + assert_eq!(agent.backoff_max, Duration::from_secs(30)); +} + +#[test] +fn compile_rejects_enabled_agent_without_endpoint() { + let mut config = base_config(); + config.agent = Some(AgentConfig { + enabled: Some(true), + endpoint: None, + node_id: Some("edge-sfo-1".to_string()), + token_path: Some("agent.token".to_string()), + state_path: None, + region: None, + pop: None, + labels: Default::default(), + heartbeat_interval_secs: None, + connect_timeout_secs: None, + request_timeout_secs: None, + poll_timeout_secs: None, + backoff_initial_ms: None, + backoff_max_secs: None, + }); + + let error = compile(config).expect_err("enabled agent should require endpoint"); + assert!(error.to_string().contains("agent.endpoint is required")); +} + +#[test] +fn compile_rejects_agent_endpoint_without_https() { + let mut config = base_config(); + config.agent = Some(AgentConfig { + enabled: Some(true), + endpoint: Some("http://control.example.com".to_string()), + node_id: Some("edge-sfo-1".to_string()), + token_path: Some("agent.token".to_string()), + state_path: None, + region: None, + pop: None, + labels: Default::default(), + heartbeat_interval_secs: None, + connect_timeout_secs: None, + request_timeout_secs: None, + poll_timeout_secs: None, + backoff_initial_ms: None, + backoff_max_secs: None, + }); + + let error = compile(config).expect_err("agent endpoint should require https"); + assert!(error.to_string().contains("agent.endpoint must be an HTTPS URL")); +} + +#[test] +fn compile_rejects_agent_token_path_that_is_not_file() { + let mut config = base_config(); + config.agent = Some(AgentConfig { + enabled: Some(true), + endpoint: Some("https://control.example.com".to_string()), + node_id: Some("edge-sfo-1".to_string()), + token_path: Some("missing-agent.token".to_string()), + state_path: None, + region: None, + pop: None, + labels: Default::default(), + heartbeat_interval_secs: None, + connect_timeout_secs: None, + request_timeout_secs: None, + poll_timeout_secs: None, + backoff_initial_ms: None, + backoff_max_secs: None, + }); + + let base_dir = temp_base_dir("rginx-agent-missing-token-"); + let error = compile_with_base(config, base_dir.path()) + .expect_err("enabled agent should require readable token file"); + assert!(error.to_string().contains("agent.token_path")); +} diff --git a/crates/rginx-config/src/compile/tests/cache.rs b/crates/rginx-config/src/compile/tests/cache.rs index d2d499d6..f5241c8e 100644 --- a/crates/rginx-config/src/compile/tests/cache.rs +++ b/crates/rginx-config/src/compile/tests/cache.rs @@ -5,6 +5,7 @@ fn compile_attaches_cache_zones_and_route_policy() { let base_dir = temp_base_dir("rginx-cache-compile"); let mut config = Config { control_plane: None, + agent: None, acme: None, cache_zones: vec![CacheZoneConfig { name: "default".to_string(), @@ -212,6 +213,7 @@ fn compile_cache_policy_supports_p0_controls() { let base_dir = temp_base_dir("rginx-cache-compile-p0"); let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: vec![CacheZoneConfig { name: "default".to_string(), diff --git a/crates/rginx-config/src/compile/tests/cache_p1.rs b/crates/rginx-config/src/compile/tests/cache_p1.rs index d4ef7ddf..95f785b1 100644 --- a/crates/rginx-config/src/compile/tests/cache_p1.rs +++ b/crates/rginx-config/src/compile/tests/cache_p1.rs @@ -5,6 +5,7 @@ fn compile_cache_policy_supports_p1_controls() { let base_dir = temp_base_dir("rginx-cache-compile-p1"); let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: vec![CacheZoneConfig { name: "default".to_string(), diff --git a/crates/rginx-config/src/compile/tests/cache_p2.rs b/crates/rginx-config/src/compile/tests/cache_p2.rs index 6f1d145c..fecb8b7f 100644 --- a/crates/rginx-config/src/compile/tests/cache_p2.rs +++ b/crates/rginx-config/src/compile/tests/cache_p2.rs @@ -5,6 +5,7 @@ fn compile_cache_policy_supports_disabling_p2_defaults() { let base_dir = temp_base_dir("rginx-cache-compile-p2-disable"); let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: vec![CacheZoneConfig { name: "default".to_string(), diff --git a/crates/rginx-config/src/compile/tests/cache_p3.rs b/crates/rginx-config/src/compile/tests/cache_p3.rs index d28209a7..ed7c225c 100644 --- a/crates/rginx-config/src/compile/tests/cache_p3.rs +++ b/crates/rginx-config/src/compile/tests/cache_p3.rs @@ -5,6 +5,7 @@ fn compile_cache_policy_supports_p3_slice_controls() { let base_dir = temp_base_dir("rginx-cache-compile-p3"); let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: vec![CacheZoneConfig { name: "default".to_string(), diff --git a/crates/rginx-config/src/compile/tests/http3.rs b/crates/rginx-config/src/compile/tests/http3.rs index 8318c918..571b8f28 100644 --- a/crates/rginx-config/src/compile/tests/http3.rs +++ b/crates/rginx-config/src/compile/tests/http3.rs @@ -10,6 +10,7 @@ fn compile_http3_listener_defaults_to_tcp_listen_addr_and_default_alt_svc_policy let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { @@ -122,6 +123,7 @@ fn compile_http3_applies_transport_settings_and_resolves_host_key_path() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { diff --git a/crates/rginx-config/src/compile/tests/listeners.rs b/crates/rginx-config/src/compile/tests/listeners.rs index b02f2e3d..7170a33f 100644 --- a/crates/rginx-config/src/compile/tests/listeners.rs +++ b/crates/rginx-config/src/compile/tests/listeners.rs @@ -4,6 +4,7 @@ use super::*; fn compile_supports_explicit_multi_listener_configs() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { diff --git a/crates/rginx-config/src/compile/tests/route.rs b/crates/rginx-config/src/compile/tests/route.rs index 28823f83..b2e37175 100644 --- a/crates/rginx-config/src/compile/tests/route.rs +++ b/crates/rginx-config/src/compile/tests/route.rs @@ -9,6 +9,7 @@ mod regex; fn compile_attaches_route_access_control() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { @@ -51,9 +52,7 @@ fn compile_attaches_route_access_control() { body: Some("ok\n".to_string()), }, grpc_service: None, - grpc_method: None, - allow_cidrs: vec!["127.0.0.1/32".to_string(), "::1/128".to_string()], deny_cidrs: vec!["127.0.0.2/32".to_string()], requests_per_sec: None, @@ -78,6 +77,7 @@ fn compile_attaches_route_access_control() { fn compile_attaches_route_rate_limit() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { @@ -149,6 +149,7 @@ fn compile_attaches_route_rate_limit() { fn compile_applies_route_transport_policy_defaults_and_overrides() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { @@ -255,6 +256,7 @@ fn compile_applies_route_transport_policy_defaults_and_overrides() { fn compile_generates_distinct_route_and_vhost_ids() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { @@ -310,7 +312,6 @@ fn compile_generates_distinct_route_and_vhost_ids() { }; let snapshot = compile(config).expect("vhost config should compile"); - assert_eq!(snapshot.default_vhost.id, "server"); assert_eq!(snapshot.vhosts[0].id, "servers[0]"); assert_eq!(snapshot.default_vhost.routes[0].id, "server/routes[0]|exact:/"); @@ -323,6 +324,7 @@ fn compile_generates_distinct_route_and_vhost_ids() { fn compile_preserves_grpc_route_constraints_without_reordering_routes() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { diff --git a/crates/rginx-config/src/compile/tests/route/file_handler.rs b/crates/rginx-config/src/compile/tests/route/file_handler.rs index e872252f..3744ba5a 100644 --- a/crates/rginx-config/src/compile/tests/route/file_handler.rs +++ b/crates/rginx-config/src/compile/tests/route/file_handler.rs @@ -4,6 +4,7 @@ use super::*; fn compile_attaches_extended_file_handler_settings() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { diff --git a/crates/rginx-config/src/compile/tests/route/preferred_prefix.rs b/crates/rginx-config/src/compile/tests/route/preferred_prefix.rs index e2ea41f4..13426036 100644 --- a/crates/rginx-config/src/compile/tests/route/preferred_prefix.rs +++ b/crates/rginx-config/src/compile/tests/route/preferred_prefix.rs @@ -4,6 +4,7 @@ use super::*; fn compile_attaches_preferred_prefix_matcher_without_reordering_routes() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { diff --git a/crates/rginx-config/src/compile/tests/route/proxy_rewrite.rs b/crates/rginx-config/src/compile/tests/route/proxy_rewrite.rs index c2b8ffbf..a3290413 100644 --- a/crates/rginx-config/src/compile/tests/route/proxy_rewrite.rs +++ b/crates/rginx-config/src/compile/tests/route/proxy_rewrite.rs @@ -4,6 +4,7 @@ use super::*; fn compile_attaches_rewrites_and_proxy_pass_uri_mode() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { diff --git a/crates/rginx-config/src/compile/tests/route/regex.rs b/crates/rginx-config/src/compile/tests/route/regex.rs index 75d0efd2..5123d822 100644 --- a/crates/rginx-config/src/compile/tests/route/regex.rs +++ b/crates/rginx-config/src/compile/tests/route/regex.rs @@ -4,6 +4,7 @@ use super::*; fn compile_attaches_regex_matcher_and_dynamic_proxy_headers() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { @@ -127,6 +128,7 @@ fn compile_attaches_regex_matcher_and_dynamic_proxy_headers() { fn compile_preserves_declaration_order_for_overlapping_regex_routes() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { diff --git a/crates/rginx-config/src/compile/tests/server_settings.rs b/crates/rginx-config/src/compile/tests/server_settings.rs index c904ddc0..2f619ecf 100644 --- a/crates/rginx-config/src/compile/tests/server_settings.rs +++ b/crates/rginx-config/src/compile/tests/server_settings.rs @@ -4,6 +4,7 @@ use super::*; fn compile_applies_custom_server_header() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { @@ -52,6 +53,7 @@ fn compile_applies_custom_server_header() { fn compile_normalizes_trusted_proxy_ips_and_cidrs() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { @@ -126,6 +128,7 @@ fn compile_normalizes_trusted_proxy_ips_and_cidrs() { fn compile_attaches_server_hardening_settings() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { @@ -229,6 +232,7 @@ fn compile_attaches_server_hardening_settings() { fn compile_rejects_invalid_server_access_log_format() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { diff --git a/crates/rginx-config/src/compile/tests/server_tls.rs b/crates/rginx-config/src/compile/tests/server_tls.rs index dd4ae128..d01f7cb5 100644 --- a/crates/rginx-config/src/compile/tests/server_tls.rs +++ b/crates/rginx-config/src/compile/tests/server_tls.rs @@ -10,6 +10,7 @@ fn compile_resolves_server_tls_paths_relative_to_config_base() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { @@ -102,6 +103,7 @@ fn compile_preserves_server_tls_policy_fields() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { @@ -200,6 +202,7 @@ fn compile_preserves_server_tls_ocsp_policy_fields() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { diff --git a/crates/rginx-config/src/compile/tests/upstream_defaults.rs b/crates/rginx-config/src/compile/tests/upstream_defaults.rs index e66f0463..088752bf 100644 --- a/crates/rginx-config/src/compile/tests/upstream_defaults.rs +++ b/crates/rginx-config/src/compile/tests/upstream_defaults.rs @@ -4,6 +4,7 @@ use super::*; fn compile_accepts_https_upstreams() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { @@ -124,6 +125,7 @@ fn compile_accepts_https_upstreams() { fn compile_defaults_grpc_health_check_path_when_service_is_set() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { diff --git a/crates/rginx-config/src/compile/tests/upstream_fallbacks.rs b/crates/rginx-config/src/compile/tests/upstream_fallbacks.rs index 3874efc0..8571a8a7 100644 --- a/crates/rginx-config/src/compile/tests/upstream_fallbacks.rs +++ b/crates/rginx-config/src/compile/tests/upstream_fallbacks.rs @@ -4,6 +4,7 @@ use super::*; fn compile_accepts_backup_peers() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { @@ -138,6 +139,7 @@ fn compile_accepts_backup_peers() { fn compile_uses_legacy_request_timeout_fallbacks_and_disables_pool_idle_timeout() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { @@ -258,6 +260,7 @@ fn compile_uses_legacy_request_timeout_fallbacks_and_disables_pool_idle_timeout( fn compile_uses_default_pool_idle_timeout() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { diff --git a/crates/rginx-config/src/compile/tests/upstream_server_name.rs b/crates/rginx-config/src/compile/tests/upstream_server_name.rs index dc07b5fe..b5ed0580 100644 --- a/crates/rginx-config/src/compile/tests/upstream_server_name.rs +++ b/crates/rginx-config/src/compile/tests/upstream_server_name.rs @@ -4,6 +4,7 @@ use super::*; fn compile_normalizes_server_name_override() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { @@ -115,6 +116,7 @@ fn compile_normalizes_server_name_override() { fn compile_preserves_upstream_server_name_toggle() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { @@ -232,6 +234,7 @@ fn compile_preserves_upstream_server_name_toggle() { fn compile_rejects_invalid_server_name_override() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { diff --git a/crates/rginx-config/src/compile/tests/upstream_tls.rs b/crates/rginx-config/src/compile/tests/upstream_tls.rs index c7586405..d340f6ef 100644 --- a/crates/rginx-config/src/compile/tests/upstream_tls.rs +++ b/crates/rginx-config/src/compile/tests/upstream_tls.rs @@ -8,6 +8,7 @@ fn compile_resolves_custom_ca_relative_to_config_base() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { @@ -148,6 +149,7 @@ fn compile_resolves_custom_ca_relative_to_config_base() { fn compile_accepts_https_http3_upstreams() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { @@ -265,6 +267,7 @@ fn compile_resolves_upstream_mtls_identity_and_tls_versions_relative_to_config_b let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { diff --git a/crates/rginx-config/src/compile/tests/upstream_transport.rs b/crates/rginx-config/src/compile/tests/upstream_transport.rs index 8e5c714e..3c037ce9 100644 --- a/crates/rginx-config/src/compile/tests/upstream_transport.rs +++ b/crates/rginx-config/src/compile/tests/upstream_transport.rs @@ -4,6 +4,7 @@ use super::*; fn compile_applies_granular_upstream_transport_settings() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { @@ -106,6 +107,7 @@ fn compile_applies_granular_upstream_transport_settings() { fn compile_accepts_least_conn_load_balance() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { @@ -225,6 +227,7 @@ fn compile_accepts_least_conn_load_balance() { fn compile_applies_peer_weights() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { diff --git a/crates/rginx-config/src/compile/tests/vhosts.rs b/crates/rginx-config/src/compile/tests/vhosts.rs index 91e85926..9cb6e428 100644 --- a/crates/rginx-config/src/compile/tests/vhosts.rs +++ b/crates/rginx-config/src/compile/tests/vhosts.rs @@ -10,6 +10,7 @@ fn compile_generates_deduplicated_listeners_from_vhost_listen() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { @@ -81,6 +82,7 @@ fn compile_generates_deduplicated_listeners_from_vhost_listen() { fn compile_uses_vhost_local_upstream_before_global_upstream() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { @@ -141,6 +143,7 @@ fn compile_applies_server_tls_defaults_only_to_vhost_ssl_listeners() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { @@ -196,6 +199,7 @@ fn compile_uses_first_tls_vhost_as_implicit_default_certificate() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { @@ -227,6 +231,7 @@ fn compile_uses_first_tls_vhost_as_implicit_default_certificate() { fn compile_preserves_ipv6_vhost_listener_ids() { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { diff --git a/crates/rginx-config/src/compile/tests/vhosts/listener_conflicts.rs b/crates/rginx-config/src/compile/tests/vhosts/listener_conflicts.rs index 26fed49e..bda22528 100644 --- a/crates/rginx-config/src/compile/tests/vhosts/listener_conflicts.rs +++ b/crates/rginx-config/src/compile/tests/vhosts/listener_conflicts.rs @@ -45,6 +45,7 @@ fn compile_rejects_conflicting_shared_vhost_listener_flags() { for (servers, expected) in cases { let config = Config { control_plane: None, + agent: None, acme: None, cache_zones: Vec::new(), runtime: RuntimeConfig { diff --git a/crates/rginx-config/src/model.rs b/crates/rginx-config/src/model.rs index f2200de0..4921558d 100644 --- a/crates/rginx-config/src/model.rs +++ b/crates/rginx-config/src/model.rs @@ -1,6 +1,7 @@ use serde::Deserialize; mod acme; +mod agent; mod cache; mod control_plane; mod listener; @@ -12,6 +13,7 @@ mod upstream; mod vhost; pub use acme::{AcmeChallengeConfig, AcmeConfig, VirtualHostAcmeConfig}; +pub use agent::AgentConfig; pub use cache::{ CacheIgnoreHeaderConfig, CachePredicateConfig, CacheRangeRequestPolicyConfig, CacheRouteConfig, CacheStatusTtlConfig, CacheUseStaleConditionConfig, CacheZoneConfig, @@ -41,6 +43,8 @@ pub use vhost::VirtualHostConfig; pub struct Config { pub runtime: RuntimeConfig, #[serde(default)] + pub agent: Option, + #[serde(default)] pub control_plane: Option, #[serde(default)] pub acme: Option, diff --git a/crates/rginx-config/src/model/agent.rs b/crates/rginx-config/src/model/agent.rs new file mode 100644 index 00000000..b886ffc3 --- /dev/null +++ b/crates/rginx-config/src/model/agent.rs @@ -0,0 +1,40 @@ +use std::collections::BTreeMap; + +use serde::Deserialize; + +/// Outbound node agent configuration. +/// +/// When enabled, the rginx node keeps its public surface to normal site +/// listeners and connects out to the central control plane instead of opening a +/// node-side management port. +#[derive(Debug, Clone, Deserialize, Default)] +pub struct AgentConfig { + #[serde(default)] + pub enabled: Option, + #[serde(default)] + pub endpoint: Option, + #[serde(default)] + pub node_id: Option, + #[serde(default)] + pub token_path: Option, + #[serde(default)] + pub state_path: Option, + #[serde(default)] + pub region: Option, + #[serde(default)] + pub pop: Option, + #[serde(default)] + pub labels: BTreeMap, + #[serde(default)] + pub heartbeat_interval_secs: Option, + #[serde(default)] + pub connect_timeout_secs: Option, + #[serde(default)] + pub request_timeout_secs: Option, + #[serde(default)] + pub poll_timeout_secs: Option, + #[serde(default)] + pub backoff_initial_ms: Option, + #[serde(default)] + pub backoff_max_secs: Option, +} diff --git a/crates/rginx-config/src/model/control_plane.rs b/crates/rginx-config/src/model/control_plane.rs index 9fa5ccd9..7fba222a 100644 --- a/crates/rginx-config/src/model/control_plane.rs +++ b/crates/rginx-config/src/model/control_plane.rs @@ -2,6 +2,11 @@ use std::collections::BTreeMap; use serde::Deserialize; +/// Legacy node-side control-plane server configuration. +/// +/// New control-plane work should target the outbound `agent` model described in +/// `docs/AGENT_OUTBOUND_CONTROL_PLANE_PLAN.md`. This model is kept so existing +/// deployments can continue to boot while the outbound agent is introduced. #[derive(Debug, Clone, Deserialize, Default)] pub struct ControlPlaneConfig { #[serde(default)] diff --git a/crates/rginx-config/src/validate.rs b/crates/rginx-config/src/validate.rs index 87fa56ea..11930304 100644 --- a/crates/rginx-config/src/validate.rs +++ b/crates/rginx-config/src/validate.rs @@ -5,6 +5,7 @@ use rginx_core::{Error, Result}; use crate::model::{Config, LocationConfig, RouteBufferingPolicyConfig}; mod acme; +mod agent; mod cache; mod control_plane; mod route; @@ -17,6 +18,7 @@ const DEFAULT_GRPC_HEALTH_CHECK_PATH: &str = "/grpc.health.v1.Health/Check"; pub fn validate(config: &Config) -> Result<()> { runtime::validate_runtime(&config.runtime)?; + agent::validate_agent(config.agent.as_ref())?; control_plane::validate_control_plane(config.control_plane.as_ref())?; if config.locations.is_empty() && config.servers.is_empty() { diff --git a/crates/rginx-config/src/validate/agent.rs b/crates/rginx-config/src/validate/agent.rs new file mode 100644 index 00000000..882bca61 --- /dev/null +++ b/crates/rginx-config/src/validate/agent.rs @@ -0,0 +1,141 @@ +use rginx_core::{Error, Result}; + +use crate::model::AgentConfig; + +const DEFAULT_BACKOFF_INITIAL_MS: u64 = 500; +const DEFAULT_BACKOFF_MAX_SECS: u64 = 60; + +const MIN_TIMEOUT_SECS: u64 = 1; +const MAX_TIMEOUT_SECS: u64 = 300; +const MIN_HEARTBEAT_SECS: u64 = 1; +const MAX_HEARTBEAT_SECS: u64 = 3600; +const MIN_BACKOFF_INITIAL_MS: u64 = 100; +const MAX_BACKOFF_INITIAL_MS: u64 = 60_000; +const MIN_BACKOFF_MAX_SECS: u64 = 1; +const MAX_BACKOFF_MAX_SECS: u64 = 3600; + +pub(super) fn validate_agent(agent: Option<&AgentConfig>) -> Result<()> { + let Some(agent) = agent else { + return Ok(()); + }; + + let enabled = agent.enabled.unwrap_or(false); + + if enabled && agent.endpoint.is_none() { + return Err(Error::Config( + "agent.endpoint is required when agent.enabled=true".to_string(), + )); + } + if let Some(endpoint) = agent.endpoint.as_deref() { + validate_https_endpoint(endpoint)?; + } + + if enabled && agent.node_id.is_none() { + return Err(Error::Config("agent.node_id is required when agent.enabled=true".to_string())); + } + validate_optional_identity_field("agent.node_id", agent.node_id.as_deref())?; + validate_optional_identity_field("agent.region", agent.region.as_deref())?; + validate_optional_identity_field("agent.pop", agent.pop.as_deref())?; + + if enabled && agent.token_path.is_none() { + return Err(Error::Config( + "agent.token_path is required when agent.enabled=true".to_string(), + )); + } + validate_optional_identity_field("agent.token_path", agent.token_path.as_deref())?; + validate_optional_identity_field("agent.state_path", agent.state_path.as_deref())?; + + for (key, value) in &agent.labels { + let key = key.trim(); + if key.is_empty() { + return Err(Error::Config("agent.labels keys must not be empty".to_string())); + } + if value.trim().is_empty() { + return Err(Error::Config(format!("agent.labels entry `{key}` must not be empty"))); + } + } + + validate_range( + "agent.heartbeat_interval_secs", + agent.heartbeat_interval_secs, + MIN_HEARTBEAT_SECS, + MAX_HEARTBEAT_SECS, + )?; + validate_range( + "agent.connect_timeout_secs", + agent.connect_timeout_secs, + MIN_TIMEOUT_SECS, + MAX_TIMEOUT_SECS, + )?; + validate_range( + "agent.request_timeout_secs", + agent.request_timeout_secs, + MIN_TIMEOUT_SECS, + MAX_TIMEOUT_SECS, + )?; + validate_range( + "agent.poll_timeout_secs", + agent.poll_timeout_secs, + MIN_TIMEOUT_SECS, + MAX_TIMEOUT_SECS, + )?; + validate_range( + "agent.backoff_initial_ms", + agent.backoff_initial_ms, + MIN_BACKOFF_INITIAL_MS, + MAX_BACKOFF_INITIAL_MS, + )?; + validate_range( + "agent.backoff_max_secs", + agent.backoff_max_secs, + MIN_BACKOFF_MAX_SECS, + MAX_BACKOFF_MAX_SECS, + )?; + validate_backoff_order(agent)?; + + Ok(()) +} + +fn validate_https_endpoint(endpoint: &str) -> Result<()> { + let endpoint = endpoint.trim(); + if endpoint.is_empty() { + return Err(Error::Config("agent.endpoint must not be empty".to_string())); + } + let uri = endpoint.parse::().map_err(|error| { + Error::Config(format!("agent.endpoint `{endpoint}` is invalid: {error}")) + })?; + if uri.scheme_str() != Some("https") || uri.authority().is_none() { + return Err(Error::Config("agent.endpoint must be an HTTPS URL".to_string())); + } + Ok(()) +} + +fn validate_optional_identity_field(field: &str, value: Option<&str>) -> Result<()> { + if let Some(value) = value + && value.trim().is_empty() + { + return Err(Error::Config(format!("{field} must not be empty"))); + } + Ok(()) +} + +fn validate_range(field: &str, value: Option, min: u64, max: u64) -> Result<()> { + if let Some(value) = value + && !(min..=max).contains(&value) + { + return Err(Error::Config(format!("{field} must be between {min} and {max}"))); + } + Ok(()) +} + +fn validate_backoff_order(agent: &AgentConfig) -> Result<()> { + let initial_ms = agent.backoff_initial_ms.unwrap_or(DEFAULT_BACKOFF_INITIAL_MS); + let max_ms = agent.backoff_max_secs.unwrap_or(DEFAULT_BACKOFF_MAX_SECS).saturating_mul(1000); + if initial_ms <= max_ms { + Ok(()) + } else { + Err(Error::Config( + "agent.backoff_initial_ms must not exceed agent.backoff_max_secs".to_string(), + )) + } +} diff --git a/crates/rginx-config/src/validate/tests.rs b/crates/rginx-config/src/validate/tests.rs index 907dc36a..2a20f613 100644 --- a/crates/rginx-config/src/validate/tests.rs +++ b/crates/rginx-config/src/validate/tests.rs @@ -1,16 +1,17 @@ use crate::model::{ - CacheRouteConfig, CacheZoneConfig, Config, ControlPlaneConfig, ControlPlaneTlsConfig, - HandlerConfig, Http1Config, Http3Config, ListenerConfig, LocationConfig, MatcherConfig, - ProxyHeaderDynamicValueConfig, ProxyHeaderValueConfig, RouteBufferingPolicyConfig, - RouteCompressionPolicyConfig, RuntimeConfig, ServerConfig, ServerTlsConfig, - TlsCipherSuiteConfig, TlsKeyExchangeGroupConfig, TlsVersionConfig, UpstreamConfig, - UpstreamLoadBalanceConfig, UpstreamPeerConfig, UpstreamProtocolConfig, VirtualHostConfig, - VirtualHostTlsConfig, + AgentConfig, CacheRouteConfig, CacheZoneConfig, Config, ControlPlaneConfig, + ControlPlaneTlsConfig, HandlerConfig, Http1Config, Http3Config, ListenerConfig, LocationConfig, + MatcherConfig, ProxyHeaderDynamicValueConfig, ProxyHeaderValueConfig, + RouteBufferingPolicyConfig, RouteCompressionPolicyConfig, RuntimeConfig, ServerConfig, + ServerTlsConfig, TlsCipherSuiteConfig, TlsKeyExchangeGroupConfig, TlsVersionConfig, + UpstreamConfig, UpstreamLoadBalanceConfig, UpstreamPeerConfig, UpstreamProtocolConfig, + VirtualHostConfig, VirtualHostTlsConfig, }; use super::{DEFAULT_GRPC_HEALTH_CHECK_PATH, validate}; mod acme; +mod agent; mod cache; mod control_plane; mod listeners; @@ -27,6 +28,7 @@ mod vhosts; fn base_config() -> Config { Config { + agent: None, control_plane: None, acme: None, cache_zones: Vec::new(), diff --git a/crates/rginx-config/src/validate/tests/agent.rs b/crates/rginx-config/src/validate/tests/agent.rs new file mode 100644 index 00000000..1d6f7ab5 --- /dev/null +++ b/crates/rginx-config/src/validate/tests/agent.rs @@ -0,0 +1,144 @@ +use super::*; + +#[test] +fn validate_accepts_minimal_enabled_agent() { + let mut config = base_config(); + config.agent = Some(AgentConfig { + enabled: Some(true), + endpoint: Some("https://control.example.com".to_string()), + node_id: Some("edge-sha-1".to_string()), + token_path: Some("/etc/rginx/agent.token".to_string()), + state_path: Some("/var/lib/rginx/agent/state.json".to_string()), + region: Some("ap-east".to_string()), + pop: Some("sha".to_string()), + labels: [("tier".to_string(), "edge".to_string())].into_iter().collect(), + heartbeat_interval_secs: None, + connect_timeout_secs: None, + request_timeout_secs: None, + poll_timeout_secs: None, + backoff_initial_ms: None, + backoff_max_secs: None, + }); + + validate(&config).expect("minimal enabled agent should validate"); +} + +#[test] +fn validate_rejects_enabled_agent_without_required_fields() { + let mut config = base_config(); + config.agent = Some(AgentConfig { + enabled: Some(true), + endpoint: None, + node_id: Some("edge-sha-1".to_string()), + token_path: Some("/etc/rginx/agent.token".to_string()), + state_path: None, + region: None, + pop: None, + labels: Default::default(), + heartbeat_interval_secs: None, + connect_timeout_secs: None, + request_timeout_secs: None, + poll_timeout_secs: None, + backoff_initial_ms: None, + backoff_max_secs: None, + }); + + let error = validate(&config).expect_err("enabled agent should require endpoint"); + assert!(error.to_string().contains("agent.endpoint is required")); +} + +#[test] +fn validate_rejects_agent_endpoint_without_https() { + let mut config = base_config(); + config.agent = Some(AgentConfig { + enabled: Some(true), + endpoint: Some("http://control.example.com".to_string()), + node_id: Some("edge-sha-1".to_string()), + token_path: Some("/etc/rginx/agent.token".to_string()), + state_path: None, + region: None, + pop: None, + labels: Default::default(), + heartbeat_interval_secs: None, + connect_timeout_secs: None, + request_timeout_secs: None, + poll_timeout_secs: None, + backoff_initial_ms: None, + backoff_max_secs: None, + }); + + let error = validate(&config).expect_err("agent endpoint should require https"); + assert!(error.to_string().contains("agent.endpoint must be an HTTPS URL")); +} + +#[test] +fn validate_rejects_blank_agent_identity_and_labels() { + let mut config = base_config(); + config.agent = Some(AgentConfig { + enabled: Some(true), + endpoint: Some("https://control.example.com".to_string()), + node_id: Some(" ".to_string()), + token_path: Some("/etc/rginx/agent.token".to_string()), + state_path: None, + region: None, + pop: None, + labels: [("tier".to_string(), " ".to_string())].into_iter().collect(), + heartbeat_interval_secs: None, + connect_timeout_secs: None, + request_timeout_secs: None, + poll_timeout_secs: None, + backoff_initial_ms: None, + backoff_max_secs: None, + }); + + let error = validate(&config).expect_err("blank agent node_id should be rejected"); + assert!(error.to_string().contains("agent.node_id must not be empty")); +} + +#[test] +fn validate_rejects_agent_timeout_outside_range() { + let mut config = base_config(); + config.agent = Some(AgentConfig { + enabled: Some(true), + endpoint: Some("https://control.example.com".to_string()), + node_id: Some("edge-sha-1".to_string()), + token_path: Some("/etc/rginx/agent.token".to_string()), + state_path: None, + region: None, + pop: None, + labels: Default::default(), + heartbeat_interval_secs: Some(0), + connect_timeout_secs: None, + request_timeout_secs: None, + poll_timeout_secs: None, + backoff_initial_ms: None, + backoff_max_secs: None, + }); + + let error = validate(&config).expect_err("heartbeat interval should have lower bound"); + assert!(error.to_string().contains("agent.heartbeat_interval_secs must be between")); +} + +#[test] +fn validate_rejects_agent_backoff_initial_above_max() { + let mut config = base_config(); + config.agent = Some(AgentConfig { + enabled: Some(true), + endpoint: Some("https://control.example.com".to_string()), + node_id: Some("edge-sha-1".to_string()), + token_path: Some("/etc/rginx/agent.token".to_string()), + state_path: None, + region: None, + pop: None, + labels: Default::default(), + heartbeat_interval_secs: None, + connect_timeout_secs: None, + request_timeout_secs: None, + poll_timeout_secs: None, + backoff_initial_ms: Some(10_000), + backoff_max_secs: Some(1), + }); + + let error = validate(&config).expect_err("initial backoff should not exceed max"); + assert!(error.to_string().contains("agent.backoff_initial_ms must not exceed")); +} diff --git a/crates/rginx-config/src/validate/tests/vhosts.rs b/crates/rginx-config/src/validate/tests/vhosts.rs index 89dbaa10..e1f13be1 100644 --- a/crates/rginx-config/src/validate/tests/vhosts.rs +++ b/crates/rginx-config/src/validate/tests/vhosts.rs @@ -1,5 +1,7 @@ use super::*; +mod listen; + #[test] fn validate_rejects_empty_default_server_name() { let mut config = base_config(); @@ -219,206 +221,3 @@ fn validate_rejects_vhost_tls_without_any_tls_listener() { let error = validate(&config).expect_err("vhost tls should require a tls listener"); assert!(error.to_string().contains("requires at least one listener with tls")); } - -#[test] -fn validate_accepts_vhost_listen_without_main_server_listen() { - let mut config = base_config(); - config.server.listen = None; - config.locations.clear(); - let mut vhost = sample_vhost(vec!["api.example.com"]); - vhost.listen = vec!["127.0.0.1:8080".to_string()]; - config.servers = vec![vhost]; - - validate(&config).expect("servers[].listen should provide the listener binding"); -} - -#[test] -fn validate_accepts_server_tls_defaults_with_vhost_listen() { - let mut config = base_config(); - config.server.listen = None; - config.server.default_certificate = Some("api.example.com".to_string()); - config.server.tls = Some(valid_server_tls()); - config.locations.clear(); - let mut vhost = sample_vhost(vec!["api.example.com"]); - vhost.listen = vec!["127.0.0.1:8443 ssl http2".to_string()]; - vhost.tls = Some(VirtualHostTlsConfig { - acme: None, - cert_path: "api.crt".to_string(), - key_path: "api.key".to_string(), - additional_certificates: None, - ocsp_staple_path: None, - ocsp: None, - }); - config.servers = vec![vhost]; - - validate(&config).expect("server TLS defaults should be allowed with servers[].listen"); -} - -#[test] -fn validate_rejects_mixing_main_server_listen_with_vhost_listen() { - let mut config = base_config(); - let mut vhost = sample_vhost(vec!["api.example.com"]); - vhost.listen = vec!["127.0.0.1:8080".to_string()]; - config.servers = vec![vhost]; - - let error = validate(&config).expect_err("mixed listener architecture should fail"); - assert!(error.to_string().contains("cannot be used together with servers[].listen")); -} - -#[test] -fn validate_rejects_server_http3_with_vhost_listen() { - let mut config = base_config(); - config.server.listen = None; - config.server.tls = Some(valid_server_tls()); - config.server.http3 = Some(Http3Config::default()); - config.locations.clear(); - let mut vhost = sample_vhost(vec!["api.example.com"]); - vhost.listen = vec!["127.0.0.1:8080".to_string()]; - config.servers = vec![vhost]; - - let error = validate(&config).expect_err("server http3 should stay listener-scoped"); - assert!(error.to_string().contains("server listen, proxy_protocol, and http3")); -} - -#[test] -fn validate_rejects_missing_listen_when_any_vhost_uses_listen() { - let mut config = base_config(); - config.server.listen = None; - config.locations.clear(); - let mut first = sample_vhost(vec!["api.example.com"]); - first.listen = vec!["127.0.0.1:8080".to_string()]; - let second = sample_vhost(vec!["www.example.com"]); - config.servers = vec![first, second]; - - let error = validate(&config).expect_err("all vhosts should declare listen in vhost model"); - assert!(error.to_string().contains("every vhost must declare listen explicitly")); -} - -#[test] -fn validate_accepts_vhost_listen_default_server_and_reuseport() { - let mut config = base_config(); - config.server.listen = None; - config.locations.clear(); - let mut vhost = sample_vhost(vec!["api.example.com"]); - vhost.listen = vec!["127.0.0.1:8080 ssl default_server reuseport".to_string()]; - vhost.tls = Some(VirtualHostTlsConfig { - acme: None, - cert_path: "server.crt".to_string(), - key_path: "server.key".to_string(), - additional_certificates: None, - ocsp_staple_path: None, - ocsp: None, - }); - config.servers = vec![vhost]; - - validate(&config).expect("default_server and reuseport should validate"); -} - -#[test] -fn validate_rejects_inconsistent_http3_on_shared_vhost_listen() { - let mut config = base_config(); - config.server.listen = None; - config.server.tls = Some(valid_server_tls()); - config.locations.clear(); - let mut first = sample_vhost(vec!["api.example.com"]); - first.listen = vec!["127.0.0.1:8443 ssl http2 http3".to_string()]; - first.tls = Some(VirtualHostTlsConfig { - acme: None, - cert_path: "api.crt".to_string(), - key_path: "api.key".to_string(), - additional_certificates: None, - ocsp_staple_path: None, - ocsp: None, - }); - first.http3 = Some(Http3Config { alt_svc_max_age_secs: Some(7200), ..Http3Config::default() }); - let mut second = sample_vhost(vec!["www.example.com"]); - second.listen = vec!["127.0.0.1:8443 ssl http2 http3".to_string()]; - second.tls = Some(VirtualHostTlsConfig { - acme: None, - cert_path: "www.crt".to_string(), - key_path: "www.key".to_string(), - additional_certificates: None, - ocsp_staple_path: None, - ocsp: None, - }); - second.http3 = Some(Http3Config { alt_svc_max_age_secs: Some(3600), ..Http3Config::default() }); - config.servers = vec![first, second]; - - let error = validate(&config).expect_err("shared http3 listener settings should match"); - assert!(error.to_string().contains("must use consistent http3 settings")); -} - -#[test] -fn validate_rejects_vhost_http3_when_server_tls_policy_disables_tls13() { - let mut config = base_config(); - config.server.listen = None; - let mut tls = valid_server_tls(); - tls.versions = Some(vec![TlsVersionConfig::Tls12]); - config.server.tls = Some(tls); - config.locations.clear(); - let mut vhost = sample_vhost(vec!["api.example.com"]); - vhost.listen = vec!["127.0.0.1:8443 ssl http2 http3".to_string()]; - vhost.tls = Some(VirtualHostTlsConfig { - acme: None, - cert_path: "api.crt".to_string(), - key_path: "api.key".to_string(), - additional_certificates: None, - ocsp_staple_path: None, - ocsp: None, - }); - vhost.http3 = Some(Http3Config::default()); - config.servers = vec![vhost]; - - let error = validate(&config).expect_err("http3 should honor server TLS policy defaults"); - assert!(error.to_string().contains("http3 requires TLS1.3")); -} - -#[test] -fn validate_rejects_vhost_ssl_listen_without_vhost_tls() { - let mut config = base_config(); - config.server.listen = None; - config.locations.clear(); - let mut vhost = sample_vhost(vec!["api.example.com"]); - vhost.listen = vec!["127.0.0.1:8443 ssl http2".to_string()]; - config.servers = vec![vhost]; - - let error = validate(&config).expect_err("ssl listen without vhost tls should fail"); - assert!(error.to_string().contains("servers[0] ssl listen requires tls")); -} - -#[test] -fn validate_accepts_vhost_local_upstream_scope() { - let mut config = base_config(); - config.server.listen = None; - config.upstreams.clear(); - config.locations.clear(); - let mut vhost = sample_vhost(vec!["api.example.com"]); - vhost.listen = vec!["127.0.0.1:8080".to_string()]; - vhost.upstreams = vec![local_upstream("backend")]; - vhost.locations[0].handler = HandlerConfig::Proxy { - upstream: "backend".to_string(), - preserve_host: None, - strip_prefix: None, - proxy_pass_uri: None, - proxy_http_version: None, - proxy_redirect: None, - proxy_set_headers: std::collections::HashMap::new(), - }; - config.servers = vec![vhost]; - - validate(&config).expect("vhost route should see vhost-local upstream"); -} - -#[test] -fn validate_keeps_vhost_local_upstream_hidden_from_default_routes() { - let mut config = base_config(); - config.upstreams.clear(); - config.servers = vec![{ - let mut vhost = sample_vhost(vec!["api.example.com"]); - vhost.upstreams = vec![local_upstream("backend")]; - vhost - }]; - - let error = validate(&config).expect_err("default route must not see vhost-local upstream"); - assert!(error.to_string().contains("proxy upstream `backend` is not defined")); -} diff --git a/crates/rginx-config/src/validate/tests/vhosts/listen.rs b/crates/rginx-config/src/validate/tests/vhosts/listen.rs new file mode 100644 index 00000000..8dfdaf28 --- /dev/null +++ b/crates/rginx-config/src/validate/tests/vhosts/listen.rs @@ -0,0 +1,204 @@ +use super::*; + +#[test] +fn validate_accepts_vhost_listen_without_main_server_listen() { + let mut config = base_config(); + config.server.listen = None; + config.locations.clear(); + let mut vhost = sample_vhost(vec!["api.example.com"]); + vhost.listen = vec!["127.0.0.1:8080".to_string()]; + config.servers = vec![vhost]; + + validate(&config).expect("servers[].listen should provide the listener binding"); +} + +#[test] +fn validate_accepts_server_tls_defaults_with_vhost_listen() { + let mut config = base_config(); + config.server.listen = None; + config.server.default_certificate = Some("api.example.com".to_string()); + config.server.tls = Some(valid_server_tls()); + config.locations.clear(); + let mut vhost = sample_vhost(vec!["api.example.com"]); + vhost.listen = vec!["127.0.0.1:8443 ssl http2".to_string()]; + vhost.tls = Some(VirtualHostTlsConfig { + acme: None, + cert_path: "api.crt".to_string(), + key_path: "api.key".to_string(), + additional_certificates: None, + ocsp_staple_path: None, + ocsp: None, + }); + config.servers = vec![vhost]; + + validate(&config).expect("server TLS defaults should be allowed with servers[].listen"); +} + +#[test] +fn validate_rejects_mixing_main_server_listen_with_vhost_listen() { + let mut config = base_config(); + let mut vhost = sample_vhost(vec!["api.example.com"]); + vhost.listen = vec!["127.0.0.1:8080".to_string()]; + config.servers = vec![vhost]; + + let error = validate(&config).expect_err("mixed listener architecture should fail"); + assert!(error.to_string().contains("cannot be used together with servers[].listen")); +} + +#[test] +fn validate_rejects_server_http3_with_vhost_listen() { + let mut config = base_config(); + config.server.listen = None; + config.server.tls = Some(valid_server_tls()); + config.server.http3 = Some(Http3Config::default()); + config.locations.clear(); + let mut vhost = sample_vhost(vec!["api.example.com"]); + vhost.listen = vec!["127.0.0.1:8080".to_string()]; + config.servers = vec![vhost]; + + let error = validate(&config).expect_err("server http3 should stay listener-scoped"); + assert!(error.to_string().contains("server listen, proxy_protocol, and http3")); +} + +#[test] +fn validate_rejects_missing_listen_when_any_vhost_uses_listen() { + let mut config = base_config(); + config.server.listen = None; + config.locations.clear(); + let mut first = sample_vhost(vec!["api.example.com"]); + first.listen = vec!["127.0.0.1:8080".to_string()]; + let second = sample_vhost(vec!["www.example.com"]); + config.servers = vec![first, second]; + + let error = validate(&config).expect_err("all vhosts should declare listen in vhost model"); + assert!(error.to_string().contains("every vhost must declare listen explicitly")); +} + +#[test] +fn validate_accepts_vhost_listen_default_server_and_reuseport() { + let mut config = base_config(); + config.server.listen = None; + config.locations.clear(); + let mut vhost = sample_vhost(vec!["api.example.com"]); + vhost.listen = vec!["127.0.0.1:8080 ssl default_server reuseport".to_string()]; + vhost.tls = Some(VirtualHostTlsConfig { + acme: None, + cert_path: "server.crt".to_string(), + key_path: "server.key".to_string(), + additional_certificates: None, + ocsp_staple_path: None, + ocsp: None, + }); + config.servers = vec![vhost]; + + validate(&config).expect("default_server and reuseport should validate"); +} + +#[test] +fn validate_rejects_inconsistent_http3_on_shared_vhost_listen() { + let mut config = base_config(); + config.server.listen = None; + config.server.tls = Some(valid_server_tls()); + config.locations.clear(); + let mut first = sample_vhost(vec!["api.example.com"]); + first.listen = vec!["127.0.0.1:8443 ssl http2 http3".to_string()]; + first.tls = Some(VirtualHostTlsConfig { + acme: None, + cert_path: "api.crt".to_string(), + key_path: "api.key".to_string(), + additional_certificates: None, + ocsp_staple_path: None, + ocsp: None, + }); + first.http3 = Some(Http3Config { alt_svc_max_age_secs: Some(7200), ..Http3Config::default() }); + let mut second = sample_vhost(vec!["www.example.com"]); + second.listen = vec!["127.0.0.1:8443 ssl http2 http3".to_string()]; + second.tls = Some(VirtualHostTlsConfig { + acme: None, + cert_path: "www.crt".to_string(), + key_path: "www.key".to_string(), + additional_certificates: None, + ocsp_staple_path: None, + ocsp: None, + }); + second.http3 = Some(Http3Config { alt_svc_max_age_secs: Some(3600), ..Http3Config::default() }); + config.servers = vec![first, second]; + + let error = validate(&config).expect_err("shared http3 listener settings should match"); + assert!(error.to_string().contains("must use consistent http3 settings")); +} + +#[test] +fn validate_rejects_vhost_http3_when_server_tls_policy_disables_tls13() { + let mut config = base_config(); + config.server.listen = None; + let mut tls = valid_server_tls(); + tls.versions = Some(vec![TlsVersionConfig::Tls12]); + config.server.tls = Some(tls); + config.locations.clear(); + let mut vhost = sample_vhost(vec!["api.example.com"]); + vhost.listen = vec!["127.0.0.1:8443 ssl http2 http3".to_string()]; + vhost.tls = Some(VirtualHostTlsConfig { + acme: None, + cert_path: "api.crt".to_string(), + key_path: "api.key".to_string(), + additional_certificates: None, + ocsp_staple_path: None, + ocsp: None, + }); + vhost.http3 = Some(Http3Config::default()); + config.servers = vec![vhost]; + + let error = validate(&config).expect_err("http3 should honor server TLS policy defaults"); + assert!(error.to_string().contains("http3 requires TLS1.3")); +} + +#[test] +fn validate_rejects_vhost_ssl_listen_without_vhost_tls() { + let mut config = base_config(); + config.server.listen = None; + config.locations.clear(); + let mut vhost = sample_vhost(vec!["api.example.com"]); + vhost.listen = vec!["127.0.0.1:8443 ssl http2".to_string()]; + config.servers = vec![vhost]; + + let error = validate(&config).expect_err("ssl listen without vhost tls should fail"); + assert!(error.to_string().contains("servers[0] ssl listen requires tls")); +} + +#[test] +fn validate_accepts_vhost_local_upstream_scope() { + let mut config = base_config(); + config.server.listen = None; + config.upstreams.clear(); + config.locations.clear(); + let mut vhost = sample_vhost(vec!["api.example.com"]); + vhost.listen = vec!["127.0.0.1:8080".to_string()]; + vhost.upstreams = vec![local_upstream("backend")]; + vhost.locations[0].handler = HandlerConfig::Proxy { + upstream: "backend".to_string(), + preserve_host: None, + strip_prefix: None, + proxy_pass_uri: None, + proxy_http_version: None, + proxy_redirect: None, + proxy_set_headers: std::collections::HashMap::new(), + }; + config.servers = vec![vhost]; + + validate(&config).expect("vhost route should see vhost-local upstream"); +} + +#[test] +fn validate_keeps_vhost_local_upstream_hidden_from_default_routes() { + let mut config = base_config(); + config.upstreams.clear(); + config.servers = vec![{ + let mut vhost = sample_vhost(vec!["api.example.com"]); + vhost.upstreams = vec![local_upstream("backend")]; + vhost + }]; + + let error = validate(&config).expect_err("default route must not see vhost-local upstream"); + assert!(error.to_string().contains("proxy upstream `backend` is not defined")); +} diff --git a/crates/rginx-core/src/config.rs b/crates/rginx-core/src/config.rs index b0fdb631..86b476ae 100644 --- a/crates/rginx-core/src/config.rs +++ b/crates/rginx-core/src/config.rs @@ -1,5 +1,6 @@ mod access_log; mod acme; +mod agent; mod cache; mod control_plane; mod listener; @@ -13,6 +14,7 @@ mod virtual_host; pub use access_log::{AccessLogFormat, AccessLogValues}; pub use acme::{AcmeChallengeType, AcmeSettings, ManagedCertificateSpec}; +pub use agent::{AgentAuthSettings, AgentSettings}; pub use cache::{ CacheIgnoreHeader, CacheKeyRenderContext, CacheKeyTemplate, CacheKeyTemplateError, CachePredicate, CachePredicateRequestContext, CacheRangeRequestPolicy, CacheStatusTtlRule, diff --git a/crates/rginx-core/src/config/agent.rs b/crates/rginx-core/src/config/agent.rs new file mode 100644 index 00000000..282ac51f --- /dev/null +++ b/crates/rginx-core/src/config/agent.rs @@ -0,0 +1,27 @@ +use std::collections::BTreeMap; +use std::path::PathBuf; +use std::time::Duration; + +use http::Uri; + +#[derive(Debug, Clone)] +pub struct AgentAuthSettings { + pub token_path: PathBuf, +} + +#[derive(Debug, Clone)] +pub struct AgentSettings { + pub endpoint: Uri, + pub node_id: String, + pub auth: AgentAuthSettings, + pub state_path: PathBuf, + pub region: Option, + pub pop: Option, + pub labels: BTreeMap, + pub heartbeat_interval: Duration, + pub connect_timeout: Duration, + pub request_timeout: Duration, + pub poll_timeout: Duration, + pub backoff_initial: Duration, + pub backoff_max: Duration, +} diff --git a/crates/rginx-core/src/config/control_plane.rs b/crates/rginx-core/src/config/control_plane.rs index b484c874..ab6d3b48 100644 --- a/crates/rginx-core/src/config/control_plane.rs +++ b/crates/rginx-core/src/config/control_plane.rs @@ -12,6 +12,12 @@ pub struct ControlPlaneTlsSettings { pub require_client_cert: bool, } +/// Legacy node-side control-plane server settings. +/// +/// These settings make an rginx node listen for control-plane requests. The +/// preferred direction is an outbound agent connection to a central control +/// plane, so new features should not extend this server-oriented model unless +/// they are required for compatibility. #[derive(Debug, Clone)] pub struct ControlPlaneSettings { pub listen: SocketAddr, diff --git a/crates/rginx-core/src/config/snapshot/mod.rs b/crates/rginx-core/src/config/snapshot/mod.rs index a02fdb4d..3aba1b16 100644 --- a/crates/rginx-core/src/config/snapshot/mod.rs +++ b/crates/rginx-core/src/config/snapshot/mod.rs @@ -5,7 +5,7 @@ use super::{ AcmeSettings, CacheZone, Listener, ManagedCertificateSpec, Route, RouteMatcher, RuntimeSettings, Upstream, VirtualHost, }; -use crate::config::ControlPlaneSettings; +use crate::config::{AgentSettings, ControlPlaneSettings}; mod linear; mod lookup; @@ -17,6 +17,7 @@ pub use lookup::ConfigLookup; #[derive(Debug, Clone)] pub struct ConfigSnapshot { pub runtime: RuntimeSettings, + pub agent: Option, pub control_plane: Option, pub acme: Option, pub managed_certificates: Vec, diff --git a/crates/rginx-core/src/config/tests/core.rs b/crates/rginx-core/src/config/tests/core.rs index 2e366f5d..d49d1d23 100644 --- a/crates/rginx-core/src/config/tests/core.rs +++ b/crates/rginx-core/src/config/tests/core.rs @@ -76,6 +76,7 @@ fn config_snapshot_counts_routes_across_all_vhosts() { }; let snapshot = ConfigSnapshot { control_plane: None, + agent: None, acme: None, managed_certificates: Vec::new(), runtime: RuntimeSettings { diff --git a/crates/rginx-core/src/config/tests/snapshot_lookup.rs b/crates/rginx-core/src/config/tests/snapshot_lookup.rs index f9716e21..9a61dbeb 100644 --- a/crates/rginx-core/src/config/tests/snapshot_lookup.rs +++ b/crates/rginx-core/src/config/tests/snapshot_lookup.rs @@ -4,6 +4,7 @@ use super::*; fn config_snapshot_route_selection_falls_back_when_lookup_misses_vhost() { let mut snapshot = ConfigSnapshot { control_plane: None, + agent: None, acme: None, managed_certificates: Vec::new(), runtime: RuntimeSettings { @@ -135,6 +136,7 @@ fn snapshot_with_path_routes() -> ConfigSnapshot { fn snapshot_with_vhost_routes(routes: Vec) -> ConfigSnapshot { ConfigSnapshot { control_plane: None, + agent: None, acme: None, managed_certificates: Vec::new(), runtime: RuntimeSettings { diff --git a/crates/rginx-core/src/lib.rs b/crates/rginx-core/src/lib.rs index 7d3c3cb9..9ec95e6a 100644 --- a/crates/rginx-core/src/lib.rs +++ b/crates/rginx-core/src/lib.rs @@ -6,21 +6,22 @@ pub mod types; pub use config::{ AccessLogFormat, AccessLogValues, AcmeChallengeType, AcmeSettings, ActiveHealthCheck, - CacheIgnoreHeader, CacheKeyRenderContext, CacheKeyTemplate, CacheKeyTemplateError, - CachePredicate, CachePredicateRequestContext, CacheRangeRequestPolicy, CacheStatusTtlRule, - CacheUseStaleCondition, CacheZone, ClientIdentity, CompiledServerNamePattern, ConfigLookup, - ConfigSnapshot, ControlPlaneSettings, ControlPlaneTlsSettings, DEFAULT_SERVER_HEADER, - ErrorPageTarget, FilePathStrategy, FileRoute, GrpcRouteMatch, Http1Settings, Listener, - ListenerApplicationProtocol, ListenerHttp3, ListenerTransportBinding, ListenerTransportKind, - ManagedCertificateSpec, OcspConfig, OcspNonceMode, OcspResponderPolicy, - ProxyHeaderRenderContext, ProxyHeaderTemplate, ProxyHeaderTemplateError, ProxyHeaderValue, - ProxyRedirectMode, ProxyTarget, ProxyUriMode, ReturnAction, Route, RouteAccessControl, - RouteAction, RouteBufferingPolicy, RouteCachePolicy, RouteCompressionPolicy, RouteErrorPage, - RouteMatcher, RouteRateLimit, RouteRegexError, RouteRegexMatcher, RouteRewrite, - RouteRewriteStop, RuntimeSettings, Server, ServerCertificateBundle, ServerClientAuthMode, - ServerClientAuthPolicy, ServerNameMatch, ServerTls, TlsCipherSuite, TlsKeyExchangeGroup, - TlsVersion, TryFileStep, Upstream, UpstreamDnsPolicy, UpstreamLoadBalance, UpstreamPeer, - UpstreamProtocol, UpstreamSettings, UpstreamTls, VirtualHost, VirtualHostTls, - best_matching_server_name_pattern, default_server_header, match_server_name, + AgentAuthSettings, AgentSettings, CacheIgnoreHeader, CacheKeyRenderContext, CacheKeyTemplate, + CacheKeyTemplateError, CachePredicate, CachePredicateRequestContext, CacheRangeRequestPolicy, + CacheStatusTtlRule, CacheUseStaleCondition, CacheZone, ClientIdentity, + CompiledServerNamePattern, ConfigLookup, ConfigSnapshot, ControlPlaneSettings, + ControlPlaneTlsSettings, DEFAULT_SERVER_HEADER, ErrorPageTarget, FilePathStrategy, FileRoute, + GrpcRouteMatch, Http1Settings, Listener, ListenerApplicationProtocol, ListenerHttp3, + ListenerTransportBinding, ListenerTransportKind, ManagedCertificateSpec, OcspConfig, + OcspNonceMode, OcspResponderPolicy, ProxyHeaderRenderContext, ProxyHeaderTemplate, + ProxyHeaderTemplateError, ProxyHeaderValue, ProxyRedirectMode, ProxyTarget, ProxyUriMode, + ReturnAction, Route, RouteAccessControl, RouteAction, RouteBufferingPolicy, RouteCachePolicy, + RouteCompressionPolicy, RouteErrorPage, RouteMatcher, RouteRateLimit, RouteRegexError, + RouteRegexMatcher, RouteRewrite, RouteRewriteStop, RuntimeSettings, Server, + ServerCertificateBundle, ServerClientAuthMode, ServerClientAuthPolicy, ServerNameMatch, + ServerTls, TlsCipherSuite, TlsKeyExchangeGroup, TlsVersion, TryFileStep, Upstream, + UpstreamDnsPolicy, UpstreamLoadBalance, UpstreamPeer, UpstreamProtocol, UpstreamSettings, + UpstreamTls, VirtualHost, VirtualHostTls, best_matching_server_name_pattern, + default_server_header, match_server_name, }; pub use error::{Error, Result}; diff --git a/crates/rginx-http/src/cache/invalidation.rs b/crates/rginx-http/src/cache/invalidation.rs index ba73c4a7..0b487966 100644 --- a/crates/rginx-http/src/cache/invalidation.rs +++ b/crates/rginx-http/src/cache/invalidation.rs @@ -46,68 +46,3 @@ fn selector_matches_entry( CacheInvalidationSelector::Tag(tag) => entry.tags.iter().any(|candidate| candidate == tag), } } - -#[cfg(test)] -mod tests { - use super::*; - use crate::cache::CacheIndexEntryKind; - - #[test] - fn entry_is_logically_invalid_uses_latest_matching_rule_per_selector_family() { - let key = "https:example.com:/news/article"; - let mut index = CacheIndex::default(); - index.add_invalidation_rule(CacheInvalidationRule { - selector: CacheInvalidationSelector::Prefix("https:example.com:/news/".to_string()), - created_at_unix_ms: 1_000, - }); - index.add_invalidation_rule(CacheInvalidationRule { - selector: CacheInvalidationSelector::Tag("sports".to_string()), - created_at_unix_ms: 1_500, - }); - index.add_invalidation_rule(CacheInvalidationRule { - selector: CacheInvalidationSelector::Exact(key.to_string()), - created_at_unix_ms: 2_000, - }); - - let mut entry = test_entry(key, 1_999); - entry.tags = vec!["sports".to_string()]; - assert!(entry_is_logically_invalid(&index, key, &entry)); - - let mut entry = test_entry(key, 2_000); - entry.tags = vec!["sports".to_string()]; - assert!(!entry_is_logically_invalid(&index, key, &entry)); - } - - #[test] - fn entry_is_logically_invalid_matches_unicode_prefixes_without_scanning_all_rules() { - let key = "https:example.com:/新闻/头条"; - let mut index = CacheIndex::default(); - index.add_invalidation_rule(CacheInvalidationRule { - selector: CacheInvalidationSelector::Prefix("https:example.com:/新闻/".to_string()), - created_at_unix_ms: 5_000, - }); - - assert!(entry_is_logically_invalid(&index, key, &test_entry(key, 4_999))); - assert!(!entry_is_logically_invalid(&index, key, &test_entry(key, 5_000))); - } - - fn test_entry(key: &str, stored_at_unix_ms: u64) -> CacheIndexEntry { - CacheIndexEntry { - kind: CacheIndexEntryKind::Response, - hash: "hash".to_string(), - base_key: key.to_string(), - stored_at_unix_ms, - vary: Vec::new(), - tags: Vec::new(), - body_size_bytes: 1, - expires_at_unix_ms: 60_000, - grace_until_unix_ms: None, - keep_until_unix_ms: None, - stale_if_error_until_unix_ms: None, - stale_while_revalidate_until_unix_ms: None, - requires_revalidation: false, - must_revalidate: false, - last_access_unix_ms: stored_at_unix_ms, - } - } -} diff --git a/crates/rginx-http/src/handler/tests/routing/handle.rs b/crates/rginx-http/src/handler/tests/routing/handle.rs index 3e4626e5..8ebb845b 100644 --- a/crates/rginx-http/src/handler/tests/routing/handle.rs +++ b/crates/rginx-http/src/handler/tests/routing/handle.rs @@ -210,296 +210,5 @@ async fn handle_short_circuits_acme_http01_requests() { assert_eq!(body.as_ref(), b"demo-key-authorization"); } -#[tokio::test] -async fn handle_rejects_internal_redirect_cycles_after_budget_is_exhausted() { - let route = Route { - cache: None, - id: "server/routes[0]|exact:/loop".to_string(), - matcher: RouteMatcher::Exact("/loop".to_string()), - internal: false, - rewrites: Vec::new(), - try_files: Vec::new(), - error_pages: Vec::new(), - grpc_match: None, - action: RouteAction::Return(ReturnAction { - status: internal_redirect_status(), - location: internal_redirect_location("/loop"), - body: None, - }), - access_control: RouteAccessControl::default(), - rate_limit: None, - allow_early_data: false, - request_buffering: rginx_core::RouteBufferingPolicy::Auto, - response_buffering: rginx_core::RouteBufferingPolicy::Auto, - compression: rginx_core::RouteCompressionPolicy::Off, - compression_min_bytes: None, - compression_content_types: Vec::new(), - streaming_response_idle_timeout: None, - }; - let config = test_config(test_vhost("server", Vec::new(), vec![route]), Vec::new()); - let shared = crate::state::SharedState::from_config(config).expect("shared state should build"); - - let request = Request::builder() - .uri("/loop") - .body(crate::handler::full_body("")) - .expect("request should build"); - let connection = std::sync::Arc::new(ConnectionPeerAddrs { - socket_peer_addr: "192.0.2.10:44324".parse().unwrap(), - proxy_protocol_source_addr: None, - tls_client_identity: None, - tls_version: None, - tls_alpn: None, - early_data: false, - }); - - let response = crate::handler::handle(request, shared, connection, "default").await; - assert_eq!(response.status(), StatusCode::INTERNAL_SERVER_ERROR); - let body = response.into_body().collect().await.expect("body should collect").to_bytes(); - assert_eq!(body.as_ref(), b"internal redirect cycle detected\n"); -} - -#[tokio::test] -async fn handle_rejects_external_requests_to_internal_named_routes() { - let route = Route { - cache: None, - id: "server/routes[0]|named:@hidden".to_string(), - matcher: RouteMatcher::Named("@hidden".to_string()), - internal: true, - rewrites: Vec::new(), - try_files: Vec::new(), - error_pages: Vec::new(), - grpc_match: None, - action: RouteAction::Return(ReturnAction { - status: StatusCode::OK, - location: String::new(), - body: Some("hidden\n".to_string()), - }), - access_control: RouteAccessControl::default(), - rate_limit: None, - allow_early_data: false, - request_buffering: rginx_core::RouteBufferingPolicy::Auto, - response_buffering: rginx_core::RouteBufferingPolicy::Auto, - compression: rginx_core::RouteCompressionPolicy::Off, - compression_min_bytes: None, - compression_content_types: Vec::new(), - streaming_response_idle_timeout: None, - }; - let config = test_config(test_vhost("server", Vec::new(), vec![route]), Vec::new()); - let shared = crate::state::SharedState::from_config(config).expect("shared state should build"); - - let request = Request::builder() - .uri("/hidden") - .body(crate::handler::full_body("")) - .expect("request should build"); - let connection = std::sync::Arc::new(ConnectionPeerAddrs { - socket_peer_addr: "192.0.2.10:44325".parse().unwrap(), - proxy_protocol_source_addr: None, - tls_client_identity: None, - tls_version: None, - tls_alpn: None, - early_data: false, - }); - - let response = crate::handler::handle(request, shared, connection, "default").await; - assert_eq!(response.status(), StatusCode::NOT_FOUND); -} - -#[tokio::test] -async fn handle_can_internally_redirect_to_named_routes() { - let redirect = Route { - cache: None, - id: "server/routes[0]|exact:/start".to_string(), - matcher: RouteMatcher::Exact("/start".to_string()), - internal: false, - rewrites: Vec::new(), - try_files: Vec::new(), - error_pages: Vec::new(), - grpc_match: None, - action: RouteAction::Return(ReturnAction { - status: internal_redirect_status(), - location: internal_redirect_location("@fallback"), - body: None, - }), - access_control: RouteAccessControl::default(), - rate_limit: None, - allow_early_data: false, - request_buffering: rginx_core::RouteBufferingPolicy::Auto, - response_buffering: rginx_core::RouteBufferingPolicy::Auto, - compression: rginx_core::RouteCompressionPolicy::Off, - compression_min_bytes: None, - compression_content_types: Vec::new(), - streaming_response_idle_timeout: None, - }; - let named = Route { - cache: None, - id: "server/routes[1]|named:@fallback".to_string(), - matcher: RouteMatcher::Named("@fallback".to_string()), - internal: true, - rewrites: Vec::new(), - try_files: Vec::new(), - error_pages: Vec::new(), - grpc_match: None, - action: RouteAction::Return(ReturnAction { - status: StatusCode::OK, - location: String::new(), - body: Some("named ok\n".to_string()), - }), - access_control: RouteAccessControl::default(), - rate_limit: None, - allow_early_data: false, - request_buffering: rginx_core::RouteBufferingPolicy::Auto, - response_buffering: rginx_core::RouteBufferingPolicy::Auto, - compression: rginx_core::RouteCompressionPolicy::Off, - compression_min_bytes: None, - compression_content_types: Vec::new(), - streaming_response_idle_timeout: None, - }; - let config = test_config(test_vhost("server", Vec::new(), vec![redirect, named]), Vec::new()); - let shared = crate::state::SharedState::from_config(config).expect("shared state should build"); - - let request = Request::builder() - .uri("/start") - .body(crate::handler::full_body("")) - .expect("request should build"); - let connection = std::sync::Arc::new(ConnectionPeerAddrs { - socket_peer_addr: "192.0.2.10:44326".parse().unwrap(), - proxy_protocol_source_addr: None, - tls_client_identity: None, - tls_version: None, - tls_alpn: None, - early_data: false, - }); - - let response = crate::handler::handle(request, shared, connection, "default").await; - assert_eq!(response.status(), StatusCode::OK); - let body = response.into_body().collect().await.expect("body should collect").to_bytes(); - assert_eq!(body.as_ref(), b"named ok\n"); -} - -#[tokio::test] -async fn handle_rewrite_last_restarts_location_selection() { - let start = Route { - cache: None, - id: "server/routes[0]|exact:/legacy".to_string(), - matcher: RouteMatcher::Exact("/legacy".to_string()), - internal: false, - rewrites: vec![rginx_core::RouteRewrite { - pattern: rginx_core::RouteRegexMatcher::new("^/legacy$".to_string(), false).unwrap(), - replacement: "/modern".to_string(), - stop: rginx_core::RouteRewriteStop::RestartLocationSelection, - }], - try_files: Vec::new(), - error_pages: Vec::new(), - grpc_match: None, - action: RouteAction::Return(ReturnAction { - status: StatusCode::OK, - location: String::new(), - body: Some("legacy\n".to_string()), - }), - access_control: RouteAccessControl::default(), - rate_limit: None, - allow_early_data: false, - request_buffering: rginx_core::RouteBufferingPolicy::Auto, - response_buffering: rginx_core::RouteBufferingPolicy::Auto, - compression: rginx_core::RouteCompressionPolicy::Off, - compression_min_bytes: None, - compression_content_types: Vec::new(), - streaming_response_idle_timeout: None, - }; - let modern = Route { - cache: None, - id: "server/routes[1]|exact:/modern".to_string(), - matcher: RouteMatcher::Exact("/modern".to_string()), - internal: false, - rewrites: Vec::new(), - try_files: Vec::new(), - error_pages: Vec::new(), - grpc_match: None, - action: RouteAction::Return(ReturnAction { - status: StatusCode::OK, - location: String::new(), - body: Some("modern\n".to_string()), - }), - access_control: RouteAccessControl::default(), - rate_limit: None, - allow_early_data: false, - request_buffering: rginx_core::RouteBufferingPolicy::Auto, - response_buffering: rginx_core::RouteBufferingPolicy::Auto, - compression: rginx_core::RouteCompressionPolicy::Off, - compression_min_bytes: None, - compression_content_types: Vec::new(), - streaming_response_idle_timeout: None, - }; - let config = test_config(test_vhost("server", Vec::new(), vec![start, modern]), Vec::new()); - let shared = crate::state::SharedState::from_config(config).expect("shared state should build"); - - let request = Request::builder() - .uri("/legacy") - .body(crate::handler::full_body("")) - .expect("request should build"); - let connection = std::sync::Arc::new(ConnectionPeerAddrs { - socket_peer_addr: "192.0.2.10:44327".parse().unwrap(), - proxy_protocol_source_addr: None, - tls_client_identity: None, - tls_version: None, - tls_alpn: None, - early_data: false, - }); - - let response = crate::handler::handle(request, shared, connection, "default").await; - let body = response.into_body().collect().await.expect("body should collect").to_bytes(); - assert_eq!(body.as_ref(), b"modern\n"); -} - -#[tokio::test] -async fn handle_rewrite_break_stays_on_current_route() { - let route = Route { - cache: None, - id: "server/routes[0]|exact:/legacy".to_string(), - matcher: RouteMatcher::Exact("/legacy".to_string()), - internal: false, - rewrites: vec![rginx_core::RouteRewrite { - pattern: rginx_core::RouteRegexMatcher::new("^/legacy$".to_string(), false).unwrap(), - replacement: "/modern".to_string(), - stop: rginx_core::RouteRewriteStop::Stop, - }], - try_files: Vec::new(), - error_pages: Vec::new(), - grpc_match: None, - action: RouteAction::Return(ReturnAction { - status: StatusCode::OK, - location: String::new(), - body: Some("current\n".to_string()), - }), - access_control: RouteAccessControl::default(), - rate_limit: None, - allow_early_data: false, - request_buffering: rginx_core::RouteBufferingPolicy::Auto, - response_buffering: rginx_core::RouteBufferingPolicy::Auto, - compression: rginx_core::RouteCompressionPolicy::Off, - compression_min_bytes: None, - compression_content_types: Vec::new(), - streaming_response_idle_timeout: None, - }; - let config = test_config(test_vhost("server", Vec::new(), vec![route]), Vec::new()); - let shared = crate::state::SharedState::from_config(config).expect("shared state should build"); - - let request = Request::builder() - .uri("/legacy") - .body(crate::handler::full_body("")) - .expect("request should build"); - let connection = std::sync::Arc::new(ConnectionPeerAddrs { - socket_peer_addr: "192.0.2.10:44328".parse().unwrap(), - proxy_protocol_source_addr: None, - tls_client_identity: None, - tls_version: None, - tls_alpn: None, - early_data: false, - }); - - let response = crate::handler::handle(request, shared, connection, "default").await; - let body = response.into_body().collect().await.expect("body should collect").to_bytes(); - assert_eq!(body.as_ref(), b"current\n"); -} - mod file; +mod redirects; diff --git a/crates/rginx-http/src/handler/tests/routing/handle/redirects.rs b/crates/rginx-http/src/handler/tests/routing/handle/redirects.rs new file mode 100644 index 00000000..60ee1db6 --- /dev/null +++ b/crates/rginx-http/src/handler/tests/routing/handle/redirects.rs @@ -0,0 +1,293 @@ +use super::*; + +#[tokio::test] +async fn handle_rejects_internal_redirect_cycles_after_budget_is_exhausted() { + let route = Route { + cache: None, + id: "server/routes[0]|exact:/loop".to_string(), + matcher: RouteMatcher::Exact("/loop".to_string()), + internal: false, + rewrites: Vec::new(), + try_files: Vec::new(), + error_pages: Vec::new(), + grpc_match: None, + action: RouteAction::Return(ReturnAction { + status: internal_redirect_status(), + location: internal_redirect_location("/loop"), + body: None, + }), + access_control: RouteAccessControl::default(), + rate_limit: None, + allow_early_data: false, + request_buffering: rginx_core::RouteBufferingPolicy::Auto, + response_buffering: rginx_core::RouteBufferingPolicy::Auto, + compression: rginx_core::RouteCompressionPolicy::Off, + compression_min_bytes: None, + compression_content_types: Vec::new(), + streaming_response_idle_timeout: None, + }; + let config = test_config(test_vhost("server", Vec::new(), vec![route]), Vec::new()); + let shared = crate::state::SharedState::from_config(config).expect("shared state should build"); + + let request = Request::builder() + .uri("/loop") + .body(crate::handler::full_body("")) + .expect("request should build"); + let connection = std::sync::Arc::new(ConnectionPeerAddrs { + socket_peer_addr: "192.0.2.10:44324".parse().unwrap(), + proxy_protocol_source_addr: None, + tls_client_identity: None, + tls_version: None, + tls_alpn: None, + early_data: false, + }); + + let response = crate::handler::handle(request, shared, connection, "default").await; + assert_eq!(response.status(), StatusCode::INTERNAL_SERVER_ERROR); + let body = response.into_body().collect().await.expect("body should collect").to_bytes(); + assert_eq!(body.as_ref(), b"internal redirect cycle detected\n"); +} + +#[tokio::test] +async fn handle_rejects_external_requests_to_internal_named_routes() { + let route = Route { + cache: None, + id: "server/routes[0]|named:@hidden".to_string(), + matcher: RouteMatcher::Named("@hidden".to_string()), + internal: true, + rewrites: Vec::new(), + try_files: Vec::new(), + error_pages: Vec::new(), + grpc_match: None, + action: RouteAction::Return(ReturnAction { + status: StatusCode::OK, + location: String::new(), + body: Some("hidden\n".to_string()), + }), + access_control: RouteAccessControl::default(), + rate_limit: None, + allow_early_data: false, + request_buffering: rginx_core::RouteBufferingPolicy::Auto, + response_buffering: rginx_core::RouteBufferingPolicy::Auto, + compression: rginx_core::RouteCompressionPolicy::Off, + compression_min_bytes: None, + compression_content_types: Vec::new(), + streaming_response_idle_timeout: None, + }; + let config = test_config(test_vhost("server", Vec::new(), vec![route]), Vec::new()); + let shared = crate::state::SharedState::from_config(config).expect("shared state should build"); + + let request = Request::builder() + .uri("/hidden") + .body(crate::handler::full_body("")) + .expect("request should build"); + let connection = std::sync::Arc::new(ConnectionPeerAddrs { + socket_peer_addr: "192.0.2.10:44325".parse().unwrap(), + proxy_protocol_source_addr: None, + tls_client_identity: None, + tls_version: None, + tls_alpn: None, + early_data: false, + }); + + let response = crate::handler::handle(request, shared, connection, "default").await; + assert_eq!(response.status(), StatusCode::NOT_FOUND); +} + +#[tokio::test] +async fn handle_can_internally_redirect_to_named_routes() { + let redirect = Route { + cache: None, + id: "server/routes[0]|exact:/start".to_string(), + matcher: RouteMatcher::Exact("/start".to_string()), + internal: false, + rewrites: Vec::new(), + try_files: Vec::new(), + error_pages: Vec::new(), + grpc_match: None, + action: RouteAction::Return(ReturnAction { + status: internal_redirect_status(), + location: internal_redirect_location("@fallback"), + body: None, + }), + access_control: RouteAccessControl::default(), + rate_limit: None, + allow_early_data: false, + request_buffering: rginx_core::RouteBufferingPolicy::Auto, + response_buffering: rginx_core::RouteBufferingPolicy::Auto, + compression: rginx_core::RouteCompressionPolicy::Off, + compression_min_bytes: None, + compression_content_types: Vec::new(), + streaming_response_idle_timeout: None, + }; + let named = Route { + cache: None, + id: "server/routes[1]|named:@fallback".to_string(), + matcher: RouteMatcher::Named("@fallback".to_string()), + internal: true, + rewrites: Vec::new(), + try_files: Vec::new(), + error_pages: Vec::new(), + grpc_match: None, + action: RouteAction::Return(ReturnAction { + status: StatusCode::OK, + location: String::new(), + body: Some("named ok\n".to_string()), + }), + access_control: RouteAccessControl::default(), + rate_limit: None, + allow_early_data: false, + request_buffering: rginx_core::RouteBufferingPolicy::Auto, + response_buffering: rginx_core::RouteBufferingPolicy::Auto, + compression: rginx_core::RouteCompressionPolicy::Off, + compression_min_bytes: None, + compression_content_types: Vec::new(), + streaming_response_idle_timeout: None, + }; + let config = test_config(test_vhost("server", Vec::new(), vec![redirect, named]), Vec::new()); + let shared = crate::state::SharedState::from_config(config).expect("shared state should build"); + + let request = Request::builder() + .uri("/start") + .body(crate::handler::full_body("")) + .expect("request should build"); + let connection = std::sync::Arc::new(ConnectionPeerAddrs { + socket_peer_addr: "192.0.2.10:44326".parse().unwrap(), + proxy_protocol_source_addr: None, + tls_client_identity: None, + tls_version: None, + tls_alpn: None, + early_data: false, + }); + + let response = crate::handler::handle(request, shared, connection, "default").await; + assert_eq!(response.status(), StatusCode::OK); + let body = response.into_body().collect().await.expect("body should collect").to_bytes(); + assert_eq!(body.as_ref(), b"named ok\n"); +} + +#[tokio::test] +async fn handle_rewrite_last_restarts_location_selection() { + let start = Route { + cache: None, + id: "server/routes[0]|exact:/legacy".to_string(), + matcher: RouteMatcher::Exact("/legacy".to_string()), + internal: false, + rewrites: vec![rginx_core::RouteRewrite { + pattern: rginx_core::RouteRegexMatcher::new("^/legacy$".to_string(), false).unwrap(), + replacement: "/modern".to_string(), + stop: rginx_core::RouteRewriteStop::RestartLocationSelection, + }], + try_files: Vec::new(), + error_pages: Vec::new(), + grpc_match: None, + action: RouteAction::Return(ReturnAction { + status: StatusCode::OK, + location: String::new(), + body: Some("legacy\n".to_string()), + }), + access_control: RouteAccessControl::default(), + rate_limit: None, + allow_early_data: false, + request_buffering: rginx_core::RouteBufferingPolicy::Auto, + response_buffering: rginx_core::RouteBufferingPolicy::Auto, + compression: rginx_core::RouteCompressionPolicy::Off, + compression_min_bytes: None, + compression_content_types: Vec::new(), + streaming_response_idle_timeout: None, + }; + let modern = Route { + cache: None, + id: "server/routes[1]|exact:/modern".to_string(), + matcher: RouteMatcher::Exact("/modern".to_string()), + internal: false, + rewrites: Vec::new(), + try_files: Vec::new(), + error_pages: Vec::new(), + grpc_match: None, + action: RouteAction::Return(ReturnAction { + status: StatusCode::OK, + location: String::new(), + body: Some("modern\n".to_string()), + }), + access_control: RouteAccessControl::default(), + rate_limit: None, + allow_early_data: false, + request_buffering: rginx_core::RouteBufferingPolicy::Auto, + response_buffering: rginx_core::RouteBufferingPolicy::Auto, + compression: rginx_core::RouteCompressionPolicy::Off, + compression_min_bytes: None, + compression_content_types: Vec::new(), + streaming_response_idle_timeout: None, + }; + let config = test_config(test_vhost("server", Vec::new(), vec![start, modern]), Vec::new()); + let shared = crate::state::SharedState::from_config(config).expect("shared state should build"); + + let request = Request::builder() + .uri("/legacy") + .body(crate::handler::full_body("")) + .expect("request should build"); + let connection = std::sync::Arc::new(ConnectionPeerAddrs { + socket_peer_addr: "192.0.2.10:44327".parse().unwrap(), + proxy_protocol_source_addr: None, + tls_client_identity: None, + tls_version: None, + tls_alpn: None, + early_data: false, + }); + + let response = crate::handler::handle(request, shared, connection, "default").await; + let body = response.into_body().collect().await.expect("body should collect").to_bytes(); + assert_eq!(body.as_ref(), b"modern\n"); +} + +#[tokio::test] +async fn handle_rewrite_break_stays_on_current_route() { + let route = Route { + cache: None, + id: "server/routes[0]|exact:/legacy".to_string(), + matcher: RouteMatcher::Exact("/legacy".to_string()), + internal: false, + rewrites: vec![rginx_core::RouteRewrite { + pattern: rginx_core::RouteRegexMatcher::new("^/legacy$".to_string(), false).unwrap(), + replacement: "/modern".to_string(), + stop: rginx_core::RouteRewriteStop::Stop, + }], + try_files: Vec::new(), + error_pages: Vec::new(), + grpc_match: None, + action: RouteAction::Return(ReturnAction { + status: StatusCode::OK, + location: String::new(), + body: Some("current\n".to_string()), + }), + access_control: RouteAccessControl::default(), + rate_limit: None, + allow_early_data: false, + request_buffering: rginx_core::RouteBufferingPolicy::Auto, + response_buffering: rginx_core::RouteBufferingPolicy::Auto, + compression: rginx_core::RouteCompressionPolicy::Off, + compression_min_bytes: None, + compression_content_types: Vec::new(), + streaming_response_idle_timeout: None, + }; + let config = test_config(test_vhost("server", Vec::new(), vec![route]), Vec::new()); + let shared = crate::state::SharedState::from_config(config).expect("shared state should build"); + + let request = Request::builder() + .uri("/legacy") + .body(crate::handler::full_body("")) + .expect("request should build"); + let connection = std::sync::Arc::new(ConnectionPeerAddrs { + socket_peer_addr: "192.0.2.10:44328".parse().unwrap(), + proxy_protocol_source_addr: None, + tls_client_identity: None, + tls_version: None, + tls_alpn: None, + early_data: false, + }); + + let response = crate::handler::handle(request, shared, connection, "default").await; + let body = response.into_body().collect().await.expect("body should collect").to_bytes(); + assert_eq!(body.as_ref(), b"current\n"); +} diff --git a/crates/rginx-http/src/handler/tests/support.rs b/crates/rginx-http/src/handler/tests/support.rs index 79d18a17..a7e673aa 100644 --- a/crates/rginx-http/src/handler/tests/support.rs +++ b/crates/rginx-http/src/handler/tests/support.rs @@ -32,6 +32,7 @@ pub(crate) fn test_config(default_vhost: VirtualHost, vhosts: Vec) }; ConfigSnapshot { control_plane: None, + agent: None, acme: None, managed_certificates: Vec::new(), cache_zones: HashMap::new(), diff --git a/crates/rginx-http/src/lib.rs b/crates/rginx-http/src/lib.rs index 6e3e83e3..b3b5fc9d 100644 --- a/crates/rginx-http/src/lib.rs +++ b/crates/rginx-http/src/lib.rs @@ -25,19 +25,19 @@ pub use client_ip::TlsClientIdentity; pub use proxy::{PeerHealthSnapshot, UpstreamHealthSnapshot}; pub use server::serve; pub use state::{ - AcmeManagedCertificateSnapshot, AcmeRuntimeSnapshot, ApplyOutcomeSnapshot, ApplyResultSnapshot, - ApplyStatusSnapshot, CacheInvalidationResult, CachePurgeResult, CacheStatsSnapshot, - CacheZoneRuntimeSnapshot, ConfigFailureStageSnapshot, GrpcTrafficSnapshot, - HttpCountersSnapshot, ListenerStatsSnapshot, MtlsStatusSnapshot, NodeIdentitySnapshot, - ReloadOutcomeSnapshot, ReloadResultSnapshot, ReloadStatusSnapshot, RevisionStatusSnapshot, - RouteStatsSnapshot, RuntimeListenerBindingSnapshot, RuntimeListenerSnapshot, - RuntimeStatusSnapshot, SharedState, SnapshotDeltaSnapshot, SnapshotModule, - TlsCertificateStatusSnapshot, TlsDefaultCertificateBindingSnapshot, TlsListenerStatusSnapshot, - TlsOcspRefreshSpec, TlsOcspStatusSnapshot, TlsReloadBoundarySnapshot, TlsRuntimeSnapshot, - TlsSniBindingSnapshot, TlsVhostBindingSnapshot, TrafficStatsSnapshot, - UpstreamPeerStatsSnapshot, UpstreamStatsSnapshot, UpstreamTlsStatusSnapshot, - VhostStatsSnapshot, tls_ocsp_refresh_specs_for_config, tls_reloadable_fields, - tls_restart_required_fields, tls_runtime_snapshot_for_config, + AcmeManagedCertificateSnapshot, AcmeRuntimeSnapshot, AgentRuntimeSnapshot, AgentRuntimeUpdate, + ApplyOutcomeSnapshot, ApplyResultSnapshot, ApplyStatusSnapshot, CacheInvalidationResult, + CachePurgeResult, CacheStatsSnapshot, CacheZoneRuntimeSnapshot, ConfigFailureStageSnapshot, + GrpcTrafficSnapshot, HttpCountersSnapshot, ListenerStatsSnapshot, MtlsStatusSnapshot, + NodeIdentitySnapshot, ReloadOutcomeSnapshot, ReloadResultSnapshot, ReloadStatusSnapshot, + RevisionStatusSnapshot, RouteStatsSnapshot, RuntimeListenerBindingSnapshot, + RuntimeListenerSnapshot, RuntimeStatusSnapshot, SharedState, SnapshotDeltaSnapshot, + SnapshotModule, TlsCertificateStatusSnapshot, TlsDefaultCertificateBindingSnapshot, + TlsListenerStatusSnapshot, TlsOcspRefreshSpec, TlsOcspStatusSnapshot, + TlsReloadBoundarySnapshot, TlsRuntimeSnapshot, TlsSniBindingSnapshot, TlsVhostBindingSnapshot, + TrafficStatsSnapshot, UpstreamPeerStatsSnapshot, UpstreamStatsSnapshot, + UpstreamTlsStatusSnapshot, VhostStatsSnapshot, tls_ocsp_refresh_specs_for_config, + tls_reloadable_fields, tls_restart_required_fields, tls_runtime_snapshot_for_config, }; pub use tls::{ build_ocsp_request_for_certificate, build_ocsp_request_for_certificate_with_options, diff --git a/crates/rginx-http/src/proxy/clients/tests.rs b/crates/rginx-http/src/proxy/clients/tests.rs index a33a61e3..68d3755b 100644 --- a/crates/rginx-http/src/proxy/clients/tests.rs +++ b/crates/rginx-http/src/proxy/clients/tests.rs @@ -107,6 +107,7 @@ async fn peer_health_snapshot_delegates_to_registry() { }; let snapshot = ConfigSnapshot { control_plane: None, + agent: None, acme: None, managed_certificates: Vec::new(), cache_zones: HashMap::new(), diff --git a/crates/rginx-http/src/proxy/common.rs b/crates/rginx-http/src/proxy/common.rs index 6b601bde..2e2aaaab 100644 --- a/crates/rginx-http/src/proxy/common.rs +++ b/crates/rginx-http/src/proxy/common.rs @@ -1,71 +1,8 @@ use super::*; -use rginx_core::ProxyUriMode; -pub(super) fn build_proxy_uri( - peer: &ResolvedUpstreamPeer, - original_uri: &Uri, - uri_mode: &ProxyUriMode, -) -> Result { - let normalized_target = crate::request_target::normalize_request_target(original_uri); - let original_path = normalized_target.path_and_query.as_str(); - - let path_and_query = match uri_mode { - ProxyUriMode::PassOriginal => original_path.to_string(), - ProxyUriMode::StripPrefix(prefix) => strip_prefix_from_path(original_path, prefix), - ProxyUriMode::ReplacePrefix { matched_prefix, replacement } => { - replace_prefix_in_path(original_path, matched_prefix, replacement) - } - }; - - Uri::builder() - .scheme(peer.scheme.as_str()) - .authority(peer.upstream_authority.as_str()) - .path_and_query(path_and_query) - .build() -} - -fn strip_prefix_from_path(original_path: &str, prefix: &str) -> String { - if let Some(stripped) = original_path.strip_prefix(prefix) { - if stripped.is_empty() || stripped.starts_with('?') { - if stripped.is_empty() { "/".to_string() } else { stripped.to_string() } - } else if stripped.starts_with('/') { - stripped.to_string() - } else { - original_path.to_string() - } - } else { - original_path.to_string() - } -} +mod uri; -fn replace_prefix_in_path(original_path: &str, matched_prefix: &str, replacement: &str) -> String { - let suffix = if matched_prefix == "/" { - original_path - } else if let Some(stripped) = original_path.strip_prefix(matched_prefix) { - stripped - } else { - return original_path.to_string(); - }; - - let mut rewritten = replacement.trim_end_matches('/').to_string(); - if rewritten.is_empty() { - rewritten.push('/'); - } - - if suffix.is_empty() { - return rewritten; - } - if suffix.starts_with('?') { - return format!("{rewritten}{suffix}"); - } - if rewritten.ends_with('/') { - format!("{rewritten}{}", suffix.trim_start_matches('/')) - } else if suffix.starts_with('/') { - format!("{rewritten}{suffix}") - } else { - format!("{rewritten}/{suffix}") - } -} +pub(super) use uri::build_proxy_uri; pub(super) fn split_content_type(content_type: &str) -> (&str, &str) { let mut parts = content_type.splitn(2, ';'); diff --git a/crates/rginx-http/src/proxy/common/uri.rs b/crates/rginx-http/src/proxy/common/uri.rs new file mode 100644 index 00000000..7353384d --- /dev/null +++ b/crates/rginx-http/src/proxy/common/uri.rs @@ -0,0 +1,68 @@ +use super::*; +use rginx_core::ProxyUriMode; + +pub(in crate::proxy) fn build_proxy_uri( + peer: &ResolvedUpstreamPeer, + original_uri: &Uri, + uri_mode: &ProxyUriMode, +) -> Result { + let normalized_target = crate::request_target::normalize_request_target(original_uri); + let original_path = normalized_target.path_and_query.as_str(); + + let path_and_query = match uri_mode { + ProxyUriMode::PassOriginal => original_path.to_string(), + ProxyUriMode::StripPrefix(prefix) => strip_prefix_from_path(original_path, prefix), + ProxyUriMode::ReplacePrefix { matched_prefix, replacement } => { + replace_prefix_in_path(original_path, matched_prefix, replacement) + } + }; + + Uri::builder() + .scheme(peer.scheme.as_str()) + .authority(peer.upstream_authority.as_str()) + .path_and_query(path_and_query) + .build() +} + +fn strip_prefix_from_path(original_path: &str, prefix: &str) -> String { + if let Some(stripped) = original_path.strip_prefix(prefix) { + if stripped.is_empty() || stripped.starts_with('?') { + if stripped.is_empty() { "/".to_string() } else { stripped.to_string() } + } else if stripped.starts_with('/') { + stripped.to_string() + } else { + original_path.to_string() + } + } else { + original_path.to_string() + } +} + +fn replace_prefix_in_path(original_path: &str, matched_prefix: &str, replacement: &str) -> String { + let suffix = if matched_prefix == "/" { + original_path + } else if let Some(stripped) = original_path.strip_prefix(matched_prefix) { + stripped + } else { + return original_path.to_string(); + }; + + let mut rewritten = replacement.trim_end_matches('/').to_string(); + if rewritten.is_empty() { + rewritten.push('/'); + } + + if suffix.is_empty() { + return rewritten; + } + if suffix.starts_with('?') { + return format!("{rewritten}{suffix}"); + } + if rewritten.ends_with('/') { + format!("{rewritten}{}", suffix.trim_start_matches('/')) + } else if suffix.starts_with('/') { + format!("{rewritten}{suffix}") + } else { + format!("{rewritten}/{suffix}") + } +} diff --git a/crates/rginx-http/src/proxy/forward/response.rs b/crates/rginx-http/src/proxy/forward/response.rs index 366e321a..1f0be2b2 100644 --- a/crates/rginx-http/src/proxy/forward/response.rs +++ b/crates/rginx-http/src/proxy/forward/response.rs @@ -164,213 +164,4 @@ where } #[cfg(test)] -mod tests { - use super::*; - - #[test] - fn proxy_redirect_default_rewrites_upstream_absolute_location() { - let upstream = std::sync::Arc::new(rginx_core::Upstream::new( - "backend".to_string(), - vec![rginx_core::UpstreamPeer { - url: "http://127.0.0.1:9000".to_string(), - scheme: "http".to_string(), - authority: "127.0.0.1:9000".to_string(), - weight: 1, - backup: false, - max_conns: None, - }], - rginx_core::UpstreamTls::Insecure, - rginx_core::UpstreamSettings { - protocol: rginx_core::UpstreamProtocol::Http1, - load_balance: rginx_core::UpstreamLoadBalance::RoundRobin, - dns: Default::default(), - server_name: true, - server_name_override: None, - tls_versions: None, - server_verify_depth: None, - server_crl_path: None, - client_identity: None, - request_timeout: Duration::from_secs(5), - connect_timeout: Duration::from_secs(5), - write_timeout: Duration::from_secs(5), - idle_timeout: Duration::from_secs(5), - pool_idle_timeout: None, - pool_max_idle_per_host: usize::MAX, - tcp_keepalive: None, - tcp_nodelay: false, - http2_keep_alive_interval: None, - http2_keep_alive_timeout: Duration::from_secs(20), - http2_keep_alive_while_idle: false, - max_replayable_request_body_bytes: 65536, - unhealthy_after_failures: 2, - unhealthy_cooldown: Duration::from_secs(10), - active_health_check: None, - }, - )); - let target = rginx_core::ProxyTarget { - upstream_name: "backend".to_string(), - upstream, - preserve_host: false, - uri_mode: rginx_core::ProxyUriMode::PassOriginal, - request_version: http::Version::HTTP_11, - redirect: rginx_core::ProxyRedirectMode::Default, - proxy_set_headers: Vec::new(), - }; - let mut headers = HeaderMap::new(); - headers.insert( - http::header::LOCATION, - HeaderValue::from_static("http://127.0.0.1:9000/login"), - ); - - rewrite_proxy_redirect_headers( - &mut headers, - &target, - "127.0.0.1:9000", - "https", - Some(&HeaderValue::from_static("edge.example.com")), - ); - - assert_eq!( - headers.get(http::header::LOCATION).and_then(|value| value.to_str().ok()), - Some("https://edge.example.com/login") - ); - } - - #[test] - fn proxy_redirect_off_preserves_upstream_absolute_location() { - let upstream = std::sync::Arc::new(rginx_core::Upstream::new( - "backend".to_string(), - vec![rginx_core::UpstreamPeer { - url: "http://127.0.0.1:9000".to_string(), - scheme: "http".to_string(), - authority: "127.0.0.1:9000".to_string(), - weight: 1, - backup: false, - max_conns: None, - }], - rginx_core::UpstreamTls::Insecure, - rginx_core::UpstreamSettings { - protocol: rginx_core::UpstreamProtocol::Http1, - load_balance: rginx_core::UpstreamLoadBalance::RoundRobin, - dns: Default::default(), - server_name: true, - server_name_override: None, - tls_versions: None, - server_verify_depth: None, - server_crl_path: None, - client_identity: None, - request_timeout: Duration::from_secs(5), - connect_timeout: Duration::from_secs(5), - write_timeout: Duration::from_secs(5), - idle_timeout: Duration::from_secs(5), - pool_idle_timeout: None, - pool_max_idle_per_host: usize::MAX, - tcp_keepalive: None, - tcp_nodelay: false, - http2_keep_alive_interval: None, - http2_keep_alive_timeout: Duration::from_secs(20), - http2_keep_alive_while_idle: false, - max_replayable_request_body_bytes: 65536, - unhealthy_after_failures: 2, - unhealthy_cooldown: Duration::from_secs(10), - active_health_check: None, - }, - )); - let target = rginx_core::ProxyTarget { - upstream_name: "backend".to_string(), - upstream, - preserve_host: false, - uri_mode: rginx_core::ProxyUriMode::PassOriginal, - request_version: http::Version::HTTP_11, - redirect: rginx_core::ProxyRedirectMode::Off, - proxy_set_headers: Vec::new(), - }; - let mut headers = HeaderMap::new(); - headers.insert( - http::header::LOCATION, - HeaderValue::from_static("http://127.0.0.1:9000/login"), - ); - - rewrite_proxy_redirect_headers( - &mut headers, - &target, - "127.0.0.1:9000", - "https", - Some(&HeaderValue::from_static("edge.example.com")), - ); - - assert_eq!( - headers.get(http::header::LOCATION).and_then(|value| value.to_str().ok()), - Some("http://127.0.0.1:9000/login") - ); - } - - #[test] - fn proxy_redirect_ignores_invalid_downstream_host() { - let upstream = std::sync::Arc::new(rginx_core::Upstream::new( - "backend".to_string(), - vec![rginx_core::UpstreamPeer { - url: "http://127.0.0.1:9000".to_string(), - scheme: "http".to_string(), - authority: "127.0.0.1:9000".to_string(), - weight: 1, - backup: false, - max_conns: None, - }], - rginx_core::UpstreamTls::Insecure, - rginx_core::UpstreamSettings { - protocol: rginx_core::UpstreamProtocol::Http1, - load_balance: rginx_core::UpstreamLoadBalance::RoundRobin, - dns: Default::default(), - server_name: true, - server_name_override: None, - tls_versions: None, - server_verify_depth: None, - server_crl_path: None, - client_identity: None, - request_timeout: Duration::from_secs(5), - connect_timeout: Duration::from_secs(5), - write_timeout: Duration::from_secs(5), - idle_timeout: Duration::from_secs(5), - pool_idle_timeout: None, - pool_max_idle_per_host: usize::MAX, - tcp_keepalive: None, - tcp_nodelay: false, - http2_keep_alive_interval: None, - http2_keep_alive_timeout: Duration::from_secs(20), - http2_keep_alive_while_idle: false, - max_replayable_request_body_bytes: 65536, - unhealthy_after_failures: 2, - unhealthy_cooldown: Duration::from_secs(10), - active_health_check: None, - }, - )); - let target = rginx_core::ProxyTarget { - upstream_name: "backend".to_string(), - upstream, - preserve_host: false, - uri_mode: rginx_core::ProxyUriMode::PassOriginal, - request_version: http::Version::HTTP_11, - redirect: rginx_core::ProxyRedirectMode::Default, - proxy_set_headers: Vec::new(), - }; - let mut headers = HeaderMap::new(); - headers.insert( - http::header::LOCATION, - HeaderValue::from_static("http://127.0.0.1:9000/login"), - ); - - rewrite_proxy_redirect_headers( - &mut headers, - &target, - "127.0.0.1:9000", - "https", - Some(&HeaderValue::from_static("bad host value")), - ); - - assert_eq!( - headers.get(http::header::LOCATION).and_then(|value| value.to_str().ok()), - Some("http://127.0.0.1:9000/login") - ); - } -} +mod tests; diff --git a/crates/rginx-http/src/proxy/forward/response/tests.rs b/crates/rginx-http/src/proxy/forward/response/tests.rs new file mode 100644 index 00000000..0fde05e3 --- /dev/null +++ b/crates/rginx-http/src/proxy/forward/response/tests.rs @@ -0,0 +1,199 @@ +use super::*; + +#[test] +fn proxy_redirect_default_rewrites_upstream_absolute_location() { + let upstream = std::sync::Arc::new(rginx_core::Upstream::new( + "backend".to_string(), + vec![rginx_core::UpstreamPeer { + url: "http://127.0.0.1:9000".to_string(), + scheme: "http".to_string(), + authority: "127.0.0.1:9000".to_string(), + weight: 1, + backup: false, + max_conns: None, + }], + rginx_core::UpstreamTls::Insecure, + rginx_core::UpstreamSettings { + protocol: rginx_core::UpstreamProtocol::Http1, + load_balance: rginx_core::UpstreamLoadBalance::RoundRobin, + dns: Default::default(), + server_name: true, + server_name_override: None, + tls_versions: None, + server_verify_depth: None, + server_crl_path: None, + client_identity: None, + request_timeout: Duration::from_secs(5), + connect_timeout: Duration::from_secs(5), + write_timeout: Duration::from_secs(5), + idle_timeout: Duration::from_secs(5), + pool_idle_timeout: None, + pool_max_idle_per_host: usize::MAX, + tcp_keepalive: None, + tcp_nodelay: false, + http2_keep_alive_interval: None, + http2_keep_alive_timeout: Duration::from_secs(20), + http2_keep_alive_while_idle: false, + max_replayable_request_body_bytes: 65536, + unhealthy_after_failures: 2, + unhealthy_cooldown: Duration::from_secs(10), + active_health_check: None, + }, + )); + let target = rginx_core::ProxyTarget { + upstream_name: "backend".to_string(), + upstream, + preserve_host: false, + uri_mode: rginx_core::ProxyUriMode::PassOriginal, + request_version: http::Version::HTTP_11, + redirect: rginx_core::ProxyRedirectMode::Default, + proxy_set_headers: Vec::new(), + }; + let mut headers = HeaderMap::new(); + headers.insert(http::header::LOCATION, HeaderValue::from_static("http://127.0.0.1:9000/login")); + + rewrite_proxy_redirect_headers( + &mut headers, + &target, + "127.0.0.1:9000", + "https", + Some(&HeaderValue::from_static("edge.example.com")), + ); + + assert_eq!( + headers.get(http::header::LOCATION).and_then(|value| value.to_str().ok()), + Some("https://edge.example.com/login") + ); +} + +#[test] +fn proxy_redirect_off_preserves_upstream_absolute_location() { + let upstream = std::sync::Arc::new(rginx_core::Upstream::new( + "backend".to_string(), + vec![rginx_core::UpstreamPeer { + url: "http://127.0.0.1:9000".to_string(), + scheme: "http".to_string(), + authority: "127.0.0.1:9000".to_string(), + weight: 1, + backup: false, + max_conns: None, + }], + rginx_core::UpstreamTls::Insecure, + rginx_core::UpstreamSettings { + protocol: rginx_core::UpstreamProtocol::Http1, + load_balance: rginx_core::UpstreamLoadBalance::RoundRobin, + dns: Default::default(), + server_name: true, + server_name_override: None, + tls_versions: None, + server_verify_depth: None, + server_crl_path: None, + client_identity: None, + request_timeout: Duration::from_secs(5), + connect_timeout: Duration::from_secs(5), + write_timeout: Duration::from_secs(5), + idle_timeout: Duration::from_secs(5), + pool_idle_timeout: None, + pool_max_idle_per_host: usize::MAX, + tcp_keepalive: None, + tcp_nodelay: false, + http2_keep_alive_interval: None, + http2_keep_alive_timeout: Duration::from_secs(20), + http2_keep_alive_while_idle: false, + max_replayable_request_body_bytes: 65536, + unhealthy_after_failures: 2, + unhealthy_cooldown: Duration::from_secs(10), + active_health_check: None, + }, + )); + let target = rginx_core::ProxyTarget { + upstream_name: "backend".to_string(), + upstream, + preserve_host: false, + uri_mode: rginx_core::ProxyUriMode::PassOriginal, + request_version: http::Version::HTTP_11, + redirect: rginx_core::ProxyRedirectMode::Off, + proxy_set_headers: Vec::new(), + }; + let mut headers = HeaderMap::new(); + headers.insert(http::header::LOCATION, HeaderValue::from_static("http://127.0.0.1:9000/login")); + + rewrite_proxy_redirect_headers( + &mut headers, + &target, + "127.0.0.1:9000", + "https", + Some(&HeaderValue::from_static("edge.example.com")), + ); + + assert_eq!( + headers.get(http::header::LOCATION).and_then(|value| value.to_str().ok()), + Some("http://127.0.0.1:9000/login") + ); +} + +#[test] +fn proxy_redirect_ignores_invalid_downstream_host() { + let upstream = std::sync::Arc::new(rginx_core::Upstream::new( + "backend".to_string(), + vec![rginx_core::UpstreamPeer { + url: "http://127.0.0.1:9000".to_string(), + scheme: "http".to_string(), + authority: "127.0.0.1:9000".to_string(), + weight: 1, + backup: false, + max_conns: None, + }], + rginx_core::UpstreamTls::Insecure, + rginx_core::UpstreamSettings { + protocol: rginx_core::UpstreamProtocol::Http1, + load_balance: rginx_core::UpstreamLoadBalance::RoundRobin, + dns: Default::default(), + server_name: true, + server_name_override: None, + tls_versions: None, + server_verify_depth: None, + server_crl_path: None, + client_identity: None, + request_timeout: Duration::from_secs(5), + connect_timeout: Duration::from_secs(5), + write_timeout: Duration::from_secs(5), + idle_timeout: Duration::from_secs(5), + pool_idle_timeout: None, + pool_max_idle_per_host: usize::MAX, + tcp_keepalive: None, + tcp_nodelay: false, + http2_keep_alive_interval: None, + http2_keep_alive_timeout: Duration::from_secs(20), + http2_keep_alive_while_idle: false, + max_replayable_request_body_bytes: 65536, + unhealthy_after_failures: 2, + unhealthy_cooldown: Duration::from_secs(10), + active_health_check: None, + }, + )); + let target = rginx_core::ProxyTarget { + upstream_name: "backend".to_string(), + upstream, + preserve_host: false, + uri_mode: rginx_core::ProxyUriMode::PassOriginal, + request_version: http::Version::HTTP_11, + redirect: rginx_core::ProxyRedirectMode::Default, + proxy_set_headers: Vec::new(), + }; + let mut headers = HeaderMap::new(); + headers.insert(http::header::LOCATION, HeaderValue::from_static("http://127.0.0.1:9000/login")); + + rewrite_proxy_redirect_headers( + &mut headers, + &target, + "127.0.0.1:9000", + "https", + Some(&HeaderValue::from_static("bad host value")), + ); + + assert_eq!( + headers.get(http::header::LOCATION).and_then(|value| value.to_str().ok()), + Some("http://127.0.0.1:9000/login") + ); +} diff --git a/crates/rginx-http/src/proxy/health/registry/tests.rs b/crates/rginx-http/src/proxy/health/registry/tests.rs index e0b11ee9..1be96f1b 100644 --- a/crates/rginx-http/src/proxy/health/registry/tests.rs +++ b/crates/rginx-http/src/proxy/health/registry/tests.rs @@ -126,6 +126,7 @@ fn snapshot_reports_passive_and_active_health_state() { }; let config = rginx_core::ConfigSnapshot { control_plane: None, + agent: None, acme: None, managed_certificates: Vec::new(), cache_zones: HashMap::new(), diff --git a/crates/rginx-http/src/proxy/tests/mod.rs b/crates/rginx-http/src/proxy/tests/mod.rs index 3e9d25d9..d497445c 100644 --- a/crates/rginx-http/src/proxy/tests/mod.rs +++ b/crates/rginx-http/src/proxy/tests/mod.rs @@ -212,6 +212,7 @@ fn snapshot_with_upstreams_map( let server = default_server(); rginx_core::ConfigSnapshot { control_plane: None, + agent: None, acme: None, managed_certificates: Vec::new(), cache_zones: HashMap::new(), diff --git a/crates/rginx-http/src/router/tests.rs b/crates/rginx-http/src/router/tests.rs index 32688101..b28f72e0 100644 --- a/crates/rginx-http/src/router/tests.rs +++ b/crates/rginx-http/src/router/tests.rs @@ -1,12 +1,9 @@ use http::StatusCode; -use rginx_core::{ - GrpcRouteMatch, ReturnAction, Route, RouteAccessControl, RouteAction, RouteMatcher, VirtualHost, -}; +use rginx_core::{ReturnAction, Route, RouteAccessControl, RouteAction, RouteMatcher, VirtualHost}; -use super::{ - RouteMatchContext, select_named_route_in_vhost, select_route, select_route_by_host, - select_route_by_host_with_context, select_route_with_context, select_vhost, -}; +use super::{select_route, select_route_by_host, select_vhost}; + +mod context; fn make_route(path: &str, body: &str) -> Route { Route { @@ -208,258 +205,3 @@ fn select_route_by_host_combines_host_and_path() { let (vhost, _) = result.unwrap(); assert!(vhost.server_names.is_empty()); } - -#[test] -fn grpc_specific_routes_beat_generic_routes_for_same_path() { - let routes = vec![ - Route { - cache: None, - id: "test|prefix:/|grpc:service=grpc.health.v1.Health,method=Check".to_string(), - matcher: RouteMatcher::Prefix("/".to_string()), - internal: false, - rewrites: Vec::new(), - try_files: Vec::new(), - error_pages: Vec::new(), - grpc_match: Some(GrpcRouteMatch { - service: Some("grpc.health.v1.Health".to_string()), - method: Some("Check".to_string()), - }), - action: RouteAction::Return(ReturnAction { - status: StatusCode::OK, - location: String::new(), - body: Some("grpc".to_string()), - }), - access_control: RouteAccessControl::default(), - rate_limit: None, - allow_early_data: false, - request_buffering: rginx_core::RouteBufferingPolicy::Auto, - response_buffering: rginx_core::RouteBufferingPolicy::Auto, - compression: rginx_core::RouteCompressionPolicy::Auto, - compression_min_bytes: None, - compression_content_types: Vec::new(), - streaming_response_idle_timeout: None, - }, - Route { - cache: None, - id: "test|prefix:/".to_string(), - matcher: RouteMatcher::Prefix("/".to_string()), - internal: false, - rewrites: Vec::new(), - try_files: Vec::new(), - error_pages: Vec::new(), - grpc_match: None, - action: RouteAction::Return(ReturnAction { - status: StatusCode::OK, - location: String::new(), - body: Some("generic".to_string()), - }), - access_control: RouteAccessControl::default(), - rate_limit: None, - allow_early_data: false, - request_buffering: rginx_core::RouteBufferingPolicy::Auto, - response_buffering: rginx_core::RouteBufferingPolicy::Auto, - compression: rginx_core::RouteCompressionPolicy::Auto, - compression_min_bytes: None, - compression_content_types: Vec::new(), - streaming_response_idle_timeout: None, - }, - ]; - - let route = select_route_with_context( - &routes, - &RouteMatchContext::with_grpc("/", "grpc.health.v1.Health", "Check"), - ) - .expect("gRPC route should match"); - assert_eq!(route.id, "test|prefix:/|grpc:service=grpc.health.v1.Health,method=Check"); -} - -#[test] -fn grpc_specific_routes_require_grpc_request_context() { - let routes = vec![Route { - cache: None, - id: "test|prefix:/|grpc:service=grpc.health.v1.Health".to_string(), - matcher: RouteMatcher::Prefix("/".to_string()), - internal: false, - rewrites: Vec::new(), - try_files: Vec::new(), - error_pages: Vec::new(), - grpc_match: Some(GrpcRouteMatch { - service: Some("grpc.health.v1.Health".to_string()), - method: None, - }), - action: RouteAction::Return(ReturnAction { - status: StatusCode::OK, - location: String::new(), - body: Some("grpc".to_string()), - }), - access_control: RouteAccessControl::default(), - rate_limit: None, - allow_early_data: false, - request_buffering: rginx_core::RouteBufferingPolicy::Auto, - response_buffering: rginx_core::RouteBufferingPolicy::Auto, - compression: rginx_core::RouteCompressionPolicy::Auto, - compression_min_bytes: None, - compression_content_types: Vec::new(), - streaming_response_idle_timeout: None, - }]; - - assert!(select_route(&routes, "/").is_none()); - assert!( - select_route_with_context( - &routes, - &RouteMatchContext::with_grpc("/", "grpc.health.v1.Health", "Check"), - ) - .is_some() - ); -} - -#[test] -fn named_routes_are_not_selected_by_path_matching() { - let routes = vec![Route { - cache: None, - id: "test|named:@fallback".to_string(), - matcher: RouteMatcher::Named("@fallback".to_string()), - internal: true, - rewrites: Vec::new(), - try_files: Vec::new(), - error_pages: Vec::new(), - grpc_match: None, - action: RouteAction::Return(ReturnAction { - status: StatusCode::OK, - location: String::new(), - body: Some("named".to_string()), - }), - access_control: RouteAccessControl::default(), - rate_limit: None, - allow_early_data: false, - request_buffering: rginx_core::RouteBufferingPolicy::Auto, - response_buffering: rginx_core::RouteBufferingPolicy::Auto, - compression: rginx_core::RouteCompressionPolicy::Auto, - compression_min_bytes: None, - compression_content_types: Vec::new(), - streaming_response_idle_timeout: None, - }]; - - assert!(select_route(&routes, "/fallback").is_none()); -} - -#[test] -fn named_routes_can_be_selected_explicitly() { - let vhost = make_vhost( - vec!["example.com"], - vec![Route { - cache: None, - id: "test|named:@fallback".to_string(), - matcher: RouteMatcher::Named("@fallback".to_string()), - internal: true, - rewrites: Vec::new(), - try_files: Vec::new(), - error_pages: Vec::new(), - grpc_match: None, - action: RouteAction::Return(ReturnAction { - status: StatusCode::OK, - location: String::new(), - body: Some("named".to_string()), - }), - access_control: RouteAccessControl::default(), - rate_limit: None, - allow_early_data: false, - request_buffering: rginx_core::RouteBufferingPolicy::Auto, - response_buffering: rginx_core::RouteBufferingPolicy::Auto, - compression: rginx_core::RouteCompressionPolicy::Auto, - compression_min_bytes: None, - compression_content_types: Vec::new(), - streaming_response_idle_timeout: None, - }], - ); - - let route = - select_named_route_in_vhost(&vhost, "@fallback").expect("named route should resolve"); - assert_eq!(route.id, "test|named:@fallback"); -} - -#[test] -fn select_vhost_falls_back_to_listener_default_server() { - let default = make_vhost(vec![], vec![make_route("/", "default")]); - let mut listener_default = - make_vhost(vec!["fallback.example.com"], vec![make_route("/", "listener-default")]); - listener_default.listener_ids = vec!["listener:https".to_string()]; - listener_default.default_listener_ids = vec!["listener:https".to_string()]; - let vhosts = vec![listener_default]; - - let selected = super::select_vhost_for_listener( - &vhosts, - &default, - "unknown.example.com", - Some("listener:https"), - ); - assert_eq!(selected.server_names, vec!["fallback.example.com"]); -} - -#[test] -fn select_vhost_for_listener_keeps_first_listener_default() { - let default = make_vhost(vec![], vec![make_route("/", "default")]); - let mut first = make_vhost(vec!["first.example.com"], vec![make_route("/", "first")]); - first.listener_ids = vec!["listener:https".to_string()]; - first.default_listener_ids = vec!["listener:https".to_string()]; - let mut second = make_vhost(vec!["second.example.com"], vec![make_route("/", "second")]); - second.listener_ids = vec!["listener:https".to_string()]; - second.default_listener_ids = vec!["listener:https".to_string()]; - - let vhosts = [first.clone(), second]; - let selected = super::select_vhost_for_listener( - &vhosts, - &default, - "unknown.example.com", - Some("listener:https"), - ); - assert_eq!(selected.server_names, first.server_names); -} - -#[test] -fn select_route_by_host_with_context_respects_grpc_constraints() { - let default = make_vhost(vec![], vec![make_route("/", "default")]); - let api_vhost = make_vhost( - vec!["api.example.com"], - vec![ - Route { - cache: None, - id: "test|prefix:/|grpc:service=grpc.health.v1.Health".to_string(), - matcher: RouteMatcher::Prefix("/".to_string()), - internal: false, - rewrites: Vec::new(), - try_files: Vec::new(), - error_pages: Vec::new(), - grpc_match: Some(GrpcRouteMatch { - service: Some("grpc.health.v1.Health".to_string()), - method: None, - }), - action: RouteAction::Return(ReturnAction { - status: StatusCode::OK, - location: String::new(), - body: Some("grpc".to_string()), - }), - access_control: RouteAccessControl::default(), - rate_limit: None, - allow_early_data: false, - request_buffering: rginx_core::RouteBufferingPolicy::Auto, - response_buffering: rginx_core::RouteBufferingPolicy::Auto, - compression: rginx_core::RouteCompressionPolicy::Auto, - compression_min_bytes: None, - compression_content_types: Vec::new(), - streaming_response_idle_timeout: None, - }, - make_route("/", "fallback"), - ], - ); - let vhosts = vec![api_vhost]; - - let result = select_route_by_host_with_context( - &default, - &vhosts, - "api.example.com", - &RouteMatchContext::with_grpc("/", "grpc.health.v1.Health", "Check"), - ) - .expect("gRPC route should match"); - assert_eq!(result.1.id, "test|prefix:/|grpc:service=grpc.health.v1.Health"); -} diff --git a/crates/rginx-http/src/router/tests/context.rs b/crates/rginx-http/src/router/tests/context.rs new file mode 100644 index 00000000..0f1b70b4 --- /dev/null +++ b/crates/rginx-http/src/router/tests/context.rs @@ -0,0 +1,257 @@ +use http::StatusCode; +use rginx_core::{ + GrpcRouteMatch, ReturnAction, Route, RouteAccessControl, RouteAction, RouteMatcher, +}; + +use super::{make_route, make_vhost}; +use crate::router::{ + RouteMatchContext, select_named_route_in_vhost, select_route, + select_route_by_host_with_context, select_route_with_context, select_vhost_for_listener, +}; + +#[test] +fn grpc_specific_routes_beat_generic_routes_for_same_path() { + let routes = vec![ + Route { + cache: None, + id: "test|prefix:/|grpc:service=grpc.health.v1.Health,method=Check".to_string(), + matcher: RouteMatcher::Prefix("/".to_string()), + internal: false, + rewrites: Vec::new(), + try_files: Vec::new(), + error_pages: Vec::new(), + grpc_match: Some(GrpcRouteMatch { + service: Some("grpc.health.v1.Health".to_string()), + method: Some("Check".to_string()), + }), + action: RouteAction::Return(ReturnAction { + status: StatusCode::OK, + location: String::new(), + body: Some("grpc".to_string()), + }), + access_control: RouteAccessControl::default(), + rate_limit: None, + allow_early_data: false, + request_buffering: rginx_core::RouteBufferingPolicy::Auto, + response_buffering: rginx_core::RouteBufferingPolicy::Auto, + compression: rginx_core::RouteCompressionPolicy::Auto, + compression_min_bytes: None, + compression_content_types: Vec::new(), + streaming_response_idle_timeout: None, + }, + Route { + cache: None, + id: "test|prefix:/".to_string(), + matcher: RouteMatcher::Prefix("/".to_string()), + internal: false, + rewrites: Vec::new(), + try_files: Vec::new(), + error_pages: Vec::new(), + grpc_match: None, + action: RouteAction::Return(ReturnAction { + status: StatusCode::OK, + location: String::new(), + body: Some("generic".to_string()), + }), + access_control: RouteAccessControl::default(), + rate_limit: None, + allow_early_data: false, + request_buffering: rginx_core::RouteBufferingPolicy::Auto, + response_buffering: rginx_core::RouteBufferingPolicy::Auto, + compression: rginx_core::RouteCompressionPolicy::Auto, + compression_min_bytes: None, + compression_content_types: Vec::new(), + streaming_response_idle_timeout: None, + }, + ]; + + let route = select_route_with_context( + &routes, + &RouteMatchContext::with_grpc("/", "grpc.health.v1.Health", "Check"), + ) + .expect("gRPC route should match"); + assert_eq!(route.id, "test|prefix:/|grpc:service=grpc.health.v1.Health,method=Check"); +} + +#[test] +fn grpc_specific_routes_require_grpc_request_context() { + let routes = vec![Route { + cache: None, + id: "test|prefix:/|grpc:service=grpc.health.v1.Health".to_string(), + matcher: RouteMatcher::Prefix("/".to_string()), + internal: false, + rewrites: Vec::new(), + try_files: Vec::new(), + error_pages: Vec::new(), + grpc_match: Some(GrpcRouteMatch { + service: Some("grpc.health.v1.Health".to_string()), + method: None, + }), + action: RouteAction::Return(ReturnAction { + status: StatusCode::OK, + location: String::new(), + body: Some("grpc".to_string()), + }), + access_control: RouteAccessControl::default(), + rate_limit: None, + allow_early_data: false, + request_buffering: rginx_core::RouteBufferingPolicy::Auto, + response_buffering: rginx_core::RouteBufferingPolicy::Auto, + compression: rginx_core::RouteCompressionPolicy::Auto, + compression_min_bytes: None, + compression_content_types: Vec::new(), + streaming_response_idle_timeout: None, + }]; + + assert!(select_route(&routes, "/").is_none()); + assert!( + select_route_with_context( + &routes, + &RouteMatchContext::with_grpc("/", "grpc.health.v1.Health", "Check"), + ) + .is_some() + ); +} + +#[test] +fn named_routes_are_not_selected_by_path_matching() { + let routes = vec![Route { + cache: None, + id: "test|named:@fallback".to_string(), + matcher: RouteMatcher::Named("@fallback".to_string()), + internal: true, + rewrites: Vec::new(), + try_files: Vec::new(), + error_pages: Vec::new(), + grpc_match: None, + action: RouteAction::Return(ReturnAction { + status: StatusCode::OK, + location: String::new(), + body: Some("named".to_string()), + }), + access_control: RouteAccessControl::default(), + rate_limit: None, + allow_early_data: false, + request_buffering: rginx_core::RouteBufferingPolicy::Auto, + response_buffering: rginx_core::RouteBufferingPolicy::Auto, + compression: rginx_core::RouteCompressionPolicy::Auto, + compression_min_bytes: None, + compression_content_types: Vec::new(), + streaming_response_idle_timeout: None, + }]; + + assert!(select_route(&routes, "/fallback").is_none()); +} + +#[test] +fn named_routes_can_be_selected_explicitly() { + let vhost = make_vhost( + vec!["example.com"], + vec![Route { + cache: None, + id: "test|named:@fallback".to_string(), + matcher: RouteMatcher::Named("@fallback".to_string()), + internal: true, + rewrites: Vec::new(), + try_files: Vec::new(), + error_pages: Vec::new(), + grpc_match: None, + action: RouteAction::Return(ReturnAction { + status: StatusCode::OK, + location: String::new(), + body: Some("named".to_string()), + }), + access_control: RouteAccessControl::default(), + rate_limit: None, + allow_early_data: false, + request_buffering: rginx_core::RouteBufferingPolicy::Auto, + response_buffering: rginx_core::RouteBufferingPolicy::Auto, + compression: rginx_core::RouteCompressionPolicy::Auto, + compression_min_bytes: None, + compression_content_types: Vec::new(), + streaming_response_idle_timeout: None, + }], + ); + + let route = + select_named_route_in_vhost(&vhost, "@fallback").expect("named route should resolve"); + assert_eq!(route.id, "test|named:@fallback"); +} + +#[test] +fn select_vhost_falls_back_to_listener_default_server() { + let default = make_vhost(vec![], vec![make_route("/", "default")]); + let mut listener_default = + make_vhost(vec!["fallback.example.com"], vec![make_route("/", "listener-default")]); + listener_default.listener_ids = vec!["listener:https".to_string()]; + listener_default.default_listener_ids = vec!["listener:https".to_string()]; + let vhosts = vec![listener_default]; + + let selected = + select_vhost_for_listener(&vhosts, &default, "unknown.example.com", Some("listener:https")); + assert_eq!(selected.server_names, vec!["fallback.example.com"]); +} + +#[test] +fn select_vhost_for_listener_keeps_first_listener_default() { + let default = make_vhost(vec![], vec![make_route("/", "default")]); + let mut first = make_vhost(vec!["first.example.com"], vec![make_route("/", "first")]); + first.listener_ids = vec!["listener:https".to_string()]; + first.default_listener_ids = vec!["listener:https".to_string()]; + let mut second = make_vhost(vec!["second.example.com"], vec![make_route("/", "second")]); + second.listener_ids = vec!["listener:https".to_string()]; + second.default_listener_ids = vec!["listener:https".to_string()]; + + let vhosts = [first.clone(), second]; + let selected = + select_vhost_for_listener(&vhosts, &default, "unknown.example.com", Some("listener:https")); + assert_eq!(selected.server_names, first.server_names); +} + +#[test] +fn select_route_by_host_with_context_respects_grpc_constraints() { + let default = make_vhost(vec![], vec![make_route("/", "default")]); + let api_vhost = make_vhost( + vec!["api.example.com"], + vec![ + Route { + cache: None, + id: "test|prefix:/|grpc:service=grpc.health.v1.Health".to_string(), + matcher: RouteMatcher::Prefix("/".to_string()), + internal: false, + rewrites: Vec::new(), + try_files: Vec::new(), + error_pages: Vec::new(), + grpc_match: Some(GrpcRouteMatch { + service: Some("grpc.health.v1.Health".to_string()), + method: None, + }), + action: RouteAction::Return(ReturnAction { + status: StatusCode::OK, + location: String::new(), + body: Some("grpc".to_string()), + }), + access_control: RouteAccessControl::default(), + rate_limit: None, + allow_early_data: false, + request_buffering: rginx_core::RouteBufferingPolicy::Auto, + response_buffering: rginx_core::RouteBufferingPolicy::Auto, + compression: rginx_core::RouteCompressionPolicy::Auto, + compression_min_bytes: None, + compression_content_types: Vec::new(), + streaming_response_idle_timeout: None, + }, + make_route("/", "fallback"), + ], + ); + let vhosts = vec![api_vhost]; + + let result = select_route_by_host_with_context( + &default, + &vhosts, + "api.example.com", + &RouteMatchContext::with_grpc("/", "grpc.health.v1.Health", "Check"), + ) + .expect("gRPC route should match"); + assert_eq!(result.1.id, "test|prefix:/|grpc:service=grpc.health.v1.Health"); +} diff --git a/crates/rginx-http/src/state/agent.rs b/crates/rginx-http/src/state/agent.rs new file mode 100644 index 00000000..5764a35b --- /dev/null +++ b/crates/rginx-http/src/state/agent.rs @@ -0,0 +1,200 @@ +use super::*; + +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct AgentRuntimeUpdate { + pub connection_state: String, + pub command_cursor: Option, + pub in_flight_command_id: Option, + pub last_register_success_unix_ms: Option, + pub last_heartbeat_success_unix_ms: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub(super) struct AgentRuntimeState { + configured: bool, + endpoint: Option, + node_id: Option, + state_path: Option, + region: Option, + pop: Option, + labels: BTreeMap, + connection_state: String, + command_cursor: Option, + in_flight_command_id: Option, + last_register_success_unix_ms: Option, + last_heartbeat_success_unix_ms: Option, +} + +impl AgentRuntimeState { + pub(super) fn from_config(agent: Option<&rginx_core::AgentSettings>) -> Self { + let Some(agent) = agent else { + return Self::default(); + }; + + Self { + configured: true, + endpoint: Some(agent.endpoint.to_string()), + node_id: Some(agent.node_id.clone()), + state_path: Some(agent.state_path.clone()), + region: agent.region.clone(), + pop: agent.pop.clone(), + labels: agent.labels.clone(), + connection_state: "starting".to_string(), + command_cursor: None, + in_flight_command_id: None, + last_register_success_unix_ms: None, + last_heartbeat_success_unix_ms: None, + } + } + + pub(super) fn sync_config(&mut self, agent: Option<&rginx_core::AgentSettings>) -> bool { + let mut next = Self::from_config(agent); + if self.same_config_identity(&next) { + next.connection_state.clone_from(&self.connection_state); + next.command_cursor.clone_from(&self.command_cursor); + next.in_flight_command_id.clone_from(&self.in_flight_command_id); + next.last_register_success_unix_ms = self.last_register_success_unix_ms; + next.last_heartbeat_success_unix_ms = self.last_heartbeat_success_unix_ms; + } + if *self == next { + return false; + } + *self = next; + true + } + + pub(super) fn apply_update(&mut self, update: AgentRuntimeUpdate) -> bool { + let mut changed = false; + update_if_changed(&mut self.connection_state, update.connection_state, &mut changed); + update_if_changed(&mut self.command_cursor, update.command_cursor, &mut changed); + update_if_changed( + &mut self.in_flight_command_id, + update.in_flight_command_id, + &mut changed, + ); + update_if_changed( + &mut self.last_register_success_unix_ms, + update.last_register_success_unix_ms, + &mut changed, + ); + update_if_changed( + &mut self.last_heartbeat_success_unix_ms, + update.last_heartbeat_success_unix_ms, + &mut changed, + ); + changed + } + + pub(super) fn snapshot(&self, locally_disabled: bool) -> AgentRuntimeSnapshot { + AgentRuntimeSnapshot { + configured: self.configured, + enabled: self.configured && !locally_disabled, + locally_disabled, + endpoint: self.endpoint.clone(), + node_id: self.node_id.clone(), + state_path: self.state_path.clone(), + region: self.region.clone(), + pop: self.pop.clone(), + labels: self.labels.clone(), + connection_state: if self.configured && locally_disabled { + "locally_disabled".to_string() + } else { + self.connection_state.clone() + }, + command_cursor: self.command_cursor.clone(), + in_flight_command_id: self.in_flight_command_id.clone(), + last_register_success_unix_ms: self.last_register_success_unix_ms, + last_heartbeat_success_unix_ms: self.last_heartbeat_success_unix_ms, + } + } + + fn same_config_identity(&self, other: &Self) -> bool { + self.configured + && other.configured + && self.endpoint == other.endpoint + && self.node_id == other.node_id + && self.state_path == other.state_path + } +} + +impl Default for AgentRuntimeState { + fn default() -> Self { + Self { + configured: false, + endpoint: None, + node_id: None, + state_path: None, + region: None, + pop: None, + labels: BTreeMap::new(), + connection_state: "not_configured".to_string(), + command_cursor: None, + in_flight_command_id: None, + last_register_success_unix_ms: None, + last_heartbeat_success_unix_ms: None, + } + } +} + +impl SharedState { + pub fn set_agent_configured(&self, agent: &rginx_core::AgentSettings) { + if self.sync_agent_runtime_config(Some(agent)) { + self.mark_status_snapshot_changed(); + } + } + + pub fn update_agent_runtime(&self, update: AgentRuntimeUpdate) { + let changed = self + .lifecycle + .agent_runtime + .write() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + .apply_update(update); + if changed { + self.mark_status_snapshot_changed(); + } + } + + pub fn agent_status_snapshot(&self) -> AgentRuntimeSnapshot { + self.lifecycle + .agent_runtime + .read() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + .snapshot(self.agent_locally_disabled()) + } + + pub fn agent_disabled_receiver(&self) -> watch::Receiver { + self.lifecycle.agent_disabled.subscribe() + } + + pub fn set_agent_locally_disabled(&self, disabled: bool) -> AgentRuntimeSnapshot { + let previous = self.lifecycle.agent_disabled_value.swap(disabled, Ordering::AcqRel); + if previous != disabled { + self.lifecycle.agent_disabled.send_replace(disabled); + self.mark_status_snapshot_changed(); + } + self.agent_status_snapshot() + } + + pub(crate) fn sync_agent_runtime_config( + &self, + agent: Option<&rginx_core::AgentSettings>, + ) -> bool { + self.lifecycle + .agent_runtime + .write() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + .sync_config(agent) + } + + fn agent_locally_disabled(&self) -> bool { + self.lifecycle.agent_disabled_value.load(Ordering::Acquire) + } +} + +fn update_if_changed(current: &mut T, next: T, changed: &mut bool) { + if *current != next { + *current = next; + *changed = true; + } +} diff --git a/crates/rginx-http/src/state/lifecycle/reload.rs b/crates/rginx-http/src/state/lifecycle/reload.rs index a9ca085f..04220352 100644 --- a/crates/rginx-http/src/state/lifecycle/reload.rs +++ b/crates/rginx-http/src/state/lifecycle/reload.rs @@ -176,7 +176,11 @@ impl SharedState { }; *self.lifecycle.node_identity.write().unwrap_or_else(|poisoned| poisoned.into_inner()) = - self.resolved_node_identity(prepared.config.control_plane.as_ref()); + self.resolved_node_identity( + prepared.config.agent.as_ref(), + prepared.config.control_plane.as_ref(), + ); + self.sync_agent_runtime_config(prepared.config.agent.as_ref()); *self.listener_runtime.tls_acceptors.write().await = merged_acceptors; let _ = self.revisions.send(next_revision); diff --git a/crates/rginx-http/src/state/lifecycle/status.rs b/crates/rginx-http/src/state/lifecycle/status.rs index 1b1f8538..12a7f222 100644 --- a/crates/rginx-http/src/state/lifecycle/status.rs +++ b/crates/rginx-http/src/state/lifecycle/status.rs @@ -84,6 +84,7 @@ impl SharedState { binary_version: env!("CARGO_PKG_VERSION").to_string(), desired_revision, converged: desired_revision == revision, + agent: self.agent_status_snapshot(), config_path: self.config_path.as_deref().cloned(), listeners: config .listeners diff --git a/crates/rginx-http/src/state/mod.rs b/crates/rginx-http/src/state/mod.rs index c0023315..f2177be6 100644 --- a/crates/rginx-http/src/state/mod.rs +++ b/crates/rginx-http/src/state/mod.rs @@ -1,7 +1,7 @@ use std::collections::{BTreeMap, HashMap, VecDeque}; use std::future::Future; use std::path::PathBuf; -use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}; use std::sync::{Arc, Mutex, RwLock as StdRwLock}; use std::time::{SystemTime, UNIX_EPOCH}; @@ -17,6 +17,7 @@ use crate::rate_limit::RateLimiters; use crate::tls::build_tls_acceptor; use crate::tls::ocsp::ocsp_responder_urls_for_certificate; +mod agent; mod cache; mod connections; mod counters; @@ -43,10 +44,12 @@ pub(super) struct PreparedState { retired_listeners: Vec, } +use self::agent::AgentRuntimeState; pub(crate) use self::counters::http::TlsHandshakeFailureReason; pub use crate::cache::{CacheInvalidationResult, CachePurgeResult, CacheZoneRuntimeSnapshot}; #[cfg(test)] pub(crate) use crate::validate_config_transition; +pub use agent::AgentRuntimeUpdate; pub use connections::ActiveConnectionGuard; use counters::{ ApplyHistory, HttpCounters, ListenerTrafficCounters, ReloadHistory, RequestTrafficCounters, @@ -60,13 +63,14 @@ use helpers::{ }; pub use snapshots::ActiveState; pub use snapshots::{ - AcmeManagedCertificateSnapshot, AcmeRuntimeSnapshot, ApplyOutcomeSnapshot, ApplyResultSnapshot, - ApplyStatusSnapshot, CacheStatsSnapshot, ConfigFailureStageSnapshot, GrpcTrafficSnapshot, - Http3ListenerRuntimeSnapshot, HttpCountersSnapshot, ListenerStatsSnapshot, MtlsStatusSnapshot, - NodeIdentitySnapshot, RecentTrafficStatsSnapshot, RecentUpstreamStatsSnapshot, - ReloadOutcomeSnapshot, ReloadResultSnapshot, ReloadStatusSnapshot, RevisionStatusSnapshot, - RouteStatsSnapshot, RuntimeListenerBindingSnapshot, RuntimeListenerSnapshot, - RuntimeStatusSnapshot, SnapshotDeltaSnapshot, SnapshotModule, TlsCertificateStatusSnapshot, + AcmeManagedCertificateSnapshot, AcmeRuntimeSnapshot, AgentRuntimeSnapshot, + ApplyOutcomeSnapshot, ApplyResultSnapshot, ApplyStatusSnapshot, CacheStatsSnapshot, + ConfigFailureStageSnapshot, GrpcTrafficSnapshot, Http3ListenerRuntimeSnapshot, + HttpCountersSnapshot, ListenerStatsSnapshot, MtlsStatusSnapshot, NodeIdentitySnapshot, + RecentTrafficStatsSnapshot, RecentUpstreamStatsSnapshot, ReloadOutcomeSnapshot, + ReloadResultSnapshot, ReloadStatusSnapshot, RevisionStatusSnapshot, RouteStatsSnapshot, + RuntimeListenerBindingSnapshot, RuntimeListenerSnapshot, RuntimeStatusSnapshot, + SnapshotDeltaSnapshot, SnapshotModule, TlsCertificateStatusSnapshot, TlsDefaultCertificateBindingSnapshot, TlsListenerStatusSnapshot, TlsOcspRefreshSpec, TlsOcspStatusSnapshot, TlsReloadBoundarySnapshot, TlsRuntimeSnapshot, TlsSniBindingSnapshot, TlsVhostBindingSnapshot, TrafficStatsSnapshot, UpstreamPeerStatsSnapshot, @@ -152,6 +156,7 @@ impl SharedState { let acme_statuses = Arc::new(StdRwLock::new(HashMap::new())); let acme_http01_challenges = Arc::new(StdRwLock::new(HashMap::new())); let (revisions, _rx) = watch::channel(revision); + let (agent_disabled, _agent_disabled_rx) = watch::channel(false); let request_runtime = RequestRuntimeState::new(config_path.as_deref()); let listener_runtime = ListenerRuntimeState::new( prepared.listener_tls_acceptors.clone(), @@ -163,10 +168,15 @@ impl SharedState { cache_component_versions, ); let lifecycle = LifecycleState::new( - NodeIdentityState::from_control_plane(prepared.config.control_plane.as_ref()), + NodeIdentityState::from_config( + prepared.config.agent.as_ref(), + prepared.config.control_plane.as_ref(), + ), ocsp_statuses, acme_statuses, acme_http01_challenges, + AgentRuntimeState::from_config(prepared.config.agent.as_ref()), + agent_disabled, ); Ok(Self { @@ -188,6 +198,25 @@ impl SharedState { } impl NodeIdentityState { + fn from_config( + agent: Option<&rginx_core::AgentSettings>, + control_plane: Option<&rginx_core::ControlPlaneSettings>, + ) -> Self { + if let Some(agent) = agent { + return Self::from_agent(agent); + } + Self::from_control_plane(control_plane) + } + + fn from_agent(agent: &rginx_core::AgentSettings) -> Self { + Self { + node_id: Some(agent.node_id.clone()), + region: agent.region.clone(), + pop: agent.pop.clone(), + labels: agent.labels.clone(), + } + } + fn from_control_plane(control_plane: Option<&rginx_core::ControlPlaneSettings>) -> Self { let Some(control_plane) = control_plane else { return Self::default(); diff --git a/crates/rginx-http/src/state/snapshot_bus/identity.rs b/crates/rginx-http/src/state/snapshot_bus/identity.rs index 183ad3a4..5d1b8c92 100644 --- a/crates/rginx-http/src/state/snapshot_bus/identity.rs +++ b/crates/rginx-http/src/state/snapshot_bus/identity.rs @@ -1,8 +1,15 @@ use super::*; impl SharedState { + pub fn set_agent_identity(&self, agent: &rginx_core::AgentSettings) { + self.set_node_identity_override(NodeIdentityState::from_agent(agent)); + } + pub fn set_control_plane_identity(&self, control_plane: &rginx_core::ControlPlaneSettings) { - let next = NodeIdentityState::from_control_plane(Some(control_plane)); + self.set_node_identity_override(NodeIdentityState::from_control_plane(Some(control_plane))); + } + + fn set_node_identity_override(&self, next: NodeIdentityState) { let mut changed = false; { @@ -36,6 +43,7 @@ impl SharedState { pub(in crate::state) fn resolved_node_identity( &self, + agent: Option<&rginx_core::AgentSettings>, control_plane: Option<&rginx_core::ControlPlaneSettings>, ) -> NodeIdentityState { self.lifecycle @@ -43,6 +51,6 @@ impl SharedState { .read() .unwrap_or_else(|poisoned| poisoned.into_inner()) .clone() - .unwrap_or_else(|| NodeIdentityState::from_control_plane(control_plane)) + .unwrap_or_else(|| NodeIdentityState::from_config(agent, control_plane)) } } diff --git a/crates/rginx-http/src/state/snapshots/mod.rs b/crates/rginx-http/src/state/snapshots/mod.rs index 158456c3..3f79315e 100644 --- a/crates/rginx-http/src/state/snapshots/mod.rs +++ b/crates/rginx-http/src/state/snapshots/mod.rs @@ -20,8 +20,9 @@ pub use delta::{SnapshotDeltaSnapshot, SnapshotModule}; pub use http::{HttpCountersSnapshot, MtlsStatusSnapshot}; pub use reload::{ReloadOutcomeSnapshot, ReloadResultSnapshot, ReloadStatusSnapshot}; pub use runtime::{ - Http3ListenerRuntimeSnapshot, NodeIdentitySnapshot, RevisionStatusSnapshot, - RuntimeListenerBindingSnapshot, RuntimeListenerSnapshot, RuntimeStatusSnapshot, + AgentRuntimeSnapshot, Http3ListenerRuntimeSnapshot, NodeIdentitySnapshot, + RevisionStatusSnapshot, RuntimeListenerBindingSnapshot, RuntimeListenerSnapshot, + RuntimeStatusSnapshot, }; pub use tls::{ TlsCertificateStatusSnapshot, TlsDefaultCertificateBindingSnapshot, TlsListenerStatusSnapshot, diff --git a/crates/rginx-http/src/state/snapshots/runtime.rs b/crates/rginx-http/src/state/snapshots/runtime.rs index ca16bde2..4847b37c 100644 --- a/crates/rginx-http/src/state/snapshots/runtime.rs +++ b/crates/rginx-http/src/state/snapshots/runtime.rs @@ -20,6 +20,59 @@ pub struct NodeIdentitySnapshot { pub labels: BTreeMap, } +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct AgentRuntimeSnapshot { + #[serde(default)] + pub configured: bool, + #[serde(default)] + pub enabled: bool, + #[serde(default)] + pub locally_disabled: bool, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub endpoint: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub node_id: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub state_path: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub region: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub pop: Option, + #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] + pub labels: BTreeMap, + #[serde(default)] + pub connection_state: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub command_cursor: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub in_flight_command_id: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub last_register_success_unix_ms: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub last_heartbeat_success_unix_ms: Option, +} + +impl Default for AgentRuntimeSnapshot { + fn default() -> Self { + Self { + configured: false, + enabled: false, + locally_disabled: false, + endpoint: None, + node_id: None, + state_path: None, + region: None, + pop: None, + labels: BTreeMap::new(), + connection_state: "not_configured".to_string(), + command_cursor: None, + in_flight_command_id: None, + last_register_success_unix_ms: None, + last_heartbeat_success_unix_ms: None, + } + } +} + #[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] pub struct RevisionStatusSnapshot { #[serde(default)] @@ -41,6 +94,8 @@ pub struct RuntimeStatusSnapshot { pub desired_revision: u64, #[serde(default)] pub converged: bool, + #[serde(default)] + pub agent: AgentRuntimeSnapshot, pub config_path: Option, pub listeners: Vec, pub worker_threads: Option, diff --git a/crates/rginx-http/src/state/structure.rs b/crates/rginx-http/src/state/structure.rs index 9f33b54c..2617206e 100644 --- a/crates/rginx-http/src/state/structure.rs +++ b/crates/rginx-http/src/state/structure.rs @@ -37,6 +37,9 @@ pub(super) struct LifecycleState { pub(super) background_tasks: Arc>>>, pub(super) node_identity: Arc>, pub(super) node_identity_override: Arc>>, + pub(super) agent_runtime: Arc>, + pub(super) agent_disabled: watch::Sender, + pub(super) agent_disabled_value: Arc, pub(super) reload_history: Arc>, pub(super) apply_history: Arc>, pub(super) ocsp_statuses: Arc>>, @@ -171,11 +174,16 @@ impl LifecycleState { ocsp_statuses: Arc>>, acme_statuses: Arc>>, acme_http01_challenges: Arc>>, + agent_runtime: AgentRuntimeState, + agent_disabled: watch::Sender, ) -> Self { Self { background_tasks: Arc::new(Mutex::new(Vec::new())), node_identity: Arc::new(StdRwLock::new(node_identity)), node_identity_override: Arc::new(StdRwLock::new(None)), + agent_runtime: Arc::new(StdRwLock::new(agent_runtime)), + agent_disabled, + agent_disabled_value: Arc::new(AtomicBool::new(false)), reload_history: Arc::new(Mutex::new(ReloadHistory::default())), apply_history: Arc::new(Mutex::new(ApplyHistory::default())), ocsp_statuses, diff --git a/crates/rginx-http/src/state/tests/status.rs b/crates/rginx-http/src/state/tests/status.rs index d0639d30..ad94ec42 100644 --- a/crates/rginx-http/src/state/tests/status.rs +++ b/crates/rginx-http/src/state/tests/status.rs @@ -128,6 +128,47 @@ async fn status_snapshot_reports_node_identity_and_convergence() { assert!(!status.converged); } +#[tokio::test] +async fn status_snapshot_reports_agent_runtime_and_local_disable() { + let mut config = snapshot("127.0.0.1:8080"); + config.agent = Some(agent_settings()); + let shared = SharedState::from_config(config).expect("shared state should build"); + + let status = shared.status_snapshot().await; + assert!(status.agent.configured); + assert!(status.agent.enabled); + assert!(!status.agent.locally_disabled); + assert_eq!(status.agent.endpoint.as_deref(), Some("https://control.example.com/")); + assert_eq!(status.agent.node_id.as_deref(), Some("edge-sfo-1")); + assert_eq!(status.agent.connection_state, "starting"); + + shared.update_agent_runtime(crate::state::AgentRuntimeUpdate { + connection_state: "connected".to_string(), + command_cursor: Some("cursor-1".to_string()), + in_flight_command_id: Some("cmd-1".to_string()), + last_register_success_unix_ms: Some(1000), + last_heartbeat_success_unix_ms: Some(2000), + }); + + let status = shared.status_snapshot().await; + assert_eq!(status.agent.connection_state, "connected"); + assert_eq!(status.agent.command_cursor.as_deref(), Some("cursor-1")); + assert_eq!(status.agent.in_flight_command_id.as_deref(), Some("cmd-1")); + assert_eq!(status.agent.last_register_success_unix_ms, Some(1000)); + assert_eq!(status.agent.last_heartbeat_success_unix_ms, Some(2000)); + + let disabled = shared.set_agent_locally_disabled(true); + assert!(!disabled.enabled); + assert!(disabled.locally_disabled); + assert_eq!(disabled.connection_state, "locally_disabled"); + assert!(*shared.agent_disabled_receiver().borrow()); + + let enabled = shared.set_agent_locally_disabled(false); + assert!(enabled.enabled); + assert!(!enabled.locally_disabled); + assert_eq!(enabled.connection_state, "connected"); +} + #[tokio::test] async fn status_snapshot_preserves_explicit_control_plane_identity_override() { let shared = @@ -161,6 +202,24 @@ async fn status_snapshot_preserves_explicit_control_plane_identity_override() { assert_eq!(status.revision, 1); } +fn agent_settings() -> rginx_core::AgentSettings { + rginx_core::AgentSettings { + endpoint: "https://control.example.com".parse().unwrap(), + node_id: "edge-sfo-1".to_string(), + auth: rginx_core::AgentAuthSettings { token_path: "/etc/rginx/agent.token".into() }, + state_path: "/var/lib/rginx/agent/state.json".into(), + region: Some("us-west".to_string()), + pop: Some("sfo".to_string()), + labels: [("tier".to_string(), "edge".to_string())].into_iter().collect(), + heartbeat_interval: Duration::from_secs(30), + connect_timeout: Duration::from_secs(10), + request_timeout: Duration::from_secs(30), + poll_timeout: Duration::from_secs(30), + backoff_initial: Duration::from_millis(500), + backoff_max: Duration::from_secs(60), + } +} + #[tokio::test] async fn status_snapshot_reports_acme_runtime_state() { let temp = tempdir().expect("tempdir should build"); diff --git a/crates/rginx-http/src/state/tests/support.rs b/crates/rginx-http/src/state/tests/support.rs index 3f8e3ee9..530805c5 100644 --- a/crates/rginx-http/src/state/tests/support.rs +++ b/crates/rginx-http/src/state/tests/support.rs @@ -20,6 +20,7 @@ pub(crate) fn snapshot(listen: &str) -> ConfigSnapshot { }; ConfigSnapshot { control_plane: None, + agent: None, acme: None, managed_certificates: Vec::new(), cache_zones: HashMap::new(), diff --git a/crates/rginx-http/src/state/tls_runtime/bindings/tests.rs b/crates/rginx-http/src/state/tls_runtime/bindings/tests.rs index 1ff9783f..67b9c39f 100644 --- a/crates/rginx-http/src/state/tls_runtime/bindings/tests.rs +++ b/crates/rginx-http/src/state/tls_runtime/bindings/tests.rs @@ -104,6 +104,7 @@ fn certificates(scopes: &[&str]) -> Vec { fn listener_certificate_is_default_when_no_explicit_default_is_configured() { let config = ConfigSnapshot { control_plane: None, + agent: None, acme: None, managed_certificates: Vec::new(), cache_zones: HashMap::new(), @@ -147,6 +148,7 @@ fn listener_certificate_is_default_when_no_explicit_default_is_configured() { fn single_named_vhost_certificate_becomes_implicit_default_without_listener_tls() { let config = ConfigSnapshot { control_plane: None, + agent: None, acme: None, managed_certificates: Vec::new(), cache_zones: HashMap::new(), diff --git a/crates/rginx-http/src/transition.rs b/crates/rginx-http/src/transition.rs index b35fcca1..cab3c2b9 100644 --- a/crates/rginx-http/src/transition.rs +++ b/crates/rginx-http/src/transition.rs @@ -16,13 +16,14 @@ const RELOADABLE_FIELDS: [&str; 13] = [ "servers[].upstreams[].server_name_override", ]; -const RESTART_REQUIRED_FIELDS: [&str; 8] = [ +const RESTART_REQUIRED_FIELDS: [&str; 9] = [ "listen", "server.http3.listen", "listeners[].listen", "listeners[].http3.listen", "servers[].listen", "control_plane", + "agent", "runtime.worker_threads", "runtime.accept_workers", ]; @@ -142,6 +143,14 @@ pub fn plan_config_transition( )); } + if current.agent.as_ref().map(agent_fingerprint) != next.agent.as_ref().map(agent_fingerprint) { + changes.push(format!( + "agent {:?} -> {:?}", + current.agent.as_ref().map(agent_fingerprint), + next.agent.as_ref().map(agent_fingerprint) + )); + } + let boundary = config_transition_boundary(); let kind = if changes.is_empty() { ConfigTransitionKind::HotReload @@ -167,6 +176,25 @@ fn control_plane_fingerprint(control_plane: &rginx_core::ControlPlaneSettings) - ) } +fn agent_fingerprint(agent: &rginx_core::AgentSettings) -> String { + format!( + "endpoint={} node_id={} token_path={} state_path={} region={:?} pop={:?} labels={:?} heartbeat_interval={:?} connect_timeout={:?} request_timeout={:?} poll_timeout={:?} backoff_initial={:?} backoff_max={:?}", + agent.endpoint, + agent.node_id, + agent.auth.token_path.display(), + agent.state_path.display(), + agent.region, + agent.pop, + agent.labels, + agent.heartbeat_interval, + agent.connect_timeout, + agent.request_timeout, + agent.poll_timeout, + agent.backoff_initial, + agent.backoff_max + ) +} + pub fn validate_config_transition(current: &ConfigSnapshot, next: &ConfigSnapshot) -> Result<()> { let plan = plan_config_transition(current, next); if let Some(message) = plan.restart_required_message() { diff --git a/crates/rginx-http/src/transition/tests.rs b/crates/rginx-http/src/transition/tests.rs index edd8b680..5bbfdfea 100644 --- a/crates/rginx-http/src/transition/tests.rs +++ b/crates/rginx-http/src/transition/tests.rs @@ -1,7 +1,10 @@ use std::collections::HashMap; use std::time::Duration; -use rginx_core::{ConfigSnapshot, Listener, RuntimeSettings, Server, VirtualHost}; +use rginx_core::{ + AgentAuthSettings, AgentSettings, ConfigSnapshot, Listener, RuntimeSettings, Server, + VirtualHost, +}; use super::{ ConfigTransitionKind, config_transition_boundary, plan_config_transition, @@ -28,6 +31,7 @@ fn snapshot(listen: &str) -> ConfigSnapshot { }; ConfigSnapshot { control_plane: None, + agent: None, acme: None, managed_certificates: Vec::new(), cache_zones: HashMap::new(), @@ -78,6 +82,24 @@ fn control_plane_settings(listen: &str) -> rginx_core::ControlPlaneSettings { } } +fn agent_settings(endpoint: &str) -> AgentSettings { + AgentSettings { + endpoint: endpoint.parse().unwrap(), + node_id: "edge-sfo-1".to_string(), + auth: AgentAuthSettings { token_path: "/etc/rginx/agent.token".into() }, + state_path: "/var/lib/rginx/agent/state.json".into(), + region: Some("us-west".to_string()), + pop: Some("sfo".to_string()), + labels: [("tier".to_string(), "edge".to_string())].into_iter().collect(), + heartbeat_interval: Duration::from_secs(15), + connect_timeout: Duration::from_secs(10), + request_timeout: Duration::from_secs(20), + poll_timeout: Duration::from_secs(30), + backoff_initial: Duration::from_millis(250), + backoff_max: Duration::from_secs(10), + } +} + #[test] fn boundary_lists_are_stable() { let boundary = config_transition_boundary(); @@ -108,6 +130,7 @@ fn boundary_lists_are_stable() { "listeners[].http3.listen".to_string(), "servers[].listen".to_string(), "control_plane".to_string(), + "agent".to_string(), "runtime.worker_threads".to_string(), "runtime.accept_workers".to_string(), ] @@ -267,6 +290,19 @@ fn planner_reports_restart_required_control_plane_removed() { ); } +#[test] +fn planner_reports_restart_required_agent_changes() { + let current = snapshot("127.0.0.1:8080"); + let mut next = snapshot("127.0.0.1:8080"); + next.agent = Some(agent_settings("https://control.example.com/rginx")); + + let plan = plan_config_transition(¤t, &next); + assert_eq!(plan.kind, ConfigTransitionKind::RestartRequired); + assert!(plan.changed_restart_required_fields.iter().any(|change| { + change.contains("agent None -> Some(\"endpoint=https://control.example.com/rginx") + })); +} + #[test] fn planner_reports_control_plane_no_change_as_hot_reload() { let mut current = snapshot("127.0.0.1:8080"); diff --git a/crates/rginx-runtime/src/acme/tests.rs b/crates/rginx-runtime/src/acme/tests.rs index 67fc0c50..04f2f879 100644 --- a/crates/rginx-runtime/src/acme/tests.rs +++ b/crates/rginx-runtime/src/acme/tests.rs @@ -20,6 +20,7 @@ use super::types::{certificate_status_index, http01_listener_addrs, plan_reconci fn test_config(listeners: Vec) -> ConfigSnapshot { ConfigSnapshot { control_plane: None, + agent: None, runtime: RuntimeSettings { shutdown_timeout: Duration::from_secs(1), worker_threads: None, diff --git a/crates/rginx-runtime/src/admin/mod.rs b/crates/rginx-runtime/src/admin/mod.rs index c34e8eba..e326c246 100644 --- a/crates/rginx-runtime/src/admin/mod.rs +++ b/crates/rginx-runtime/src/admin/mod.rs @@ -14,7 +14,7 @@ pub use socket::admin_socket_path_for_config; const INSTALLED_CONFIG_PATH: &str = "/etc/rginx/rginx.ron"; const INSTALLED_ADMIN_SOCKET_PATH: &str = "/run/rginx/admin.sock"; -const ADMIN_SNAPSHOT_SCHEMA_VERSION: u32 = 15; +const ADMIN_SNAPSHOT_SCHEMA_VERSION: u32 = 16; const DEFAULT_RECENT_WINDOW_SECS: u64 = 60; const EXTENDED_RECENT_WINDOW_SECS: u64 = 300; diff --git a/crates/rginx-runtime/src/admin/model.rs b/crates/rginx-runtime/src/admin/model.rs index f04d8834..60ac38fc 100644 --- a/crates/rginx-runtime/src/admin/model.rs +++ b/crates/rginx-runtime/src/admin/model.rs @@ -1,7 +1,7 @@ use rginx_http::{ - CacheInvalidationResult, CachePurgeResult, CacheStatsSnapshot, HttpCountersSnapshot, - RevisionStatusSnapshot, RuntimeStatusSnapshot, SnapshotDeltaSnapshot, SnapshotModule, - TrafficStatsSnapshot, UpstreamHealthSnapshot, UpstreamStatsSnapshot, + AgentRuntimeSnapshot, CacheInvalidationResult, CachePurgeResult, CacheStatsSnapshot, + HttpCountersSnapshot, RevisionStatusSnapshot, RuntimeStatusSnapshot, SnapshotDeltaSnapshot, + SnapshotModule, TrafficStatsSnapshot, UpstreamHealthSnapshot, UpstreamStatsSnapshot, }; use serde::{Deserialize, Deserializer, Serialize, Serializer}; @@ -17,6 +17,8 @@ pub enum AdminRequest { GetTrafficStats { window_secs: Option }, GetPeerHealth, GetUpstreamStats { window_secs: Option }, + GetAgentStatus, + SetAgentDisabled { disabled: bool }, PurgeCacheZone { zone_name: String }, PurgeCacheKey { zone_name: String, key: String }, PurgeCachePrefix { zone_name: String, prefix: String }, @@ -117,6 +119,7 @@ pub enum AdminResponse { TrafficStats(TrafficStatsSnapshot), PeerHealth(Vec), UpstreamStats(Vec), + AgentStatus(AgentRuntimeSnapshot), CachePurge(CachePurgeResult), CacheInvalidation(CacheInvalidationResult), Revision(RevisionSnapshot), diff --git a/crates/rginx-runtime/src/admin/service.rs b/crates/rginx-runtime/src/admin/service.rs index 74520b95..726c951d 100644 --- a/crates/rginx-runtime/src/admin/service.rs +++ b/crates/rginx-runtime/src/admin/service.rs @@ -176,6 +176,10 @@ async fn dispatch_request(request: AdminRequest, state: &SharedState) -> io::Res .map_err(|message| io::Error::new(io::ErrorKind::InvalidInput, message))?; AdminResponse::UpstreamStats(state.upstream_stats_snapshot_with_window(window_secs)) } + AdminRequest::GetAgentStatus => AdminResponse::AgentStatus(state.agent_status_snapshot()), + AdminRequest::SetAgentDisabled { disabled } => { + AdminResponse::AgentStatus(state.set_agent_locally_disabled(disabled)) + } AdminRequest::PurgeCacheZone { zone_name } => { match state.purge_cache_zone(&zone_name).await { Ok(result) => AdminResponse::CachePurge(result), diff --git a/crates/rginx-runtime/src/admin/tests.rs b/crates/rginx-runtime/src/admin/tests.rs index 4100b408..23c07c47 100644 --- a/crates/rginx-runtime/src/admin/tests.rs +++ b/crates/rginx-runtime/src/admin/tests.rs @@ -99,6 +99,8 @@ fn status_response_accepts_older_payload_without_acme_snapshot() { assert!(!status.acme.enabled); assert!(status.acme.directory_url.is_none()); assert!(status.acme.managed_certificates.is_empty()); + assert!(!status.agent.configured); + assert_eq!(status.agent.connection_state, "not_configured"); } #[test] diff --git a/crates/rginx-runtime/src/agent.rs b/crates/rginx-runtime/src/agent.rs index e2dedfbe..92895870 100644 --- a/crates/rginx-runtime/src/agent.rs +++ b/crates/rginx-runtime/src/agent.rs @@ -1,13 +1,19 @@ -use rginx_core::ControlPlaneSettings; +use std::sync::Arc; + +use rginx_core::{AgentSettings, ControlPlaneSettings}; use tokio::sync::watch; use crate::state::RuntimeState; -pub async fn run( +pub async fn run_legacy_control_plane( control_plane: ControlPlaneSettings, state: RuntimeState, shutdown: watch::Receiver, ) -> rginx_agent::Result<()> { + tracing::warn!( + listen = %control_plane.listen, + "legacy node-side control-plane server enabled; prefer outbound agent to avoid opening an extra management port on the node" + ); let context = rginx_agent::ControlPlaneContext::new( state.http.clone(), std::sync::Arc::new(rginx_agent::ProcessSignalReloadExecutor::current_process()), @@ -17,3 +23,72 @@ pub async fn run( )); rginx_agent::run_with_context(control_plane, context, shutdown).await } + +pub async fn run_outbound( + agent: AgentSettings, + state: RuntimeState, + shutdown: watch::Receiver, +) -> rginx_agent::Result<()> { + let token = tokio::fs::read_to_string(&agent.auth.token_path).await?; + let token = token.trim().to_string(); + if token.is_empty() { + return Err(rginx_agent::Error::Server("agent token file is empty".to_string())); + } + run_outbound_with_client( + agent.clone(), + state, + shutdown, + Arc::new(rginx_agent::HttpOutboundControlPlaneClient::with_request_timeout( + agent.endpoint.clone(), + token.clone(), + agent.request_timeout, + )), + Some(Arc::new(rginx_agent::WebSocketOutboundStreamClient::with_connect_timeout( + agent.endpoint.clone(), + token.clone(), + agent.connect_timeout, + ))), + token, + ) + .await +} + +async fn run_outbound_with_client( + agent: AgentSettings, + state: RuntimeState, + shutdown: watch::Receiver, + client: Arc, + stream_client: Option>, + command_signing_key: String, +) -> rginx_agent::Result<()> { + let http_state = state.http.clone(); + http_state.set_agent_identity(&agent); + http_state.set_agent_configured(&agent); + let core = rginx_agent::AgentCore::new( + http_state.clone(), + Arc::new(rginx_agent::ProcessSignalReloadExecutor::current_process()), + ) + .with_config_apply_executor(Arc::new(crate::apply::RuntimeConfigApplyExecutor::new(state))); + + tracing::info!( + endpoint = %agent.endpoint, + node_id = %agent.node_id, + region = agent.region.as_deref().unwrap_or(""), + pop = agent.pop.as_deref().unwrap_or(""), + "outbound agent task started" + ); + + let mut outbound = rginx_agent::OutboundAgent::new(agent.clone(), core, client) + .with_command_signing_key(command_signing_key) + .with_local_disable(http_state.agent_disabled_receiver()); + if let Some(stream_client) = stream_client { + outbound = outbound.with_stream_client(stream_client); + } + outbound.with_state_path(agent.state_path.clone())?.run(shutdown).await?; + + tracing::info!(node_id = %agent.node_id, "outbound agent task stopped"); + Ok(()) +} + +#[cfg(test)] +mod tests; diff --git a/crates/rginx-runtime/src/agent/tests.rs b/crates/rginx-runtime/src/agent/tests.rs new file mode 100644 index 00000000..86104267 --- /dev/null +++ b/crates/rginx-runtime/src/agent/tests.rs @@ -0,0 +1,180 @@ +use std::collections::{BTreeMap, HashMap}; +use std::future::Future; +use std::pin::Pin; +use std::sync::Arc; +use std::time::Duration; + +use rginx_core::{ + AgentAuthSettings, AgentSettings, ConfigSnapshot, Listener, RuntimeSettings, Server, + VirtualHost, +}; +use tokio::sync::watch; + +use super::*; + +#[tokio::test] +async fn outbound_agent_task_starts_and_stops_on_shutdown() { + let tempdir = tempfile::tempdir().expect("tempdir should be created"); + let token_path = tempdir.path().join("agent.token"); + std::fs::write(&token_path, "secret").expect("agent token should be written"); + let state = RuntimeState::new(tempdir.path().join("rginx.ron"), snapshot()) + .expect("runtime state should build"); + let settings = AgentSettings { + endpoint: "https://control.example.com".parse().unwrap(), + node_id: "edge-sfo-1".to_string(), + auth: AgentAuthSettings { token_path }, + state_path: tempdir.path().join("agent-state.json"), + region: Some("us-west".to_string()), + pop: Some("sfo".to_string()), + labels: BTreeMap::from([("tier".to_string(), "edge".to_string())]), + heartbeat_interval: Duration::from_secs(30), + connect_timeout: Duration::from_secs(10), + request_timeout: Duration::from_secs(30), + poll_timeout: Duration::from_secs(30), + backoff_initial: Duration::from_millis(500), + backoff_max: Duration::from_secs(60), + }; + let (shutdown_tx, shutdown_rx) = watch::channel(false); + + let task = tokio::spawn(run_outbound_with_client( + settings, + state.clone(), + shutdown_rx, + Arc::new(NoopControlCenter), + None, + "secret".to_string(), + )); + wait_for_agent_identity(&state).await; + wait_for_agent_connection(&state).await; + + shutdown_tx.send(true).expect("shutdown should be sent"); + tokio::time::timeout(Duration::from_secs(1), task) + .await + .expect("outbound agent task should stop") + .expect("outbound agent task should join") + .expect("outbound agent task should succeed"); +} + +struct NoopControlCenter; + +impl rginx_agent::OutboundControlPlaneClient for NoopControlCenter { + fn register( + &self, + _request: rginx_agent::AgentRegisterRequest, + ) -> Pin> + Send + 'static>> { + Box::pin(async { Ok(()) }) + } + + fn heartbeat( + &self, + _request: rginx_agent::AgentHeartbeatRequest, + ) -> Pin> + Send + 'static>> { + Box::pin(async { Ok(()) }) + } + + fn poll_commands( + &self, + _node_id: String, + _cursor: Option, + _timeout: Duration, + ) -> Pin< + Box< + dyn Future> + + Send + + 'static, + >, + > { + Box::pin(async { + tokio::time::sleep(Duration::from_millis(10)).await; + Ok(rginx_agent::AgentPollResponse::empty()) + }) + } + + fn post_result( + &self, + _result: rginx_agent::AgentCommandResult, + ) -> Pin> + Send + 'static>> { + Box::pin(async { Ok(()) }) + } +} + +async fn wait_for_agent_identity(state: &RuntimeState) { + for _ in 0..20 { + let status = state.http.status_snapshot().await; + if status.node.node_id == "edge-sfo-1" { + assert_eq!(status.node.region.as_deref(), Some("us-west")); + assert_eq!(status.node.pop.as_deref(), Some("sfo")); + assert_eq!(status.node.labels.get("tier").map(String::as_str), Some("edge")); + return; + } + tokio::time::sleep(Duration::from_millis(10)).await; + } + panic!("outbound agent identity was not published"); +} + +async fn wait_for_agent_connection(state: &RuntimeState) { + for _ in 0..20 { + let status = state.http.status_snapshot().await; + if status.agent.connection_state == "connected" { + assert!(status.agent.configured); + assert!(status.agent.enabled); + assert_eq!(status.agent.node_id.as_deref(), Some("edge-sfo-1")); + assert_eq!(status.agent.endpoint.as_deref(), Some("https://control.example.com/")); + return; + } + tokio::time::sleep(Duration::from_millis(10)).await; + } + panic!("outbound agent connection state was not published"); +} + +fn snapshot() -> ConfigSnapshot { + ConfigSnapshot { + agent: None, + control_plane: None, + acme: None, + managed_certificates: Vec::new(), + cache_zones: HashMap::new(), + runtime: RuntimeSettings { + shutdown_timeout: Duration::from_secs(1), + worker_threads: None, + accept_workers: 1, + }, + listeners: vec![Listener { + id: "default".to_string(), + name: "default".to_string(), + server: Server { + listen_addr: "127.0.0.1:0".parse().unwrap(), + server_header: rginx_core::default_server_header(), + default_certificate: None, + trusted_proxies: Vec::new(), + client_ip_header: None, + keep_alive: true, + max_headers: None, + max_request_body_bytes: None, + max_connections: None, + header_read_timeout: None, + request_body_read_timeout: None, + response_write_timeout: None, + http1: rginx_core::Http1Settings::default(), + access_log_format: None, + tls: None, + }, + default_server: true, + reuse_port_enabled: false, + tls_termination_enabled: false, + proxy_protocol_enabled: false, + http3: None, + }], + default_vhost: VirtualHost { + id: "server".to_string(), + listener_ids: Vec::new(), + default_listener_ids: Vec::new(), + server_names: Vec::new(), + routes: Vec::new(), + tls: None, + }, + vhosts: Vec::new(), + upstreams: HashMap::new(), + lookup: Default::default(), + } +} diff --git a/crates/rginx-runtime/src/bootstrap/listeners/tests.rs b/crates/rginx-runtime/src/bootstrap/listeners/tests.rs index e9b83588..5d9f6feb 100644 --- a/crates/rginx-runtime/src/bootstrap/listeners/tests.rs +++ b/crates/rginx-runtime/src/bootstrap/listeners/tests.rs @@ -40,6 +40,7 @@ fn listener(id: &str, name: &str, listen_addr: SocketAddr) -> Listener { fn config_with_listeners(listeners: Vec) -> ConfigSnapshot { ConfigSnapshot { control_plane: None, + agent: None, acme: None, managed_certificates: Vec::new(), cache_zones: HashMap::new(), diff --git a/crates/rginx-runtime/src/bootstrap/mod.rs b/crates/rginx-runtime/src/bootstrap/mod.rs index 93c1d64a..aafe7447 100644 --- a/crates/rginx-runtime/src/bootstrap/mod.rs +++ b/crates/rginx-runtime/src/bootstrap/mod.rs @@ -52,8 +52,15 @@ pub async fn run(config_path: PathBuf, config: ConfigSnapshot) -> Result<()> { state.http.clone(), shutdown_tx.subscribe(), ))); + let mut agent_task = current_config.agent.clone().map(|agent| { + tokio::spawn(agent::run_outbound(agent, state.clone(), shutdown_tx.subscribe())) + }); let mut control_plane_task = current_config.control_plane.clone().map(|control_plane| { - tokio::spawn(agent::run(control_plane, state.clone(), shutdown_tx.subscribe())) + tokio::spawn(agent::run_legacy_control_plane( + control_plane, + state.clone(), + shutdown_tx.subscribe(), + )) }); let mut cache_task = Some(tokio::spawn(cache::run(state.http.clone(), shutdown_tx.subscribe()))); @@ -113,6 +120,7 @@ pub async fn run(config_path: PathBuf, config: ConfigSnapshot) -> Result<()> { &mut draining_listener_groups, shutdown::ShutdownTasks { admin_task: &mut admin_task, + agent_task: &mut agent_task, control_plane_task: &mut control_plane_task, cache_task: &mut cache_task, health_task: &mut health_task, diff --git a/crates/rginx-runtime/src/bootstrap/shutdown.rs b/crates/rginx-runtime/src/bootstrap/shutdown.rs index 4c11b663..170b9d75 100644 --- a/crates/rginx-runtime/src/bootstrap/shutdown.rs +++ b/crates/rginx-runtime/src/bootstrap/shutdown.rs @@ -13,6 +13,7 @@ use super::listeners::{ pub(super) struct ShutdownTasks<'a> { pub(super) admin_task: &'a mut Option>>, + pub(super) agent_task: &'a mut Option>>, pub(super) control_plane_task: &'a mut Option>>, pub(super) cache_task: &'a mut Option>, pub(super) health_task: &'a mut Option>, @@ -30,6 +31,7 @@ pub(super) async fn graceful_shutdown( ) -> Result<()> { let ShutdownTasks { admin_task, + agent_task, control_plane_task, cache_task, health_task, @@ -44,7 +46,8 @@ pub(super) async fn graceful_shutdown( join_listener_worker_groups(&state.http, active_listener_groups, draining_listener_groups) .await?; join_admin_task(admin_task).await?; - join_control_plane_task(control_plane_task).await?; + join_agent_result_task(agent_task, "outbound agent").await?; + join_agent_result_task(control_plane_task, "control plane").await?; join_unit_task(cache_task, "cache cleanup").await?; join_unit_task(health_task, "active health").await?; join_unit_task(acme_task, "managed ACME").await?; @@ -62,6 +65,7 @@ pub(super) async fn graceful_shutdown( abort_listener_worker_groups(active_listener_groups.values()); abort_listener_worker_groups(draining_listener_groups.iter()); abort_task(admin_task.as_ref()); + abort_task(agent_task.as_ref()); abort_task(control_plane_task.as_ref()); abort_task(cache_task.as_ref()); abort_task(health_task.as_ref()); @@ -76,7 +80,8 @@ pub(super) async fn graceful_shutdown( .await; join_admin_task_after_abort(admin_task).await; - join_control_plane_task_after_abort(control_plane_task).await; + join_agent_result_task_after_abort(agent_task, "outbound agent").await; + join_agent_result_task_after_abort(control_plane_task, "control plane").await; join_unit_task_after_abort(cache_task, "cache cleanup").await; join_unit_task_after_abort(health_task, "active health").await; join_unit_task_after_abort(acme_task, "managed ACME").await; @@ -103,15 +108,14 @@ async fn join_admin_task(task: &mut Option>>) -> Ok(()) } -async fn join_control_plane_task( +async fn join_agent_result_task( task: &mut Option>>, + name: &str, ) -> Result<()> { if let Some(task) = task.take() { task.await - .map_err(|error| Error::Server(format!("control plane task failed to join: {error}")))? - .map_err(|error| { - Error::Server(format!("control plane task returned error: {error}")) - })?; + .map_err(|error| Error::Server(format!("{name} task failed to join: {error}")))? + .map_err(|error| Error::Server(format!("{name} task returned error: {error}")))?; } Ok(()) } @@ -138,16 +142,17 @@ async fn join_admin_task_after_abort(task: &mut Option>>, + name: &str, ) { if let Some(task) = task.take() { match task.await { Err(error) if !error.is_cancelled() => { - tracing::warn!(%error, "control plane task failed after abort"); + tracing::warn!(%error, "{name} task failed after abort"); } Ok(Err(error)) => { - tracing::warn!(%error, "control plane task returned error after abort"); + tracing::warn!(%error, "{name} task returned error after abort"); } _ => {} } diff --git a/crates/rginx-runtime/src/bootstrap/shutdown/tests.rs b/crates/rginx-runtime/src/bootstrap/shutdown/tests.rs index 8be2ad24..d2450cb3 100644 --- a/crates/rginx-runtime/src/bootstrap/shutdown/tests.rs +++ b/crates/rginx-runtime/src/bootstrap/shutdown/tests.rs @@ -16,6 +16,7 @@ use super::*; fn snapshot() -> ConfigSnapshot { ConfigSnapshot { + agent: None, control_plane: None, acme: None, managed_certificates: Vec::new(), @@ -103,6 +104,7 @@ async fn graceful_shutdown_waits_for_background_tasks_and_signals_shutdown() { let mut active_listener_groups = ListenerGroupMap::new(); let mut draining_listener_groups = Vec::new(); let mut admin_task = Some(tokio::spawn(async { Ok::<(), std::io::Error>(()) })); + let mut agent_task = Some(tokio::spawn(async { Ok::<(), rginx_agent::Error>(()) })); let mut control_plane_task = Some(tokio::spawn(async { Ok::<(), rginx_agent::Error>(()) })); let mut cache_task = Some(tokio::spawn(async {})); let mut health_task = Some(tokio::spawn(async {})); @@ -117,6 +119,7 @@ async fn graceful_shutdown_waits_for_background_tasks_and_signals_shutdown() { &mut draining_listener_groups, ShutdownTasks { admin_task: &mut admin_task, + agent_task: &mut agent_task, control_plane_task: &mut control_plane_task, cache_task: &mut cache_task, health_task: &mut health_task, @@ -130,6 +133,7 @@ async fn graceful_shutdown_waits_for_background_tasks_and_signals_shutdown() { assert!(*shutdown_rx.borrow()); assert!(background_task_drained.load(Ordering::Relaxed)); assert!(admin_task.is_none()); + assert!(agent_task.is_none()); assert!(control_plane_task.is_none()); assert!(cache_task.is_none()); assert!(health_task.is_none()); @@ -154,6 +158,7 @@ async fn graceful_shutdown_aborts_pending_tasks_after_timeout() { let mut active_listener_groups = ListenerGroupMap::new(); let mut draining_listener_groups = Vec::new(); let mut admin_task = Some(tokio::spawn(async { pending::>().await })); + let mut agent_task = Some(tokio::spawn(async { pending::>().await })); let mut control_plane_task = Some(tokio::spawn(async { pending::>().await })); let mut cache_task = Some(tokio::spawn(async { pending::<()>().await })); @@ -170,6 +175,7 @@ async fn graceful_shutdown_aborts_pending_tasks_after_timeout() { &mut draining_listener_groups, ShutdownTasks { admin_task: &mut admin_task, + agent_task: &mut agent_task, control_plane_task: &mut control_plane_task, cache_task: &mut cache_task, health_task: &mut health_task, @@ -183,6 +189,7 @@ async fn graceful_shutdown_aborts_pending_tasks_after_timeout() { assert!(*shutdown_rx.borrow()); assert!(background_task_started.load(Ordering::Relaxed)); assert!(admin_task.is_none()); + assert!(agent_task.is_none()); assert!(control_plane_task.is_none()); assert!(cache_task.is_none()); assert!(health_task.is_none()); diff --git a/crates/rginx-runtime/src/health/tests.rs b/crates/rginx-runtime/src/health/tests.rs index e2a0f7a0..e250b9ea 100644 --- a/crates/rginx-runtime/src/health/tests.rs +++ b/crates/rginx-runtime/src/health/tests.rs @@ -54,6 +54,7 @@ async fn collect_probe_targets_only_includes_enabled_upstreams() { }; let snapshot = ConfigSnapshot { control_plane: None, + agent: None, acme: None, managed_certificates: Vec::new(), cache_zones: HashMap::new(), diff --git a/crates/rginx-sdk/src/lib.rs b/crates/rginx-sdk/src/lib.rs index bd82899d..2ed0a1b5 100644 --- a/crates/rginx-sdk/src/lib.rs +++ b/crates/rginx-sdk/src/lib.rs @@ -18,7 +18,7 @@ //! //! #[tokio::main] //! async fn main() -> Result<(), Box> { -//! let config = ClientConfig::new("https://control-plane.example.com") +//! let config = ClientConfig::new("https://control-plane.example.com")? //! .with_api_key("your-api-key"); //! //! let client = ControlPlaneClient::new(config)?; diff --git a/crates/rginx-sdk/src/websocket.rs b/crates/rginx-sdk/src/websocket.rs index 00b1fe71..55013d93 100644 --- a/crates/rginx-sdk/src/websocket.rs +++ b/crates/rginx-sdk/src/websocket.rs @@ -122,24 +122,4 @@ impl EventSubscriber { } #[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_build_websocket_url() { - let config = ClientConfig::new("https://example.com:8080").unwrap(); - let subscriber = EventSubscriber::new(config); - - let ws_url = subscriber.build_websocket_url().unwrap(); - assert_eq!(ws_url, "wss://example.com:8080/v1/events"); - } - - #[test] - fn test_build_websocket_url_http() { - let config = ClientConfig::new("http://localhost:8080").unwrap(); - let subscriber = EventSubscriber::new(config); - - let ws_url = subscriber.build_websocket_url().unwrap(); - assert_eq!(ws_url, "ws://localhost:8080/v1/events"); - } -} +mod tests; diff --git a/crates/rginx-sdk/src/websocket/tests.rs b/crates/rginx-sdk/src/websocket/tests.rs new file mode 100644 index 00000000..9c409aab --- /dev/null +++ b/crates/rginx-sdk/src/websocket/tests.rs @@ -0,0 +1,19 @@ +use super::*; + +#[test] +fn test_build_websocket_url() { + let config = ClientConfig::new("https://example.com:8080").unwrap(); + let subscriber = EventSubscriber::new(config); + + let ws_url = subscriber.build_websocket_url().unwrap(); + assert_eq!(ws_url, "wss://example.com:8080/v1/events"); +} + +#[test] +fn test_build_websocket_url_http() { + let config = ClientConfig::new("http://localhost:8080").unwrap(); + let subscriber = EventSubscriber::new(config); + + let ws_url = subscriber.build_websocket_url().unwrap(); + assert_eq!(ws_url, "ws://localhost:8080/v1/events"); +} diff --git a/docs/AGENT_OUTBOUND_CONTROL_PLANE_PLAN.md b/docs/AGENT_OUTBOUND_CONTROL_PLANE_PLAN.md new file mode 100644 index 00000000..b048025a --- /dev/null +++ b/docs/AGENT_OUTBOUND_CONTROL_PLANE_PLAN.md @@ -0,0 +1,711 @@ +# rginx-agent 出站控制面改造计划 + +## 背景 + +当前 `rginx-agent` 更像是节点侧内嵌的控制面 HTTP 服务:每个 rginx 节点需要额外监听控制端口,配置控制面 TLS 证书,并允许控制中心或运维客户端主动访问该节点。 + +这个模型会把每个边缘节点都变成一个需要单独暴露、单独加固、单独维护证书和防火墙规则的控制服务器。对于 NAT、多云、私网、动态 IP、边缘机房和大量节点场景,这个运维成本过高,也扩大了节点侧暴露面。 + +新的方向是:rginx 节点本地只负责业务网站的 80/443 端口,控制通讯全部由节点主动向控制中心发起出站连接。 + +## 目标 + +- rginx 节点不再为了控制面新开公网端口。 +- rginx 节点不再需要控制面专用域名。 +- rginx 节点不再需要作为控制面服务端维护 TLS 证书。 +- 控制中心只暴露统一的 HTTPS 入口。 +- 节点通过出站 HTTPS、WebSocket 或未来的 gRPC stream 与控制中心通讯。 +- 现有 reload、config apply、cache purge、status snapshot 等控制能力继续复用。 +- 业务流量和控制流量边界清晰,业务 80/443 不被控制面 API 污染。 + +## 非目标 + +- 不把控制 API 隐藏在业务站点路径下作为主方案。 +- 不要求控制中心能反向访问每个节点。 +- 不在本阶段引入强依赖的消息队列或 broker。 +- 不把旧节点侧 control-plane server 继续作为长期推荐路径。 + +## 目标架构 + +```text +业务流量: +User -> rginx:80/443 -> upstream + +控制流量: +rginx agent -> control center:443 +``` + +节点侧只需要能出站访问控制中心: + +```text +rginx node + ├─ HTTP/HTTPS listener: 80/443 + ├─ local runtime state + ├─ agent core + └─ outbound agent client -> https://control.example.com +``` + +控制中心负责: + +- 节点注册与认证 +- desired state 存储 +- command 分发 +- command result 收集 +- 节点心跳和健康状态 +- rollout 编排 +- 审计和可观测性 + +## 推荐配置形态 + +新的配置块建议命名为 `agent`: + +```ron +agent: Some(AgentConfig( + enabled: true, + endpoint: "https://control.example.com", + node_id: "edge-sfo-1", + token_path: "/etc/rginx/agent.token", + state_path: Some("/var/lib/rginx/agent/state.json"), + region: Some("us-west"), + pop: Some("sfo"), + labels: { + "tier": "edge", + }, + heartbeat_interval_secs: Some(30), +)) +``` + +旧的 `control_plane` 配置进入 legacy 状态: + +```ron +control_plane: Some(ControlPlaneConfig( + enabled: true, + listen: "0.0.0.0:9443", + ... +)) +``` + +旧配置先兼容并打印 deprecation warning,后续阶段逐步降级为 feature flag 或删除。 + +## 阶段 0:冻结现状与边界 + +状态:已完成。 + +目标:明确旧模型的问题、保留能力和废弃边界,防止继续沿着节点侧 HTTP server 方向堆功能。 + +保留能力: + +- runtime reload +- managed config apply +- cache purge、invalidate、clear invalidations +- node status、snapshot、delta、wait +- traffic、upstreams、cache、system、revision 观测数据 +- revision convergence 状态 +- 可复用的 audit、metrics、rate limit、event bus 组件 + +废弃或降级能力: + +- 节点侧 `control_plane.listen` +- 节点侧控制面 TLS server +- 节点侧控制面 API key 服务端认证 +- 控制中心主动访问节点的通信模型 + +代码范围: + +- `crates/rginx-agent` +- `crates/rginx-runtime/src/agent.rs` +- `crates/rginx-runtime/src/bootstrap/mod.rs` +- `crates/rginx-config/src/model/control_plane.rs` +- `crates/rginx-config/src/compile/control_plane.rs` +- `crates/rginx-core/src/config/control_plane.rs` + +验收标准: + +- 本文档成为 outbound agent 改造的阶段性依据。 +- 当前节点侧 control-plane server 被明确标注为 legacy。 +- 后续功能默认按 outbound agent 模型设计。 + +已落地内容: + +- `ControlPlaneConfig` 和 `ControlPlaneSettings` 已标注为 legacy 节点侧控制面 server 模型。 +- 启用 `control_plane.enabled=true` 时会记录 deprecation warning。 +- `rginx-agent` 当前 HTTP server adapter 已标注为 legacy。 +- `docs/CONTROL_PLANE.md` 和 `docs/MTLS_SETUP_GUIDE.md` 已说明它们描述的是旧节点侧 server 方案。 + +## 阶段 1:抽出 Agent Core + +状态:已完成。 + +目标:把控制动作执行能力从 HTTP handler 中抽出来,形成可被 REST、long polling、WebSocket 或 gRPC 共同调用的内部服务层。 + +建议接口: + +```rust +pub struct AgentCore { + // shared runtime state and executors +} + +impl AgentCore { + pub async fn status(&self) -> Result; + pub async fn snapshot(&self, window_secs: Option) -> Result; + pub async fn delta_since(&self, since_version: u64, window_secs: Option) -> Result; + pub async fn reload(&self) -> Result; + pub async fn apply_config(&self, request: ManagedResourceMutation) -> Result>; + pub async fn purge_cache(&self, request: CachePurgeCommand) -> Result>; + pub async fn invalidate_cache(&self, request: CacheInvalidateCommand) -> Result>; + pub async fn set_desired_revision(&self, desired_revision: u64) -> Result; +} +``` + +改动重点: + +- 从 `server/request/read.rs` 中抽出读模型。 +- 从 `server/write.rs` 中抽出写模型。 +- `ControlPlaneContext` 逐步演进为可复用 runtime control context。 +- 旧 REST handler 只作为 adapter,负责解析 HTTP 并调用 `AgentCore`。 + +验收标准: + +- `cargo test -p rginx-agent` 通过。 +- 旧 REST control-plane 行为保持不变。 +- 新增 core 级测试覆盖 reload、config apply、cache 操作和 snapshot。 + +已落地内容: + +- 新增 `AgentCore`,集中承载 node status、snapshot、delta、traffic、upstreams、cache、system、revision 读取能力。 +- `AgentCore` 集中承载 reload、managed config apply、cache purge/invalidate/clear invalidations、desired revision 更新能力。 +- `ControlPlaneContext` 已改为持有 `AgentCore`,旧兼容方法委托给 core。 +- 旧 REST GET/POST handler 已改成 adapter:负责解析 HTTP 请求并调用 `AgentCore`。 +- 新增 core 级测试覆盖 snapshot、reload、config apply、cache 操作和 desired revision。 + +## 阶段 2:新增 Outbound Agent 配置模型 + +状态:已完成。 + +目标:让 runtime 能基于 `agent` 配置启动出站客户端,而不是基于 `control_plane.listen` 启动节点侧 HTTP server。 + +新增模型: + +- `rginx-config/src/model/agent.rs` +- `rginx-core/src/config/agent.rs` +- `AgentConfig` +- `AgentSettings` +- `AgentAuthSettings` + +建议字段: + +- `enabled` +- `endpoint` +- `node_id` +- `token_path` +- `state_path` +- `region` +- `pop` +- `labels` +- `heartbeat_interval_secs` +- `connect_timeout_secs` +- `request_timeout_secs` +- `poll_timeout_secs` +- `backoff_initial_ms` +- `backoff_max_secs` + +编译和校验要求: + +- `endpoint` 必须是 HTTPS URL。 +- `node_id` 不能为空。 +- `token_path` 必须指向普通文件。 +- `state_path` 不能为空;未配置时使用 `/var/lib/rginx/agent/state.json`。 +- label key/value 必须非空。 +- timeout 和 backoff 必须在合理范围。 + +runtime 启动策略: + +- `agent.enabled=true` 时启动 outbound agent client。 +- `control_plane.enabled=true` 时继续启动 legacy server,但打印 warning。 +- 两者同时启用时允许短期共存,但文档标记不推荐。 + +验收标准: + +- 配置编译测试覆盖 enabled、disabled、缺字段、非法 URL、非法 token path。 +- runtime 启动测试覆盖 agent task 启动路径。 +- 旧 `control_plane` 兼容但有 deprecation warning。 + +已落地内容: + +- 新增 `AgentConfig`,支持 `enabled`、`endpoint`、`node_id`、`token_path`、`state_path`、地域、POP、labels、heartbeat、timeout 和 backoff 字段。 +- 新增 `AgentSettings` 与 `AgentAuthSettings`,编译后进入 `ConfigSnapshot.agent`。 +- `agent.enabled=true` 时要求 `endpoint` 是 HTTPS URL,`node_id` 和 `token_path` 非空,`token_path` 编译时必须指向普通文件。 +- agent labels、timeout 和 backoff 已纳入 validate/compile 校验。 +- runtime 已能在 `agent.enabled=true` 时启动 outbound agent task;旧 `control_plane` task 仍可并行启动用于短期兼容。 +- outbound agent task 当前作为阶段 2 启动骨架:读取 token、发布 agent 身份到运行时状态并等待 shutdown;真实 long polling 放到阶段 3。 +- 新增配置层 agent 测试和 runtime agent 启动测试。 + +## 阶段 3:实现 HTTPS Long Polling MVP + +状态:已完成。 + +目标:先用最稳、最容易测试的出站 HTTP 模型跑通完整控制链路。 + +节点主动调用控制中心: + +```text +POST /v1/agents/register +POST /v1/agents/{node_id}/heartbeat +GET /v1/agents/{node_id}/commands?after=&timeout=30s +POST /v1/agents/{node_id}/commands/{command_id}/result +POST /v1/agents/{node_id}/events +``` + +注册请求: + +```json +{ + "node_id": "edge-sfo-1", + "version": "0.1.7", + "region": "us-west", + "pop": "sfo", + "labels": { + "tier": "edge" + }, + "capabilities": [ + "reload", + "config.apply", + "cache.purge", + "snapshot.delta" + ] +} +``` + +命令模型: + +```json +{ + "id": "cmd_123", + "type": "apply_config", + "target_node_id": "edge-sfo-1", + "revision": 42, + "expires_at_unix_ms": 1760000000000, + "payload": {} +} +``` + +首批命令类型: + +- `reload` +- `apply_config` +- `set_desired_revision` +- `cache_purge` +- `cache_invalidate` +- `collect_snapshot` + +结果模型: + +```json +{ + "command_id": "cmd_123", + "node_id": "edge-sfo-1", + "status": "succeeded", + "started_at_unix_ms": 1760000000100, + "finished_at_unix_ms": 1760000000200, + "result": {}, + "error": null +} +``` + +可靠性要求: + +- 控制中心不可达时业务 80/443 不受影响。 +- 命令必须有 idempotency key。 +- 重复命令不能重复造成破坏性操作。 +- agent 使用指数退避重连。 +- long polling 超时是正常路径,不应记录为 error。 + +验收标准: + +- mock 控制中心测试 register、heartbeat、poll commands、post result。 +- 控制中心断开后 agent 能恢复。 +- reload、apply_config、cache_purge 命令能端到端执行并回报。 +- `cargo test -p rginx-agent` 通过。 + +已落地内容: + +- 新增 outbound long polling client/runner/model,节点侧通过出站 HTTP(S) 主动访问控制中心。 +- 已实现注册、heartbeat、poll commands、post command result 的最小协议闭环。 +- MVP 请求包含 `Authorization: Bearer ` 和 `X-Rginx-Node-Id`,后续阶段继续强化签名模型。 +- 已支持首批命令:`reload`、`apply_config`、`set_desired_revision`、`cache_purge`、`cache_invalidate`、`collect_snapshot`。 +- 命令执行统一走 `AgentCore`,复用阶段 1 抽出的 reload/config/cache/snapshot 能力。 +- 命令 `id` 作为内存级 idempotency key;重复命令会复用已生成结果并重新上报,不重复执行破坏性动作。 +- 控制中心请求失败时 agent 不影响业务监听;runner 会按配置的指数退避重试,long polling `204 No Content` 被视为空命令正常路径。 +- runtime 的 outbound agent task 已切换到真实 runner,而不是阶段 2 的占位循环。 +- 新增 mock 控制中心测试覆盖 register、heartbeat、poll commands、post result、断连恢复、重复命令幂等,以及 reload/apply_config/cache_purge 端到端执行。 + +## 阶段 4:认证与安全模型 + +状态:已完成。 + +目标:节点不作为服务端暴露证书,只证明自己是合法节点。 + +推荐演进顺序: + +1. MVP:`Authorization: Bearer ` +2. 强化版:HMAC request signing +3. 高安全版:Ed25519 节点密钥签名 +4. 可选:mTLS client certificate,只作为出站客户端证书 + +MVP 请求头: + +```text +Authorization: Bearer +X-Rginx-Node-Id: edge-sfo-1 +X-Rginx-Timestamp: 1760000000000 +X-Rginx-Nonce: +``` + +强化版签名材料: + +```text +method + "\n" + +path + "\n" + +timestamp + "\n" + +nonce + "\n" + +sha256(body) +``` + +安全要求: + +- token 或私钥只从文件读取。 +- token 和签名密钥不能写入日志。 +- 每次请求带 timestamp 和 nonce,控制中心可防重放。 +- 控制中心可签发短期 session token。 +- command 带 `target_node_id`、`expires_at_unix_ms` 和签名。 +- 节点只执行发给自己的命令。 + +验收标准: + +- token 缺失、错误、过期均被拒绝。 +- replay request 被拒绝。 +- 过期 command 不执行。 +- 发给其他 node_id 的 command 不执行。 +- 日志中不出现 token 或签名密钥。 + +已落地内容: + +- outbound HTTP 客户端在每次 register、heartbeat、poll、post result 请求中带 `Authorization`、`X-Rginx-Node-Id`、`X-Rginx-Timestamp`、`X-Rginx-Nonce`、`X-Rginx-Body-Sha256`、`X-Rginx-Signature`。 +- 请求签名材料按 `method + path + timestamp + nonce + sha256(body)` 生成,endpoint 带 base path 时签名路径包含完整 path。 +- 新增 `OutboundAuthVerifier`,控制中心侧可复用同一套 Bearer token、timestamp、nonce、body hash、HMAC signature 校验逻辑;验签成功后才记录 nonce,避免畸形请求占用 nonce。 +- runtime 仍只从 `agent.auth.token_path` 读取 token;日志只记录 endpoint、node_id、region、pop 等非敏感字段,不记录 token 或签名密钥。 +- command 执行前先校验 `target_node_id`、`expires_at_unix_ms`;启用命令签名密钥时,缺失有效期、缺失签名或签名错误的命令都会被拒绝并作为失败结果回传。 +- 测试覆盖 token 缺失/错误/过期、replay request、body/signature 篡改、过期 command、错 node_id command、缺签名/错签名 command,以及有效签名命令执行成功。 + +## 阶段 5:状态机与持久化 + +状态:已完成。 + +目标:让 agent 具备可恢复状态,而不是无状态 HTTP 客户端。 + +节点连接状态: + +```text +Starting -> Registering -> Connected -> Degraded -> OfflineRetrying +``` + +命令状态: + +```text +Received -> Accepted -> Executing -> Succeeded + ├-> Failed + ├-> Rejected + └-> TimedOut +``` + +节点侧需要持久化: + +- command cursor +- in-flight command +- 最近 N 条 command result +- 最近一次 register 成功信息 +- 最近一次 heartbeat 成功时间 + +建议本地状态文件: + +```text +/var/lib/rginx/agent/state.json +``` + +可靠性要求: + +- agent 重启后不会重复执行已完成命令。 +- 控制中心重放命令时,节点可返回已有结果。 +- apply_config 成功后再 ack。 +- reload 失败必须回传错误摘要。 +- 控制中心不可达不会阻塞业务流量。 + +验收标准: + +- agent 重启恢复 cursor。 +- in-flight command 在重启后按规则恢复或失败上报。 +- 重复 command 返回 cached result。 +- 断网、控制中心 5xx、响应超时都有测试覆盖。 + +已落地内容: + +- `AgentConfig`/`AgentSettings` 新增 `state_path`,默认 `/var/lib/rginx/agent/state.json`,可在配置中覆盖。 +- 新增 outbound JSON state store,持久化 `connection_state`、`command_cursor`、`in_flight_command`、最近 command results、最近 register/heartbeat 成功时间。 +- runner 启动时从 state store 恢复 cursor 和最近结果;控制中心重放已完成 command 时直接返回 cached result,不重复执行。 +- command 执行前先写入 in-flight;结果生成后先落盘,再上报 result;result 上报成功后才推进 cursor,保证 `apply_config`/`reload` 不会提前 ack。 +- 如果 agent 重启时发现旧的 in-flight command,会生成失败结果并在恢复连接后上报,同时保留原 cursor 继续 long polling。 +- 测试覆盖 cursor 恢复、重启后 cached result、in-flight 恢复失败上报、断网、控制中心 5xx、result 上报超时以及后续恢复。 + +## 阶段 6:实时 Stream 升级 + +状态:已完成。 + +目标:long polling 稳定后,再增加实时通道,降低命令延迟。 + +首选方案: + +- WebSocket stream +- 节点主动连接 `wss://control.example.com/v1/agents/stream` +- 复用已有 `tokio-tungstenite` 依赖 + +可选后续方案: + +- gRPC bidirectional stream +- 适合未来 SDK、强 schema 和多语言控制中心 +- 需要引入 tonic/prost 生态,放在 WebSocket 稳定之后评估 + +stream 消息类型: + +- `hello` +- `registered` +- `heartbeat` +- `command` +- `command_result` +- `snapshot_delta` +- `event` +- `ping` +- `pong` +- `error` + +降级策略: + +- stream 在线时走实时命令下发。 +- stream 断开后自动回到 long polling。 +- 重连后用 cursor 补齐断线期间命令。 +- stream 是优化路径,不是唯一生命线。 + +验收标准: + +- stream 在线时命令实时下发。 +- stream 断开后 long polling 接管。 +- reconnect 后从 cursor 继续。 +- 不影响业务监听和请求处理。 + +已落地内容: + +- 新增 `OutboundStreamClient` 抽象和 `WebSocketOutboundStreamClient`,节点主动连接 `wss:///v1/agents/stream`。 +- WebSocket 握手复用阶段 4 的 Bearer token、timestamp、nonce、body hash、HMAC signature 头;`wss` 支持通过 `tokio-tungstenite` 的 rustls native roots 功能启用。 +- stream 协议支持 `hello`、`registered`、`heartbeat`、`command`、`commands`、`command_result`、`snapshot_delta`、`event`、`ping`、`pong`、`error` 消息。 +- runner 在 stream client 配置存在时优先等待 stream command;stream 返回命令时不走 long polling,命令结果通过 stream `command_result` 上报。 +- stream 断开或报错时自动 fallback 到现有 long polling;cursor、in-flight、recent results 继续复用阶段 5 的持久化状态。 +- reconnect/fallback 时带上当前 cursor,保证 stream 期间推进的 cursor 可用于断线补齐。 +- 测试覆盖 stream 在线实时命令、stream 断开后 long polling 接管、fallback/reconnect 后从 stream cursor 继续。 + +## 阶段 7:控制中心侧 API 与状态模型 + +目标:定义控制中心需要提供的最小服务能力,避免 agent 协议只实现了节点侧。 + +控制中心最小状态: + +- nodes +- node_sessions +- commands +- command_results +- desired_revisions +- heartbeats +- events +- rollouts + +建议服务端 API: + +```text +POST /v1/agents/register +POST /v1/agents/{node_id}/heartbeat +GET /v1/agents/{node_id}/commands +POST /v1/agents/{node_id}/commands/{command_id}/result +POST /v1/agents/{node_id}/events +GET /v1/control/nodes +POST /v1/control/nodes/{node_id}/commands +GET /v1/control/commands/{command_id} +``` + +持久化建议: + +- MVP 可以用 SQLite。 +- 多控制中心实例或生产集群使用 Postgres。 +- command queue 先用数据库行锁和状态机实现,避免过早引入 broker。 + +验收标准: + +- 控制中心可创建命令。 +- 节点可拉取命令并上报结果。 +- 控制中心可查询节点当前状态和最近结果。 +- rollout 能基于节点 labels 和健康状态选择目标。 + +状态:已完成。 + +已落地内容: + +- 新增 `rginx-agent::control_center` 模块,提供控制中心侧的最小内存状态模型。 +- 覆盖 `nodes`、`node_sessions`、`commands`、`command_results`、`desired_revisions`、`heartbeats`、`events`、`rollouts` 这些阶段 7 最小状态。 +- 新增 `ControlCenterStore`,支持 agent 注册、心跳、命令创建、命令拉取、结果上报、事件记录、节点查询、最近结果查询。 +- `ControlCenterStore` 实现 `OutboundControlPlaneClient`,因此阶段 3-6 的 outbound agent 可以直接复用同一套 register、heartbeat、poll、post result 协议。 +- 新增 `ControlCenterCommandCreate`、`ControlCenterCommandRecord`、`ControlCenterNodeFilter`、`RolloutTargetSelector`、`ControlCenterRollout` 等控制中心侧 API 模型。 +- rollout 选择支持按 region、pop、labels、节点健康状态、收敛状态、desired revision 过滤目标节点。 +- 当前落地为内存 MVP;SQLite/Postgres 持久化和多控制中心实例下的行锁状态机仍保留为后续生产化增强。 +- 测试覆盖命令创建到节点拉取再到结果回写、节点状态与最近结果查询、rollout 按 labels/健康/收敛状态选目标。 + +## 阶段 8:迁移旧控制面 + +目标:平滑淘汰节点侧控制端口。 + +迁移节奏: + +- v1:新增 `agent`,旧 `control_plane` 可用但 warning。 +- v2:文档默认推荐 `agent`,旧方案移入 legacy 章节。 +- v3:旧节点侧 control-plane server 变 feature flag。 +- v4:删除旧 server,或只保留本地 debug/test adapter。 + +需要更新的文档: + +- `docs/CONTROL_PLANE.md` +- `docs/MTLS_SETUP_GUIDE.md` +- `docs/openapi.yaml` +- `README.md` +- 包安装和部署示例 + +兼容策略: + +- 旧 REST handler 在阶段 1 后作为 adapter 保留一段时间。 +- 新 outbound agent 与旧 server 可短期共存。 +- 配置迁移工具或 `rginx check` 给出迁移提示。 + +验收标准: + +- 用户可从 `control_plane` 配置迁移到 `agent` 配置。 +- 迁移后节点不再打开额外控制端口。 +- `ss -ltnp` 中只看到业务监听和本地管理入口。 +- 文档不再把节点侧控制面 server 作为推荐部署方式。 + +状态:已完成。 + +已落地内容: + +- 启动 legacy `control_plane` 时明确记录 warning,说明它会打开额外节点管理端口,并推荐 `agent.enabled=true` 出站控制模式。 +- `rginx check` 详细输出新增 Control plane 小节,显示当前模式、是否打开额外节点端口、agent endpoint/node_id/state path、legacy listen 和迁移提示。 +- `docs/CONTROL_PLANE.md` 默认推荐 outbound `agent`,旧节点侧 server 移入 legacy 兼容说明,并补充从 `control_plane` 到 `agent` 的迁移步骤。 +- `docs/MTLS_SETUP_GUIDE.md` 明确只适用于 legacy 节点侧 server,新部署推荐 outbound agent。 +- `docs/openapi.yaml` 新增 `/v1/agents/*` 和 `/v1/control/*` 协议说明与 schema,旧 `/v1/node/*`/`/v1/cache/*`/`/v1/runtime/*` 保留为兼容接口。 +- `README.md` 与 `docs/README.md` 已同步把出站 agent 描述为推荐控制面方向。 +- `configs/control-plane-mtls.example.ron` 已标注为 legacy 示例,避免误导新部署继续暴露节点控制端口。 + +## 阶段 9:本地管理入口 + +状态:已完成。 + +目标:保留机器本地调试和救援能力,但不暴露公网。 + +推荐入口: + +- Unix domain socket +- 本地 CLI +- systemd notify/status + +不推荐: + +- 默认监听 TCP 管理端口 +- 复用业务域名隐藏公网控制 API + +用途: + +- 本机查看状态 +- 本机触发 reload +- 本机导出 snapshot +- 本机查看 agent 连接状态 +- 故障时临时禁用 agent + +验收标准: + +- 本地 CLI 可读 agent 状态。 +- 本地 CLI 可触发安全的本机操作。 +- 默认不打开公网管理端口。 + +已落地内容: + +- 本地管理入口继续使用 Unix domain socket,默认不新增公网 TCP 管理端口。 +- `RuntimeStatusSnapshot` 新增 agent 运行快照,包含 configured/enabled、endpoint、node_id、state path、labels、连接状态、cursor、in-flight command 和最近 register/heartbeat 成功时间。 +- outbound agent 每次状态持久化时同步更新本地运行快照,本机状态查询可以看到连接状态。 +- admin socket 新增 `GetAgentStatus` 和 `SetAgentDisabled` 请求,作为本地救援入口。 +- CLI 新增 `rginx agent status`、`rginx agent disable`、`rginx agent enable`,可查看状态并临时暂停/恢复 outbound agent 对控制中心的请求。 +- `rginx status` 摘要中同步展示 agent configured/enabled、local disable 和 connection state。 + +## 实施顺序建议 + +优先顺序: + +1. 阶段 1:抽出 Agent Core。 +2. 阶段 2:新增 `agent` 配置模型。 +3. 阶段 3:实现 HTTPS long polling MVP。 +4. 阶段 4:补齐认证和防重放。 +5. 阶段 5:补齐状态机和持久化。 +6. 阶段 6:增加 WebSocket stream。 +7. 阶段 7:完善控制中心侧 API。 +8. 阶段 8:迁移旧控制面。 +9. 阶段 9:完善本地管理入口。 + +最小可交付切片: + +- `AgentCore` +- `agent` 配置 +- register +- heartbeat +- command polling +- command result +- `reload` 命令 +- `status snapshot` 上报 + +这个切片完成后,rginx 节点就可以在不新增控制端口、不新增节点域名、不配置节点服务端证书的前提下被控制中心管理。 + +## 验证矩阵 + +每个阶段至少运行: + +```bash +cargo fmt --check +cargo test -p rginx-agent +``` + +涉及配置模型时运行: + +```bash +cargo test -p rginx-config control_plane +cargo test -p rginx-core control_plane +``` + +涉及 runtime 启动时运行: + +```bash +cargo test -p rginx-runtime agent +``` + +涉及全局行为或文档示例时运行: + +```bash +cargo test --workspace +``` + +## 最终判断 + +`rginx-agent` 不应该继续作为节点侧暴露的控制 API server 演进。更适合 rginx 的模型是:节点只处理自己的业务监听,agent 作为出站客户端连接控制中心。 + +这样能显著降低部署复杂度和安全暴露面,也更符合边缘节点、私网节点和大规模节点管理场景。 diff --git a/docs/CONTROL_PLANE.md b/docs/CONTROL_PLANE.md index 79e27989..e07bce11 100644 --- a/docs/CONTROL_PLANE.md +++ b/docs/CONTROL_PLANE.md @@ -1,8 +1,52 @@ # rginx Control Plane +> Recommended path: use the outbound `agent` model from +> `AGENT_OUTBOUND_CONTROL_PLANE_PLAN.md`, where nodes only expose their normal +> 80/443 website listeners and initiate control traffic to the control center. +> Legacy node-side server sections are kept only for compatibility and migration. + The rginx Control Plane provides centralized management and orchestration for distributed edge nodes. +The recommended node integration is now the outbound `agent` configuration: each rginx node keeps +serving only its website listeners, then initiates HTTPS/WebSocket traffic to the control center. + +## Recommended Deployment: Outbound Agent + +Use `agent` for new deployments. In this model the node does not listen on a public control-plane +port and does not need a node-side control-plane domain or server certificate. + +```ron +Config( + agent: Some(AgentConfig( + enabled: Some(true), + endpoint: Some("https://control.example.com/rginx"), + node_id: Some("edge-sfo-1"), + token_path: Some("/etc/rginx/agent.token"), + state_path: Some("/var/lib/rginx/agent/state.json"), + region: Some("us-west"), + pop: Some("sfo"), + labels: { + "tier": "edge", + "env": "prod", + }, + )), + // business listeners stay in server/listeners or conf.d/*.ron +) +``` -## Overview +Migration from legacy `control_plane`: + +1. Create a shared agent token file on the node, for example `/etc/rginx/agent.token`. +2. Add `agent.enabled=true`, `endpoint`, `node_id`, `token_path`, and any labels currently used for targeting. +3. Run `rginx check`; the detailed output shows whether a legacy node-side control port is still enabled. +4. Once the control center manages this node through `/v1/agents/*`, remove `control_plane.listen`, + `control_plane.tls`, and `control_plane.api_keys_path`. +5. Restart or reload rginx and verify `ss -ltnp` only shows website listeners plus local management sockets. + +The legacy server endpoints below remain documented for compatibility and local migration windows, but +new features should target the outbound protocol in [openapi.yaml](openapi.yaml) and +[AGENT_OUTBOUND_CONTROL_PLANE_PLAN.md](AGENT_OUTBOUND_CONTROL_PLANE_PLAN.md). + +## Legacy Node-Side Server Overview The Control Plane is a secure, high-performance API service that enables: @@ -179,31 +223,34 @@ Comprehensive monitoring and metrics. - `GET /v1/node/cache` - Cache statistics - `GET /v1/node/system` - System information -## Configuration +## Legacy Node-Side Server Configuration -### Basic Configuration +### Basic Legacy Configuration ```ron -ControlPlane( - listen: "0.0.0.0:8443", - - // TLS configuration - tls: ( - cert_path: "/etc/rginx/certs/server.crt", - key_path: "/etc/rginx/certs/server.key", - ca_cert_path: Some("/etc/rginx/certs/ca.crt"), - require_client_cert: true, - ), - - // API key authentication - api_keys_path: Some("/etc/rginx/api-keys.json"), - - // Network access control - allowed_cidrs: [ - "10.0.0.0/8", - "172.16.0.0/12", - "192.168.0.0/16", - ], +Config( + control_plane: Some(ControlPlaneConfig( + enabled: Some(true), + listen: Some("0.0.0.0:8443"), + + // TLS configuration + tls: Some(ControlPlaneTlsConfig( + cert_path: "/etc/rginx/certs/server.crt", + key_path: "/etc/rginx/certs/server.key", + client_ca_path: Some("/etc/rginx/certs/ca.crt"), + require_client_cert: Some(true), + )), + + // API key authentication + api_keys_path: Some("/etc/rginx/api-keys.json"), + + // Network access control + allowed_cidrs: [ + "10.0.0.0/8", + "172.16.0.0/12", + "192.168.0.0/16", + ], + )), ) ``` diff --git a/docs/MTLS_SETUP_GUIDE.md b/docs/MTLS_SETUP_GUIDE.md index 27518aab..fe46f0e1 100644 --- a/docs/MTLS_SETUP_GUIDE.md +++ b/docs/MTLS_SETUP_GUIDE.md @@ -1,8 +1,20 @@ # mTLS Client Certificate Authentication Setup Guide +> Legacy status: this guide applies to the current node-side control-plane +> server model. The outbound agent direction in +> `AGENT_OUTBOUND_CONTROL_PLANE_PLAN.md` does not require each rginx node to +> expose a control port, own a control-plane domain, or maintain a server +> certificate for control-plane traffic. + ## Overview -rginx-agent now supports **mutual TLS (mTLS)** authentication for the control plane. This provides stronger security than API keys alone by requiring clients to present valid X.509 certificates signed by a trusted Certificate Authority. +rginx-agent supports **mutual TLS (mTLS)** authentication for the legacy node-side control-plane server. This provides stronger security than API keys alone by requiring clients to present valid X.509 certificates signed by a trusted Certificate Authority. + +For new deployments, prefer the outbound `agent` model instead: + +- Node-side legacy server: node listens on `control_plane.listen`, needs server certificate/key, and accepts inbound management traffic. +- Outbound agent: node connects to `agent.endpoint`, uses `agent.token_path`, and does not open an extra control port. +- Migration path: enable `agent`, verify the control center can register/heartbeat/poll commands, then remove `control_plane.*`. ## Authentication Modes @@ -61,11 +73,11 @@ Update your `rginx.ron` configuration: ```ron Config( - control_plane: Some(ControlPlane( + control_plane: Some(ControlPlaneConfig( enabled: Some(true), listen: Some("0.0.0.0:9443"), - tls: Some(ControlPlaneTls( + tls: Some(ControlPlaneTlsConfig( cert_path: "/etc/rginx/control-plane.crt", key_path: "/etc/rginx/control-plane.key", diff --git a/docs/README.md b/docs/README.md index 26235b0b..6bb4b69c 100644 --- a/docs/README.md +++ b/docs/README.md @@ -7,23 +7,28 @@ ### Control Plane - **`CONTROL_PLANE.md`** - - Complete guide to the rginx Control Plane - - Node management, configuration, gradual rollout, circuit breaker - - API reference, usage examples, best practices + - Control-plane overview with outbound `agent` as the recommended node integration + - Legacy node-side server reference kept for compatibility and migration windows + - Migration guidance for removing node-side control ports - **`openapi.yaml`** - - OpenAPI 3.0 specification for Control Plane API - - 40+ endpoints with complete request/response schemas + - OpenAPI 3.0 specification for outbound agent protocol plus legacy Control Plane API + - Agent register/heartbeat/command/result endpoints and legacy API schemas - Use with Swagger UI or Redoc for interactive documentation - **`MTLS_SETUP_GUIDE.md`** - - Mutual TLS setup guide for Control Plane + - Mutual TLS setup guide for the legacy node-side Control Plane server - Certificate generation, configuration, testing - **`CONTROL_PLANE_ENHANCEMENT_ROADMAP.md`** - Long-term roadmap for Control Plane features - Future phases and planned enhancements +- **`AGENT_OUTBOUND_CONTROL_PLANE_PLAN.md`** + - Plan to move rginx-agent from node-side control server to outbound agent + - No extra node control port, node domain, or node-side control-plane certificate + - Phased migration path, protocol shape, security model, and validation matrix + ### HTTP & Caching - **`CACHE_ARCHITECTURE_GAPS.md`** diff --git a/docs/openapi.yaml b/docs/openapi.yaml index 1822ae48..53e3c515 100644 --- a/docs/openapi.yaml +++ b/docs/openapi.yaml @@ -4,6 +4,13 @@ info: description: | Control Plane API for managing rginx edge nodes, configurations, gradual rollouts, and circuit breakers. + The recommended node integration is the outbound agent protocol under `/v1/agents/*`. + In that model rginx nodes initiate HTTPS/WebSocket control traffic to the control center and + do not expose an extra node-side control port, domain, or server certificate. + + Legacy node-side server endpoints such as `/v1/node/*`, `/v1/cache/*`, and `/v1/runtime/*` + remain documented for compatibility during migration windows. + ## Authentication The API supports two authentication methods: @@ -13,7 +20,7 @@ info: ## Rate Limiting API requests are rate-limited per actor. When rate limit is exceeded, the API returns HTTP 429 with a `Retry-After` header. - version: 0.1.6 + version: 0.1.7 contact: name: rginx url: https://github.com/rginx/rginx @@ -31,8 +38,12 @@ security: - MutualTLS: [] tags: + - name: Agent Outbound + description: Preferred outbound protocol used by rginx nodes to talk to the control center + - name: Control Center + description: Operator-facing control-center resources for nodes, commands, and rollout targeting - name: Node Management - description: Register and manage edge nodes + description: Legacy node-side server registry endpoints - name: Configuration description: Apply and manage configurations - name: Gradual Rollout @@ -49,6 +60,187 @@ tags: description: Prometheus metrics paths: + # Preferred outbound agent protocol + /v1/agents/register: + post: + tags: + - Agent Outbound + summary: Register an outbound agent node + operationId: registerOutboundAgent + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/AgentRegisterRequest' + responses: + '204': + description: Node registered + '400': + $ref: '#/components/responses/BadRequest' + '401': + $ref: '#/components/responses/Unauthorized' + + /v1/agents/{nodeId}/heartbeat: + post: + tags: + - Agent Outbound + summary: Receive an outbound agent heartbeat + operationId: heartbeatOutboundAgent + parameters: + - $ref: '#/components/parameters/NodeId' + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/AgentHeartbeatRequest' + responses: + '204': + description: Heartbeat accepted + '404': + $ref: '#/components/responses/NotFound' + + /v1/agents/{nodeId}/commands: + get: + tags: + - Agent Outbound + summary: Poll commands for an outbound agent + operationId: pollOutboundAgentCommands + parameters: + - $ref: '#/components/parameters/NodeId' + - name: after + in: query + schema: + type: string + description: Last command cursor acknowledged by the node + - name: timeout + in: query + schema: + type: string + example: 30s + description: Long-poll timeout requested by the node + responses: + '200': + description: Commands for the node + content: + application/json: + schema: + $ref: '#/components/schemas/AgentPollResponse' + '404': + $ref: '#/components/responses/NotFound' + + /v1/agents/{nodeId}/commands/{commandId}/result: + post: + tags: + - Agent Outbound + summary: Report an outbound agent command result + operationId: postOutboundAgentCommandResult + parameters: + - $ref: '#/components/parameters/NodeId' + - $ref: '#/components/parameters/CommandId' + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/AgentCommandResult' + responses: + '204': + description: Result accepted + '404': + $ref: '#/components/responses/NotFound' + + /v1/agents/{nodeId}/events: + post: + tags: + - Agent Outbound + summary: Publish an outbound agent event + operationId: postOutboundAgentEvent + parameters: + - $ref: '#/components/parameters/NodeId' + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/AgentEvent' + responses: + '204': + description: Event accepted + + /v1/control/nodes: + get: + tags: + - Control Center + summary: List control-center node state + operationId: listControlCenterNodes + parameters: + - name: region + in: query + schema: + type: string + - name: pop + in: query + schema: + type: string + - name: health + in: query + schema: + type: string + enum: [healthy, degraded, offline] + responses: + '200': + description: Control-center node summaries + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/ControlCenterNode' + + /v1/control/nodes/{nodeId}/commands: + post: + tags: + - Control Center + summary: Queue a command for a node + operationId: queueControlCenterCommand + parameters: + - $ref: '#/components/parameters/NodeId' + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ControlCommandCreate' + responses: + '200': + description: Queued command + content: + application/json: + schema: + $ref: '#/components/schemas/ControlCommandRecord' + '404': + $ref: '#/components/responses/NotFound' + + /v1/control/commands/{commandId}: + get: + tags: + - Control Center + summary: Get control-center command state + operationId: getControlCenterCommand + parameters: + - $ref: '#/components/parameters/CommandId' + responses: + '200': + description: Command state + content: + application/json: + schema: + $ref: '#/components/schemas/ControlCommandRecord' + '404': + $ref: '#/components/responses/NotFound' + # Node Management /v1/nodes/register: post: @@ -851,6 +1043,14 @@ components: type: string description: Rollout identifier + CommandId: + name: commandId + in: path + required: true + schema: + type: string + description: Command identifier + WindowSecs: name: window_secs in: query @@ -859,6 +1059,235 @@ components: description: Time window in seconds for statistics schemas: + JsonValue: + description: Arbitrary JSON value, including null. + nullable: true + oneOf: + - type: object + additionalProperties: true + - type: array + items: {} + - type: string + - type: number + - type: integer + - type: boolean + + AgentRegisterRequest: + type: object + required: + - node_id + - version + properties: + node_id: + type: string + version: + type: string + region: + type: string + nullable: true + pop: + type: string + nullable: true + labels: + type: object + additionalProperties: + type: string + capabilities: + type: array + items: + type: string + + AgentHeartbeatRequest: + type: object + required: + - node_id + - snapshot_version + - current_revision + - desired_revision + - converged + properties: + node_id: + type: string + snapshot_version: + type: integer + format: int64 + current_revision: + type: integer + format: int64 + desired_revision: + type: integer + format: int64 + converged: + type: boolean + + AgentPollResponse: + type: object + properties: + commands: + type: array + items: + $ref: '#/components/schemas/AgentCommand' + next_cursor: + type: string + nullable: true + + AgentCommand: + type: object + required: + - id + - type + - target_node_id + properties: + id: + type: string + type: + $ref: '#/components/schemas/AgentCommandType' + target_node_id: + type: string + revision: + type: integer + format: int64 + nullable: true + expires_at_unix_ms: + type: integer + format: int64 + nullable: true + payload: + $ref: '#/components/schemas/JsonValue' + signature: + type: string + nullable: true + + AgentCommandType: + type: string + enum: + - reload + - apply_config + - set_desired_revision + - cache_purge + - cache_invalidate + - collect_snapshot + + AgentCommandResult: + type: object + required: + - command_id + - node_id + - status + - started_at_unix_ms + - finished_at_unix_ms + properties: + command_id: + type: string + node_id: + type: string + status: + $ref: '#/components/schemas/AgentCommandStatus' + started_at_unix_ms: + type: integer + format: int64 + finished_at_unix_ms: + type: integer + format: int64 + result: + $ref: '#/components/schemas/JsonValue' + error: + type: string + nullable: true + + AgentCommandStatus: + type: string + enum: + - succeeded + - failed + + AgentEvent: + type: object + required: + - event_type + properties: + event_type: + type: string + payload: + $ref: '#/components/schemas/JsonValue' + + ControlCenterNode: + type: object + properties: + node_id: + type: string + version: + type: string + region: + type: string + nullable: true + pop: + type: string + nullable: true + labels: + type: object + additionalProperties: + type: string + health: + type: string + enum: [healthy, degraded, offline] + current_revision: + type: integer + format: int64 + desired_revision: + type: integer + format: int64 + converged: + type: boolean + last_heartbeat_at_unix_ms: + type: integer + format: int64 + + ControlCommandCreate: + type: object + required: + - type + properties: + type: + $ref: '#/components/schemas/AgentCommandType' + revision: + type: integer + format: int64 + nullable: true + expires_at_unix_ms: + type: integer + format: int64 + nullable: true + payload: + $ref: '#/components/schemas/JsonValue' + signature: + type: string + nullable: true + + ControlCommandRecord: + type: object + properties: + command: + $ref: '#/components/schemas/AgentCommand' + state: + type: string + enum: [queued, delivered, succeeded, failed, expired] + created_at_unix_ms: + type: integer + format: int64 + delivered_at_unix_ms: + type: integer + format: int64 + nullable: true + completed_at_unix_ms: + type: integer + format: int64 + nullable: true + result: + nullable: true + allOf: + - $ref: '#/components/schemas/AgentCommandResult' + NodeRegistration: type: object required: diff --git a/release-notes/v0.1.7.md b/release-notes/v0.1.7.md new file mode 100644 index 00000000..4d64f3f0 --- /dev/null +++ b/release-notes/v0.1.7.md @@ -0,0 +1,35 @@ +# v0.1.7 + +## 新增 + +- 新增出站 agent 控制面路径,节点主动向控制中心建立 HTTPS / WebSocket 通讯,不再需要额外的节点控制端口、控制域名或节点侧控制证书。 +- 新增本地 `agent status`、`agent disable`、`agent enable` 运维命令,与 `status`、`snapshot`、`delta`、`wait`、`traffic`、`upstreams`、`cache`、`system` 等命令形成统一的节点操作面。 +- 新增控制中心与节点之间的结构化命令、结果与快照轨道,覆盖注册、心跳、轮询、结果回报以及日常运维动作。 + +## 更新与改进 + +- 继续收口 legacy `control_plane` 兼容路径,并把 README、OpenAPI、发布说明和计划文档同步到 outbound agent 模式。 +- 继续拆分大文件和测试模块,让 modularization gate 保持无 warning,也让后续版本维护和回归定位更轻。 +- release prep、packaging 和版本号文档同步到 `0.1.7`,发布前检查链路现在能直接对齐当前 workspace 版本。 + +## 问题修复 + +- 修复 agent、control plane、CLI、snapshot 和路由视图上的若干回归,让状态导出和本地控制语义保持一致。 +- 修复 release prep 所需的 changelog、release notes 和 workspace version 一致性问题。 + +## New + +- Added the outbound-agent control plane path so nodes connect to the control center directly and keep only business 80/443 ports open. +- Added local `agent status`, `agent disable`, and `agent enable` commands alongside the existing node operations. +- Added structured command, result, and snapshot flows between the control center and nodes for registration, heartbeat, polling, result reporting, and day-to-day operations. + +## Update & Improvement + +- Continued narrowing the legacy `control_plane` compatibility path and synced the README, OpenAPI, release notes, and planning docs to the outbound-agent model. +- Continued splitting large files and test modules so the modularization gate stays warning-free and future releases remain easier to maintain. +- Synced release-prep, packaging, and version-reference docs to `0.1.7`. + +## Bug Fixes + +- Fixed several agent, control-plane, CLI, snapshot, and routing-view regressions so state export and local control semantics stay aligned. +- Fixed the consistency between changelog, release notes, and workspace version required by release prep.