From bc5a5b4626fd02aa5da5382beb3e779806e1355d Mon Sep 17 00:00:00 2001 From: Amin Vakil Date: Fri, 20 Mar 2026 22:28:25 +0330 Subject: [PATCH 1/8] feat(relay): Exit container on healthcheck failure --- relay/src/healthcheck.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/relay/src/healthcheck.rs b/relay/src/healthcheck.rs index f6f0e732407..b1face91b79 100644 --- a/relay/src/healthcheck.rs +++ b/relay/src/healthcheck.rs @@ -6,6 +6,9 @@ use clap::ArgMatches; use relay_config::Config; use reqwest::blocking::Client; +use nix::sys::signal::{self, Signal}; +use nix::unistd::Pid; + pub fn healthcheck(config: &Config, matches: &ArgMatches) -> Result<()> { let mode = matches .get_one::("mode") @@ -36,6 +39,7 @@ pub fn healthcheck(config: &Config, matches: &ArgMatches) -> Result<()> { Ok(()) } else { relay_log::error!("Relay is unhealthy. Status code: {}", response.status()); + signal::kill(Pid::from_raw(1), Signal::SIGTERM).ok(); Err(format_err!( "Relay is unhealthy. Status code: {}", response.status() @@ -44,6 +48,7 @@ pub fn healthcheck(config: &Config, matches: &ArgMatches) -> Result<()> { } Err(err) => { relay_log::error!("Relay is unhealthy. Error: {err}"); + signal::kill(Pid::from_raw(1), Signal::SIGTERM).ok(); Err(err.into()) } } From c72c966234f7bc74397d757247cd3ab5e61400f8 Mon Sep 17 00:00:00 2001 From: Amin Vakil Date: Fri, 20 Mar 2026 22:49:19 +0330 Subject: [PATCH 2/8] Add kill-on-fail arg to healthcheck --- relay/src/cliapp.rs | 6 ++++++ relay/src/healthcheck.rs | 10 ++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/relay/src/cliapp.rs b/relay/src/cliapp.rs index 750356058d0..45591d6fc25 100644 --- a/relay/src/cliapp.rs +++ b/relay/src/cliapp.rs @@ -344,5 +344,11 @@ pub fn make_app() -> Command { .value_parser(clap::value_parser!(SocketAddr)) .required(false), ) + .arg( + Arg::new("kill-on-fail") + .long("kill-on-fail") + .action(ArgAction::SetTrue) + .help("Send SIGTERM to PID 1 if the healthcheck fails."), + ) ) } diff --git a/relay/src/healthcheck.rs b/relay/src/healthcheck.rs index b1face91b79..85e1e25d3dd 100644 --- a/relay/src/healthcheck.rs +++ b/relay/src/healthcheck.rs @@ -33,13 +33,17 @@ pub fn healthcheck(config: &Config, matches: &ArgMatches) -> Result<()> { .get(format!("http://{addr}/api/relay/healthcheck/{mode}/")) .send(); + let kill_on_fail = matches.get_flag("kill-on-fail"); + match response { Ok(response) => { if response.status().is_success() { Ok(()) } else { relay_log::error!("Relay is unhealthy. Status code: {}", response.status()); - signal::kill(Pid::from_raw(1), Signal::SIGTERM).ok(); + if kill_on_fail { + signal::kill(Pid::from_raw(1), Signal::SIGTERM).ok(); + } Err(format_err!( "Relay is unhealthy. Status code: {}", response.status() @@ -48,7 +52,9 @@ pub fn healthcheck(config: &Config, matches: &ArgMatches) -> Result<()> { } Err(err) => { relay_log::error!("Relay is unhealthy. Error: {err}"); - signal::kill(Pid::from_raw(1), Signal::SIGTERM).ok(); + if kill_on_fail { + signal::kill(Pid::from_raw(1), Signal::SIGTERM).ok(); + } Err(err.into()) } } From 9fa7de3c5da3e55563291dadb4b287d4d0ca89d4 Mon Sep 17 00:00:00 2001 From: Amin Vakil Date: Fri, 20 Mar 2026 22:57:48 +0330 Subject: [PATCH 3/8] Add required false to kill-on-fail argument --- relay/src/cliapp.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/relay/src/cliapp.rs b/relay/src/cliapp.rs index 45591d6fc25..ae3da602737 100644 --- a/relay/src/cliapp.rs +++ b/relay/src/cliapp.rs @@ -349,6 +349,7 @@ pub fn make_app() -> Command { .long("kill-on-fail") .action(ArgAction::SetTrue) .help("Send SIGTERM to PID 1 if the healthcheck fails."), + .required(false), ) ) } From 73d74dea1a6472a234bbbc4738f680bc3f055132 Mon Sep 17 00:00:00 2001 From: Amin Vakil Date: Fri, 20 Mar 2026 23:03:12 +0330 Subject: [PATCH 4/8] Typo --- relay/src/cliapp.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay/src/cliapp.rs b/relay/src/cliapp.rs index ae3da602737..3ecc54fd52a 100644 --- a/relay/src/cliapp.rs +++ b/relay/src/cliapp.rs @@ -348,7 +348,7 @@ pub fn make_app() -> Command { Arg::new("kill-on-fail") .long("kill-on-fail") .action(ArgAction::SetTrue) - .help("Send SIGTERM to PID 1 if the healthcheck fails."), + .help("Send SIGTERM to PID 1 if the healthcheck fails.") .required(false), ) ) From 0a6921fe06194238eb2a0133b272297f379a38e0 Mon Sep 17 00:00:00 2001 From: Amin Vakil Date: Fri, 20 Mar 2026 23:09:54 +0330 Subject: [PATCH 5/8] Add nix to relay deps --- relay/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/relay/Cargo.toml b/relay/Cargo.toml index a4397e4b5c1..4b97a612142 100644 --- a/relay/Cargo.toml +++ b/relay/Cargo.toml @@ -32,4 +32,5 @@ relay-statsd = { workspace = true } relay-kafka = { workspace = true, optional = true } uuid = { workspace = true } reqwest = { workspace = true, features = ["gzip", "native-tls-vendored"] } +nix = { version = "0.31.2", features = ["signal"] } mimalloc = { workspace = true, features = ["v3", "override", "debug_in_debug"] } From d4146c6ba0991e1735dc56d3855f7198f1ef20aa Mon Sep 17 00:00:00 2001 From: Amin Vakil Date: Fri, 20 Mar 2026 23:21:44 +0330 Subject: [PATCH 6/8] Downgrade nix, add it to Cargo.lock This was necessary, to have minimum changes --- Cargo.lock | 1 + relay/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index b40f40e8565..9581e38e94e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4168,6 +4168,7 @@ dependencies = [ "hostname 0.4.1", "http", "mimalloc", + "nix", "relay-config", "relay-kafka", "relay-log", diff --git a/relay/Cargo.toml b/relay/Cargo.toml index 4b97a612142..a5c493144e7 100644 --- a/relay/Cargo.toml +++ b/relay/Cargo.toml @@ -32,5 +32,5 @@ relay-statsd = { workspace = true } relay-kafka = { workspace = true, optional = true } uuid = { workspace = true } reqwest = { workspace = true, features = ["gzip", "native-tls-vendored"] } -nix = { version = "0.31.2", features = ["signal"] } +nix = { version = "0.29.0", features = ["signal"] } mimalloc = { workspace = true, features = ["v3", "override", "debug_in_debug"] } From 5c1e8c74869550b8aba9f7986d45f003fc09c3bd Mon Sep 17 00:00:00 2001 From: Amin Vakil Date: Tue, 24 Mar 2026 12:43:07 +0330 Subject: [PATCH 7/8] Add PR to CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 63339810f06..3e6afc9c3e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ - Set `sentry.segment.id` and `sentry.segment.name` attributes on OTLP segment spans. ([#5748](https://github.com/getsentry/relay/pull/5748)) - Envelope buffer: Add option to disable flush-to-disk on shutdown. ([#5751](https://github.com/getsentry/relay/pull/5751)) +- Healthcheck: Provide a flag to exit container on healthcheck failure. ([#5754](https://github.com/getsentry/relay/pull/5754)) **Internal**: From 982bcb81d5fad81ac8d078793c727f1870774a1f Mon Sep 17 00:00:00 2001 From: Amin Vakil Date: Wed, 25 Mar 2026 20:25:41 +0330 Subject: [PATCH 8/8] Log prior to sending SIGTERM --- relay/src/healthcheck.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/relay/src/healthcheck.rs b/relay/src/healthcheck.rs index 85e1e25d3dd..e08809d692b 100644 --- a/relay/src/healthcheck.rs +++ b/relay/src/healthcheck.rs @@ -42,7 +42,10 @@ pub fn healthcheck(config: &Config, matches: &ArgMatches) -> Result<()> { } else { relay_log::error!("Relay is unhealthy. Status code: {}", response.status()); if kill_on_fail { - signal::kill(Pid::from_raw(1), Signal::SIGTERM).ok(); + relay_log::error!("Sending SIGTERM to PID 1 to exit container."); + if let Err(err) = signal::kill(Pid::from_raw(1), Signal::SIGTERM) { + relay_log::error!("Failed to send SIGTERM to PID 1: {err}"); + } } Err(format_err!( "Relay is unhealthy. Status code: {}", @@ -53,7 +56,10 @@ pub fn healthcheck(config: &Config, matches: &ArgMatches) -> Result<()> { Err(err) => { relay_log::error!("Relay is unhealthy. Error: {err}"); if kill_on_fail { - signal::kill(Pid::from_raw(1), Signal::SIGTERM).ok(); + relay_log::error!("Sending SIGTERM to PID 1 to exit container."); + if let Err(err) = signal::kill(Pid::from_raw(1), Signal::SIGTERM) { + relay_log::error!("Failed to send SIGTERM to PID 1: {err}"); + } } Err(err.into()) }