diff --git a/foundations/src/panic/hook.rs b/foundations/src/panic/hook.rs index 94726ea..7281e85 100644 --- a/foundations/src/panic/hook.rs +++ b/foundations/src/panic/hook.rs @@ -39,11 +39,12 @@ fn log_panic(panic_info: &PanicHookInfo<'_>) { // Use foundations logging if telemetry is initialized #[cfg(feature = "logging")] if crate::telemetry::is_initialized() { - crate::telemetry::log::error!( - "panic occurred"; - "payload" => payload, - "location" => ?location, - ); + let _ = crate::telemetry::log::internal::log_to_drain(&slog::record!( + slog::Level::Error, + "", // tag + &format_args!("panic occurred"), + slog::b!("payload" => payload, "location" => ?location), + )); return; } @@ -75,3 +76,57 @@ fn panic_payload_as_str<'a>(panic_info: &'a PanicHookInfo<'_>) -> &'a str { "" } } + +#[cfg(all(test, feature = "logging"))] +mod logging_tests { + use crate::service_info; + use crate::telemetry::TelemetryConfig; + use crate::telemetry::log::init::{LogHarness, build_log_with_drain, wrap_root_drain}; + use crate::telemetry::log::internal::LoggerWithKvNestingTracking; + use crate::telemetry::settings::LoggingSettings; + use slog::{Drain, OwnedKVList, Record}; + use std::sync::Arc; + + struct FailingDrain; + impl Drain for FailingDrain { + type Ok = (); + type Err = &'static str; + + fn log(&self, _record: &Record, _values: &OwnedKVList) -> Result { + Err("drain failed") + } + } + + #[tokio::test] + async fn hook_swallows_drain_error() { + let settings = LoggingSettings::default(); + let root_drain = wrap_root_drain(&settings, FailingDrain); + let root_log = LoggerWithKvNestingTracking::new(build_log_with_drain( + settings.verbosity, + slog::o!(), + Arc::clone(&root_drain), + )); + + LogHarness::override_for_testing(LogHarness { + root_log: Arc::new(parking_lot::RwLock::new(root_log)), + root_drain, + settings, + log_scope_stack: Default::default(), + }) + .unwrap(); + + crate::telemetry::init(TelemetryConfig { + service_info: &service_info!(), + settings: &Default::default(), + custom_server_routes: Default::default(), + }) + .expect("telemetry is already initialized"); + + super::install_hook(); + let _ = std::panic::catch_unwind(|| panic!("oh no! 😱")); + // If we just used `log::error!(...)` in the hook above, the `Fuse` from + // `build_log_with_drain` would convert the error from `FailingDrain` into + // a panic inside the panic hook. That's a double panic and aborts the process, + // causing the test to fail. + } +} diff --git a/foundations/src/telemetry/log/init.rs b/foundations/src/telemetry/log/init.rs index fee510b..2e13656 100644 --- a/foundations/src/telemetry/log/init.rs +++ b/foundations/src/telemetry/log/init.rs @@ -13,27 +13,28 @@ use crate::telemetry::settings::{LogFormat, LogOutput, LogVerbosity, LoggingSett use crate::{BootstrapResult, ServiceInfo}; use crossbeam_utils::CachePadded; use slog::{ - Discard, Drain, FnValue, Fuse, Logger, Never, OwnedKV, SendSyncRefUnwindSafeDrain, + Discard, Drain, FnValue, Fuse, Logger, OwnedKV, SendSyncRefUnwindSafeDrain, SendSyncRefUnwindSafeKV, }; use slog_async::{Async as AsyncDrain, AsyncGuard}; use slog_json::{Json as JsonDrain, Json}; use slog_term::{FullFormat as TextDrain, PlainDecorator, TermDecorator}; +use std::fmt::Debug; use std::fs::File; use std::io; use std::io::BufWriter; use std::sync::{Arc, LazyLock, OnceLock}; -type SharedDrain = Arc>; +type BoxedDebug = Box; +type SharedDrain = Arc>; // These singletons are accessed _very often_, and each access requires an atomic load to // ensure initialization. Make sure nobody else invalidates our cache lines. static HARNESS: CachePadded> = CachePadded::new(OnceLock::new()); static NOOP_HARNESS: CachePadded> = CachePadded::new(LazyLock::new(|| { - let root_drain = Arc::new(Discard) as Arc<_>; - let noop_log = - LoggerWithKvNestingTracking::new(Logger::root_typed(Arc::clone(&root_drain), slog::o!())); + let root_drain = Discard.shared(); + let noop_log = LoggerWithKvNestingTracking::new(Logger::root(Discard, slog::o!())); LogHarness { root_drain, @@ -126,7 +127,7 @@ pub(crate) fn init( .build_with_guard(), }; - let root_drain = wrap_root_drain(settings, async_drain.fuse()); + let root_drain = wrap_root_drain(settings, async_drain); let root_kv = slog::o!( "module" => FnValue(|record| { format!("{}:{}", record.module(), record.line()) @@ -196,7 +197,8 @@ fn stderr_writer_without_line_buffering() -> BufWriter { pub(crate) fn wrap_root_drain(settings: &LoggingSettings, drain: D) -> SharedDrain where - D: SendSyncRefUnwindSafeDrain + 'static, + D: SendSyncRefUnwindSafeDrain + 'static, + D::Err: Debug + 'static, { let drain = drain .field_filter(FieldDedupFilterFactory) @@ -218,7 +220,7 @@ pub(crate) fn build_log_with_drain( where K: SendSyncRefUnwindSafeKV + 'static, { - let drain = drain.filter_level(verbosity.into()).ignore_res(); + let drain = drain.filter_level(verbosity.into()).fuse(); Logger::root(drain, kv) } @@ -252,13 +254,15 @@ trait DrainExt: Drain + Sized { RateLimitingDrain::new(self, &settings.rate_limit) } - /// Converts the current drain into an `Arc` for sharing between + /// Converts the current drain into a [`SharedDrain`] for sharing between /// multiple loggers. - fn shared(self) -> Arc> + fn shared(self) -> SharedDrain where - Self: SendSyncRefUnwindSafeDrain + 'static, + Self: SendSyncRefUnwindSafeDrain + 'static, + Self::Err: Debug + 'static, { - Arc::new(self) + let boxed_err = self.map_err(|e| Box::new(e) as BoxedDebug); + Arc::new(boxed_err) } } diff --git a/foundations/src/telemetry/log/internal.rs b/foundations/src/telemetry/log/internal.rs index ecdb21a..5d7b72a 100644 --- a/foundations/src/telemetry/log/internal.rs +++ b/foundations/src/telemetry/log/internal.rs @@ -1,6 +1,7 @@ use super::init::LogHarness; use crate::telemetry::scope::Scope; -use slog::{Logger, OwnedKV, SendSyncRefUnwindSafeKV}; +use slog::{Logger, OwnedKV, Record, SendSyncRefUnwindSafeKV}; +use std::fmt::Debug; use std::ops::Deref; use std::sync::Arc; @@ -170,6 +171,18 @@ pub(crate) fn fork_log() -> SharedLog { Arc::new(parking_lot::RwLock::new(log)) } +/// Logs a [`Record`] directly to the root drain. Unlike our regular logger, this function +/// does not filter by level and does not panic on errors (it returns them instead.) +#[allow(dead_code, reason = "only used by some features")] +pub(crate) fn log_to_drain(record: &Record) -> Result<(), Box> { + let harness = LogHarness::get(); + let root_drain = &harness.root_drain; + let shared_logger = current_log(); + + let logger = shared_logger.read(); + root_drain.log(record, logger.list()) +} + /// Freezes the current logger. Any subsequent call to [`add_log_fields`] or `set_verbosity` /// will be rejected: by default a panic is raised; if the `panic_on_frozen_logger` feature is /// disabled, an error is logged with a backtrace instead. diff --git a/foundations/tests/panic_hook.rs b/foundations/tests/panic_hook.rs index 1401bf7..9f69d86 100644 --- a/foundations/tests/panic_hook.rs +++ b/foundations/tests/panic_hook.rs @@ -150,7 +150,10 @@ async fn error_log_is_emitted(ctx: TestTelemetryContext) { settings: &Default::default(), custom_server_routes: Default::default(), }) - .expect("telemetry is not already initialized"); + .expect("telemetry is already initialized"); + + let _scope = ctx.scope(); + foundations::telemetry::log::add_fields!("context_key" => "context_value"); simulate_panic(); assert_eq!(metrics::panics::total().get(), 1); @@ -167,6 +170,12 @@ async fn error_log_is_emitted(ctx: TestTelemetryContext) { .iter() .any(|(key, value)| key == "payload" && value == "oh no! 😱"); assert!(has_panic_payload); + + let has_context_field = panic_log + .fields + .iter() + .any(|(key, value)| key == "context_key" && value == "context_value"); + assert!(has_context_field); } #[tokio::test]