diff --git a/libs/accounts/errors/src/app-error.ts b/libs/accounts/errors/src/app-error.ts index 29d7a4cbdc1..822c50c22f9 100644 --- a/libs/accounts/errors/src/app-error.ts +++ b/libs/accounts/errors/src/app-error.ts @@ -9,7 +9,6 @@ import { TOO_LARGE, DEFAULT_ERRROR, IGNORED_ERROR_NUMBERS, - DEBUGGABLE_PAYLOAD_KEYS, } from './constants'; import { OauthError } from './oauth-error'; import type { Request as HapiRequest } from 'hapi'; @@ -1787,8 +1786,6 @@ export class AppError extends Error { method: request.method, path: request.path, query: request.query, - payload: scrubPii(request.payload), - headers: scrubHeaders(request.headers), }; } } @@ -1841,42 +1838,6 @@ export class AppError extends Error { } } -/** - * Tries to remove PII from payload data. - * @param payload - * @returns - */ -function scrubPii(payload: any) { - if (!payload) { - return; - } - - return Object.entries(payload).reduce((scrubbed: any, [key, value]) => { - if (DEBUGGABLE_PAYLOAD_KEYS.has(key)) { - scrubbed[key] = value; - } - - return scrubbed; - }, {}); -} - -/** - * Deletes feilds with senstive data from headers. - * @param headers - * @returns - */ -function scrubHeaders( - headers?: Record -): Record { - if (headers == null) { - return {}; - } - - const scrubbed = { ...headers }; - delete scrubbed['x-forwarded-for']; - return scrubbed; -} - /** * Prevents errors from being captured by Sentry. * diff --git a/libs/accounts/errors/src/constants.ts b/libs/accounts/errors/src/constants.ts index 6698305cc04..5bfbbb46aac 100644 --- a/libs/accounts/errors/src/constants.ts +++ b/libs/accounts/errors/src/constants.ts @@ -248,37 +248,3 @@ export const BAD_SIGNATURE_ERRORS = [ 'Missing required payload hash', 'Payload is invalid', ]; - -/** - * Payload properties that might help us debug unexpected errors - * when they show up in production. Obviously we don't want to - * accidentally send any sensitive data or PII to a 3rd-party, - * so the set is opt-in rather than opt-out. - */ -export const DEBUGGABLE_PAYLOAD_KEYS = new Set([ - 'availableCommands', - 'capabilities', - 'client_id', - 'code', - 'command', - 'duration', - 'excluded', - 'features', - 'messageId', - 'metricsContext', - 'name', - 'preVerified', - 'publicKey', - 'reason', - 'redirectTo', - 'reminder', - 'scope', - 'service', - 'target', - 'to', - 'TTL', - 'ttl', - 'type', - 'unblockCode', - 'verificationMethod', -]); diff --git a/libs/shared/otel/src/lib/config.ts b/libs/shared/otel/src/lib/config.ts index bf1769541fc..60531cb7449 100644 --- a/libs/shared/otel/src/lib/config.ts +++ b/libs/shared/otel/src/lib/config.ts @@ -25,6 +25,9 @@ export type TracingOpts = { url: string; concurrencyLimit: number; }; + sentry?: { + enabled: boolean; + }; }; /** Default convict config for node tracing */ diff --git a/libs/shared/otel/src/lib/node-tracing.ts b/libs/shared/otel/src/lib/node-tracing.ts index 1756a68c41f..bfbe4f7b696 100644 --- a/libs/shared/otel/src/lib/node-tracing.ts +++ b/libs/shared/otel/src/lib/node-tracing.ts @@ -11,6 +11,7 @@ import { checkServiceName, logType, } from './config'; +import { SentryContextManager } from '@sentry/node'; import { NodeTracerProvider, TraceIdRatioBasedSampler, @@ -109,10 +110,26 @@ export class NodeTracingInitializer { '@opentelemetry/instrumentation-aws-lambda': { enabled: false, }, + '@opentelemetry/instrumentation-http': { + // Important! If Sentry is enabled, we want to rely on Sentry's http instrumentation! + enabled: this.opts.sentry?.enabled ? false : true, + }, }), ], }); - this.provider.register(); + // Use SentryContextManager so that Sentry's per-request isolation scopes + // are properly forked for every api.context.with() call. Without this, + // NodeTracerProvider.register() would install a plain + // AsyncLocalStorageContextManager that doesn't understand Sentry's scope + // forking, and Sentry.init()'s own setupOtel() would overwrite our + // NodeTracerProvider (discarding the custom exporters). + if (this.opts.sentry?.enabled) { + this.provider.register({ + contextManager: new SentryContextManager(), + }); + } else { + this.provider.register(); + } } public startSpan(name: string, action: () => void) { diff --git a/libs/shared/sentry/src/lib/models/SentryConfigOpts.ts b/libs/shared/sentry/src/lib/models/SentryConfigOpts.ts index 6f60886b8aa..d9c57c3c98b 100644 --- a/libs/shared/sentry/src/lib/models/SentryConfigOpts.ts +++ b/libs/shared/sentry/src/lib/models/SentryConfigOpts.ts @@ -34,6 +34,9 @@ export type SentryConfigOpts = { * When using strings, partial matches will be filtered out. If you need to filter by exact match, use regex patterns instead */ ignoreErrors?: (string | RegExp)[]; + /** Let Sentry know that otel setup should be handled by the app. */ + skipOpenTelemetrySetup?: boolean; + /** When set to true, building a configuration will throw an error critical fields are missing. */ strict?: boolean; diff --git a/packages/fxa-auth-server/lib/log.js b/packages/fxa-auth-server/lib/log.js index e11818fb026..4dca3b65400 100644 --- a/packages/fxa-auth-server/lib/log.js +++ b/packages/fxa-auth-server/lib/log.js @@ -64,16 +64,30 @@ util.inherits(Lug, EventEmitter); Lug.prototype.close = function () {}; Lug.prototype.getTraceId = function (data) { - let result = {}; + const result = {}; - if (this.nodeTracer) { - try { - result = { traceId: this.nodeTracer.getTraceId() }; - } catch { - // don't let a tracing issue break logging - this.debug('log', { msg: 'could not get trace id' }); + // Try decorating otel traceId + try { + const otelTraceId = this.nodeTracer?.getTraceId(); + if (otelTraceId) { + result.otelTraceId = otelTraceId; } + } catch (err) { + // Don't let an otel hiccup break logging + result.otelTraceIdErr = err; } + + // Try decorating currenty sentry trace id + try { + const spanContext = Sentry.getActiveSpan()?.spanContext(); + if (spanContext) { + result.sentryTraceId = spanContext.traceId; + } + } catch (err) { + // Don't let a sentry hicup break logging + result.sentryTraceIdError = err; + } + return result; }; diff --git a/packages/fxa-auth-server/lib/monitoring.js b/packages/fxa-auth-server/lib/monitoring.js index d03dbb7be3b..31dce78ccb4 100644 --- a/packages/fxa-auth-server/lib/monitoring.js +++ b/packages/fxa-auth-server/lib/monitoring.js @@ -21,7 +21,11 @@ initMonitoring({ ...config.getProperties(), release: version, eventFilters: [filterSentryEvent], - integrations: [Sentry.linkedErrorsIntegration({ key: 'jse_cause' })], + integrations: [ + // Important! This fixes a ton of problems with our previous integration. + Sentry.hapiIntegration(), + Sentry.linkedErrorsIntegration({ key: 'jse_cause' }), + ], }, }); diff --git a/packages/fxa-auth-server/lib/routes/util.js b/packages/fxa-auth-server/lib/routes/util.js index 38054e67702..607fa913014 100644 --- a/packages/fxa-auth-server/lib/routes/util.js +++ b/packages/fxa-auth-server/lib/routes/util.js @@ -4,10 +4,12 @@ 'use strict'; +const Sentry = require('@sentry/node'); const isA = require('joi'); const random = require('../crypto/random'); const validators = require('./validators'); const UTIL_DOCS = require('../../docs/swagger/util-api').default; +const { AppError } = require('../../../../libs/accounts/errors/src'); const HEX_STRING = validators.HEX_STRING; @@ -67,5 +69,39 @@ module.exports = (log, config, redirectDomain) => { return h.redirect(config.contentServer.url + request.raw.req.url); }, }, + { + method: 'GET', + path: '/boom', + options: {}, + handler: async function (request, h) { + if (config.sentry.env === 'local') { + const traceId = Sentry.getActiveSpan().spanContext().traceId; + console.log(`${traceId}: crumb ${new Date().toString()}`); + await (async () => { + await new Promise((r) => { + setTimeout(() => { + console.log(`${traceId}: crumb ${new Date().toString()}`); + r(); + }, 100); + }); + + // Throw fabricated error + console.log(`${traceId}: crumb ${new Date().toString()}`); + throw new AppError( + { + code: 500, + error: 'BOOM', + errno: AppError.ERRNO.UNEXPECTED_ERROR, + message: 'Testing error capture', + }, + { + op: { foo: 'bar' }, + data: { foo: 'baz' }, + } + ); + })(); + } + }, + }, ]; }; diff --git a/packages/fxa-auth-server/lib/sentry.js b/packages/fxa-auth-server/lib/sentry.js index 87d444f321f..23a3975e255 100644 --- a/packages/fxa-auth-server/lib/sentry.js +++ b/packages/fxa-auth-server/lib/sentry.js @@ -4,10 +4,7 @@ 'use strict'; -const Hoek = require('@hapi/hoek'); const Sentry = require('@sentry/node'); -const verror = require('verror'); -const { ignoreErrors } = require('@fxa/accounts/errors'); const { formatMetadataValidationErrorMessage, @@ -15,144 +12,43 @@ const { } = require('fxa-shared/sentry/report-validation-error'); function reportSentryMessage(message, captureContext) { - Sentry.withScope((scope) => { - scope.setExtra('report', true); - - if (captureContext && typeof captureContext === 'object') { - Hoek.merge(scope, captureContext); - } - - Sentry.captureMessage(message, captureContext); - }); + Sentry.captureMessage(message, captureContext); } function reportSentryError(err, request) { - let exception = ''; - if (err && err.stack) { - try { - exception = err.stack.split('\n')[0]; - } catch (e) { - // ignore bad stack frames - } - } - - if (ignoreErrors(err)) { - return; - } - - Sentry.withScope((scope) => { - if (request) { - scope.addEventProcessor((sentryEvent) => { - // As of sentry v9, this should automatically happen by adding, Sentry.requestDataIntegration() - // Leaving note here for historical context. - // event.request = Sentry.extractRequestData(request); - sentryEvent.level = 'error'; - return sentryEvent; - }); - } - - // Important! Set a flag so that we know this is an error captured - // and reported by our code. Once we added tracing, we started seeing errors - // propagate in other ways. By setting a breakpoint or adding a console.trace - // in beforeSend, we can see that Sentry's internal libraries are picking up - // and reporting errors too. This causes problems because: - // - It means errors are double captured - // - It means that extra error info added below won't be there are errors - // where captured by this function. - // - It means the duplicate error might have a different shape, and fool - // our ignoreErrors() check. - // - // See the filterSentryEvent function to see how this flag is used. - // - scope.setExtra('report', true); - scope.setExtra('exception', exception); - // If additional data was added to the error, extract it. - if (err.output && typeof err.output.payload === 'object') { - const payload = err.output.payload; - if (typeof payload.data === 'object') { - scope.setContext('payload.data', payload.data); - delete payload.data; - } - scope.setContext('payload', payload); - } - const cause = verror.cause(err); - if (cause && cause.message) { - const causeContext = { - errorName: cause.name, - reason: cause.reason, - errorMessage: cause.message, - }; - - // Poolee EndpointError's have a few other things and oddly don't include - // a stack at all. We try and extract a bit more to reflect what actually - // happened as 'socket hang up' is somewhat inaccurate when the remote server - // throws a 500. - const output = cause.output; - if (output && output.payload) { - for (const key of ['error', 'message', 'statusCode']) { - causeContext[key] = output.payload[key]; - } - } - const attempt = cause.attempt; - if (attempt) { - causeContext.method = attempt.method; - causeContext.path = attempt.path; - } - scope.setContext('cause', causeContext); - } - - if (request) { - // Merge the request scope into the temp scope - Hoek.merge(scope, request.sentryScope); - } - Sentry.captureException(err); - }); + Sentry.captureException(err); } async function configureSentry(server, config, processName = 'key_server') { if (config.sentry.dsn) { - Sentry.getCurrentScope().setTag('process', processName); + Sentry.getGlobalScope().setTag('process', processName); if (!server) { return; } - - // Attach a new Sentry scope to the request for breadcrumbs/tags/extras server.ext({ - type: 'onRequest', + type: 'onPreHandler', method(request, h) { - request.sentryScope = new Sentry.Scope(); - - /** - // Make a transaction per request so we can get performance monitoring. There are - // some limitations to this approach, and distributed tracing will be off due to - // hapi's architecture. - // - // See https://github.com/getsentry/sentry-javascript/issues/2172 for more into. It - // looks like there might be some other solutions that are more complex, but would work - // with hapi and distributed tracing. - // - const transaction = Sentry.startInactiveSpan({ - op: 'auth-server', - name: `${request.method.toUpperCase()} ${request.path}`, - forceTransaction: true, - // As of sentry v9, this should automatically happen by adding, Sentry.requestDataIntegration() - // Leaving note here for historical context. - // request: Sentry.extractRequestData(request.raw.req), - }); - - request.app.sentry = { - transaction, - }; - //*/ - + // hapiIntegration() manages per-request isolation scopes via async context. + // Set tags/extras directly on the current scope — no withIsolationScope + // wrapper here, which would create a synchronous child scope that is + // discarded before the async handler runs, causing breadcrumbs to leak + // onto the global scope and accumulate across requests. + Sentry.setTag('route', request.route.path); + Sentry.setTag('method', request.method); + Sentry.setExtra('request_payload', request.payload || {}); + Sentry.setExtra('request_headers', request.headers || {}); + Sentry.setExtra('request_params', request.params || {}); return h.continue; }, }); server.events.on('request', (request, event, tags) => { - if (event?.error && tags?.handler && tags?.error) { - reportSentryError(event.error, request); + if (event?.error) { + Sentry.withScope((scope) => { + scope.setExtra('hapi_event', event); + Sentry.captureException(event.error); + }); } }); } diff --git a/packages/fxa-auth-server/test/local/log.js b/packages/fxa-auth-server/test/local/log.js index 1a8298a04c7..8ddb83f1568 100644 --- a/packages/fxa-auth-server/test/local/log.js +++ b/packages/fxa-auth-server/test/local/log.js @@ -46,6 +46,7 @@ describe('log', () => { sentryScope = { setContext: sinon.stub() }; cb(sentryScope); }), + getActiveSpan: sinon.stub().returns(undefined), }; sinon.stub(sentryModule, 'reportSentryMessage').returns({}); @@ -795,7 +796,7 @@ describe('log', () => { assert.calledOnceWithExactly(logger.info, 'op', { uid: 'bloop' }); }); - it('should set trace id', () => { + it('should set otel trace id', () => { log = proxyquire( '../../lib/log', mocks @@ -813,7 +814,7 @@ describe('log', () => { }); assert.calledOnceWithExactly(logger.info, 'op', { uid: 'bloop', - traceId: 'fake trace id', + otelTraceId: 'fake trace id', }); log.debug('op', { @@ -821,7 +822,7 @@ describe('log', () => { }); assert.calledOnceWithExactly(logger.debug, 'op', { uid: 'bloop', - traceId: 'fake trace id', + otelTraceId: 'fake trace id', }); log.error('op', { @@ -829,7 +830,67 @@ describe('log', () => { }); assert.calledOnceWithExactly(logger.error, 'op', { uid: 'bloop', - traceId: 'fake trace id', + otelTraceId: 'fake trace id', + }); + }); + + it('should set sentry trace id', () => { + mockSentry.getActiveSpan = sinon.stub().returns({ + spanContext: sinon.stub().returns({ traceId: 'fake-sentry-trace-id' }), + }); + log = proxyquire( + '../../lib/log', + mocks + )({ + level: 'debug', + name: 'test', + stdout: { on: sinon.spy() }, + }); + + log.info('op', { uid: 'bloop' }); + assert.calledOnceWithExactly(logger.info, 'op', { + uid: 'bloop', + sentryTraceId: 'fake-sentry-trace-id', + }); + }); + + it('should store otel error in result on failure', () => { + const otelErr = new Error('otel error'); + log = proxyquire( + '../../lib/log', + mocks + )({ + level: 'debug', + name: 'test', + stdout: { on: sinon.spy() }, + nodeTracer: { + getTraceId: sinon.stub().throws(otelErr), + }, + }); + + log.info('op', { uid: 'bloop' }); + assert.calledOnceWithExactly(logger.info, 'op', { + uid: 'bloop', + otelTraceIdErr: otelErr, + }); + }); + + it('should store sentry error in result on failure', () => { + const sentryErr = new Error('sentry error'); + mockSentry.getActiveSpan = sinon.stub().throws(sentryErr); + log = proxyquire( + '../../lib/log', + mocks + )({ + level: 'debug', + name: 'test', + stdout: { on: sinon.spy() }, + }); + + log.info('op', { uid: 'bloop' }); + assert.calledOnceWithExactly(logger.info, 'op', { + uid: 'bloop', + sentryTraceIdError: sentryErr, }); }); }); diff --git a/packages/fxa-shared/monitoring/index.ts b/packages/fxa-shared/monitoring/index.ts index 56e0c9d76d7..253a32bea08 100644 --- a/packages/fxa-shared/monitoring/index.ts +++ b/packages/fxa-shared/monitoring/index.ts @@ -14,24 +14,68 @@ export type MonitoringConfig = { let initialized = false; -// IMPORTANT! This initialization function must be called first thing when a server starts.If it's called after server -// frameworks initialized instrumentation might not work properly. +/** + * IMPORTANT! This initialization function must be called first thing when a server starts. If it's called after server + * frameworks initialized instrumentation might not work properly. + */ + /** * Initializes modules related to error monitoring, performance monitoring, and tracing. * @param opts - Initialization options. See underlying implementations for more details. */ export function initMonitoring(opts: MonitoringConfig) { - const { log, config } = opts; + const { log: logger, config } = opts; + const log = logger || console; + if (initialized) { - opts.log?.warn('monitoring', 'Monitoring can only be initialized once'); + log.warn('monitoring', 'Monitoring can only be initialized once'); return; } initialized = true; + /** + * IMPORTANT! + * + * Sentry also uses OTEL under the hood. Which means: + * - Mind the order of initialization. Otel should be first, if configured! + * - Mind the config.tracing.sentry.enabled flag + * - Mind the config.sentry.skipOpenTelemetrySetup flag + * + * If the order or flags aren't correct the following could happen: + * - Traces disappear + * - Sentry errors don't get recorded + * - Sentry context bleeds between requests (ie breadcrumbs, stack traces, etc. seem off) + */ + + let nodeTracingInitialized = false; if (config.tracing) { - initTracing(config.tracing, log || console); + // Important! Sentry also uses OTEL under the hood. Flip this flag so the two can co-exist! + // If you start seeing funny stack traces, or cross pollinating bread crumbs, something went + // sideways here. + if (config.sentry?.dsn) { + config.tracing.sentry = { enabled: true }; + } + + log.info('monitoring', { + msg: `Initialize Tracing with: ${JSON.stringify(config.tracing)}`, + }); + nodeTracingInitialized = !!initTracing(config.tracing, log); } + + // Important! Order matters here. If otel is configured, this shoudl be done after OTEL initialization! if (config && config.sentry) { - initSentry(config, log || console); + if (nodeTracingInitialized) { + config.sentry.skipOpenTelemetrySetup = true; + } + + log.info('monitoring', { + msg: `Initializing Sentry: ${JSON.stringify({ + env: config.sentry?.env, + clientName: config.sentry.clientName, + serverName: config.sentry.serverName, + hasDsn: !!config.sentry?.dsn, + })}`, + }); + initSentry(config, log); } } diff --git a/packages/fxa-shared/sentry/models/SentryConfigOpts.ts b/packages/fxa-shared/sentry/models/SentryConfigOpts.ts index 5534a38491e..ffcd99f6c62 100644 --- a/packages/fxa-shared/sentry/models/SentryConfigOpts.ts +++ b/packages/fxa-shared/sentry/models/SentryConfigOpts.ts @@ -23,6 +23,9 @@ export type SentryConfigOpts = { /** The name of the active server. */ serverName?: string; + /** Let Sentry know that otel setup should be handled by the app. */ + skipOpenTelemetrySetup?: boolean; + /** When set to true, building a configuration will throw an error critical fields are missing. */ strict?: boolean; diff --git a/packages/fxa-shared/tracing/config.ts b/packages/fxa-shared/tracing/config.ts index a57e5ac2e0c..ab3938b626f 100644 --- a/packages/fxa-shared/tracing/config.ts +++ b/packages/fxa-shared/tracing/config.ts @@ -103,14 +103,6 @@ export const tracingConfig = { format: Number, }, }, - sentry: { - enabled: { - default: true, - doc: 'Allows traces to reported to sentry. Note that sentry must be initialized separately and first!.', - env: 'TRACING_SENTRY_EXPORTER_ENABLED', - format: Boolean, - }, - }, }; export function checkServiceName(opts: Pick) { diff --git a/packages/fxa-shared/tracing/node-tracing.ts b/packages/fxa-shared/tracing/node-tracing.ts index 4bcc7b8ba89..2ddd2af56e8 100644 --- a/packages/fxa-shared/tracing/node-tracing.ts +++ b/packages/fxa-shared/tracing/node-tracing.ts @@ -7,6 +7,7 @@ import { suppressTracing } from '@opentelemetry/core'; import { ILogger } from '../log'; import { getNodeAutoInstrumentations } from '@opentelemetry/auto-instrumentations-node'; import { registerInstrumentations } from '@opentelemetry/instrumentation'; +import { SentryContextManager } from '@sentry/node'; import { BatchSpanProcessor, NodeTracerProvider, @@ -98,10 +99,29 @@ export class NodeTracingInitializer { '@opentelemetry/instrumentation-fs': { enabled: false, }, + '@opentelemetry/instrumentation-aws-lambda': { + enabled: false, + }, + '@opentelemetry/instrumentation-http': { + // Important! If Sentry is enabled, we want to rely on Sentry's http instrumentation! + enabled: this.opts.sentry?.enabled ? false : true, + }, }), ], }); - this.provider.register(); + // Use SentryContextManager so that Sentry's per-request isolation scopes + // are properly forked for every api.context.with() call. Without this, + // NodeTracerProvider.register() would install a plain + // AsyncLocalStorageContextManager that doesn't understand Sentry's scope + // forking, and Sentry.init()'s own setupOtel() would overwrite our + // NodeTracerProvider (discarding the custom exporters). + if (this.opts.sentry?.enabled) { + this.provider.register({ + contextManager: new SentryContextManager(), + }); + } else { + this.provider.register(); + } } public startSpan(name: string, action: () => void) { @@ -156,12 +176,7 @@ export function initTracing(opts: TracingOpts, logger: ILogger) { return nodeTracing; } - if ( - !opts.otel?.enabled && - !opts.gcp?.enabled && - !opts.console?.enabled && - !opts.sentry?.enabled - ) { + if (!opts.otel?.enabled && !opts.gcp?.enabled && !opts.console?.enabled) { logger.debug(log_type, { msg: 'Trace initialization skipped. No exporters configured. Enable gcp, otel or console to activate tracing.', });