From 5048dabb7b1f02145eadfbe55bb27b5bc5e6eb14 Mon Sep 17 00:00:00 2001 From: Santiago Gimeno Date: Mon, 2 Mar 2026 15:46:21 +0100 Subject: [PATCH] agents,lib,src,test: add traceSampleRate support Add end-to-end traceSampleRate handling across config, runtime propagation, tracing decisions, and regression tests. Why: - Enable configurable probabilistic trace sampling with predictable behavior. - Ensure consistent semantics across all config entry points. - Prevent invalid updates from corrupting current sampling behavior. - Keep transaction consistency by deciding sampling at the root span only. What changed: - Added traceSampleRate parsing and normalization in JS config paths with explicit default fallback and finite/range validation in [0, 1]. - Added native config sanitization for traceSampleRate to reject invalid values before merge, preserving previous valid configuration. - Ensured runtime sampling state is synchronized from effective current config after updates to avoid stale shared-memory sample rates. - Added gRPC reconfigure support for traceSampleRate in proto and agent mapping, including generated protobuf updates. - Updated tracing logic so root spans perform the sampling decision and child spans inherit parent traceFlags. - Extended tests for: - invalid value handling (including NaN/Infinity) - env/package bootstrap behavior - partial updates preserving existing traceSampleRate - gRPC invalid-update fallback behavior - sampling behavior at 0%, 50% (tolerance), and 100% - worker-thread sampling behavior - explicit parent/child trace consistency assertions --- agents/grpc/proto/reconfigure.proto | 1 + agents/grpc/src/grpc_agent.cc | 4 + agents/grpc/src/proto/reconfigure.pb.cc | 83 +++++++----- agents/grpc/src/proto/reconfigure.pb.h | 56 +++++++-- lib/internal/otel/trace.js | 11 +- lib/nsolid.js | 35 ++++++ src/nsolid/nsolid_api.cc | 69 +++++++++- src/nsolid/nsolid_api.h | 10 ++ .../addons/nsolid-tracing/test-otel-basic2.js | 5 + test/agents/test-grpc-reconfigure.mjs | 41 +++++- .../nsolid-trace-sample-rate-package.json | 7 ++ ...lid-config-trace-sample-rate-env-script.js | 10 ++ ...est-nsolid-config-trace-sample-rate-env.js | 49 ++++++++ .../test-nsolid-config-trace-sample-rate.js | 52 ++++++++ .../test-nsolid-trace-sample-rate-sampling.js | 118 ++++++++++++++++++ 15 files changed, 508 insertions(+), 43 deletions(-) create mode 100644 test/fixtures/nsolid-trace-sample-rate-package.json create mode 100644 test/fixtures/test-nsolid-config-trace-sample-rate-env-script.js create mode 100644 test/parallel/test-nsolid-config-trace-sample-rate-env.js create mode 100644 test/parallel/test-nsolid-config-trace-sample-rate.js create mode 100644 test/parallel/test-nsolid-trace-sample-rate-sampling.js diff --git a/agents/grpc/proto/reconfigure.proto b/agents/grpc/proto/reconfigure.proto index 52b0eb2e02f..3a177762f27 100644 --- a/agents/grpc/proto/reconfigure.proto +++ b/agents/grpc/proto/reconfigure.proto @@ -18,6 +18,7 @@ message ReconfigureBody { optional uint32 tracingModulesBlacklist = 11; optional bool contCpuProfile = 12; optional bool assetsEnabled = 13; + optional double traceSampleRate = 14; } message ReconfigureEvent { diff --git a/agents/grpc/src/grpc_agent.cc b/agents/grpc/src/grpc_agent.cc index e6381c9988b..dd2efcd5246 100644 --- a/agents/grpc/src/grpc_agent.cc +++ b/agents/grpc/src/grpc_agent.cc @@ -1758,6 +1758,10 @@ void GrpcAgent::reconfigure(const grpcagent::CommandRequest& request) { out["assetsEnabled"] = body.assetsenabled(); } + if (body.has_tracesamplerate()) { + out["traceSampleRate"] = body.tracesamplerate(); + } + DebugJSON("Reconfigure out: \n%s\n", out); UpdateConfig(out.dump()); diff --git a/agents/grpc/src/proto/reconfigure.pb.cc b/agents/grpc/src/proto/reconfigure.pb.cc index d115cf06945..d5cecf0d71a 100644 --- a/agents/grpc/src/proto/reconfigure.pb.cc +++ b/agents/grpc/src/proto/reconfigure.pb.cc @@ -46,6 +46,7 @@ inline constexpr ReconfigureBody::Impl_::Impl_( redactsnapshots_{false}, tracingenabled_{false}, tracingmodulesblacklist_{0u}, + tracesamplerate_{0}, contcpuprofile_{false}, assetsenabled_{false} {} @@ -104,7 +105,7 @@ const ::uint32_t protodesc_cold) = { 0x081, // bitmap PROTOBUF_FIELD_OFFSET(::grpcagent::ReconfigureBody, _impl_._has_bits_), - 16, // hasbit index offset + 17, // hasbit index offset PROTOBUF_FIELD_OFFSET(::grpcagent::ReconfigureBody, _impl_.blockedloopthreshold_), PROTOBUF_FIELD_OFFSET(::grpcagent::ReconfigureBody, _impl_.interval_), PROTOBUF_FIELD_OFFSET(::grpcagent::ReconfigureBody, _impl_.pausemetrics_), @@ -118,6 +119,7 @@ const ::uint32_t PROTOBUF_FIELD_OFFSET(::grpcagent::ReconfigureBody, _impl_.tracingmodulesblacklist_), PROTOBUF_FIELD_OFFSET(::grpcagent::ReconfigureBody, _impl_.contcpuprofile_), PROTOBUF_FIELD_OFFSET(::grpcagent::ReconfigureBody, _impl_.assetsenabled_), + PROTOBUF_FIELD_OFFSET(::grpcagent::ReconfigureBody, _impl_.tracesamplerate_), 4, 5, 6, @@ -129,8 +131,9 @@ const ::uint32_t 0, 9, 10, - 11, 12, + 13, + 11, 0x081, // bitmap PROTOBUF_FIELD_OFFSET(::grpcagent::ReconfigureEvent, _impl_._has_bits_), 5, // hasbit index offset @@ -143,7 +146,7 @@ const ::uint32_t static const ::_pbi::MigrationSchema schemas[] ABSL_ATTRIBUTE_SECTION_VARIABLE(protodesc_cold) = { {0, sizeof(::grpcagent::ReconfigureBody)}, - {29, sizeof(::grpcagent::ReconfigureEvent)}, + {31, sizeof(::grpcagent::ReconfigureEvent)}, }; static const ::_pb::Message* PROTOBUF_NONNULL const file_default_instances[] = { &::grpcagent::_ReconfigureBody_default_instance_._instance, @@ -152,7 +155,7 @@ static const ::_pb::Message* PROTOBUF_NONNULL const file_default_instances[] = { const char descriptor_table_protodef_reconfigure_2eproto[] ABSL_ATTRIBUTE_SECTION_VARIABLE( protodesc_cold) = { "\n\021reconfigure.proto\022\tgrpcagent\032\014common.p" - "roto\"\323\004\n\017ReconfigureBody\022!\n\024blockedLoopT" + "roto\"\205\005\n\017ReconfigureBody\022!\n\024blockedLoopT" "hreshold\030\001 \001(\004H\000\210\001\001\022\025\n\010interval\030\002 \001(\004H\001\210" "\001\001\022\031\n\014pauseMetrics\030\003 \001(\010H\002\210\001\001\022\034\n\017promise" "Tracking\030\004 \001(\010H\003\210\001\001\022\034\n\017redactSnapshots\030\005" @@ -161,15 +164,17 @@ const char descriptor_table_protodef_reconfigure_2eproto[] ABSL_ATTRIBUTE_SECTIO "\001\001\022\014\n\004tags\030\t \003(\t\022\033\n\016tracingEnabled\030\n \001(\010" "H\010\210\001\001\022$\n\027tracingModulesBlacklist\030\013 \001(\rH\t" "\210\001\001\022\033\n\016contCpuProfile\030\014 \001(\010H\n\210\001\001\022\032\n\rasse" - "tsEnabled\030\r \001(\010H\013\210\001\001B\027\n\025_blockedLoopThre" - "sholdB\013\n\t_intervalB\017\n\r_pauseMetricsB\022\n\020_" - "promiseTrackingB\022\n\020_redactSnapshotsB\t\n\007_" - "statsdB\017\n\r_statsdBucketB\r\n\013_statsdTagsB\021" - "\n\017_tracingEnabledB\032\n\030_tracingModulesBlac" - "klistB\021\n\017_contCpuProfileB\020\n\016_assetsEnabl" - "ed\"g\n\020ReconfigureEvent\022)\n\006common\030\001 \001(\0132\031" - ".grpcagent.CommonResponse\022(\n\004body\030\002 \001(\0132" - "\032.grpcagent.ReconfigureBodyb\006proto3" + "tsEnabled\030\r \001(\010H\013\210\001\001\022\034\n\017traceSampleRate\030" + "\016 \001(\001H\014\210\001\001B\027\n\025_blockedLoopThresholdB\013\n\t_" + "intervalB\017\n\r_pauseMetricsB\022\n\020_promiseTra" + "ckingB\022\n\020_redactSnapshotsB\t\n\007_statsdB\017\n\r" + "_statsdBucketB\r\n\013_statsdTagsB\021\n\017_tracing" + "EnabledB\032\n\030_tracingModulesBlacklistB\021\n\017_" + "contCpuProfileB\020\n\016_assetsEnabledB\022\n\020_tra" + "ceSampleRate\"g\n\020ReconfigureEvent\022)\n\006comm" + "on\030\001 \001(\0132\031.grpcagent.CommonResponse\022(\n\004b" + "ody\030\002 \001(\0132\032.grpcagent.ReconfigureBodyb\006p" + "roto3" }; static const ::_pbi::DescriptorTable* PROTOBUF_NONNULL const descriptor_table_reconfigure_2eproto_deps[1] = { @@ -179,7 +184,7 @@ static ::absl::once_flag descriptor_table_reconfigure_2eproto_once; PROTOBUF_CONSTINIT const ::_pbi::DescriptorTable descriptor_table_reconfigure_2eproto = { false, false, - 755, + 805, descriptor_table_protodef_reconfigure_2eproto, "reconfigure.proto", &descriptor_table_reconfigure_2eproto_once, @@ -336,16 +341,16 @@ ReconfigureBody::GetClassData() const { return ReconfigureBody_class_data_.base(); } PROTOBUF_CONSTINIT PROTOBUF_ATTRIBUTE_INIT_PRIORITY1 -const ::_pbi::TcParseTable<4, 13, 0, 74, 2> +const ::_pbi::TcParseTable<4, 14, 0, 74, 2> ReconfigureBody::_table_ = { { PROTOBUF_FIELD_OFFSET(ReconfigureBody, _impl_._has_bits_), 0, // no _extensions_ - 13, 120, // max_field_number, fast_idx_mask + 14, 120, // max_field_number, fast_idx_mask offsetof(decltype(_table_), field_lookup_table), - 4294959104, // skipmap + 4294950912, // skipmap offsetof(decltype(_table_), field_entries), - 13, // num_field_entries + 14, // num_field_entries 0, // num_aux_entries offsetof(decltype(_table_), field_names), // no aux_entries ReconfigureBody_class_data_.base(), @@ -401,14 +406,17 @@ ReconfigureBody::_table_ = { {88, 10, 0, PROTOBUF_FIELD_OFFSET(ReconfigureBody, _impl_.tracingmodulesblacklist_)}}, // optional bool contCpuProfile = 12; - {::_pbi::TcParser::SingularVarintNoZag1(), - {96, 11, 0, + {::_pbi::TcParser::SingularVarintNoZag1(), + {96, 12, 0, PROTOBUF_FIELD_OFFSET(ReconfigureBody, _impl_.contcpuprofile_)}}, // optional bool assetsEnabled = 13; - {::_pbi::TcParser::SingularVarintNoZag1(), - {104, 12, 0, + {::_pbi::TcParser::SingularVarintNoZag1(), + {104, 13, 0, PROTOBUF_FIELD_OFFSET(ReconfigureBody, _impl_.assetsenabled_)}}, - {::_pbi::TcParser::MiniParse, {}}, + // optional double traceSampleRate = 14; + {::_pbi::TcParser::FastF64S1, + {113, 11, 0, + PROTOBUF_FIELD_OFFSET(ReconfigureBody, _impl_.tracesamplerate_)}}, {::_pbi::TcParser::MiniParse, {}}, }}, {{ 65535, 65535 @@ -436,9 +444,11 @@ ReconfigureBody::_table_ = { // optional uint32 tracingModulesBlacklist = 11; {PROTOBUF_FIELD_OFFSET(ReconfigureBody, _impl_.tracingmodulesblacklist_), _Internal::kHasBitsOffset + 10, 0, (0 | ::_fl::kFcOptional | ::_fl::kUInt32)}, // optional bool contCpuProfile = 12; - {PROTOBUF_FIELD_OFFSET(ReconfigureBody, _impl_.contcpuprofile_), _Internal::kHasBitsOffset + 11, 0, (0 | ::_fl::kFcOptional | ::_fl::kBool)}, + {PROTOBUF_FIELD_OFFSET(ReconfigureBody, _impl_.contcpuprofile_), _Internal::kHasBitsOffset + 12, 0, (0 | ::_fl::kFcOptional | ::_fl::kBool)}, // optional bool assetsEnabled = 13; - {PROTOBUF_FIELD_OFFSET(ReconfigureBody, _impl_.assetsenabled_), _Internal::kHasBitsOffset + 12, 0, (0 | ::_fl::kFcOptional | ::_fl::kBool)}, + {PROTOBUF_FIELD_OFFSET(ReconfigureBody, _impl_.assetsenabled_), _Internal::kHasBitsOffset + 13, 0, (0 | ::_fl::kFcOptional | ::_fl::kBool)}, + // optional double traceSampleRate = 14; + {PROTOBUF_FIELD_OFFSET(ReconfigureBody, _impl_.tracesamplerate_), _Internal::kHasBitsOffset + 11, 0, (0 | ::_fl::kFcOptional | ::_fl::kDouble)}, }}, // no aux_entries {{ @@ -477,7 +487,7 @@ PROTOBUF_NOINLINE void ReconfigureBody::Clear() { reinterpret_cast(&_impl_.promisetracking_) - reinterpret_cast(&_impl_.blockedloopthreshold_)) + sizeof(_impl_.promisetracking_)); } - if (BatchCheckHasBit(cached_has_bits, 0x00001f00U)) { + if (BatchCheckHasBit(cached_has_bits, 0x00003f00U)) { ::memset(&_impl_.redactsnapshots_, 0, static_cast<::size_t>( reinterpret_cast(&_impl_.assetsenabled_) - reinterpret_cast(&_impl_.redactsnapshots_)) + sizeof(_impl_.assetsenabled_)); @@ -589,19 +599,26 @@ ::uint8_t* PROTOBUF_NONNULL ReconfigureBody::_InternalSerialize( } // optional bool contCpuProfile = 12; - if (CheckHasBit(cached_has_bits, 0x00000800U)) { + if (CheckHasBit(cached_has_bits, 0x00001000U)) { target = stream->EnsureSpace(target); target = ::_pbi::WireFormatLite::WriteBoolToArray( 12, this_._internal_contcpuprofile(), target); } // optional bool assetsEnabled = 13; - if (CheckHasBit(cached_has_bits, 0x00001000U)) { + if (CheckHasBit(cached_has_bits, 0x00002000U)) { target = stream->EnsureSpace(target); target = ::_pbi::WireFormatLite::WriteBoolToArray( 13, this_._internal_assetsenabled(), target); } + // optional double traceSampleRate = 14; + if (CheckHasBit(cached_has_bits, 0x00000800U)) { + target = stream->EnsureSpace(target); + target = ::_pbi::WireFormatLite::WriteDoubleToArray( + 14, this_._internal_tracesamplerate(), target); + } + if (ABSL_PREDICT_FALSE(this_._internal_metadata_.have_unknown_fields())) { target = ::_pbi::WireFormat::InternalSerializeUnknownFieldsToArray( @@ -627,7 +644,8 @@ ::size_t ReconfigureBody::ByteSizeLong() const { ::_pbi::Prefetch5LinesFrom7Lines(&this_); cached_has_bits = this_._impl_._has_bits_[0]; - total_size += ::absl::popcount(0x00001bc0U & cached_has_bits) * 2; + total_size += static_cast(0x00000800U & cached_has_bits) * 9; + total_size += ::absl::popcount(0x000033c0U & cached_has_bits) * 2; if (BatchCheckHasBit(cached_has_bits, 0x0000003fU)) { // repeated string tags = 9; if (CheckHasBitForRepeated(cached_has_bits, 0x00000001U)) { @@ -718,7 +736,7 @@ void ReconfigureBody::MergeImpl(::google::protobuf::MessageLite& to_msg, _this->_impl_.promisetracking_ = from._impl_.promisetracking_; } } - if (BatchCheckHasBit(cached_has_bits, 0x00001f00U)) { + if (BatchCheckHasBit(cached_has_bits, 0x00003f00U)) { if (CheckHasBit(cached_has_bits, 0x00000100U)) { _this->_impl_.redactsnapshots_ = from._impl_.redactsnapshots_; } @@ -729,9 +747,12 @@ void ReconfigureBody::MergeImpl(::google::protobuf::MessageLite& to_msg, _this->_impl_.tracingmodulesblacklist_ = from._impl_.tracingmodulesblacklist_; } if (CheckHasBit(cached_has_bits, 0x00000800U)) { - _this->_impl_.contcpuprofile_ = from._impl_.contcpuprofile_; + _this->_impl_.tracesamplerate_ = from._impl_.tracesamplerate_; } if (CheckHasBit(cached_has_bits, 0x00001000U)) { + _this->_impl_.contcpuprofile_ = from._impl_.contcpuprofile_; + } + if (CheckHasBit(cached_has_bits, 0x00002000U)) { _this->_impl_.assetsenabled_ = from._impl_.assetsenabled_; } } diff --git a/agents/grpc/src/proto/reconfigure.pb.h b/agents/grpc/src/proto/reconfigure.pb.h index 95294467cd6..6c56b02b596 100644 --- a/agents/grpc/src/proto/reconfigure.pb.h +++ b/agents/grpc/src/proto/reconfigure.pb.h @@ -228,6 +228,7 @@ class ReconfigureBody final : public ::google::protobuf::Message kRedactSnapshotsFieldNumber = 5, kTracingEnabledFieldNumber = 10, kTracingModulesBlacklistFieldNumber = 11, + kTraceSampleRateFieldNumber = 14, kContCpuProfileFieldNumber = 12, kAssetsEnabledFieldNumber = 13, }; @@ -377,6 +378,17 @@ class ReconfigureBody final : public ::google::protobuf::Message ::uint32_t _internal_tracingmodulesblacklist() const; void _internal_set_tracingmodulesblacklist(::uint32_t value); + public: + // optional double traceSampleRate = 14; + bool has_tracesamplerate() const; + void clear_tracesamplerate() ; + double tracesamplerate() const; + void set_tracesamplerate(double value); + + private: + double _internal_tracesamplerate() const; + void _internal_set_tracesamplerate(double value); + public: // optional bool contCpuProfile = 12; bool has_contcpuprofile() const; @@ -404,7 +416,7 @@ class ReconfigureBody final : public ::google::protobuf::Message private: class _Internal; friend class ::google::protobuf::internal::TcParser; - static const ::google::protobuf::internal::TcParseTable<4, 13, + static const ::google::protobuf::internal::TcParseTable<4, 14, 0, 74, 2> _table_; @@ -437,6 +449,7 @@ class ReconfigureBody final : public ::google::protobuf::Message bool redactsnapshots_; bool tracingenabled_; ::uint32_t tracingmodulesblacklist_; + double tracesamplerate_; bool contcpuprofile_; bool assetsenabled_; PROTOBUF_TSAN_DECLARE_MEMBER @@ -1159,14 +1172,14 @@ inline void ReconfigureBody::_internal_set_tracingmodulesblacklist(::uint32_t va // optional bool contCpuProfile = 12; inline bool ReconfigureBody::has_contcpuprofile() const { - bool value = CheckHasBit(_impl_._has_bits_[0], 0x00000800U); + bool value = CheckHasBit(_impl_._has_bits_[0], 0x00001000U); return value; } inline void ReconfigureBody::clear_contcpuprofile() { ::google::protobuf::internal::TSanWrite(&_impl_); _impl_.contcpuprofile_ = false; ClearHasBit(_impl_._has_bits_[0], - 0x00000800U); + 0x00001000U); } inline bool ReconfigureBody::contcpuprofile() const { // @@protoc_insertion_point(field_get:grpcagent.ReconfigureBody.contCpuProfile) @@ -1174,7 +1187,7 @@ inline bool ReconfigureBody::contcpuprofile() const { } inline void ReconfigureBody::set_contcpuprofile(bool value) { _internal_set_contcpuprofile(value); - SetHasBit(_impl_._has_bits_[0], 0x00000800U); + SetHasBit(_impl_._has_bits_[0], 0x00001000U); // @@protoc_insertion_point(field_set:grpcagent.ReconfigureBody.contCpuProfile) } inline bool ReconfigureBody::_internal_contcpuprofile() const { @@ -1188,14 +1201,14 @@ inline void ReconfigureBody::_internal_set_contcpuprofile(bool value) { // optional bool assetsEnabled = 13; inline bool ReconfigureBody::has_assetsenabled() const { - bool value = CheckHasBit(_impl_._has_bits_[0], 0x00001000U); + bool value = CheckHasBit(_impl_._has_bits_[0], 0x00002000U); return value; } inline void ReconfigureBody::clear_assetsenabled() { ::google::protobuf::internal::TSanWrite(&_impl_); _impl_.assetsenabled_ = false; ClearHasBit(_impl_._has_bits_[0], - 0x00001000U); + 0x00002000U); } inline bool ReconfigureBody::assetsenabled() const { // @@protoc_insertion_point(field_get:grpcagent.ReconfigureBody.assetsEnabled) @@ -1203,7 +1216,7 @@ inline bool ReconfigureBody::assetsenabled() const { } inline void ReconfigureBody::set_assetsenabled(bool value) { _internal_set_assetsenabled(value); - SetHasBit(_impl_._has_bits_[0], 0x00001000U); + SetHasBit(_impl_._has_bits_[0], 0x00002000U); // @@protoc_insertion_point(field_set:grpcagent.ReconfigureBody.assetsEnabled) } inline bool ReconfigureBody::_internal_assetsenabled() const { @@ -1215,6 +1228,35 @@ inline void ReconfigureBody::_internal_set_assetsenabled(bool value) { _impl_.assetsenabled_ = value; } +// optional double traceSampleRate = 14; +inline bool ReconfigureBody::has_tracesamplerate() const { + bool value = CheckHasBit(_impl_._has_bits_[0], 0x00000800U); + return value; +} +inline void ReconfigureBody::clear_tracesamplerate() { + ::google::protobuf::internal::TSanWrite(&_impl_); + _impl_.tracesamplerate_ = 0; + ClearHasBit(_impl_._has_bits_[0], + 0x00000800U); +} +inline double ReconfigureBody::tracesamplerate() const { + // @@protoc_insertion_point(field_get:grpcagent.ReconfigureBody.traceSampleRate) + return _internal_tracesamplerate(); +} +inline void ReconfigureBody::set_tracesamplerate(double value) { + _internal_set_tracesamplerate(value); + SetHasBit(_impl_._has_bits_[0], 0x00000800U); + // @@protoc_insertion_point(field_set:grpcagent.ReconfigureBody.traceSampleRate) +} +inline double ReconfigureBody::_internal_tracesamplerate() const { + ::google::protobuf::internal::TSanRead(&_impl_); + return _impl_.tracesamplerate_; +} +inline void ReconfigureBody::_internal_set_tracesamplerate(double value) { + ::google::protobuf::internal::TSanWrite(&_impl_); + _impl_.tracesamplerate_ = value; +} + // ------------------------------------------------------------------- // ReconfigureEvent diff --git a/lib/internal/otel/trace.js b/lib/internal/otel/trace.js index bf84938a0dc..353c1a7b9fb 100644 --- a/lib/internal/otel/trace.js +++ b/lib/internal/otel/trace.js @@ -2,6 +2,7 @@ const { JSONStringify, + MathRandom, SafeMap, Uint8Array, } = primordials; @@ -278,10 +279,14 @@ class Tracer { } const internalId = newInternalSpanId(); - // Follow parent, root always sampled. - let flags = api.TraceFlags.SAMPLED; - if (parentContext) + // Root spans make the sampling decision, child spans inherit parent flags. + let flags; + if (!parentContext) { + const sampled = MathRandom() < binding.trace_sample_rate[0]; + flags = sampled ? api.TraceFlags.SAMPLED : api.TraceFlags.NONE; + } else { flags = parentContext.traceFlags; + } const spanContext = new SpanContext(internalId, spanId, flags, diff --git a/lib/nsolid.js b/lib/nsolid.js index 08f3e50d292..dc634056f5c 100644 --- a/lib/nsolid.js +++ b/lib/nsolid.js @@ -6,6 +6,7 @@ const { DateNow, JSONParse, JSONStringify, + NumberIsFinite, NumberParseInt, ObjectAssign, ObjectDefineProperty, @@ -59,6 +60,7 @@ const DEFAULT_HOSTNAME = getHostname(); const DEFAULT_APPNAME = 'untitled application'; const DEFAULT_INTERVAL = 5000; const DEFAULT_BLOCKED_LOOP_THRESHOLD = 200; +const DEFAULT_TRACING_SAMPLING_RATE = 1.0; const DEFAULT_PUBKEY = '^kvy 1) { + return undefined; + } + + return normalized; +} + + function genPackageList() { let main_path; let last_path; diff --git a/src/nsolid/nsolid_api.cc b/src/nsolid/nsolid_api.cc index b879e506336..9d7202c142d 100644 --- a/src/nsolid/nsolid_api.cc +++ b/src/nsolid/nsolid_api.cc @@ -110,6 +110,7 @@ EnvInst::EnvInst(Environment* env) res_arr_(), gc_ring_(1000), trace_flags_(EnvList::Inst()->GetTracer()->traceFlags()), + trace_sample_rate_(EnvList::Inst()->trace_sample_rate()), has_metrics_stream_hooks_( EnvList::Inst()->metrics_stream_hook_list_.size() > 0) { int er; @@ -1140,14 +1141,40 @@ void EnvList::StoreInfo(const std::string& info) { } +void EnvList::validate_trace_sample_rate(nlohmann::json* config) { + auto incoming_it = config->find("traceSampleRate"); + if (incoming_it == config->end()) { + return; + } + + if (!incoming_it->is_number()) { + config->erase(incoming_it); + return; + } + + double candidate = *incoming_it; + if (!std::isfinite(candidate) || candidate < 0.0 || candidate > 1.0) { + config->erase(incoming_it); + } +} + + +void EnvList::validate_config(nlohmann::json* config) { + validate_trace_sample_rate(config); +} + + void EnvList::UpdateConfig(const nlohmann::json& config) { nlohmann::json curr; nlohmann::json old; + nlohmann::json validated_config = config; { ns_mutex::scoped_lock lock(configuration_lock_); old = current_config_; - current_config_.merge_patch(config); + + validate_config(&validated_config); + current_config_.merge_patch(validated_config); curr = current_config_; } @@ -1204,6 +1231,13 @@ void EnvList::UpdateConfig(const nlohmann::json& config) { if (it != config.end() && !it->is_null()) { grpc::GrpcAgent::Inst()->start(); } + + it = curr.find("traceSampleRate"); + DCHECK(it == curr.end() || it->is_number()); + if (it != curr.end() && it->is_number()) { + update_tracing_sample_rate(*it); + } + // If tags have changed, update info_ accordingly it = config.find("tags"); if (it != config.end()) { @@ -1391,6 +1425,26 @@ void EnvList::update_continuous_profiler(bool enabled, uint64_t interval) { } } +void EnvList::update_tracing_sample_rate(double rate) { + trace_sample_rate_.store(rate); + decltype(env_map_) env_map; + { + // Copy the envinst map so we don't need to keep it locked the entire time. + ns_mutex::scoped_lock lock(map_lock_); + env_map = env_map_; + } + + for (auto& entry : env_map) { + SharedEnvInst envinst = entry.second; + USE(RunCommand(envinst, + CommandType::InterruptOnly, + +[](SharedEnvInst envinst_sp, double rate) { + envinst_sp->trace_sample_rate_ = rate; + }, + rate)); + } +} + void EnvList::getAllEnvInst(std::function cb) { ns_mutex::scoped_lock lock(map_lock_); @@ -3122,6 +3176,19 @@ static void SetupArrayBufferExports(Isolate* isolate, target->Set(context, OneByteString(isolate, "trace_flags"), Uint32Array::New(trace_flags_ab, 0, 1)).Check(); + + double* tsr = envinst_sp->trace_sample_rate(); + std::unique_ptr trace_sample_rate_bs = + ArrayBuffer::NewBackingStore(tsr, + sizeof(double), + [](void*, size_t, void*){}, + nullptr); + + Local trace_sample_rate_ab = + ArrayBuffer::New(isolate, std::move(trace_sample_rate_bs)); + target->Set(context, + OneByteString(isolate, "trace_sample_rate"), + Float64Array::New(trace_sample_rate_ab, 0, 1)).Check(); } diff --git a/src/nsolid/nsolid_api.h b/src/nsolid/nsolid_api.h index 2d5b13a5873..307b180a0c7 100644 --- a/src/nsolid/nsolid_api.h +++ b/src/nsolid/nsolid_api.h @@ -272,6 +272,7 @@ class EnvInst { inline std::pair provider_times(); bool can_call_into_js() const; uint32_t* get_trace_flags() { return &trace_flags_; } + double* trace_sample_rate() { return &trace_sample_rate_; } std::atomic metrics_paused = { false }; @@ -412,6 +413,7 @@ class EnvInst { std::string thread_name_; uint32_t trace_flags_; + double trace_sample_rate_; bool has_metrics_stream_hooks_; nsuv::ns_mutex source_files_lock_; @@ -590,6 +592,9 @@ class EnvList { return is_alive_.load(std::memory_order_relaxed); } inline uint64_t main_thread_id() { return main_thread_id_; } + inline double trace_sample_rate() { + return trace_sample_rate_.load(std::memory_order_relaxed); + } tracing::TracerImpl* GetTracer() { return &tracer_; } void send_trace_data(tracing::SpanItem&& item); @@ -633,6 +638,10 @@ class EnvList { void fill_trace_id_q(); void update_continuous_profiler(bool enabled, uint64_t interval); + void validate_trace_sample_rate(nlohmann::json* config); + void validate_config(nlohmann::json* config); + + void update_tracing_sample_rate(double rate); #ifdef __POSIX__ static void signal_handler_(int signum, siginfo_t* info, void* ucontext); @@ -657,6 +666,7 @@ class EnvList { static void datapoint_cb_(std::queue&&); std::atomic is_alive_ = { true }; + std::atomic trace_sample_rate_ = { 1.0 }; // unique agent id const std::string agent_id_ = utils::generate_unique_id(); // The thread that EnvList is running on. diff --git a/test/addons/nsolid-tracing/test-otel-basic2.js b/test/addons/nsolid-tracing/test-otel-basic2.js index 5aff08d4d4d..6305f8abd3f 100644 --- a/test/addons/nsolid-tracing/test-otel-basic2.js +++ b/test/addons/nsolid-tracing/test-otel-basic2.js @@ -9,6 +9,7 @@ // const common = require('../../common'); +const assert = require('assert'); const { checkTracesOnExit } = require('../../common/nsolid-traces'); const { setupNSolid } = require('./utils'); const { fixturesDir } = require('../../common/fixtures'); @@ -62,6 +63,10 @@ setupNSolid(common.mustCall(() => { let activeContext = api.context.active(); activeContext = api.trace.setSpan(activeContext, span); const childSpan = tracer.startSpan('child', {}, activeContext); + assert.strictEqual(childSpan.spanContext().traceId, + span.spanContext().traceId); + assert.strictEqual(childSpan.spanContext().traceFlags, + span.spanContext().traceFlags); childSpan.setAttribute('child_key', 'child_value'); childSpan.end(); span.end(); diff --git a/test/agents/test-grpc-reconfigure.mjs b/test/agents/test-grpc-reconfigure.mjs index a5d74af9535..e6a4029a002 100644 --- a/test/agents/test-grpc-reconfigure.mjs +++ b/test/agents/test-grpc-reconfigure.mjs @@ -38,7 +38,7 @@ function checkReconfigureData(reconfigure, metadata, requestId, agentId, nsolidC if (!key.startsWith('_')) { // Convert string numbers to actual numbers for comparison if (typeof value === 'string' && !Number.isNaN(Number(value))) { - normalizedReconfigBody[key] = parseInt(value, 10); + normalizedReconfigBody[key] = Number(value); } else { normalizedReconfigBody[key] = value; } @@ -109,6 +109,7 @@ const newConfigs = [ [ 'tracingEnabled', true ], [ 'tracingModulesBlacklist', 1 ], [ 'contCpuProfile', true ], + [ 'traceSampleRate', 0.4 ], ]; tests.push({ @@ -167,6 +168,44 @@ tests.push({ }, }); +tests.push({ + name: 'should preserve previous traceSampleRate for invalid values', + test: async (getEnv) => { + return new Promise((resolve) => { + const grpcServer = new GRPCServer(); + grpcServer.start(mustSucceed(async (port) => { + const env = getEnv(port); + const opts = { + stdio: ['inherit', 'inherit', 'inherit', 'ipc'], + env, + }; + + const client = new TestClient([], opts); + const agentId = await client.id(); + + await grpcServer.reconfigure(agentId, { traceSampleRate: 0.4 }); + let nsolidConfig = await client.config(); + assert.strictEqual(nsolidConfig.traceSampleRate, 0.4); + + const invalidRates = [2, -0.5]; + for (const invalidRate of invalidRates) { + await grpcServer.reconfigure(agentId, { traceSampleRate: invalidRate }); + nsolidConfig = await client.config(); + assert.strictEqual( + nsolidConfig.traceSampleRate, + 0.4, + `Expected traceSampleRate to remain 0.4 after invalid update ${invalidRate}, got ${nsolidConfig.traceSampleRate}`, + ); + } + + await client.shutdown(0); + grpcServer.close(); + resolve(); + })); + }); + }, +}); + const testConfigs = [ { getEnv: (port) => { diff --git a/test/fixtures/nsolid-trace-sample-rate-package.json b/test/fixtures/nsolid-trace-sample-rate-package.json new file mode 100644 index 00000000000..d46f3b6dea5 --- /dev/null +++ b/test/fixtures/nsolid-trace-sample-rate-package.json @@ -0,0 +1,7 @@ +{ + "name": "nsolid-trace-sample-rate-fixture", + "version": "1.0.0", + "nsolid": { + "traceSampleRate": 0.7 + } +} diff --git a/test/fixtures/test-nsolid-config-trace-sample-rate-env-script.js b/test/fixtures/test-nsolid-config-trace-sample-rate-env-script.js new file mode 100644 index 00000000000..962bd88ffc2 --- /dev/null +++ b/test/fixtures/test-nsolid-config-trace-sample-rate-env-script.js @@ -0,0 +1,10 @@ +'use strict'; + +require('../common'); +const nsolid = require('nsolid'); + +nsolid.start(); + +console.log(JSON.stringify({ + traceSampleRate: nsolid.config.traceSampleRate, +})); diff --git a/test/parallel/test-nsolid-config-trace-sample-rate-env.js b/test/parallel/test-nsolid-config-trace-sample-rate-env.js new file mode 100644 index 00000000000..0c0d62b2d8d --- /dev/null +++ b/test/parallel/test-nsolid-config-trace-sample-rate-env.js @@ -0,0 +1,49 @@ +'use strict'; + +require('../common'); +const assert = require('assert'); +const path = require('path'); +const { spawnSync } = require('child_process'); + +const script = path.join(__dirname, + '../fixtures/test-nsolid-config-trace-sample-rate-env-script.js'); +const pkgJson = path.join(__dirname, + '../fixtures/nsolid-trace-sample-rate-package.json'); + +function runWithEnv(envVars) { + const result = spawnSync(process.execPath, [script], { + env: { + ...process.env, + ...envVars, + }, + encoding: 'utf8', + }); + + if (result.status !== 0) { + throw new Error(result.stderr || `Script failed with status ${result.status}`); + } + + return JSON.parse(result.stdout.trim()); +} + +{ + const config = runWithEnv({ + NSOLID_TRACE_SAMPLE_RATE: '0.25', + }); + assert.strictEqual(config.traceSampleRate, 0.25); +} + +{ + const config = runWithEnv({ + NSOLID_PACKAGE_JSON: pkgJson, + }); + assert.strictEqual(config.traceSampleRate, 0.7); +} + +{ + const config = runWithEnv({ + NSOLID_PACKAGE_JSON: pkgJson, + NSOLID_TRACE_SAMPLE_RATE: '0.35', + }); + assert.strictEqual(config.traceSampleRate, 0.35); +} diff --git a/test/parallel/test-nsolid-config-trace-sample-rate.js b/test/parallel/test-nsolid-config-trace-sample-rate.js new file mode 100644 index 00000000000..1c6e1305aa7 --- /dev/null +++ b/test/parallel/test-nsolid-config-trace-sample-rate.js @@ -0,0 +1,52 @@ +'use strict'; + +require('../common'); +const assert = require('assert'); +const nsolid = require('nsolid'); + +// Keep whatever the current initial value is when an invalid update arrives. +const initialRate = nsolid.config.traceSampleRate; +nsolid.start({ + command: 9001, + traceSampleRate: 'invalid' +}); +assert.strictEqual(nsolid.config.traceSampleRate, initialRate); + +// Valid values should be persisted as-is. +nsolid.start({ + traceSampleRate: 0.4 +}); +assert.strictEqual(nsolid.config.traceSampleRate, 0.4); + +// Invalid updates must preserve the previous valid value. +nsolid.start({ + traceSampleRate: 'still-invalid' +}); +assert.strictEqual(nsolid.config.traceSampleRate, 0.4); + +// Out-of-range updates must preserve the previous valid value. +nsolid.start({ + traceSampleRate: 2 +}); +assert.strictEqual(nsolid.config.traceSampleRate, 0.4); + +nsolid.start({ + traceSampleRate: -0.5 +}); +assert.strictEqual(nsolid.config.traceSampleRate, 0.4); + +nsolid.start({ + traceSampleRate: Number.NaN +}); +assert.strictEqual(nsolid.config.traceSampleRate, 0.4); + +nsolid.start({ + traceSampleRate: Number.POSITIVE_INFINITY +}); +assert.strictEqual(nsolid.config.traceSampleRate, 0.4); + +// Partial updates that omit traceSampleRate must not reset it. +nsolid.start({ + tracingEnabled: true +}); +assert.strictEqual(nsolid.config.traceSampleRate, 0.4); diff --git a/test/parallel/test-nsolid-trace-sample-rate-sampling.js b/test/parallel/test-nsolid-trace-sample-rate-sampling.js new file mode 100644 index 00000000000..d2eb123706f --- /dev/null +++ b/test/parallel/test-nsolid-trace-sample-rate-sampling.js @@ -0,0 +1,118 @@ +// Flags: --expose-internals --no-warnings +'use strict'; + +require('../common'); +const assert = require('assert'); +const { Worker, isMainThread, parentPort, workerData } = require('worker_threads'); +const { fixturesDir } = require('../common/fixtures'); +const { internalBinding } = require('internal/test/binding'); + +const binding = internalBinding('nsolid_api'); +const nsolid = require('nsolid'); +const api = require(require.resolve('@opentelemetry/api', + { paths: [fixturesDir] })); + +function registerTracingApi() { + binding.trace_flags[0] = binding.nsolid_consts.kSpanCustom; + + try { + nsolid.otel.register(api); + } catch { + // Already registered in this thread. + } +} + +function countSampledRoots(iterations) { + const tracer = api.trace.getTracer('trace-sample-rate'); + let sampled = 0; + + for (let i = 0; i < iterations; i++) { + const span = tracer.startSpan('root'); + sampled += (span.spanContext().traceFlags & api.TraceFlags.SAMPLED) ? 1 : 0; + span.end(); + } + + return sampled; +} + +async function waitForSampleRate(expectedRate, retries = 200) { + for (let i = 0; i < retries; i++) { + if (binding.trace_sample_rate[0] === expectedRate) { + return; + } + + await new Promise((resolve) => setImmediate(resolve)); + } + + throw new Error(`Timed out waiting for trace_sample_rate=${expectedRate}, current=${binding.trace_sample_rate[0]}`); +} + +function runWorker(iterations, expectedSampled) { + return new Promise((resolve, reject) => { + let settled = false; + const worker = new Worker(__filename, { + workerData: { iterations }, + }); + + worker.once('message', (sampled) => { + if (settled) return; + settled = true; + assert.strictEqual(sampled, + expectedSampled, + `Expected worker sampled=${expectedSampled}, got ${sampled}`); + resolve(); + }); + + worker.once('error', (err) => { + if (settled) return; + settled = true; + reject(err); + }); + + worker.once('exit', (code) => { + if (settled) return; + settled = true; + if (code === 0) { + reject(new Error('Worker exited without reporting sampling result')); + } else { + reject(new Error(`Worker exited with code ${code}`)); + } + }); + }); +} + +if (isMainThread) { + (async () => { + registerTracingApi(); + nsolid.enableTraces(); + assert.strictEqual(nsolid.config.tracingEnabled, true); + + nsolid.start({ traceSampleRate: 0.0 }); + await waitForSampleRate(0.0); + assert.strictEqual(countSampledRoots(200), 0); + + nsolid.start({ traceSampleRate: 1.0 }); + await waitForSampleRate(1.0); + assert.strictEqual(countSampledRoots(200), 200); + + nsolid.start({ traceSampleRate: 0.5 }); + await waitForSampleRate(0.5); + const sampled = countSampledRoots(1000); + assert.ok(sampled >= 400 && sampled <= 600, + `Expected sampled roots in [400, 600], got ${sampled}`); + + nsolid.start({ traceSampleRate: 0.0 }); + await waitForSampleRate(0.0); + await runWorker(200, 0); + + nsolid.start({ traceSampleRate: 1.0 }); + await waitForSampleRate(1.0); + await runWorker(200, 200); + })().catch((err) => { + throw err; + }); +} else { + registerTracingApi(); + const sampled = countSampledRoots(workerData.iterations); + parentPort.postMessage(sampled); +}