diff --git a/run-trigger-timer.sh b/run-trigger-timer.sh new file mode 100755 index 00000000..edac6337 --- /dev/null +++ b/run-trigger-timer.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +IMAGE=docker-registry.services.stellar-ops.com/dev/stellar-core:25.1.2-3047.7a0d9bcd2.jammy-do-not-use-in-prd-perftests + +PROJECT="/mnt/xvdf/supercluster/src/App/App.fsproj" + +# -- Drift distribution (uncomment one) -- +# No drift: +#DRIFT_ARGS="" +# Uniform drift in [-2000, +2000]ms: +#DRIFT_ARGS="--uniform-drift=-2000,+2000 --drift-pct 70" +# Bimodal drift: first half [-5000,-2000]ms, second half [+2000,+5000]ms: +DRIFT_ARGS="--bimodal-drift=-5000,-2000,+2000,+5000 --drift-pct 70" + +dotnet run --project $PROJECT clean --namespace=garand && dotnet run --project $PROJECT --configuration Release \ + -- mission TriggerTimerMixConsensus \ + --image=$IMAGE \ + --netdelay-image=docker-registry.services.stellar-ops.com/dev/sdf-netdelay:latest \ + --postgres-image=docker-registry.services.stellar-ops.com/dev/postgres:9.5.22 \ + --nginx-image=docker-registry.services.stellar-ops.com/dev/nginx:latest \ + --prometheus-exporter-image=docker-registry.services.stellar-ops.com/dev/stellar-core-prometheus-exporter:latest \ + --ingress-internal-domain=stellar-supercluster.kube001-ssc-eks.services.stellar-ops.com \ + --avoid-node-labels=purpose:ssc \ + --namespace=garand \ + --export-to-prometheus \ + --pubnet-data=/mnt/xvdf/supercluster/topologies/theoretical-max-tps.json \ + --trigger-timer-flag-pct 100 \ + $DRIFT_ARGS diff --git a/src/App/Program.fs b/src/App/Program.fs index e8fb0560..be2b5ff8 100644 --- a/src/App/Program.fs +++ b/src/App/Program.fs @@ -131,7 +131,12 @@ type MissionOptions benchmarkInfrastructure: bool, benchmarkOnly: bool, benchmarkDurationSeconds: int, - enableTcpTuning: bool + enableTcpTuning: bool, + triggerTimerFlagPct: int, + uniformDrift: seq, + bimodalDrift: seq, + driftPct: int, + disableTriggerTimer: bool ) = [] @@ -597,6 +602,36 @@ type MissionOptions Default = false)>] member self.EnableTcpTuning = enableTcpTuning + [] + member self.TriggerTimerFlagPct = triggerTimerFlagPct + + [] + member self.UniformDrift = uniformDrift + + [] + member self.BimodalDrift = bimodalDrift + + [] + member self.DriftPct = driftPct + + [] + member self.DisableTriggerTimer = disableTriggerTimer + let splitLabel (lab: string) : (string * string option) = match lab.Split ':' |> Array.toList with | [ x ] -> x, None @@ -740,7 +775,12 @@ let main argv = benchmarkInfrastructure = Some false benchmarkInfrastructureOnly = Some false benchmarkDurationSeconds = Some 30 - enableTcpTuning = false } + enableTcpTuning = false + triggerTimerFlagPct = 100 + uniformDrift = [] + bimodalDrift = [] + driftPct = 0 + enableTriggerTimer = true } let nCfg = MakeNetworkCfg ctx [] None use formation = kube.MakeEmptyFormation(nCfg) @@ -908,7 +948,12 @@ let main argv = benchmarkInfrastructure = Some mission.BenchmarkInfrastructure benchmarkInfrastructureOnly = Some mission.BenchmarkOnly benchmarkDurationSeconds = Some mission.BenchmarkDurationSeconds - enableTcpTuning = mission.EnableTcpTuning } + enableTcpTuning = mission.EnableTcpTuning + triggerTimerFlagPct = mission.TriggerTimerFlagPct + uniformDrift = List.ofSeq mission.UniformDrift + bimodalDrift = List.ofSeq mission.BimodalDrift + driftPct = mission.DriftPct + enableTriggerTimer = not mission.DisableTriggerTimer } allMissions.[m] missionContext diff --git a/src/FSLibrary.Tests/Tests.fs b/src/FSLibrary.Tests/Tests.fs index 02ed37bf..baf5eaa5 100644 --- a/src/FSLibrary.Tests/Tests.fs +++ b/src/FSLibrary.Tests/Tests.fs @@ -137,7 +137,12 @@ let ctx : MissionContext = benchmarkInfrastructure = None benchmarkInfrastructureOnly = None benchmarkDurationSeconds = None - enableTcpTuning = false } + enableTcpTuning = false + triggerTimerFlagPct = 100 + uniformDrift = [] + bimodalDrift = [] + driftPct = 0 + enableTriggerTimer = true } let netdata = __SOURCE_DIRECTORY__ + "/../../../data/public-network-data-2024-08-01.json" let pubkeys = __SOURCE_DIRECTORY__ + "/../../../data/tier1keys.json" diff --git a/src/FSLibrary/FSLibrary.fsproj b/src/FSLibrary/FSLibrary.fsproj index 0586d390..3f4849c9 100644 --- a/src/FSLibrary/FSLibrary.fsproj +++ b/src/FSLibrary/FSLibrary.fsproj @@ -77,6 +77,7 @@ + diff --git a/src/FSLibrary/MaxTPSTest.fs b/src/FSLibrary/MaxTPSTest.fs index 5cddbe5b..0abf08fd 100644 --- a/src/FSLibrary/MaxTPSTest.fs +++ b/src/FSLibrary/MaxTPSTest.fs @@ -124,7 +124,12 @@ let upgradeSorobanLedgerLimits peer.WaitForLedgerMaxTxCount multiplier -let maxTPSTest (context: MissionContext) (baseLoadGen: LoadGen) (setupCfg: LoadGen option) = +let maxTPSTest + (context: MissionContext) + (baseLoadGen: LoadGen) + (setupCfg: LoadGen option) + (enableTriggerTimer: bool) + = let allNodes = if context.pubnetData.IsSome then FullPubnetCoreSets context true false @@ -133,6 +138,14 @@ let maxTPSTest (context: MissionContext) (baseLoadGen: LoadGen) (setupCfg: LoadG context.image (if context.flatQuorum.IsSome then context.flatQuorum.Value else false) + let allNodes = + if enableTriggerTimer then + allNodes + |> List.map (fun (cs: CoreSet) -> + { cs with options = { cs.options with experimentalTriggerTimer = Some true } }) + else + allNodes + // PayPregenerated requires node restart between failed iterations to ensure validity of the pregenerated transactions // However, large-scale simulation restarts can be slow, so for now only use the new mode on small networks let baseLoadGen = diff --git a/src/FSLibrary/MissionMaxTPSClassic.fs b/src/FSLibrary/MissionMaxTPSClassic.fs index 198a61ef..05d6c625 100644 --- a/src/FSLibrary/MissionMaxTPSClassic.fs +++ b/src/FSLibrary/MissionMaxTPSClassic.fs @@ -27,4 +27,4 @@ let maxTPSClassic (context: MissionContext) = maxfeerate = None skiplowfeetxs = false } - maxTPSTest context baseLoadGen None + maxTPSTest context baseLoadGen None context.enableTriggerTimer diff --git a/src/FSLibrary/MissionMaxTPSMixed.fs b/src/FSLibrary/MissionMaxTPSMixed.fs index 068e7ff3..2e02184c 100644 --- a/src/FSLibrary/MissionMaxTPSMixed.fs +++ b/src/FSLibrary/MissionMaxTPSMixed.fs @@ -47,4 +47,4 @@ let maxTPSMixed (baseContext: MissionContext) = let invokeSetupCfg = { baseLoadGen with mode = SorobanInvokeSetup } - maxTPSTest context baseLoadGen (Some invokeSetupCfg) + maxTPSTest context baseLoadGen (Some invokeSetupCfg) false diff --git a/src/FSLibrary/MissionTriggerTimerMixConsensus.fs b/src/FSLibrary/MissionTriggerTimerMixConsensus.fs new file mode 100644 index 00000000..d907b7cc --- /dev/null +++ b/src/FSLibrary/MissionTriggerTimerMixConsensus.fs @@ -0,0 +1,178 @@ +// Copyright 2026 Stellar Development Foundation and contributors. Licensed +// under the Apache License, Version 2.0. See the COPYING file at the root +// of this distribution or at http://www.apache.org/licenses/LICENSE-2.0 + +// This mission tests the EXPERIMENTAL_TRIGGER_TIMER feature with a mix of +// nodes that have it enabled vs disabled, under configurable clock drift +// distributions. It uses generated pubnet topologies (--pubnet-data) and +// overlays trigger timer and clock offset settings onto the CoreSets. +// +// CLI parameters: +// --trigger-timer-flag-pct N percentage of nodes with the flag (0-100, default 100) +// --drift-pct N percentage of nodes that drift (0-100, default 0) +// --uniform-drift=lower,upper uniform random drift in [lower,upper] signed ms (e.g. -2000,+2000) +// --bimodal-drift=m1,M1,m2,M2 first half in [m1,M1], second half in [m2,M2] signed ms + +module MissionTriggerTimerMixConsensus + +open Logging +open StellarCoreHTTP +open StellarCorePeer +open StellarCoreSet +open StellarFormation +open StellarMissionContext +open StellarNetworkData +open StellarStatefulSets +open StellarSupercluster + +type ClockDriftDistribution = + | NoDrift + | UniformDrift of lower: int * upper: int + | BimodalDrift of min1: int * max1: int * min2: int * max2: int + +// Round ms to whole seconds, ceiling away from zero: 1500 -> 2, -800 -> -1 +let private ceilToSec (ms: int) = + if ms >= 0 then (ms + 999) / 1000 + else -((abs ms + 999) / 1000) + +// Drift suffix for a single offset: 0 -> "", 1500 -> "-p2", -800 -> "-m1" +let private driftSuffix (ms: int) = + let s = ceilToSec ms + if s > 0 then sprintf "-p%d" s + elif s < 0 then sprintf "-m%d" (abs s) + else "" + +// Build an annotated CoreSet name: append "-expr" if flag enabled, plus drift suffix +let private annotateName (baseName: string) (flagEnabled: bool) (offsetMs: int) = + let flagPart = if flagEnabled then "-expr" else "" + CoreSetName(baseName + flagPart + driftSuffix offsetMs) + +let private parseDrift (context: MissionContext) : ClockDriftDistribution = + match context.uniformDrift, context.bimodalDrift with + | [], [] -> NoDrift + | [ lower; upper ], [] -> + if upper < lower then + failwith (sprintf "uniform-drift requires lower <= upper, got %d,%d" lower upper) + UniformDrift(lower, upper) + | [], [ min1; max1; min2; max2 ] -> + if max1 < min1 then + failwith (sprintf "bimodal-drift first range requires min <= max, got %d,%d" min1 max1) + if max2 < min2 then + failwith (sprintf "bimodal-drift second range requires min <= max, got %d,%d" min2 max2) + BimodalDrift(min1, max1, min2, max2) + | _ :: _, _ :: _ -> failwith "Cannot specify both --uniform-drift and --bimodal-drift" + | u, [] -> failwith (sprintf "--uniform-drift requires exactly 2 values (lower,upper), got %d" u.Length) + | [], b -> failwith (sprintf "--bimodal-drift requires exactly 4 values (min1,max1,min2,max2), got %d" b.Length) + +let triggerTimerMixConsensus (baseContext: MissionContext) = + let drift = parseDrift baseContext + let flagPct = baseContext.triggerTimerFlagPct + let driftPct = baseContext.driftPct + + if flagPct < 0 || flagPct > 100 then + failwith (sprintf "trigger-timer-flag-pct must be 0-100, got %d" flagPct) + + if driftPct < 0 || driftPct > 100 then + failwith (sprintf "drift-pct must be 0-100, got %d" driftPct) + + let context = + { baseContext with + numAccounts = 40000 + numTxs = 90000 + txRate = 150 + coreResources = MediumTestResources + genesisTestAccountCount = Some 40000 + installNetworkDelay = Some(baseContext.installNetworkDelay |> Option.defaultValue true) + maxConnections = Some(baseContext.maxConnections |> Option.defaultValue 65) } + + let baseCoreSets = FullPubnetCoreSets context true false + + let totalNodes = + List.sumBy (fun (cs: CoreSet) -> cs.options.nodeCount) baseCoreSets + + match drift with + | NoDrift when driftPct > 0 -> + failwith "drift-pct > 0 but no drift distribution specified (use --uniform-drift or --bimodal-drift)" + | _ -> () + + LogInfo + "TriggerTimerMixConsensus: %d total nodes, flag-pct=%d%%, drift-pct=%d%%" + totalNodes + flagPct + driftPct + + // Each node independently has a flagPct% chance of having the trigger + // timer flag enabled, and a driftPct% chance of drifting. When drifting, + // bimodal nodes have a 50/50 chance of being in the first or second group. + let rng = System.Random(context.randomSeed) + + let sampleFlag () = rng.Next(100) < flagPct + + let sampleOffset () = + match drift with + | NoDrift -> 0 + | _ when rng.Next(100) >= driftPct -> 0 + | UniformDrift (lower, upper) -> rng.Next(lower, upper + 1) + | BimodalDrift (min1, max1, min2, max2) -> + if rng.Next(2) = 0 then rng.Next(min1, max1 + 1) + else rng.Next(min2, max2 + 1) + + // Walk through CoreSets, splitting each into single-node CoreSets so that + // each node gets its own name with flag/drift annotation. + let modifiedCoreSets = + baseCoreSets + |> List.collect (fun cs -> + let nc = cs.options.nodeCount + + [ for j in 0 .. nc - 1 do + let flagEnabled = sampleFlag () + let offset = sampleOffset () + + let baseName = + if nc > 1 then sprintf "%s-%d" cs.name.StringName j + else cs.name.StringName + + let annotatedName = annotateName baseName flagEnabled offset + + LogInfo + " Node %s: trigger_timer=%b, offset=%d" + annotatedName.StringName + flagEnabled + offset + + { cs with + name = annotatedName + keys = [| cs.keys.[j] |] + options = + { cs.options with + nodeCount = 1 + nodeLocs = + cs.options.nodeLocs + |> Option.map (fun locs -> [ locs.[j] ]) + experimentalTriggerTimer = if flagEnabled then Some true else None + clockOffsets = if offset <> 0 then Some [ offset ] else None } } ]) + + let tier1 = + List.filter (fun (cs: CoreSet) -> cs.options.tier1 = Some true) modifiedCoreSets + + let nonTier1 = + List.filter (fun (cs: CoreSet) -> cs.options.tier1 <> Some true) modifiedCoreSets + + context.Execute + modifiedCoreSets + None + (fun (formation: StellarFormation) -> + formation.WaitUntilConnected modifiedCoreSets + formation.ManualClose tier1 + formation.WaitUntilSynced modifiedCoreSets + + formation.UpgradeProtocolToLatest tier1 + formation.UpgradeMaxTxSetSize tier1 (context.txRate * 10) + + let loadPeer = + if nonTier1.Length > 0 then nonTier1.[0] else tier1.[0] + + formation.RunLoadgen loadPeer context.GeneratePaymentLoad + + formation.CheckNoErrorsAndPairwiseConsistency() + formation.EnsureAllNodesInSync modifiedCoreSets) diff --git a/src/FSLibrary/StellarCoreCfg.fs b/src/FSLibrary/StellarCoreCfg.fs index e8e0dd23..cdc4d6d8 100644 --- a/src/FSLibrary/StellarCoreCfg.fs +++ b/src/FSLibrary/StellarCoreCfg.fs @@ -166,6 +166,8 @@ type StellarCoreCfg = maxBatchWriteCount: int emitMeta: bool addArtificialDelayUsec: int option // optional delay for testing in microseconds + experimentalTriggerTimer: bool option + clockOffsetMs: int option surveyPhaseDuration: int option containerType: CoreContainerType skipHighCriticalValidatorChecks: bool } @@ -265,6 +267,14 @@ type StellarCoreCfg = | None -> maybeAddGlobalDelay () | Some sleep -> t.Add("ARTIFICIALLY_SLEEP_MAIN_THREAD_FOR_TESTING", sleep) |> ignore + match self.experimentalTriggerTimer with + | Some v -> t.Add("EXPERIMENTAL_TRIGGER_TIMER", v) |> ignore + | None -> () + + match self.clockOffsetMs with + | Some offset -> t.Add("ARTIFICIALLY_SET_SYSTEM_CLOCK_OFFSET_FOR_TESTING", int64 offset) |> ignore + | None -> () + match self.network.missionContext.flowControlSendMoreBatchSize with | None -> () | Some batchSize -> t.Add("FLOW_CONTROL_SEND_MORE_BATCH_SIZE", batchSize) |> ignore @@ -635,6 +645,8 @@ type NetworkCfg with maxBatchWriteCount = opts.maxBatchWriteCount emitMeta = opts.emitMeta addArtificialDelayUsec = opts.addArtificialDelayUsec + experimentalTriggerTimer = opts.experimentalTriggerTimer + clockOffsetMs = None surveyPhaseDuration = opts.surveyPhaseDuration containerType = MainCoreContainer skipHighCriticalValidatorChecks = opts.skipHighCriticalValidatorChecks } @@ -676,6 +688,14 @@ type NetworkCfg with maxBatchWriteCount = c.options.maxBatchWriteCount emitMeta = c.options.emitMeta addArtificialDelayUsec = c.options.addArtificialDelayUsec + experimentalTriggerTimer = c.options.experimentalTriggerTimer + clockOffsetMs = + match c.options.clockOffsets with + | Some offsets -> + if offsets.Length <> c.options.nodeCount then + failwith (sprintf "clockOffsets length %d does not match nodeCount %d" offsets.Length c.options.nodeCount) + Some offsets.[i] + | None -> None surveyPhaseDuration = c.options.surveyPhaseDuration containerType = ctype skipHighCriticalValidatorChecks = c.options.skipHighCriticalValidatorChecks } diff --git a/src/FSLibrary/StellarCoreSet.fs b/src/FSLibrary/StellarCoreSet.fs index 34a2d84f..e1c1c3ca 100644 --- a/src/FSLibrary/StellarCoreSet.fs +++ b/src/FSLibrary/StellarCoreSet.fs @@ -220,6 +220,8 @@ type CoreSetOptions = maxBatchWriteCount: int emitMeta: bool addArtificialDelayUsec: int option + experimentalTriggerTimer: bool option + clockOffsets: int list option surveyPhaseDuration: int option updateSorobanCosts: bool option // `skipHighCriticalValidatorChecks` exists to allow supercluster to @@ -261,6 +263,8 @@ type CoreSetOptions = maxBatchWriteCount = 1024 emitMeta = false addArtificialDelayUsec = None + experimentalTriggerTimer = None + clockOffsets = None surveyPhaseDuration = None updateSorobanCosts = None skipHighCriticalValidatorChecks = true } diff --git a/src/FSLibrary/StellarMission.fs b/src/FSLibrary/StellarMission.fs index 677eb2f8..83e65c53 100644 --- a/src/FSLibrary/StellarMission.fs +++ b/src/FSLibrary/StellarMission.fs @@ -45,6 +45,7 @@ open MissionMaxTPSMixed open MissionSimulatePubnetMixedLoad open MissionPubnetNetworkLimitsBench open MissionMixedNominationLeaderElection +open MissionTriggerTimerMixConsensus open MissionUpgradeSCPSettings open MissionUpgradeTxClusters open MissionValidatorSetup @@ -95,6 +96,7 @@ let allMissions : Map = ("PubnetNetworkLimitsBench", pubnetNetworkLimitsBench) ("MixedNominationLeaderElectionWithOldMajority", mixedNominationLeaderElectionWithOldMajority) ("MixedNominationLeaderElectionWithNewMajority", mixedNominationLeaderElectionWithNewMajority) + ("TriggerTimerMixConsensus", triggerTimerMixConsensus) ("UpgradeSCPSettings", upgradeSCPSettings) ("UpgradeTxClusters", upgradeTxClusters) ("ValidatorSetup", validatorSetup) |] diff --git a/src/FSLibrary/StellarMissionContext.fs b/src/FSLibrary/StellarMissionContext.fs index ef18ceba..36687244 100644 --- a/src/FSLibrary/StellarMissionContext.fs +++ b/src/FSLibrary/StellarMissionContext.fs @@ -132,4 +132,9 @@ type MissionContext = benchmarkInfrastructure: bool option benchmarkInfrastructureOnly: bool option benchmarkDurationSeconds: int option - enableTcpTuning: bool } + enableTcpTuning: bool + triggerTimerFlagPct: int + uniformDrift: int list + bimodalDrift: int list + driftPct: int + enableTriggerTimer: bool }