diff --git a/src/Microsoft.DotNet.Helix/Client/CSharp/JobCreationResult.cs b/src/Microsoft.DotNet.Helix/Client/CSharp/JobCreationResult.cs
new file mode 100644
index 00000000000..efc0d725ed8
--- /dev/null
+++ b/src/Microsoft.DotNet.Helix/Client/CSharp/JobCreationResult.cs
@@ -0,0 +1,13 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Newtonsoft.Json;
+
+namespace Microsoft.DotNet.Helix.Client.Models
+{
+ public partial class JobCreationResult
+ {
+ [JsonProperty("queueStats")]
+ public QueueStats QueueStats { get; set; }
+ }
+}
diff --git a/src/Microsoft.DotNet.Helix/Client/CSharp/QueueStats.cs b/src/Microsoft.DotNet.Helix/Client/CSharp/QueueStats.cs
new file mode 100644
index 00000000000..1ffb8a5fced
--- /dev/null
+++ b/src/Microsoft.DotNet.Helix/Client/CSharp/QueueStats.cs
@@ -0,0 +1,29 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using Newtonsoft.Json;
+
+namespace Microsoft.DotNet.Helix.Client.Models
+{
+ public partial class QueueStats
+ {
+ [JsonProperty("queueName")]
+ public string QueueName { get; set; }
+
+ [JsonProperty("depth")]
+ public int? Depth { get; set; }
+
+ [JsonProperty("averageRunDuration")]
+ public TimeSpan? AverageRunDuration { get; set; }
+
+ [JsonProperty("estimatedWait")]
+ public TimeSpan? EstimatedWait { get; set; }
+
+ [JsonProperty("estimatedWaitMethod")]
+ public string EstimatedWaitMethod { get; set; }
+
+ [JsonProperty("generatedAt")]
+ public DateTimeOffset? GeneratedAt { get; set; }
+ }
+}
diff --git a/src/Microsoft.DotNet.Helix/JobSender/IJobDefinition.cs b/src/Microsoft.DotNet.Helix/JobSender/IJobDefinition.cs
index 2a99e318209..3f80365bd7d 100644
--- a/src/Microsoft.DotNet.Helix/JobSender/IJobDefinition.cs
+++ b/src/Microsoft.DotNet.Helix/JobSender/IJobDefinition.cs
@@ -175,6 +175,13 @@ public interface IJobDefinition
/// Fluent job builder.
IJobDefinition WithMaxRetryCount(int? maxRetryCount);
+ ///
+ /// Opts in to logging the preview Helix queue health summary (estimated wait, depth,
+ /// snapshot time) returned in the job creation response. Off by default.
+ ///
+ /// Fluent job builder.
+ IJobDefinition WithQueueStats();
+
///
/// Sends the fully specified job to execution.
///
diff --git a/src/Microsoft.DotNet.Helix/JobSender/JobDefinition.cs b/src/Microsoft.DotNet.Helix/JobSender/JobDefinition.cs
index 1c52f9a4811..b9ccd89ecdc 100644
--- a/src/Microsoft.DotNet.Helix/JobSender/JobDefinition.cs
+++ b/src/Microsoft.DotNet.Helix/JobSender/JobDefinition.cs
@@ -47,6 +47,7 @@ public JobDefinition(IJob jobApi)
public string ResultContainerPrefix { get; private set; }
public IDictionary CorrelationPayloads { get; } = new Dictionary();
public int? MaxRetryCount { get; private set; }
+ public bool ShowQueueStats { get; private set; }
public string StorageAccountConnectionString { get; private set; }
public string TargetContainerName { get; set; } = DefaultContainerName;
public string TargetResultsContainerName { get; set; } = DefaultContainerName;
@@ -241,9 +242,110 @@ public async Task SendAsync(Action log, CancellationToken canc
string jobStartIdentifier = Guid.NewGuid().ToString("N");
var newJob = await JobApi.NewAsync(creationRequest, jobStartIdentifier, cancellationToken: cancellationToken).ConfigureAwait(false);
+ if (ShowQueueStats)
+ {
+ LogQueueStats(log, queueId, newJob?.QueueStats);
+ }
+
return new SentJob(JobApi, newJob);
}
+ // Helix SLA threshold; estimated waits above this are flagged as queue-at-capacity / unhealthy.
+ private static readonly TimeSpan QueueWaitSlaThreshold = TimeSpan.FromMinutes(30);
+
+ // If the Observer snapshot is older than this, the reported numbers may not reflect current queue state.
+ private static readonly TimeSpan SnapshotStaleThreshold = TimeSpan.FromMinutes(15);
+
+ private const string FirstRespondersUrl = "https://teams.microsoft.com/l/channel/19%3Aafba3d1545dd45d7b79f34c1821f6055%40thread.skype/First%20Responders?groupId=4d73664c-9f2f-450d-82a5-c2f02756606d&tenantId=72f988bf-86f1-41af-91ab-2d7cd011db47";
+
+ private static int s_queueStatsHeaderShown;
+ private static int s_firstRespondersHintShown;
+
+ private static void LogQueueStats(Action log, string queueId, Models.QueueStats stats)
+ {
+ if (log == null || stats == null)
+ {
+ return;
+ }
+
+ string depth = stats.Depth?.ToString(CultureInfo.InvariantCulture) ?? "unknown";
+ string avgRun = FormatTimeSpan(stats.AverageRunDuration);
+ string estWait = FormatTimeSpan(stats.EstimatedWait);
+ string snapshot = FormatSnapshotTime(stats.GeneratedAt);
+
+ bool overSla = stats.EstimatedWait is TimeSpan wait && wait > QueueWaitSlaThreshold;
+ TimeSpan? snapshotAge = stats.GeneratedAt is DateTimeOffset gen
+ ? DateTimeOffset.UtcNow - gen
+ : (TimeSpan?)null;
+ bool stale = snapshotAge is TimeSpan age && age > SnapshotStaleThreshold;
+
+ string healthTag = overSla ? " [AT CAPACITY]" : string.Empty;
+ string staleTag = stale ? " (stale)" : string.Empty;
+
+ if (Interlocked.Exchange(ref s_queueStatsHeaderShown, 1) == 0)
+ {
+ log("note : Helix queue health reporting is a preview feature; data and format may change.");
+ }
+
+ string queueName = string.IsNullOrEmpty(stats.QueueName) ? queueId : stats.QueueName;
+
+ log($"Helix queue '{queueName}' health{healthTag}:");
+ log($" Estimated wait : {estWait} (queue depth: {depth}, avg run: {avgRun})");
+ log($" Snapshot taken : {snapshot}{staleTag}");
+
+ if (overSla)
+ {
+ log($"warning : Helix queue '{queueName}' estimated wait of {estWait} exceeds the {QueueWaitSlaThreshold.TotalMinutes:F0}-minute SLA - the queue is at capacity or unhealthy. Jobs may take longer than usual to start.");
+ }
+
+ if (stale)
+ {
+ log($"warning : Helix queue '{queueName}' health snapshot is {FormatTimeSpan(snapshotAge)} old (threshold {SnapshotStaleThreshold.TotalMinutes:F0}m) - reported wait/depth may not reflect current queue state.");
+ }
+
+ if (Interlocked.Exchange(ref s_firstRespondersHintShown, 1) == 0)
+ {
+ log($" Questions about Helix queue health? Reach the dnceng First Responders channel: {FirstRespondersUrl}");
+ }
+ }
+
+ private static string FormatTimeSpan(TimeSpan? value)
+ {
+ if (value is not TimeSpan ts)
+ {
+ return "unknown";
+ }
+
+ if (ts.TotalDays >= 1)
+ {
+ return $"{(int)ts.TotalDays}d {ts.Hours}h {ts.Minutes}m";
+ }
+ if (ts.TotalHours >= 1)
+ {
+ return $"{(int)ts.TotalHours}h {ts.Minutes}m";
+ }
+ if (ts.TotalMinutes >= 1)
+ {
+ return $"{(int)ts.TotalMinutes}m {ts.Seconds}s";
+ }
+ return $"{ts.Seconds}s";
+ }
+
+ private static string FormatSnapshotTime(DateTimeOffset? value)
+ {
+ if (value is not DateTimeOffset utc)
+ {
+ return "unknown";
+ }
+
+ DateTime local = utc.LocalDateTime;
+ string tz = TimeZoneInfo.Local.IsDaylightSavingTime(local)
+ ? TimeZoneInfo.Local.DaylightName
+ : TimeZoneInfo.Local.StandardName;
+
+ return $"{local.ToString("yyyy-MM-dd HH:mm", CultureInfo.InvariantCulture)} {tz}";
+ }
+
private void WarnForImpendingRemoval(Action log, QueueInfo queueInfo)
{
DateTime whenItExpires = DateTime.MaxValue;
@@ -354,6 +456,12 @@ public IJobDefinition WithMaxRetryCount(int? maxRetryCount)
return this;
}
+ public IJobDefinition WithQueueStats()
+ {
+ ShowQueueStats = true;
+ return this;
+ }
+
internal void AddWorkItem(WorkItemDefinition workItemDefinition)
{
_workItems.Add(workItemDefinition);
diff --git a/src/Microsoft.DotNet.Helix/Sdk/SendHelixJob.cs b/src/Microsoft.DotNet.Helix/Sdk/SendHelixJob.cs
index 58af4284325..2012872ad23 100644
--- a/src/Microsoft.DotNet.Helix/Sdk/SendHelixJob.cs
+++ b/src/Microsoft.DotNet.Helix/Sdk/SendHelixJob.cs
@@ -148,6 +148,12 @@ public static class MetadataNames
///
public int MaxRetryCount { get; set; }
+ ///
+ /// When true, the preview Helix queue health summary (estimated wait, depth, snapshot
+ /// time) is logged after job submission. Off by default.
+ ///
+ public bool EnableShowHelixQueueStats { get; set; }
+
private CommandPayload _commandPayload;
protected override async Task ExecuteCore(CancellationToken cancellationToken)
@@ -176,6 +182,10 @@ protected override async Task ExecuteCore(CancellationToken cancellationToken)
.WithType(Type)
.WithTargetQueue(TargetQueue)
.WithMaxRetryCount(MaxRetryCount);
+ if (EnableShowHelixQueueStats)
+ {
+ def = def.WithQueueStats();
+ }
Log.LogMessage($"Initialized job definition with type '{Type}', and target queue '{TargetQueue}'");
if (!string.IsNullOrEmpty(Creator))
diff --git a/src/Microsoft.DotNet.Helix/Sdk/tools/Microsoft.DotNet.Helix.Sdk.MonoQueue.targets b/src/Microsoft.DotNet.Helix/Sdk/tools/Microsoft.DotNet.Helix.Sdk.MonoQueue.targets
index 62b05066a3a..4dc8eddabb2 100644
--- a/src/Microsoft.DotNet.Helix/Sdk/tools/Microsoft.DotNet.Helix.Sdk.MonoQueue.targets
+++ b/src/Microsoft.DotNet.Helix/Sdk/tools/Microsoft.DotNet.Helix.Sdk.MonoQueue.targets
@@ -84,6 +84,7 @@
BaseUri="$(HelixBaseUri)"
AccessToken="$(HelixAccessToken)"
MaxRetryCount="$(MaxRetryCount)"
+ EnableShowHelixQueueStats="$(EnableShowHelixQueueStats)"
PreCommands="$(HelixPreCommands)"
PostCommands="$(HelixPostCommands)"
CorrelationPayloads="@(HelixCorrelationPayload)"
diff --git a/src/Microsoft.DotNet.Helix/Sdk/tools/Microsoft.DotNet.Helix.Sdk.props b/src/Microsoft.DotNet.Helix/Sdk/tools/Microsoft.DotNet.Helix.Sdk.props
index c26e4f79ec3..f0cd20b9bbb 100644
--- a/src/Microsoft.DotNet.Helix/Sdk/tools/Microsoft.DotNet.Helix.Sdk.props
+++ b/src/Microsoft.DotNet.Helix/Sdk/tools/Microsoft.DotNet.Helix.Sdk.props
@@ -30,6 +30,15 @@
false
+
+
+ false
+
+