From 41d77c18b1034bb1fb75b79f480daecef38260d6 Mon Sep 17 00:00:00 2001 From: John Estrada Date: Fri, 3 Apr 2026 11:57:19 -0500 Subject: [PATCH 1/3] Refactor HTTP client usage and enhance retry logic for metadata fetch --- .../Rest/TestFhirServer.cs | 133 +++++++++++------- 1 file changed, 81 insertions(+), 52 deletions(-) diff --git a/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/TestFhirServer.cs b/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/TestFhirServer.cs index 8112b99059..75ded71105 100644 --- a/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/TestFhirServer.cs +++ b/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/TestFhirServer.cs @@ -200,98 +200,127 @@ public async Task ConfigureSecurityOptions(CancellationToken cancellationToken = { bool localSecurityEnabled = false; - var httpClient = new HttpClient(CreateMessageHandler()) + using var availabilityHttpClient = new HttpClient(CreateMessageHandler()) { BaseAddress = BaseAddress, Timeout = TimeSpan.FromSeconds(30), }; - // Retry policy for transient failures (500 errors, timeouts, etc.) during server startup - // Total timeout is 5 minutes to ensure we fail fast if the server is not coming up - var overallTimeout = TimeSpan.FromMinutes(5); - var overallStopwatch = System.Diagnostics.Stopwatch.StartNew(); + await WaitForAvailabilityAsync(availabilityHttpClient, cancellationToken); + + using var metadataHttpClient = new HttpClient(CreateMessageHandler()) + { + BaseAddress = BaseAddress, + Timeout = TimeSpan.FromSeconds(120), + }; + + string content = await GetMetadataAsync(metadataHttpClient, cancellationToken); + + CapabilityStatement metadata = new FhirJsonParser().Parse(content); + Metadata = metadata.ToResourceElement(); + +#if Stu3 || R4 || R4B + foreach (var rest in metadata.Rest.Where(r => r.Mode == RestfulCapabilityMode.Server)) +#else + foreach (var rest in metadata.Rest.Where(r => r.Mode == CapabilityStatement.RestfulCapabilityMode.Server)) +#endif + { + var oauth = rest.Security?.GetExtension(Core.Features.Security.Constants.SmartOAuthUriExtension); + if (oauth != null) + { + var tokenUrl = oauth.GetExtensionValue(Core.Features.Security.Constants.SmartOAuthUriExtensionToken).Value; + var authorizeUrl = oauth.GetExtensionValue(Core.Features.Security.Constants.SmartOAuthUriExtensionAuthorize).Value; + + localSecurityEnabled = true; + TokenUri = new Uri(tokenUrl); + AuthorizeUri = new Uri(authorizeUrl); + + break; + } + } + + SecurityEnabled = localSecurityEnabled; + } + + private async Task WaitForAvailabilityAsync(HttpClient httpClient, CancellationToken cancellationToken) + { + await SendGetWithRetryAsync( + httpClient, + new Uri(BaseAddress, "$versions"), + "Availability check", + TimeSpan.FromMinutes(5), + cancellationToken); + } + + private async Task GetMetadataAsync(HttpClient httpClient, CancellationToken cancellationToken) + { + return await SendGetWithRetryAsync( + httpClient, + new Uri(BaseAddress, "metadata"), + "Metadata fetch", + TimeSpan.FromMinutes(10), + cancellationToken); + } + + private static async Task SendGetWithRetryAsync(HttpClient httpClient, Uri requestUri, string operationName, TimeSpan overallTimeout, CancellationToken cancellationToken) + { + // Retry policy for transient failures during server startup with exponential backoff. + var overallStopwatch = Stopwatch.StartNew(); const int baseDelaySeconds = 5; const int maxDelaySeconds = 30; - HttpResponseMessage response = null; - string content = null; int attempt = 0; + HttpStatusCode? lastStatusCode = null; + string lastErrorMessage = null; while (overallStopwatch.Elapsed < overallTimeout) { attempt++; try { - using HttpRequestMessage requestMessage = new HttpRequestMessage(HttpMethod.Get, new Uri(BaseAddress, "metadata")); - response = await httpClient.SendAsync(requestMessage, cancellationToken); - content = await response.Content.ReadAsStringAsync(); + using HttpRequestMessage requestMessage = new HttpRequestMessage(HttpMethod.Get, requestUri); + using HttpResponseMessage response = await httpClient.SendAsync(requestMessage, cancellationToken); + string content = await response.Content.ReadAsStringAsync(); if (response.IsSuccessStatusCode) { - Console.WriteLine($"[ConfigureSecurityOptions] Metadata fetch successful on attempt {attempt} after {overallStopwatch.Elapsed.TotalSeconds:F1}s."); - break; + Console.WriteLine($"[ConfigureSecurityOptions] {operationName} successful on attempt {attempt} after {overallStopwatch.Elapsed.TotalSeconds:F1}s."); + return content; } - // Retry on 5xx errors (server not ready) or 401/503 (transient auth/availability issues) if within timeout + lastStatusCode = response.StatusCode; + lastErrorMessage = $"Last status: {response.StatusCode}"; + if (((int)response.StatusCode >= 500 || response.StatusCode == HttpStatusCode.Unauthorized || response.StatusCode == HttpStatusCode.ServiceUnavailable) && overallStopwatch.Elapsed < overallTimeout) { - int delaySeconds = Math.Min(baseDelaySeconds * (int)Math.Pow(2, Math.Min(attempt - 1, 3)), maxDelaySeconds); // Cap growth at attempt 4 - Console.WriteLine($"[ConfigureSecurityOptions] Metadata fetch returned {response.StatusCode} on attempt {attempt}. Elapsed: {overallStopwatch.Elapsed.TotalSeconds:F1}s. Retrying in {delaySeconds}s..."); + int delaySeconds = Math.Min(baseDelaySeconds * (int)Math.Pow(2, Math.Min(attempt - 1, 3)), maxDelaySeconds); + Console.WriteLine($"[ConfigureSecurityOptions] {operationName} returned {response.StatusCode} on attempt {attempt}. Elapsed: {overallStopwatch.Elapsed.TotalSeconds:F1}s. Retrying in {delaySeconds}s..."); await Task.Delay(TimeSpan.FromSeconds(delaySeconds), cancellationToken); - response.Dispose(); continue; } - // Non-retryable error or timeout exhausted response.EnsureSuccessStatusCode(); } catch (Exception ex) when (ex is TaskCanceledException || ex is HttpRequestException || ex is IOException) { + lastErrorMessage = $"Last error: {ex.Message}"; + if (overallStopwatch.Elapsed < overallTimeout) { int delaySeconds = Math.Min(baseDelaySeconds * (int)Math.Pow(2, Math.Min(attempt - 1, 3)), maxDelaySeconds); - Console.WriteLine($"[ConfigureSecurityOptions] Metadata fetch failed with {ex.GetType().Name} on attempt {attempt}. Elapsed: {overallStopwatch.Elapsed.TotalSeconds:F1}s. Retrying in {delaySeconds}s..."); + Console.WriteLine($"[ConfigureSecurityOptions] {operationName} failed with {ex.GetType().Name} on attempt {attempt}. Elapsed: {overallStopwatch.Elapsed.TotalSeconds:F1}s. Retrying in {delaySeconds}s..."); await Task.Delay(TimeSpan.FromSeconds(delaySeconds), cancellationToken); continue; } - throw new HttpRequestException($"ConfigureSecurityOptions failed after {attempt} attempts over {overallStopwatch.Elapsed.TotalSeconds:F1}s. Last error: {ex.Message}", ex); - } - } - - // If we exited the loop due to timeout without success - if (response == null || !response.IsSuccessStatusCode) - { - string errorMessage = response != null - ? $"ConfigureSecurityOptions failed after {attempt} attempts over {overallStopwatch.Elapsed.TotalSeconds:F1}s. Last status: {response.StatusCode}" - : $"ConfigureSecurityOptions failed after {attempt} attempts over {overallStopwatch.Elapsed.TotalSeconds:F1}s. No response received."; - throw new HttpRequestException(errorMessage); - } - - CapabilityStatement metadata = new FhirJsonParser().Parse(content); - Metadata = metadata.ToResourceElement(); - -#if Stu3 || R4 || R4B - foreach (var rest in metadata.Rest.Where(r => r.Mode == RestfulCapabilityMode.Server)) -#else - foreach (var rest in metadata.Rest.Where(r => r.Mode == CapabilityStatement.RestfulCapabilityMode.Server)) -#endif - { - var oauth = rest.Security?.GetExtension(Core.Features.Security.Constants.SmartOAuthUriExtension); - if (oauth != null) - { - var tokenUrl = oauth.GetExtensionValue(Core.Features.Security.Constants.SmartOAuthUriExtensionToken).Value; - var authorizeUrl = oauth.GetExtensionValue(Core.Features.Security.Constants.SmartOAuthUriExtensionAuthorize).Value; - - localSecurityEnabled = true; - TokenUri = new Uri(tokenUrl); - AuthorizeUri = new Uri(authorizeUrl); - - break; + throw new HttpRequestException($"{operationName} failed after {attempt} attempts over {overallStopwatch.Elapsed.TotalSeconds:F1}s. {lastErrorMessage}", ex); } } - SecurityEnabled = localSecurityEnabled; + string errorMessage = lastStatusCode.HasValue + ? $"{operationName} failed after {attempt} attempts over {overallStopwatch.Elapsed.TotalSeconds:F1}s. Last status: {lastStatusCode.Value}" + : $"{operationName} failed after {attempt} attempts over {overallStopwatch.Elapsed.TotalSeconds:F1}s. {lastErrorMessage ?? "No response received."}"; + throw new HttpRequestException(errorMessage); } public virtual ValueTask DisposeAsync() From a2d7323b9bc02410f50acd3e051e923eb13b3b98 Mon Sep 17 00:00:00 2001 From: John Estrada Date: Fri, 3 Apr 2026 12:18:53 -0500 Subject: [PATCH 2/3] Add warmup path and status properties to FHIR server configuration --- samples/templates/default-azuredeploy-docker.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/templates/default-azuredeploy-docker.json b/samples/templates/default-azuredeploy-docker.json index 4df2c03b7e..7f26df1b1a 100644 --- a/samples/templates/default-azuredeploy-docker.json +++ b/samples/templates/default-azuredeploy-docker.json @@ -422,7 +422,7 @@ "[if(variables('deployAppInsights'),concat('Microsoft.Insights/components/', variables('appInsightsName')),resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName')))]", "[if(equals(parameters('solutionType'),'FhirServerCosmosDB'), resourceId('Microsoft.KeyVault/vaults/secrets', variables('keyVaultName'), 'CosmosDb--Host'), resourceId('Microsoft.KeyVault/vaults/secrets', variables('keyVaultName'), 'SqlServer--ConnectionString'))]" ], - "properties": "[union(variables('combinedFhirServerConfigProperties'), json(concat('{ \"FhirServer__ResourceManager__DataStoreResourceId\": \"', if(equals(parameters('solutionType'),'FhirServerCosmosDB'), resourceId('Microsoft.DocumentDb/databaseAccounts', variables('serviceName')), resourceId('Microsoft.Sql/servers/', variables('sqlServerDerivedName'))), '\", ', if(variables('deployAppInsights'), concat('\"Telemetry__Provider\": \"', parameters('telemetryProviderType'), '\",', '\"Telemetry__InstrumentationKey\": \"', reference(resourceId('Microsoft.Insights/components', variables('appInsightsName'))).InstrumentationKey, '\",', '\"Telemetry__ConnectionString\": \"', reference(resourceId('Microsoft.Insights/components', variables('appInsightsName'))).ConnectionString, '\"'), ''), '}')))]" + "properties": "[union(variables('combinedFhirServerConfigProperties'), json(concat('{ \"FhirServer__ResourceManager__DataStoreResourceId\": \"', if(equals(parameters('solutionType'),'FhirServerCosmosDB'), resourceId('Microsoft.DocumentDb/databaseAccounts', variables('serviceName')), resourceId('Microsoft.Sql/servers/', variables('sqlServerDerivedName'))), '\", ', if(variables('deployAppInsights'), concat('\"Telemetry__Provider\": \"', parameters('telemetryProviderType'), '\",', '\"Telemetry__InstrumentationKey\": \"', reference(resourceId('Microsoft.Insights/components', variables('appInsightsName'))).InstrumentationKey, '\",', '\"Telemetry__ConnectionString\": \"', reference(resourceId('Microsoft.Insights/components', variables('appInsightsName'))).ConnectionString, '\",'), ''), '\"WEBSITE_WARMUP_PATH\": \"/health/check\",', '\"WEBSITE_WARMUP_STATUSES\": \"200\",', '\"WEBSITES_CONTAINER_START_TIME_LIMIT\": \"600\"', '}')))]" }, { "apiVersion": "2018-11-01", From 100b30cd4c481d96be8e4d10d893f1d8d9216e45 Mon Sep 17 00:00:00 2001 From: John Estrada Date: Fri, 3 Apr 2026 13:57:30 -0500 Subject: [PATCH 3/3] Update to 180 seconds for metadata --- .../Rest/TestFhirServer.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/TestFhirServer.cs b/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/TestFhirServer.cs index 75ded71105..56cc1db464 100644 --- a/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/TestFhirServer.cs +++ b/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/TestFhirServer.cs @@ -211,7 +211,7 @@ public async Task ConfigureSecurityOptions(CancellationToken cancellationToken = using var metadataHttpClient = new HttpClient(CreateMessageHandler()) { BaseAddress = BaseAddress, - Timeout = TimeSpan.FromSeconds(120), + Timeout = TimeSpan.FromSeconds(180), }; string content = await GetMetadataAsync(metadataHttpClient, cancellationToken);