Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
6206c86
Update smart well-known with issuer/jwks-uri (#5465)
feordin Mar 27, 2026
0b18d5a
Prevent job hosting to affect reindex test (#5470)
SergeyGaluzo Mar 30, 2026
ca60e86
Use job hosting to execute reindex in ReindexJobTests (#5476)
SergeyGaluzo Mar 31, 2026
2b9e554
Detect conflicts across input search params in a bundle (#5460)
SergeyGaluzo Mar 31, 2026
effb4ec
Distributed cache sync
SergeyGaluzo Apr 2, 2026
4431f3b
merge from main
SergeyGaluzo Apr 2, 2026
163ace3
+ line
SergeyGaluzo Apr 2, 2026
1a3bfd6
Fix test queue client
SergeyGaluzo Apr 2, 2026
d33b695
Remove max last updated stored proc
SergeyGaluzo Apr 2, 2026
dfc3e86
project
SergeyGaluzo Apr 2, 2026
d9cae79
Remove double cache updates
SergeyGaluzo Apr 2, 2026
a8c1746
orch job tests
SergeyGaluzo Apr 2, 2026
fd0e68d
removed 109
SergeyGaluzo Apr 3, 2026
5398992
Skip faling reindex test to get clean run
SergeyGaluzo Apr 3, 2026
d92cd97
comments
SergeyGaluzo Apr 3, 2026
e426406
Add logging
SergeyGaluzo Apr 3, 2026
a8436c2
tests
SergeyGaluzo Apr 3, 2026
1e193e5
combining conditions
SergeyGaluzo Apr 3, 2026
ce22701
More load
SergeyGaluzo Apr 3, 2026
27f4585
7 -> 9
SergeyGaluzo Apr 3, 2026
3f8a907
eliminate events var
SergeyGaluzo Apr 3, 2026
272f01c
Remove log event from dequeue
SergeyGaluzo Apr 3, 2026
c3375a1
reduce retries on search
SergeyGaluzo Apr 3, 2026
0505c24
last updated
SergeyGaluzo Apr 4, 2026
5688af9
10 ->25
SergeyGaluzo Apr 4, 2026
9144390
remove CR
SergeyGaluzo Apr 4, 2026
19ede1e
Drop reindex job table
SergeyGaluzo Apr 4, 2026
0551437
Revert "Drop reindex job table"
SergeyGaluzo Apr 4, 2026
cdba4b9
throw
SergeyGaluzo Apr 4, 2026
3699c49
Comment
SergeyGaluzo Apr 6, 2026
506dc37
two
SergeyGaluzo Apr 6, 2026
52a5e0b
new stored proc
SergeyGaluzo Apr 6, 2026
fdf40a0
fix FhirStorageTests
SergeyGaluzo Apr 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions build/jobs/provision-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ jobs:
$additionalProperties["SqlServer__AllowDatabaseCreation"] = "true"
# Cosmos DB autoscale is configured in the ARM template (10,000 RU max)
$additionalProperties["TaskHosting__PollingFrequencyInSeconds"] = 1
$additionalProperties["FhirServer__CoreFeatures__SearchParameterCacheRefreshIntervalSeconds"] = 2
$additionalProperties["FhirServer__CoreFeatures__SearchParameterCacheRefreshIntervalSeconds"] = 1
$additionalProperties["FhirServer__Operations__Reindex__CacheRefreshWaitMultiplier"] = 6
$additionalProperties["ASPNETCORE_FORWARDEDHEADERS_ENABLED"] = "true"

$webAppName = "${{ parameters.webAppName }}".ToLower()
Expand All @@ -90,7 +91,7 @@ jobs:
enableExport = $true
enableConvertData = $true
enableImport = $true
backgroundTaskCount = 2
backgroundTaskCount = 4
enableReindex = if ("${{ parameters.reindexEnabled }}" -eq "true") { $true } else { $false }
registryName = '$(azureContainerRegistry)'
imageTag = '${{ parameters.imageTag }}'
Expand Down
45 changes: 36 additions & 9 deletions src/Microsoft.Health.Fhir.Api/Resources.Designer.cs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 12 additions & 1 deletion src/Microsoft.Health.Fhir.Api/Resources.resx
Original file line number Diff line number Diff line change
Expand Up @@ -459,4 +459,15 @@
<data name="ExpandInvalidParameterCount" xml:space="preserve">
<value>The number of the parameter must not be more than one: {0}</value>
</data>
</root>
<data name="DuplicateSearchParamCodesInBundle" xml:space="preserve">
<value>Input search parameters have duplicate codes [{0}]</value>
<comment>{0} is comma delimited list of dup codes</comment>
</data>
<data name="DuplicateSearchParamUrlsInBundle" xml:space="preserve">
<value>Input search parameters have duplicate Urls [{0}].</value>
<comment>{0} is comma delimited list of duplicate Urls</comment>
</data>
<data name="DuplicateSearchParamCodesAndUrlsInBundle" xml:space="preserve">
<value>Input search parameters have duplicate codes [{0}] and Urls [{1}].</value>
</data>
</root>
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,22 @@ public ReindexJobConfiguration()

/// <summary>
/// Controls the multiplier applied to the SearchParameterCacheRefreshIntervalSeconds
/// to determine time to wait for search param cache refresh
/// to determine time to wait for search param cache refresh. Relevant for Cosmos only.
/// </summary>
public int CacheRefreshWaitMultiplier { get; set; } = 3;

/// <summary>
/// Controls the multiplier applied to the SearchParameterCacheRefreshIntervalSeconds
/// to determine max time to wait for search param cache refresh. Relevant for SQL only.
/// </summary>
public int CacheUpdateMaxWaitMultiplier { get; set; } = 20;

/// <summary>
/// Controls the multiplier applied to the SearchParameterCacheRefreshIntervalSeconds
/// to determine the time interval to retrieve active host names. Relevant for SQL only.
/// </summary>
public int ActiveHostsEventsMultiplier { get; set; } = 9;

/// <summary>
/// Controls how many surrogate ID ranges are fetched per database call when calculating
/// job ranges. Uses batched calls to avoid timeout on large tables.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,20 @@ public async Task<GetSmartConfigurationResponse> Handle(GetSmartConfigurationReq

Uri authorizationEndpoint;
Uri tokenEndpoint;
string issuer;
string jwksUri;

if (_securityConfiguration.EnableAadSmartOnFhirProxy)
{
authorizationEndpoint = new Uri(request.BaseUri, "AadSmartOnFhirProxy/authorize");
tokenEndpoint = new Uri(request.BaseUri, "AadSmartOnFhirProxy/token");

// Still resolve issuer and jwks_uri from OIDC discovery
(_, _, issuer, jwksUri) = await _oidcDiscoveryService.ResolveEndpointsAsync(baseEndpoint, cancellationToken);
}
else
{
(authorizationEndpoint, tokenEndpoint) = await _oidcDiscoveryService.ResolveEndpointsAsync(baseEndpoint, cancellationToken);
(authorizationEndpoint, tokenEndpoint, issuer, jwksUri) = await _oidcDiscoveryService.ResolveEndpointsAsync(baseEndpoint, cancellationToken);
}

ICollection<string> capabilities = new List<string>(
Expand Down Expand Up @@ -110,6 +115,10 @@ public async Task<GetSmartConfigurationResponse> Handle(GetSmartConfigurationReq
"code",
};

string introspectionEndpoint = !string.IsNullOrEmpty(_smartIdentityProviderConfiguration.Introspection)
? _smartIdentityProviderConfiguration.Introspection
: new Uri(request.BaseUri, "connect/introspect").ToString();

return new GetSmartConfigurationResponse(
authorizationEndpoint,
tokenEndpoint,
Expand All @@ -119,9 +128,11 @@ public async Task<GetSmartConfigurationResponse> Handle(GetSmartConfigurationReq
grantTypesSupported,
tokenEndpointAuthMethodsSupported,
responseTypesSupported,
_smartIdentityProviderConfiguration.Introspection,
introspectionEndpoint,
_smartIdentityProviderConfiguration.Management,
_smartIdentityProviderConfiguration.Revocation);
_smartIdentityProviderConfiguration.Revocation,
issuer,
jwksUri);
}
catch (Exception e) when (e is ArgumentNullException || e is UriFormatException)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -573,22 +573,27 @@ private bool TryGetFromTypeLookup(string resourceType, string code, out SearchPa

[System.Diagnostics.CodeAnalysis.SuppressMessage("Performance", "CA1859:Use concrete types when possible for improved performance", Justification = "Collection defined on model")]
private ICollection<string> GetDerivedResourceTypes(IReadOnlyCollection<string> resourceTypes)
{
return GetDerivedResourceTypes(_modelInfoProvider, resourceTypes);
}

public static ICollection<string> GetDerivedResourceTypes(IModelInfoProvider modelInfoProvider, IReadOnlyCollection<string> resourceTypes)
{
var completeResourceList = new HashSet<string>(resourceTypes);

foreach (var baseResourceType in resourceTypes)
{
if (baseResourceType == KnownResourceTypes.Resource)
{
completeResourceList.UnionWith(_modelInfoProvider.GetResourceTypeNames().ToHashSet());
completeResourceList.UnionWith(modelInfoProvider.GetResourceTypeNames().ToHashSet());

// We added all possible resource types, so no need to continue
break;
}

if (baseResourceType == KnownResourceTypes.DomainResource)
{
var domainResourceChildResourceTypes = _modelInfoProvider.GetResourceTypeNames().ToHashSet();
var domainResourceChildResourceTypes = modelInfoProvider.GetResourceTypeNames().ToHashSet();

// Remove types that inherit from Resource directly
domainResourceChildResourceTypes.Remove(KnownResourceTypes.Binary);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Diagnostics;
using System.Linq;
using System.Security.Cryptography;
using System.Threading;
using System.Threading.Tasks;
using EnsureThat;
using Hl7.Fhir.Model;
using Hl7.Fhir.Utility;
using Microsoft.AspNetCore.JsonPatch.Internal;
using Microsoft.Data.SqlClient;
using Microsoft.Extensions.Logging;
Expand Down Expand Up @@ -120,7 +122,7 @@ public ReindexOrchestratorJob(
_searchParameterStatusManager = searchParameterStatusManager;
_searchParameterOperations = searchParameterOperations;
_operationsConfiguration = operationsConfiguration.Value;
_searchParameterCacheRefreshIntervalSeconds = Math.Max(1, coreFeatureConfiguration.Value.SearchParameterCacheRefreshIntervalSeconds);
_searchParameterCacheRefreshIntervalSeconds = coreFeatureConfiguration.Value.SearchParameterCacheRefreshIntervalSeconds;

// Determine support for surrogate ID ranging once
// This is to ensure Gen1 Reindex still works as expected but we still maintain perf on job inseration to SQL
Expand Down Expand Up @@ -191,45 +193,64 @@ public async Task<string> ExecuteAsync(JobInfo jobInfo, CancellationToken cancel

private async Task RefreshSearchParameterCache(bool isReindexStart)
{
// Wait for the background cache refresh service to complete N successful refresh cycles.
// This ensures all instances (including processing pods) have the latest search parameter definitions.
var suffix = isReindexStart ? "Start" : "End";
_logger.LogJobInformation(_jobInfo, $"Reindex orchestrator job started cache refresh at the {suffix}.");
await TryLogEvent($"ReindexOrchestratorJob={_jobInfo.Id}.ExecuteAsync.{suffix}", "Warn", "Started", null, _cancellationToken);

// First, wait for the local background refresh service to complete N cycles.
// This ensures _searchParamLastUpdated is up-to-date on THIS instance before
// we use it as the convergence target for cross-instance checks.
await _searchParameterOperations.WaitForRefreshCyclesAsync(_operationsConfiguration.Reindex.CacheRefreshWaitMultiplier, _cancellationToken);

if (_isSurrogateIdRangingSupported)
{
// SQL Server: After local refresh, verify ALL instances have converged to
// the same SearchParamLastUpdated via the EventLog table. This prevents the
// SQL Server: Wait for all instances to update their cache. This prevents the
// orchestrator from creating reindex ranges while other instances still have
// stale search parameter caches and would write resources with wrong hashes.
// Use the same lookback as active host detection so we do not miss qualifying
// refresh events that occurred shortly before this instance entered the wait.
var activeHostsSince = DateTime.UtcNow.AddSeconds(-20 * _searchParameterCacheRefreshIntervalSeconds);
var syncStartDate = activeHostsSince;
await _searchParameterOperations.WaitForAllInstancesCacheConsistencyAsync(syncStartDate, activeHostsSince, _cancellationToken);
var updateEventsSince = isReindexStart ? _jobInfo.StartDate.Value : DateTime.UtcNow;
var isConsistent = await WaitForAllInstancesCacheSyncAsync(updateEventsSince, _cancellationToken);
if (!isConsistent)
{
var msg = "Unable to sync search parameter cache. Please resubmit reindex. If issue persists please contact your administrator.";
_logger.LogJobError(_jobInfo, msg);
await TryLogEvent($"ReindexOrchestratorJob={_jobInfo.Id}.ExecuteAsync.{suffix}", "Error", msg, null, _cancellationToken);
throw new JobExecutionException(msg, false);
}
}
else
{
// Cosmos DB: There is no EventLog-based convergence tracking, so wait a fixed
// delay to allow all instances to refresh their search parameter caches from
// the shared Cosmos container.
// delay to allow all instances to refresh their search parameter caches.
var delayMs = _operationsConfiguration.Reindex.CacheRefreshWaitMultiplier * _searchParameterCacheRefreshIntervalSeconds * 1000;
_logger.LogJobInformation(_jobInfo, "Cosmos DB detected — waiting {DelayMs}ms for cache propagation across instances.", delayMs);
_logger.LogJobInformation(_jobInfo, $"Cosmos DB detected — waiting {delayMs}ms for cache propagation across instances.");
await Task.Delay(delayMs, _cancellationToken);
}

// Update the reindex job record with the latest hash map
var currentDate = _searchParameterOperations.SearchParamLastUpdated.HasValue ? _searchParameterOperations.SearchParamLastUpdated.Value : DateTimeOffset.MinValue;
_searchParamLastUpdated = currentDate;

_logger.LogJobInformation(_jobInfo, $"Reindex orchestrator job completed cache refresh at the {suffix}: SearchParamLastUpdated {_searchParamLastUpdated}");
await TryLogEvent($"ReindexOrchestratorJob={_jobInfo.Id}.ExecuteAsync.{suffix}", "Warn", $"SearchParamLastUpdated={_searchParamLastUpdated.ToString("yyyy-MM-dd HH:mm:ss.fff")}", null, _cancellationToken);

async Task<bool> WaitForAllInstancesCacheSyncAsync(DateTime updateEventsSince, CancellationToken cancellationToken)
{
var start = Stopwatch.StartNew();
var maxWaitTime = TimeSpan.FromSeconds(_operationsConfiguration.Reindex.CacheUpdateMaxWaitMultiplier * _searchParameterCacheRefreshIntervalSeconds);
var waitInterval = TimeSpan.FromSeconds(_searchParameterCacheRefreshIntervalSeconds);
var activeHostsSince = DateTime.UtcNow.AddSeconds((-1) * _operationsConfiguration.Reindex.ActiveHostsEventsMultiplier * _searchParameterCacheRefreshIntervalSeconds);
CacheConsistencyResult result = null;
while (start.Elapsed < maxWaitTime)
{
result = await _searchParameterStatusManager.CheckCacheConsistencyAsync(updateEventsSince, activeHostsSince, cancellationToken);

if (result.IsConsistent)
{
var logDate = _searchParameterOperations.SearchParamLastUpdated.HasValue ? _searchParameterOperations.SearchParamLastUpdated.Value : DateTimeOffset.MinValue;
_logger.LogJobInformation(_jobInfo, $"Cache sync check: All {result.ActiveHosts} active host(s) have converged to SearchParamLastUpdated={logDate.ToString("yyyy-MM-dd HH:mm:ss.fff")}.");
break;
}

_logger.LogJobInformation(_jobInfo, $"Cache sync check: {result.ConvergedHosts}/{result.ActiveHosts} hosts synced. Waiting...");
await Task.Delay(waitInterval, cancellationToken);
}

return result != null && result.IsConsistent;
}
}

private async Task<IReadOnlyList<long>> CreateReindexProcessingJobsAsync(CancellationToken cancellationToken)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ public SmartConfigurationResult(
ICollection<string> responseTypesSupported = null,
string introspectionEndpoint = null,
string managementEndpoint = null,
string revocationEndpoint = null)
string revocationEndpoint = null,
string issuer = null,
string jwksUri = null)
{
EnsureArg.IsNotNull(authorizationEndpoint, nameof(authorizationEndpoint));
EnsureArg.IsNotNull(tokenEndpoint, nameof(tokenEndpoint));
Expand All @@ -54,6 +56,8 @@ public SmartConfigurationResult(
IntrospectionEndpoint = introspectionEndpoint;
ManagementEndpoint = managementEndpoint;
RevocationEndpoint = revocationEndpoint;
Issuer = issuer;
JwksUri = jwksUri;
}

[JsonConstructor]
Expand Down Expand Up @@ -93,5 +97,13 @@ public SmartConfigurationResult()

[JsonProperty("revocation_endpoint")]
public string RevocationEndpoint { get; }

[JsonProperty("issuer")]
public string Issuer { get; }

#pragma warning disable CA1056 // URI-like properties should not be strings
[JsonProperty("jwks_uri")]
public string JwksUri { get; }
#pragma warning restore CA1056 // URI-like properties should not be strings
}
}
Loading
Loading