-
Notifications
You must be signed in to change notification settings - Fork 2.2k
cosmos: Enable endToEndTimeout for queryDocumentChangeFeed operation #48144
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -53,6 +53,7 @@ public final class CosmosChangeFeedRequestOptionsImpl implements OverridableRequ | |
| private boolean completeAfterAllCurrentChangesRetrieved; | ||
| private Long endLSN; | ||
| private ReadConsistencyStrategy readConsistencyStrategy; | ||
| private CosmosEndToEndOperationLatencyPolicyConfig endToEndOperationLatencyPolicyConfig; | ||
|
|
||
| public CosmosChangeFeedRequestOptionsImpl(CosmosChangeFeedRequestOptionsImpl toBeCloned) { | ||
| if (toBeCloned.continuationState != null) { | ||
|
|
@@ -80,6 +81,7 @@ public CosmosChangeFeedRequestOptionsImpl(CosmosChangeFeedRequestOptionsImpl toB | |
| this.keywordIdentifiers = toBeCloned.keywordIdentifiers; | ||
| this.completeAfterAllCurrentChangesRetrieved = toBeCloned.completeAfterAllCurrentChangesRetrieved; | ||
| this.endLSN = toBeCloned.endLSN; | ||
| this.endToEndOperationLatencyPolicyConfig = toBeCloned.endToEndOperationLatencyPolicyConfig; | ||
| } | ||
|
|
||
| public CosmosChangeFeedRequestOptionsImpl( | ||
|
|
@@ -296,8 +298,11 @@ public CosmosChangeFeedRequestOptionsImpl setExcludedRegions(List<String> exclud | |
|
|
||
| @Override | ||
| public CosmosEndToEndOperationLatencyPolicyConfig getCosmosEndToEndLatencyPolicyConfig() { | ||
| // @TODO: Implement this and some of the others below | ||
| return null; | ||
| return this.endToEndOperationLatencyPolicyConfig; | ||
| } | ||
|
|
||
| public void setCosmosEndToEndLatencyPolicyConfig(CosmosEndToEndOperationLatencyPolicyConfig endToEndOperationLatencyPolicyConfig) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 Recommendation — Incomplete Wiring: The new this.cosmosEndToEndOperationLatencyPolicyConfig = overrideOption(
cosmosRequestOptions.getCosmosEndToEndLatencyPolicyConfig(),
this.cosmosEndToEndOperationLatencyPolicyConfig);Why this matters: Suggested fix: Add the following to this.endToEndOperationLatencyPolicyConfig = overrideOption(
cosmosRequestOptions.getCosmosEndToEndLatencyPolicyConfig(),
this.endToEndOperationLatencyPolicyConfig); |
||
| this.endToEndOperationLatencyPolicyConfig = endToEndOperationLatencyPolicyConfig; | ||
| } | ||
|
|
||
| @Override | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1520,6 +1520,50 @@ private static <T> Flux<FeedResponse<T>> getFeedResponseFluxWithTimeout( | |
| }); | ||
| } | ||
|
|
||
| private static <T> Flux<FeedResponse<T>> getChangeFeedResponseFluxWithTimeout( | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can |
||
| Flux<FeedResponse<T>> feedResponseFlux, | ||
| CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, | ||
| DiagnosticsClientContext diagnosticsClientContext) { | ||
|
|
||
| Duration endToEndTimeout = endToEndPolicyConfig.getEndToEndOperationTimeout(); | ||
|
|
||
| if (endToEndTimeout.isNegative()) { | ||
| return feedResponseFlux | ||
| .timeout(endToEndTimeout) | ||
| .onErrorMap(throwable -> { | ||
| if (throwable instanceof TimeoutException) { | ||
| CosmosException cancellationException = getNegativeTimeoutException(null, endToEndTimeout); | ||
| cancellationException.setStackTrace(throwable.getStackTrace()); | ||
|
|
||
| CosmosDiagnostics mostRecentDiagnostics = diagnosticsClientContext.getMostRecentlyCreatedDiagnostics(); | ||
| if (mostRecentDiagnostics != null) { | ||
| BridgeInternal.setCosmosDiagnostics(cancellationException, mostRecentDiagnostics); | ||
| } | ||
|
|
||
| return cancellationException; | ||
| } | ||
| return throwable; | ||
| }); | ||
| } | ||
|
|
||
| return feedResponseFlux | ||
| .timeout(endToEndTimeout) | ||
| .onErrorMap(throwable -> { | ||
| if (throwable instanceof TimeoutException) { | ||
| CosmosException exception = new OperationCancelledException(); | ||
| exception.setStackTrace(throwable.getStackTrace()); | ||
|
|
||
| CosmosDiagnostics mostRecentDiagnostics = diagnosticsClientContext.getMostRecentlyCreatedDiagnostics(); | ||
| if (mostRecentDiagnostics != null) { | ||
| BridgeInternal.setCosmosDiagnostics(exception, mostRecentDiagnostics); | ||
| } | ||
|
|
||
| return exception; | ||
| } | ||
| return throwable; | ||
| }); | ||
| } | ||
|
|
||
| private void addUserAgentSuffix(UserAgentContainer userAgentContainer, Set<UserAgentFeatureFlags> userAgentFeatureFlags) { | ||
|
|
||
| if (!this.globalPartitionEndpointManagerForPerPartitionAutomaticFailover.isPerPartitionAutomaticFailoverEnabled()) { | ||
|
|
@@ -4775,7 +4819,25 @@ public <T> Flux<FeedResponse<T>> queryDocumentChangeFeed( | |
| diagnosticsClientContext, | ||
| crossRegionAvailabilityContextForRequest); | ||
|
|
||
| return changeFeedQueryImpl.executeAsync(); | ||
| CosmosChangeFeedRequestOptionsImpl implOptions = | ||
| ImplementationBridgeHelpers | ||
| .CosmosChangeFeedRequestOptionsHelper | ||
| .getCosmosChangeFeedRequestOptionsAccessor() | ||
| .getImpl(requestOptions); | ||
|
|
||
| CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig = | ||
| this.getEffectiveEndToEndOperationLatencyPolicyConfig( | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 Recommendation — Behavioral Change: PPAF/client-level e2e timeout now applies to Change Feed Processor With
Previously, Why this matters: The Change Feed Processor (CFP) creates its own Suggested action: Consider whether CFP should explicitly set a disabled/null e2e config on its change feed options, or document this behavioral change so users can adjust their client-level config accordingly. |
||
| implOptions.getCosmosEndToEndLatencyPolicyConfig(), | ||
| ResourceType.Document, | ||
| OperationType.ReadFeed); | ||
|
|
||
| Flux<FeedResponse<T>> feedResponseFlux = changeFeedQueryImpl.executeAsync(); | ||
|
|
||
| if (endToEndPolicyConfig != null && endToEndPolicyConfig.isEnabled()) { | ||
| return getChangeFeedResponseFluxWithTimeout(feedResponseFlux, endToEndPolicyConfig, diagnosticsClientContext); | ||
| } | ||
|
|
||
| return feedResponseFlux; | ||
| } | ||
|
|
||
| @Override | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since,
CosmosEndToEndOperationLatencyPolicyConfigwiring introduces availability strategy, we could increase coverage there for change feed and availability strategy.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@FabianMeiswinkel - what is the risk here - https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java#L318? There might be some diagnostic gaps which can be fixed in a later PR but wanted to understand a bit more on the risks of enabling availability strategy for change feed against multi-writer accounts.