From 9e5113d7d197b2e928b9d0f04337aa85920cf88b Mon Sep 17 00:00:00 2001 From: Garrett Beatty Date: Mon, 11 May 2026 17:05:30 -0400 Subject: [PATCH 1/4] Add durable execution Step + Wait end-to-end MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements the minimum viable slice of the Amazon.Lambda.DurableExecution SDK: a workflow can run StepAsync and WaitAsync against a real Lambda, with replay-aware checkpointing wired through to the AWS service. Public API surface introduced: - DurableFunction.WrapAsync — entry point that handles the durable execution envelope (input hydration, output construction, status mapping) - IDurableContext.StepAsync / WaitAsync (4 Step overloads, 1 Wait) - StepConfig with serializer hook (retry deferred to follow-up PR) - ICheckpointSerializer interface - [DurableExecution] attribute (recognized by future source generator) - DurableExecutionException base + StepException Internals: - DurableExecutionHandler — Task.WhenAny race between user code and the suspension signal, returning Succeeded/Failed/Pending - ExecutionState — replay-aware operation lookup and pending checkpoint buffer - OperationIdGenerator — deterministic, replay-stable IDs - TerminationManager — TaskCompletionSource-based suspension trigger - LambdaDurableServiceClient — wraps AWSSDK.Lambda's checkpoint and state APIs Tests: - 86 unit tests covering enums, exceptions, models, configs, ID generation, termination, execution state, the handler race, the context (Step + Wait paths), and the WrapAsync entry point - 8 end-to-end integration tests deploying real Lambdas via Docker on the provided.al2023 runtime: StepWaitStep, MultipleSteps, WaitOnly, LongerWait, ReplayDeterminism, RetrySucceeds, RetryExhausts, StepFails Out of scope (follow-up PRs): - IRetryStrategy, ExponentialRetryStrategy, retry decision factories - DefaultJsonCheckpointSerializer - DurableLogger replay-suppression (currently returns NullLogger) - Callbacks, InvokeAsync, ParallelAsync, MapAsync, RunInChildContextAsync, WaitForConditionAsync — interface intentionally does not declare them - Annotations source-generator integration - DurableTestRunner / Amazon.Lambda.DurableExecution.Testing package - dotnet new lambda.DurableFunction blueprint stack-info: PR: https://github.com/aws/aws-lambda-dotnet/pull/2360, branch: GarrettBeatty/stack/2 remove update update update update --- Docs/durable-execution-design.md | 238 +++++-- .../Amazon.Lambda.DurableExecution.csproj | 2 + .../AssemblyMarker.cs | 5 - .../Config/StepConfig.cs | 13 + .../DurableContext.cs | 147 ++++ .../DurableExecutionHandler.cs | 119 ++++ .../DurableFunction.cs | 338 +++++++++ .../Amazon.Lambda.DurableExecution/Enums.cs | 14 + .../Exceptions/DurableExecutionException.cs | 49 ++ .../ICheckpointSerializer.cs | 25 + .../IDurableContext.cs | 108 +++ .../Internal/CheckpointBatcher.cs | 216 ++++++ .../Internal/CheckpointBatcherConfig.cs | 35 + .../Internal/DurableOperation.cs | 69 ++ .../Internal/ExecutionState.cs | 93 +++ .../Internal/Operation.cs | 140 ++++ .../Internal/OperationIdGenerator.cs | 101 +++ .../ReflectionJsonCheckpointSerializer.cs | 36 + .../Internal/StepOperation.cs | 164 +++++ .../Internal/TerminationManager.cs | 77 ++ .../Internal/UpperSnakeCaseEnumConverter.cs | 64 ++ .../Internal/WaitOperation.cs | 93 +++ .../Models/DurableExecutionInvocationInput.cs | 53 ++ .../DurableExecutionInvocationOutput.cs | 29 + .../Models/ErrorObject.cs | 46 ++ .../Services/LambdaDurableServiceClient.cs | 108 +++ ...bda.DurableExecution.AotPublishTest.csproj | 24 + .../Program.cs | 81 +++ ...a.DurableExecution.IntegrationTests.csproj | 43 ++ .../DurableFunctionDeployment.cs | 468 ++++++++++++ .../LongerWaitTest.cs | 62 ++ .../MultipleStepsTest.cs | 56 ++ .../ReplayDeterminismTest.cs | 67 ++ .../StepFailsTest.cs | 51 ++ .../StepWaitStepTest.cs | 58 ++ .../LongerWaitFunction/Dockerfile | 7 + .../LongerWaitFunction/Function.cs | 40 ++ .../LongerWaitFunction.csproj | 18 + .../MultipleStepsFunction/Dockerfile | 7 + .../MultipleStepsFunction/Function.cs | 50 ++ .../MultipleStepsFunction.csproj | 18 + .../ReplayDeterminismFunction/Dockerfile | 7 + .../ReplayDeterminismFunction/Function.cs | 43 ++ .../ReplayDeterminismFunction.csproj | 18 + .../StepFailsFunction/Dockerfile | 7 + .../StepFailsFunction/Function.cs | 38 + .../StepFailsFunction.csproj | 18 + .../StepWaitStepFunction/Dockerfile | 7 + .../StepWaitStepFunction/Function.cs | 40 ++ .../StepWaitStepFunction.csproj | 18 + .../TestFunctions/WaitOnlyFunction/Dockerfile | 7 + .../WaitOnlyFunction/Function.cs | 31 + .../WaitOnlyFunction/WaitOnlyFunction.csproj | 18 + .../WaitOnlyTest.cs | 55 ++ .../xunit.runner.json | 6 + ...mazon.Lambda.DurableExecution.Tests.csproj | 5 +- .../AssemblyLoadTests.cs | 13 - .../CheckpointBatcherTests.cs | 213 ++++++ .../ConfigTests.cs | 15 + .../DurableContextTests.cs | 669 ++++++++++++++++++ .../DurableExecutionHandlerTests.cs | 137 ++++ .../DurableFunctionTests.cs | 583 +++++++++++++++ .../EnumsTests.cs | 39 + .../ExceptionsTests.cs | 68 ++ .../ExecutionStateTests.cs | 165 +++++ .../LambdaDurableServiceClientTests.cs | 202 ++++++ .../MockLambdaClient.cs | 65 ++ .../ModelsTests.cs | 203 ++++++ .../OperationIdGeneratorTests.cs | 100 +++ .../RecordingBatcher.cs | 51 ++ .../TerminationManagerTests.cs | 88 +++ .../UpperSnakeCaseEnumConverterTests.cs | 84 +++ .../coverage.runsettings | 15 + .../coverage.sh | 29 + 74 files changed, 6428 insertions(+), 61 deletions(-) delete mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/AssemblyMarker.cs create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/Config/StepConfig.cs create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/DurableContext.cs create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/DurableExecutionHandler.cs create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/DurableFunction.cs create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/Enums.cs create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/Exceptions/DurableExecutionException.cs create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/ICheckpointSerializer.cs create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/IDurableContext.cs create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/Internal/CheckpointBatcher.cs create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/Internal/CheckpointBatcherConfig.cs create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/Internal/DurableOperation.cs create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/Internal/ExecutionState.cs create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/Internal/Operation.cs create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/Internal/OperationIdGenerator.cs create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/Internal/ReflectionJsonCheckpointSerializer.cs create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/Internal/StepOperation.cs create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/Internal/TerminationManager.cs create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/Internal/UpperSnakeCaseEnumConverter.cs create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/Internal/WaitOperation.cs create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/Models/DurableExecutionInvocationInput.cs create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/Models/DurableExecutionInvocationOutput.cs create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/Models/ErrorObject.cs create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/Services/LambdaDurableServiceClient.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.AotPublishTest/Amazon.Lambda.DurableExecution.AotPublishTest.csproj create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.AotPublishTest/Program.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/Amazon.Lambda.DurableExecution.IntegrationTests.csproj create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/DurableFunctionDeployment.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/LongerWaitTest.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/MultipleStepsTest.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/ReplayDeterminismTest.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/StepFailsTest.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/StepWaitStepTest.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/LongerWaitFunction/Dockerfile create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/LongerWaitFunction/Function.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/LongerWaitFunction/LongerWaitFunction.csproj create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/MultipleStepsFunction/Dockerfile create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/MultipleStepsFunction/Function.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/MultipleStepsFunction/MultipleStepsFunction.csproj create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ReplayDeterminismFunction/Dockerfile create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ReplayDeterminismFunction/Function.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ReplayDeterminismFunction/ReplayDeterminismFunction.csproj create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepFailsFunction/Dockerfile create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepFailsFunction/Function.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepFailsFunction/StepFailsFunction.csproj create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepWaitStepFunction/Dockerfile create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepWaitStepFunction/Function.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepWaitStepFunction/StepWaitStepFunction.csproj create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitOnlyFunction/Dockerfile create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitOnlyFunction/Function.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitOnlyFunction/WaitOnlyFunction.csproj create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/WaitOnlyTest.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/xunit.runner.json delete mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.Tests/AssemblyLoadTests.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.Tests/CheckpointBatcherTests.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.Tests/ConfigTests.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.Tests/DurableContextTests.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.Tests/DurableExecutionHandlerTests.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.Tests/DurableFunctionTests.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.Tests/EnumsTests.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.Tests/ExceptionsTests.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.Tests/ExecutionStateTests.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.Tests/LambdaDurableServiceClientTests.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.Tests/MockLambdaClient.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.Tests/ModelsTests.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.Tests/OperationIdGeneratorTests.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.Tests/RecordingBatcher.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.Tests/TerminationManagerTests.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.Tests/UpperSnakeCaseEnumConverterTests.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.Tests/coverage.runsettings create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.Tests/coverage.sh diff --git a/Docs/durable-execution-design.md b/Docs/durable-execution-design.md index efaa41589..6df424c5f 100644 --- a/Docs/durable-execution-design.md +++ b/Docs/durable-execution-design.md @@ -158,7 +158,7 @@ public class Function { // Step 1: Validate the order (checkpointed automatically) var validation = await context.StepAsync( - async () => await ValidateOrder(input.OrderId), + async (step) => await ValidateOrder(input.OrderId), name: "validate_order"); if (!validation.IsValid) @@ -169,7 +169,7 @@ public class Function // Step 3: Process the order var result = await context.StepAsync( - async () => await ProcessOrder(input.OrderId), + async (step) => await ProcessOrder(input.OrderId), name: "process_order"); return new OrderResult { Status = "approved", OrderId = result.OrderId }; @@ -182,6 +182,7 @@ public class Function Things to notice: - `[LambdaFunction]` + `[DurableExecution]` triggers source generation, so you don't wire up the handler yourself +- Each step function receives an `IStepContext` with a step-scoped logger, attempt number, and operation ID - Each `StepAsync` call checkpoints its result automatically - `WaitAsync` suspends the function -- Lambda is not running (or billing you) during the wait - On replay, completed steps return their cached result without re-executing @@ -208,7 +209,7 @@ public class Function private async Task MyWorkflow(OrderEvent input, IDurableContext context) { var validation = await context.StepAsync( - async () => await ValidateOrder(input.OrderId), + async (step) => await ValidateOrder(input.OrderId), name: "validate_order"); if (!validation.IsValid) @@ -217,7 +218,7 @@ public class Function await context.WaitAsync(TimeSpan.FromSeconds(30), name: "processing_delay"); var result = await context.StepAsync( - async () => await ProcessOrder(input.OrderId), + async (step) => await ProcessOrder(input.OrderId), name: "process_order"); return new OrderResult { Status = "approved", OrderId = result.OrderId }; @@ -244,9 +245,46 @@ public Task FunctionHandler( private async Task MyWorkflow(OrderEvent input, IDurableContext context) { - await context.StepAsync(async () => await SendNotification(input.UserId), name: "notify"); + await context.StepAsync(async (step) => await SendNotification(input.UserId), name: "notify"); await context.WaitAsync(TimeSpan.FromHours(1), name: "cooldown"); - await context.StepAsync(async () => await Cleanup(input.UserId), name: "cleanup"); + await context.StepAsync(async (step) => await Cleanup(input.UserId), name: "cleanup"); +} +``` + +For **NativeAOT** deployments, pass a `JsonSerializerContext` so the SDK can serialize/deserialize your input and output types without reflection: + +```csharp +[JsonSerializable(typeof(OrderEvent))] +[JsonSerializable(typeof(OrderResult))] +internal partial class MyJsonContext : JsonSerializerContext { } + +public class Function +{ + public Task FunctionHandler( + DurableExecutionInvocationInput invocationInput, ILambdaContext context) + => DurableFunction.WrapAsync( + MyWorkflow, invocationInput, context, MyJsonContext.Default); + + private async Task MyWorkflow(OrderEvent input, IDurableContext context) + { + // ... + } +} +``` + +To inject a custom `IAmazonLambda` client (e.g., for VPC endpoints or unit testing), use the overload that accepts one: + +```csharp +public class Function +{ + private readonly IAmazonLambda _lambdaClient; + + public Function(IAmazonLambda lambdaClient) => _lambdaClient = lambdaClient; + + public Task FunctionHandler( + DurableExecutionInvocationInput invocationInput, ILambdaContext context) + => DurableFunction.WrapAsync( + MyWorkflow, invocationInput, context, _lambdaClient); } ``` @@ -422,7 +460,7 @@ var approval = await context.WaitForCallbackAsync( if (approval.Approved) { - await context.StepAsync(async () => await ExecutePlan(), name: "execute"); + await context.StepAsync(async (step) => await ExecutePlan(), name: "execute"); } ``` @@ -486,9 +524,9 @@ Run independent operations concurrently. The JS SDK uses a `DurablePromise` patt var results = await context.ParallelAsync( new Func>[] { - async (ctx) => await ctx.StepAsync(async () => await FetchUserData(userId), name: "fetch_user"), - async (ctx) => await ctx.StepAsync(async () => await FetchOrderHistory(userId), name: "fetch_orders"), - async (ctx) => await ctx.StepAsync(async () => await FetchPreferences(userId), name: "fetch_prefs"), + async (ctx) => await ctx.StepAsync(async (step) => await FetchUserData(userId), name: "fetch_user"), + async (ctx) => await ctx.StepAsync(async (step) => await FetchOrderHistory(userId), name: "fetch_orders"), + async (ctx) => await ctx.StepAsync(async (step) => await FetchPreferences(userId), name: "fetch_prefs"), }, name: "parallel_fetch", config: new ParallelConfig @@ -512,9 +550,9 @@ For better observability, you can name individual branches (matching the JS SDK var results = await context.ParallelAsync( new NamedBranch[] { - new("fetch_user", async (ctx) => await ctx.StepAsync(async () => await FetchUserData(userId))), - new("fetch_orders", async (ctx) => await ctx.StepAsync(async () => await FetchOrderHistory(userId))), - new("fetch_prefs", async (ctx) => await ctx.StepAsync(async () => await FetchPreferences(userId))), + new("fetch_user", async (ctx) => await ctx.StepAsync(async (step) => await FetchUserData(userId))), + new("fetch_orders", async (ctx) => await ctx.StepAsync(async (step) => await FetchOrderHistory(userId))), + new("fetch_prefs", async (ctx) => await ctx.StepAsync(async (step) => await FetchPreferences(userId))), }, name: "parallel_fetch"); @@ -884,7 +922,7 @@ When user code hits a pending wait or callback: 2. Calls `terminationManager.Terminate(WaitScheduled)` 3. Awaits a new never-completing `TaskCompletionSource` (blocks itself permanently) 4. `Task.WhenAny` sees the termination task resolved and picks it as the winner -5. `RunAsync` returns PENDING; Lambda terminates; the abandoned user task is GC'd +5. `RunAsync` returns PENDING; the abandoned user task is left to be GC'd; Lambda terminates ### Lifecycle and cleanup @@ -906,21 +944,95 @@ Static helper for the non-Annotations handler path. Wraps a workflow function, h /// public static class DurableFunction { + // ── Reflection-based overloads (JIT only) ────────────────────────── + /// /// Wrap a workflow that takes typed input and returns typed output. + /// Reflection-based JSON — not AOT-safe. /// + [RequiresUnreferencedCode("Uses reflection-based JSON. Use the JsonSerializerContext overload for AOT.")] + [RequiresDynamicCode("Uses reflection-based JSON. Use the JsonSerializerContext overload for AOT.")] public static Task WrapAsync( Func> workflow, DurableExecutionInvocationInput invocationInput, ILambdaContext lambdaContext); /// - /// Wrap a workflow that takes typed input and returns no value. + /// Wrap a workflow (typed input + output) with explicit Lambda client. + /// Reflection-based JSON — not AOT-safe. + /// + [RequiresUnreferencedCode("Uses reflection-based JSON. Use the JsonSerializerContext overload for AOT.")] + [RequiresDynamicCode("Uses reflection-based JSON. Use the JsonSerializerContext overload for AOT.")] + public static Task WrapAsync( + Func> workflow, + DurableExecutionInvocationInput invocationInput, + ILambdaContext lambdaContext, + IAmazonLambda lambdaClient); + + /// + /// Wrap a void workflow (typed input, no output). + /// Reflection-based JSON — not AOT-safe. /// + [RequiresUnreferencedCode("Uses reflection-based JSON. Use the JsonSerializerContext overload for AOT.")] + [RequiresDynamicCode("Uses reflection-based JSON. Use the JsonSerializerContext overload for AOT.")] public static Task WrapAsync( Func workflow, DurableExecutionInvocationInput invocationInput, ILambdaContext lambdaContext); + + /// + /// Wrap a void workflow with explicit Lambda client. + /// Reflection-based JSON — not AOT-safe. + /// + [RequiresUnreferencedCode("Uses reflection-based JSON. Use the JsonSerializerContext overload for AOT.")] + [RequiresDynamicCode("Uses reflection-based JSON. Use the JsonSerializerContext overload for AOT.")] + public static Task WrapAsync( + Func workflow, + DurableExecutionInvocationInput invocationInput, + ILambdaContext lambdaContext, + IAmazonLambda lambdaClient); + + // ── AOT-safe overloads (caller supplies JsonSerializerContext) ────── + + /// + /// Wrap a workflow (typed input + output). AOT-safe — requires + /// [JsonSerializable(typeof(TInput))] and [JsonSerializable(typeof(TOutput))] + /// on the supplied jsonContext. + /// + public static Task WrapAsync( + Func> workflow, + DurableExecutionInvocationInput invocationInput, + ILambdaContext lambdaContext, + JsonSerializerContext jsonContext); + + /// + /// Wrap a workflow (typed input + output) with explicit Lambda client. AOT-safe. + /// + public static Task WrapAsync( + Func> workflow, + DurableExecutionInvocationInput invocationInput, + ILambdaContext lambdaContext, + IAmazonLambda lambdaClient, + JsonSerializerContext jsonContext); + + /// + /// Wrap a void workflow (typed input, no output). AOT-safe. + /// + public static Task WrapAsync( + Func workflow, + DurableExecutionInvocationInput invocationInput, + ILambdaContext lambdaContext, + JsonSerializerContext jsonContext); + + /// + /// Wrap a void workflow with explicit Lambda client. AOT-safe. + /// + public static Task WrapAsync( + Func workflow, + DurableExecutionInvocationInput invocationInput, + ILambdaContext lambdaContext, + IAmazonLambda lambdaClient, + JsonSerializerContext jsonContext); } ``` @@ -948,11 +1060,18 @@ public interface IDurableContext /// ILambdaContext LambdaContext { get; } + // ── StepAsync overloads ──────────────────────────────────────────── + // The user's function always receives IStepContext, matching the + // Python and JS SDKs (Java has no-context overloads but deprecated + // them — see https://github.com/aws/aws-durable-execution-sdk-java). + /// - /// Execute a step with automatic checkpointing. + /// Execute a step with automatic checkpointing using reflection-based JSON. /// The IStepContext provides a step-scoped logger with operation metadata /// (step name, attempt number, operation ID) and the current attempt number. /// + [RequiresUnreferencedCode("Reflection-based JSON for T. Use the ICheckpointSerializer overload for AOT/trimmed deployments.")] + [RequiresDynamicCode("Reflection-based JSON for T. Use the ICheckpointSerializer overload for AOT/trimmed deployments.")] Task StepAsync( Func> func, string? name = null, @@ -960,7 +1079,7 @@ public interface IDurableContext CancellationToken cancellationToken = default); /// - /// Execute a step that returns no value. + /// Execute a step that returns no value. AOT-safe (no payload to serialize). /// Task StepAsync( Func func, @@ -968,6 +1087,17 @@ public interface IDurableContext StepConfig? config = null, CancellationToken cancellationToken = default); + /// + /// Execute a step with AOT-safe checkpoint serialization. The supplied + /// serializer is used in place of reflection-based JSON. + /// + Task StepAsync( + Func> func, + ICheckpointSerializer serializer, + string? name = null, + StepConfig? config = null, + CancellationToken cancellationToken = default); + /// /// Suspend execution for the specified duration. /// Throws ArgumentOutOfRangeException if duration is less than 1 second. @@ -1087,7 +1217,9 @@ public record DurableBranch(string Name, Func> Func) #### CancellationToken behavior -All methods accept a `CancellationToken` that follows standard .NET semantics: cancellation throws `OperationCanceledException` and the execution fails. Cancellation does **not** trigger suspension — those are separate concepts. The durable execution service handles timeout scenarios automatically: if Lambda terminates mid-execution, the next invocation simply replays from the last checkpoint. For advanced users who want to suspend gracefully before timeout, check `context.LambdaContext.RemainingTime` and return early. +All methods accept a per-call `CancellationToken` that follows standard .NET semantics: cancellation throws `OperationCanceledException` and the execution fails. Cancellation does **not** trigger suspension — those are separate concepts. + +The durable execution service handles timeout scenarios automatically: if Lambda terminates mid-execution, the next invocation simply replays from the last checkpoint. For advanced users who want to suspend gracefully before timeout, check `context.LambdaContext.RemainingTime` and return early. ### Configuration Types @@ -1112,10 +1244,11 @@ public class StepConfig /// public StepSemantics Semantics { get; set; } = StepSemantics.AtLeastOncePerRetry; - /// - /// Custom serializer for the step result. Default is System.Text.Json. - /// - public ICheckpointSerializer? Serializer { get; set; } + // Note: there is no Serializer property here. Custom serializers are + // supplied via the AOT-safe StepAsync(..., ICheckpointSerializer, ...) + // overload, which is type-safe (ICheckpointSerializer instead of the + // non-generic marker) and gives one obvious way to opt into custom or + // AOT-friendly serialization. } public enum StepSemantics @@ -1543,16 +1676,17 @@ public interface ICheckpointSerializer public record SerializationContext(string OperationId, string DurableExecutionArn); ``` -Usage: +Usage — pass the serializer to the AOT-safe `StepAsync` overload directly. +This is the only way to override the default reflection-based JSON path; it's +intentional that there's no `StepConfig.Serializer` knob, so you have one +obvious place to opt in (and the type is `ICheckpointSerializer`, not the +non-generic marker, so the compiler catches a mismatched `T`): ```csharp var result = await context.StepAsync( async () => await GetLargeData(), - name: "get_data", - config: new StepConfig - { - Serializer = new CompressedJsonSerializer() - }); + new CompressedJsonSerializer(), + name: "get_data"); ``` ### Class library vs. executable output @@ -1579,16 +1713,34 @@ Both approaches produce a self-contained executable that the Lambda custom runti ### NativeAOT compatibility -The SDK is AOT-friendly but does not require AOT. The default JSON serialization uses reflection (standard `System.Text.Json` behavior), which works in JIT mode. For NativeAOT deployments, provide a `JsonSerializerContext` via the `ICheckpointSerializer` interface — this avoids all runtime reflection and is fully trim-safe. The SDK itself avoids `Activator.CreateInstance`, `Type.GetType()`, and other reflection patterns, and uses `[DynamicallyAccessedMembers]` trimming annotations where needed. +The SDK is AOT-friendly but does not require AOT. The default JSON serialization uses reflection (standard `System.Text.Json` behavior), which works in JIT mode. For NativeAOT deployments, AOT safety is addressed at two levels — **at each level there are two overload families: a reflection-based one annotated with `[RequiresUnreferencedCode]` / `[RequiresDynamicCode]` and an AOT-safe one that requires a serializer parameter**. The trimmer warns at the call site when reflection overloads are used in AOT/trimmed builds. + +1. **Entry point (`DurableFunction.WrapAsync`)** — the AOT-safe overload takes a `JsonSerializerContext` parameter that includes type info for your `TInput` and `TOutput` types. + +2. **Step checkpoints (`IDurableContext.StepAsync`)** — the AOT-safe overload takes an `ICheckpointSerializer` directly as a parameter. Internally, the reflection overload constructs `ReflectionJsonCheckpointSerializer` (whose constructor carries `[RequiresUnreferencedCode]`); the AOT-safe overload uses the user-supplied serializer and never touches reflection. The void `StepAsync` overloads are AOT-safe by default — they use a built-in null-only serializer since they have no payload. + +The SDK itself avoids `Activator.CreateInstance`, `Type.GetType()`, and other reflection patterns, and uses `[DynamicallyAccessedMembers]` trimming annotations where needed. ```csharp -// Default: works with reflection (JIT mode) -var result = await context.StepAsync(async () => await GetOrder()); +// Default: works with reflection (JIT mode); flagged for AOT. +var result = await context.StepAsync(async (step) => await GetOrder()); -// AOT mode: user provides serialization context +// AOT mode — entry point: pass JsonSerializerContext to WrapAsync. +[JsonSerializable(typeof(OrderEvent))] +[JsonSerializable(typeof(OrderResult))] +[JsonSerializable(typeof(Order))] +internal partial class MyJsonContext : JsonSerializerContext { } + +public Task FunctionHandler( + DurableExecutionInvocationInput invocationInput, ILambdaContext context) + => DurableFunction.WrapAsync( + MyWorkflow, invocationInput, context, MyJsonContext.Default); + +// AOT mode — step checkpoint: pass ICheckpointSerializer to StepAsync directly. var result = await context.StepAsync( async () => await GetOrder(), - config: new StepConfig { Serializer = new JsonCheckpointSerializer(MyJsonContext.Default.Order) }); + new JsonCheckpointSerializer(MyJsonContext.Default.Order), + name: "get_order"); ``` ### Large payload and checkpoint overflow @@ -1701,7 +1853,7 @@ public class Functions } ``` -When no `LambdaClientFactory` is specified, the generated code creates a default `AmazonLambdaClient`. For the manual handler path, pass the client directly to `DurableExecutionHandler.RunAsync`. +When no `LambdaClientFactory` is specified, the generated code creates a default `AmazonLambdaClient`. For the manual handler path (`DurableFunction.WrapAsync`), pass the client directly via the `IAmazonLambda lambdaClient` overload. > **Dependency boundaries:** `Amazon.Lambda.Annotations` has **no dependency** on the AWS SDK or on `Amazon.Lambda.DurableExecution`. The Annotations source generator references durable execution types by fully-qualified name strings only — it never takes a compile-time dependency on the durable package. The `[DurableExecution]` attribute is defined in `Amazon.Lambda.DurableExecution`, and the generated code resolves against the user's project references. There is only one source generator (Annotations) — no coordination between multiple generators is needed. @@ -1909,11 +2061,11 @@ These analyzers run at compile time in the IDE (IntelliSense squiggles) and duri ## Cross-SDK API comparison -All three SDKs expose the same core operations. The differences are naming conventions, parameter ordering, and concurrency model. +All four SDKs expose the same core operations. The differences are naming conventions, parameter ordering, and concurrency model. -| Operation | .NET | Python | JavaScript | -|-----------|------|--------|------------| -| Step | `context.StepAsync(func, name?, config?)` | `context.step(func, name?, config?)` | `context.step(name?, fn, config?)` → `DurablePromise` | +| Operation | .NET | Python | JavaScript | Java | +|-----------|------|--------|------------|------| +| Step | `context.StepAsync(func, name?, config?)` | `context.step(func, name?, config?)` | `context.step(name?, fn, config?)` → `DurablePromise` | `context.step(name, type, func, config?)` (blocking) / `context.stepAsync(...)` → `DurableFuture` | | Wait | `context.WaitAsync(duration, name?)` | `context.wait(duration, name?)` | `context.wait(name?, duration)` → `DurablePromise` | | Create callback | `context.CreateCallbackAsync(name?, config?)` | `context.create_callback(name?, config?)` | `context.createCallback(name?, config?)` | | Wait for callback | `context.WaitForCallbackAsync(submitter, name?, config?)` | `context.wait_for_callback(submitter, name?, config?)` | `context.waitForCallback(name?, submitter, config?)` | @@ -1943,11 +2095,13 @@ All three SDKs expose the same core operations. The differences are naming conve **Key differences:** -- **Concurrency model:** JS returns `DurablePromise` (lazy, deferred until awaited). Python is synchronous (blocks the thread). .NET returns `Task` (standard async/await). Note: `Task.WhenAll` works with durable operations but `ParallelAsync`/`MapAsync` are preferred for completion policies and observability. -- **Name parameter position:** JS puts `name` first; Python and .NET put it after the function/duration. -- **Parallel semantics in JS:** JS uses `context.promise.all/any/race/allSettled` to combine DurablePromises. .NET and Python use `CompletionConfig` on the `Parallel`/`Map` operations instead. +- **Concurrency model:** JS returns `DurablePromise` (lazy, deferred until awaited). Python is synchronous (blocks the thread). Java exposes both `step` (blocking) and `stepAsync` (returns `DurableFuture`). .NET returns `Task` (standard async/await). Note: `Task.WhenAll` works with durable operations but `ParallelAsync`/`MapAsync` are preferred for completion policies and observability. +- **Why .NET ships only the async form:** Java's two-API split exists because Java has no language-level `await` — `step` is the simple blocking ergonomic, `stepAsync` is the composable form. In .NET, `Task` is *already* both: `await context.StepAsync(...)` reads as sequential code, and `Task.WhenAll(...)` composes concurrently. A `Step` (blocking, returns `T`) overload would do nothing except call `.GetAwaiter().GetResult()` on the async version, which is also a Lambda-thread anti-pattern (deadlock-prone, blocks a thread the runtime needs). So .NET intentionally has one shape — `*Async` — matching the rest of `IAmazonLambda` and the broader .NET async convention. Python is single-shape for the same reason in reverse: no async runtime in scope, so blocking is the only ergonomic shape. +- **Step function signature:** Python and JS only expose `Func` — the user always receives a step context. Java has both `Function` and `Supplier` overloads, but the `Supplier` ones are deprecated (*"use the variants accepting StepContext instead"*). .NET follows Python/JS: `IStepContext` is always passed. +- **Name parameter position:** JS puts `name` first; Python, Java, and .NET put it after the function/duration. +- **Parallel semantics in JS:** JS uses `context.promise.all/any/race/allSettled` to combine DurablePromises. .NET, Python, and Java use `CompletionConfig` on the `Parallel`/`Map` operations instead. - **.NET-only:** `CancellationToken` on every method (standard .NET pattern). -- **Jitter default:** All three SDKs default to full jitter on retry strategies. +- **Jitter default:** All four SDKs default to full jitter on retry strategies. --- diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Amazon.Lambda.DurableExecution.csproj b/Libraries/src/Amazon.Lambda.DurableExecution/Amazon.Lambda.DurableExecution.csproj index 9139edb18..de02d8ce2 100644 --- a/Libraries/src/Amazon.Lambda.DurableExecution/Amazon.Lambda.DurableExecution.csproj +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Amazon.Lambda.DurableExecution.csproj @@ -14,6 +14,8 @@ true enable enable + true + IL2026,IL2067,IL2075,IL3050 diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/AssemblyMarker.cs b/Libraries/src/Amazon.Lambda.DurableExecution/AssemblyMarker.cs deleted file mode 100644 index 770e6ccd2..000000000 --- a/Libraries/src/Amazon.Lambda.DurableExecution/AssemblyMarker.cs +++ /dev/null @@ -1,5 +0,0 @@ -namespace Amazon.Lambda.DurableExecution; - -internal static class AssemblyMarker -{ -} diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Config/StepConfig.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Config/StepConfig.cs new file mode 100644 index 000000000..2380967de --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Config/StepConfig.cs @@ -0,0 +1,13 @@ +namespace Amazon.Lambda.DurableExecution; + +/// +/// Configuration for step execution. +/// +public sealed class StepConfig +{ + // TODO: Retry support is deferred to a follow-up PR. When added, this is + // where RetryStrategy and Semantics (AtLeastOncePerRetry / AtMostOncePerRetry) + // will live. The follow-up needs to use service-mediated retries (checkpoint + // a RETRY operation + suspend the Lambda) rather than an in-process Task.Delay + // loop, to avoid billing Lambda compute time during retry backoff. +} diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/DurableContext.cs b/Libraries/src/Amazon.Lambda.DurableExecution/DurableContext.cs new file mode 100644 index 000000000..87a874c2d --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/DurableContext.cs @@ -0,0 +1,147 @@ +using System.Diagnostics.CodeAnalysis; +using Amazon.Lambda.Core; +using Amazon.Lambda.DurableExecution.Internal; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Logging.Abstractions; + +namespace Amazon.Lambda.DurableExecution; + +/// +/// Implementation of . Constructs and dispatches +/// per-operation classes (, ); +/// the replay logic lives in those classes. +/// +internal sealed class DurableContext : IDurableContext +{ + private readonly ExecutionState _state; + private readonly TerminationManager _terminationManager; + private readonly OperationIdGenerator _idGenerator; + private readonly string _durableExecutionArn; + private readonly CheckpointBatcher? _batcher; + + public DurableContext( + ExecutionState state, + TerminationManager terminationManager, + OperationIdGenerator idGenerator, + string durableExecutionArn, + ILambdaContext lambdaContext, + CheckpointBatcher? batcher = null) + { + _state = state; + _terminationManager = terminationManager; + _idGenerator = idGenerator; + _durableExecutionArn = durableExecutionArn; + _batcher = batcher; + LambdaContext = lambdaContext; + } + + // Replay-safe logger ships in a follow-up PR; see IDurableContext.Logger doc. + public ILogger Logger => NullLogger.Instance; + public IExecutionContext ExecutionContext => new DurableExecutionContext(_durableExecutionArn); + public ILambdaContext LambdaContext { get; } + + [RequiresUnreferencedCode("Reflection-based JSON for T. Use the ICheckpointSerializer overload for AOT/trimmed deployments.")] + [RequiresDynamicCode("Reflection-based JSON for T. Use the ICheckpointSerializer overload for AOT/trimmed deployments.")] + public Task StepAsync( + Func> func, + string? name = null, + StepConfig? config = null, + CancellationToken cancellationToken = default) + => RunStep(func, new ReflectionJsonCheckpointSerializer(), name, config, cancellationToken); + + public async Task StepAsync( + Func func, + string? name = null, + StepConfig? config = null, + CancellationToken cancellationToken = default) + { + // Void steps don't carry a meaningful payload; we wrap with a null-only + // serializer that doesn't touch reflection. + await RunStep( + async (ctx) => { await func(ctx); return null; }, + NullCheckpointSerializer.Instance, + name, config, cancellationToken); + } + + public Task StepAsync( + Func> func, + ICheckpointSerializer serializer, + string? name = null, + StepConfig? config = null, + CancellationToken cancellationToken = default) + => RunStep(func, serializer, name, config, cancellationToken); + + + private Task RunStep( + Func> func, + ICheckpointSerializer serializer, + string? name, + StepConfig? config, + CancellationToken cancellationToken) + { + var operationId = _idGenerator.NextId(); + var op = new StepOperation( + operationId, name, func, config, serializer, Logger, + _state, _terminationManager, _durableExecutionArn, _batcher); + return op.ExecuteAsync(cancellationToken); + } + + public Task WaitAsync( + TimeSpan duration, + string? name = null, + CancellationToken cancellationToken = default) + { + // Service timer granularity is 1 second; sub-second waits would round to 0. + // WaitOptions.WaitSeconds is integer in [1, 31_622_400] (1 second to ~1 year). + if (duration < TimeSpan.FromSeconds(1)) + throw new ArgumentOutOfRangeException(nameof(duration), duration, "Wait duration must be at least 1 second."); + + if (duration > TimeSpan.FromSeconds(31_622_400)) + throw new ArgumentOutOfRangeException(nameof(duration), duration, "Wait duration must be at most 31,622,400 seconds (~1 year)."); + + cancellationToken.ThrowIfCancellationRequested(); + + var operationId = _idGenerator.NextId(); + var waitSeconds = (int)Math.Max(1, Math.Ceiling(duration.TotalSeconds)); + var op = new WaitOperation( + operationId, name, waitSeconds, + _state, _terminationManager, _durableExecutionArn, _batcher); + return op.ExecuteAsync(cancellationToken); + } +} + +/// +/// Trim-safe serializer used by the void StepAsync overloads, which never +/// carry a meaningful payload. Always serializes to "null" and discards +/// on deserialize. +/// +internal sealed class NullCheckpointSerializer : ICheckpointSerializer +{ + public static NullCheckpointSerializer Instance { get; } = new(); + public string Serialize(object? value, SerializationContext context) => "null"; + public object? Deserialize(string data, SerializationContext context) => null; +} + +internal sealed class DurableExecutionContext : IExecutionContext +{ + public DurableExecutionContext(string durableExecutionArn) + { + DurableExecutionArn = durableExecutionArn; + } + + public string DurableExecutionArn { get; } +} + +internal sealed class StepContext : IStepContext +{ + public StepContext(string operationId, int attemptNumber, ILogger logger) + { + OperationId = operationId; + AttemptNumber = attemptNumber; + Logger = logger; + } + + public ILogger Logger { get; } + public int AttemptNumber { get; } + public string OperationId { get; } +} diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/DurableExecutionHandler.cs b/Libraries/src/Amazon.Lambda.DurableExecution/DurableExecutionHandler.cs new file mode 100644 index 000000000..300cc8654 --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/DurableExecutionHandler.cs @@ -0,0 +1,119 @@ +using Amazon.Lambda.DurableExecution.Internal; + +namespace Amazon.Lambda.DurableExecution; + +/// +/// The result of running a durable execution handler. +/// +internal sealed class HandlerResult +{ + public required InvocationStatus Status { get; init; } + public TResult? Result { get; init; } + public string? Message { get; init; } + public Exception? Exception { get; init; } +} + +/// +/// Core orchestration engine for durable execution. Races user code against +/// a termination signal using Task.WhenAny. When user code completes, returns +/// SUCCEEDED/FAILED. When termination wins (wait, callback, invoke), returns PENDING. +/// +internal static class DurableExecutionHandler +{ + /// + /// Runs the user's workflow function within the durable execution engine. + /// + /// + /// + /// Suspension flow — example: await ctx.WaitAsync(TimeSpan.FromSeconds(5)): + /// + /// + /// user code DurableContext TerminationMgr RunAsync + /// ───────── ────────────── ────────────── ──────── + /// WaitAsync(5s) ─────► queue WAIT START + /// checkpoint + /// Terminate() ──────► TerminationTask + /// completes + /// ◄────── new TCS().Task + /// (never completes) + /// await blocks + /// forever WhenAny: + /// ── termination wins + /// ── userTask abandoned + /// ── return Pending + /// + /// + /// Key insight: WaitAsync never returns a completed Task — it hands back + /// a TaskCompletionSource that is never resolved. The user's await blocks + /// indefinitely. The escape signal is terminationManager.Terminate(), + /// which Task.WhenAny picks up. We return Pending; the dangling user + /// Task is GC'd. The service flushes checkpoints, fires the wait timer, then + /// re-invokes Lambda — on replay, WaitAsync sees the matching SUCCEED + /// checkpoint and returns Task.CompletedTask normally. + /// + /// + /// The same pattern applies to retries (RetryScheduled), callbacks + /// (CallbackPending), and chained invokes (InvokePending). + /// + /// + /// The workflow return type. + /// Hydrated execution state from prior invocations. + /// Manages the suspension signal. + /// The user's workflow function receiving a DurableContext. + /// The handler result indicating SUCCEEDED, FAILED, or PENDING. + internal static async Task> RunAsync( + ExecutionState executionState, + TerminationManager terminationManager, + Func> userHandler) + { + // Run user code on a threadpool thread so it executes independently of + // the termination signal. When TerminationManager fires (e.g., WaitAsync), + // we need the WhenAny race below to resolve immediately without waiting + // for the user task to reach an await point. + var userTask = Task.Run(userHandler); + + // Race: user code completing vs. termination signal (wait/callback/retry). + // If termination wins, we return PENDING and the abandoned userTask is never awaited. + var winner = await Task.WhenAny(userTask, terminationManager.TerminationTask); + + if (winner == terminationManager.TerminationTask) + { + var terminationResult = await terminationManager.TerminationTask; + + if (terminationResult.Exception != null) + { + return new HandlerResult + { + Status = InvocationStatus.Failed, + Message = terminationResult.Exception.Message, + Exception = terminationResult.Exception + }; + } + + return new HandlerResult + { + Status = InvocationStatus.Pending, + Message = terminationResult.Message + }; + } + + try + { + var result = await userTask; + return new HandlerResult + { + Status = InvocationStatus.Succeeded, + Result = result + }; + } + catch (Exception ex) + { + return new HandlerResult + { + Status = InvocationStatus.Failed, + Message = ex.Message, + Exception = ex + }; + } + } +} diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/DurableFunction.cs b/Libraries/src/Amazon.Lambda.DurableExecution/DurableFunction.cs new file mode 100644 index 000000000..d629a0b2e --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/DurableFunction.cs @@ -0,0 +1,338 @@ +using System.Diagnostics.CodeAnalysis; +using System.Text.Json; +using System.Text.Json.Serialization; +using System.Text.Json.Serialization.Metadata; +using System.Threading; +using Amazon.Lambda; +using Amazon.Lambda.Core; +using Amazon.Lambda.DurableExecution.Internal; +using Amazon.Lambda.DurableExecution.Services; +using Amazon.Lambda.Model; +using Amazon.Runtime; + +namespace Amazon.Lambda.DurableExecution; + +/// +/// Static helper that wraps a durable workflow function, handling all envelope +/// translation between DurableExecutionInvocationInput/Output and user types. +/// +public static class DurableFunction +{ + private static readonly Lazy _cachedLambdaClient = + new(() => new AmazonLambdaClient(), LazyThreadSafetyMode.ExecutionAndPublication); + + // ────────────────────────────────────────────────────────────────────── + // Reflection-based overloads (JIT only) + // ────────────────────────────────────────────────────────────────────── + + /// + /// Wrap a workflow (typed input + output). Reflection-based JSON — not AOT-safe. + /// + [RequiresUnreferencedCode("Uses reflection-based JSON for TInput/TOutput. Use the JsonSerializerContext overload for AOT.")] + [RequiresDynamicCode("Uses reflection-based JSON for TInput/TOutput. Use the JsonSerializerContext overload for AOT.")] + public static Task WrapAsync( + Func> workflow, + DurableExecutionInvocationInput invocationInput, + ILambdaContext lambdaContext) + { + return WrapAsyncCore(workflow, invocationInput, lambdaContext, _cachedLambdaClient.Value, jsonContext: null); + } + + /// + /// Wrap a workflow (typed input + output) with explicit Lambda client. + /// Reflection-based JSON — not AOT-safe. + /// + [RequiresUnreferencedCode("Uses reflection-based JSON for TInput/TOutput. Use the JsonSerializerContext overload for AOT.")] + [RequiresDynamicCode("Uses reflection-based JSON for TInput/TOutput. Use the JsonSerializerContext overload for AOT.")] + public static Task WrapAsync( + Func> workflow, + DurableExecutionInvocationInput invocationInput, + ILambdaContext lambdaContext, + IAmazonLambda lambdaClient) + => WrapAsyncCore(workflow, invocationInput, lambdaContext, lambdaClient, jsonContext: null); + + /// + /// Wrap a void workflow (typed input, no output). Reflection-based JSON — not AOT-safe. + /// + [RequiresUnreferencedCode("Uses reflection-based JSON for TInput. Use the JsonSerializerContext overload for AOT.")] + [RequiresDynamicCode("Uses reflection-based JSON for TInput. Use the JsonSerializerContext overload for AOT.")] + public static Task WrapAsync( + Func workflow, + DurableExecutionInvocationInput invocationInput, + ILambdaContext lambdaContext) + { + return WrapAsync(workflow, invocationInput, lambdaContext, _cachedLambdaClient.Value); + } + + /// + /// Wrap a void workflow with explicit Lambda client. Reflection-based JSON — not AOT-safe. + /// + [RequiresUnreferencedCode("Uses reflection-based JSON for TInput. Use the JsonSerializerContext overload for AOT.")] + [RequiresDynamicCode("Uses reflection-based JSON for TInput. Use the JsonSerializerContext overload for AOT.")] + public static Task WrapAsync( + Func workflow, + DurableExecutionInvocationInput invocationInput, + ILambdaContext lambdaContext, + IAmazonLambda lambdaClient) + => WrapAsyncCore( + async (input, ctx) => { await workflow(input, ctx); return null; }, + invocationInput, lambdaContext, lambdaClient, jsonContext: null); + + // ────────────────────────────────────────────────────────────────────── + // AOT-safe overloads (caller supplies JsonSerializerContext) + // ────────────────────────────────────────────────────────────────────── + + /// + /// Wrap a workflow (typed input + output). AOT-safe — requires + /// [JsonSerializable(typeof(TInput))] and [JsonSerializable(typeof(TOutput))] + /// on the supplied . + /// + public static Task WrapAsync( + Func> workflow, + DurableExecutionInvocationInput invocationInput, + ILambdaContext lambdaContext, + JsonSerializerContext jsonContext) + { + return WrapAsyncCore(workflow, invocationInput, lambdaContext, _cachedLambdaClient.Value, jsonContext); + } + + /// + /// Wrap a workflow (typed input + output) with explicit Lambda client. AOT-safe. + /// + public static Task WrapAsync( + Func> workflow, + DurableExecutionInvocationInput invocationInput, + ILambdaContext lambdaContext, + IAmazonLambda lambdaClient, + JsonSerializerContext jsonContext) + => WrapAsyncCore(workflow, invocationInput, lambdaContext, lambdaClient, jsonContext); + + /// + /// Wrap a void workflow (typed input, no output). AOT-safe. + /// + public static Task WrapAsync( + Func workflow, + DurableExecutionInvocationInput invocationInput, + ILambdaContext lambdaContext, + JsonSerializerContext jsonContext) + { + return WrapAsyncCore( + async (input, ctx) => { await workflow(input, ctx); return null; }, + invocationInput, lambdaContext, _cachedLambdaClient.Value, jsonContext); + } + + /// + /// Wrap a void workflow with explicit Lambda client. AOT-safe. + /// + public static Task WrapAsync( + Func workflow, + DurableExecutionInvocationInput invocationInput, + ILambdaContext lambdaContext, + IAmazonLambda lambdaClient, + JsonSerializerContext jsonContext) + => WrapAsyncCore( + async (input, ctx) => { await workflow(input, ctx); return null; }, + invocationInput, lambdaContext, lambdaClient, jsonContext); + + // ────────────────────────────────────────────────────────────────────── + // Core implementation + // ────────────────────────────────────────────────────────────────────── + + [UnconditionalSuppressMessage("Trimming", "IL2026", + Justification = "When jsonContext is non-null, dispatch goes through JsonTypeInfo; when null, the caller has [RequiresUnreferencedCode].")] + [UnconditionalSuppressMessage("AOT", "IL3050", + Justification = "When jsonContext is non-null, dispatch goes through JsonTypeInfo; when null, the caller has [RequiresDynamicCode].")] + private static async Task WrapAsyncCore( + Func> workflow, + DurableExecutionInvocationInput invocationInput, + ILambdaContext lambdaContext, + IAmazonLambda lambdaClient, + JsonSerializerContext? jsonContext) + { + var state = new ExecutionState(); + state.LoadFromCheckpoint(invocationInput.InitialExecutionState); + + var serviceClient = new LambdaDurableServiceClient(lambdaClient); + var checkpointToken = invocationInput.CheckpointToken; + + var nextMarker = invocationInput.InitialExecutionState?.NextMarker; + while (!string.IsNullOrEmpty(nextMarker)) + { + var (operations, marker) = await serviceClient.GetExecutionStateAsync( + invocationInput.DurableExecutionArn, checkpointToken, nextMarker); + state.AddOperations(operations); + nextMarker = marker; + } + + var userPayload = ExtractUserPayload(invocationInput, jsonContext); + var terminationManager = new TerminationManager(); + var idGenerator = new OperationIdGenerator(); + + await using var batcher = new CheckpointBatcher( + checkpointToken, + (token, ops, ct) => serviceClient.CheckpointAsync( + invocationInput.DurableExecutionArn, token, ops, ct)); + + var context = new DurableContext( + state, terminationManager, idGenerator, + invocationInput.DurableExecutionArn, lambdaContext, batcher); + + HandlerResult result; + try + { + result = await DurableExecutionHandler.RunAsync( + state, terminationManager, + async () => await workflow(userPayload, context)); + + await batcher.DrainAsync(); + } + catch (AmazonServiceException ex) when (IsTerminalCheckpointError(ex)) + { + return new DurableExecutionInvocationOutput + { + Status = InvocationStatus.Failed, + Error = ErrorObject.FromException(ex) + }; + } + + return MapToOutput(result, jsonContext); + } + + /// + /// Returns true for checkpoint-flush SDK errors that should fail the workflow + /// (Failed envelope) instead of escaping to the host (Lambda retry). + /// + /// + /// Classification rule (mirrors CheckpointError in aws-durable-execution-sdk-python): + /// - 4xx (except 429) → terminal: permanent caller-side failure (missing ARN/KMS key, + /// IAM denial, validation). Retrying will not fix it, so return Failed. + /// - 429 / 5xx / no status (network or SDK-internal) → not terminal: transient, + /// allow the exception to escape so Lambda retries the invocation. + /// - Carve-out: InvalidParameterValueException with a message starting with + /// "Invalid Checkpoint Token" is treated as transient — the service rejects a + /// stale token but a retry with a fresh token will succeed. + /// + /// Only checkpoint-flush errors flow through this catch. There are two paths: + /// 1. A flush triggered synchronously from inside a user StepAsync call + /// (the user awaits EnqueueAsync → batch flush → SDK throws). + /// 2. The final after the workflow returns. + /// + /// State-hydration errors (GetExecutionStateAsync) are NOT caught here — they + /// propagate to the host so Lambda retries, matching Python's GetExecutionStateError + /// (which extends InvocationError). + /// + /// User-code SDK errors (e.g. an SDK call inside a Step body) are caught by + /// StepRunner and surfaced as StepException for the workflow's normal + /// step-failure handling. + /// + private static bool IsTerminalCheckpointError(AmazonServiceException ex) + { + var status = (int)ex.StatusCode; + if (status < 400 || status >= 500 || status == 429) + return false; + + if (ex.ErrorCode == "InvalidParameterValueException" + && ex.Message != null + && ex.Message.StartsWith("Invalid Checkpoint Token", StringComparison.Ordinal)) + { + return false; + } + + return true; + } + + // Shared options for both user-payload deserialization (input) and user-result + // serialization (output) so the naming policy stays symmetric. We only enable + // case-insensitive matching here — keep PascalCase on the wire for output to + // preserve compatibility with existing serialized contracts. Only the user payload + // portion uses these options; the durable-execution envelope itself + // (DurableExecutionInvocationInput/Output) is serialized separately and is not + // affected. + private static readonly JsonSerializerOptions UserPayloadOptions = new() + { + PropertyNameCaseInsensitive = true + }; + + [UnconditionalSuppressMessage("Trimming", "IL2026", Justification = "Guarded by jsonContext null check.")] + [UnconditionalSuppressMessage("AOT", "IL3050", Justification = "Guarded by jsonContext null check.")] + // The user's input payload is stored inside the service envelope as an EXECUTION-type + // operation. This is part of the durable execution wire format — each invocation includes + // its input as a checkpoint record so the service can validate replay consistency. + private static TInput ExtractUserPayload( + DurableExecutionInvocationInput input, + JsonSerializerContext? jsonContext) + { + if (input.InitialExecutionState?.Operations == null) + return default!; + + foreach (var op in input.InitialExecutionState.Operations) + { + if (op.Type != OperationTypes.Execution || op.ExecutionDetails?.InputPayload == null) + continue; + + var payload = op.ExecutionDetails.InputPayload; + if (jsonContext != null) + { + if (jsonContext.GetTypeInfo(typeof(TInput)) is JsonTypeInfo typeInfo) + return JsonSerializer.Deserialize(payload, typeInfo) ?? default!; + + throw new InvalidOperationException( + $"JsonSerializerContext {jsonContext.GetType().FullName} has no JsonTypeInfo for {typeof(TInput).FullName}. " + + "Add [JsonSerializable(typeof(YourInput))] to your context."); + } + + return JsonSerializer.Deserialize(payload, UserPayloadOptions) ?? default!; + } + + return default!; + } + + [UnconditionalSuppressMessage("Trimming", "IL2026", Justification = "Guarded by jsonContext null check.")] + [UnconditionalSuppressMessage("AOT", "IL3050", Justification = "Guarded by jsonContext null check.")] + private static DurableExecutionInvocationOutput MapToOutput( + HandlerResult result, + JsonSerializerContext? jsonContext) + { + return result.Status switch + { + InvocationStatus.Succeeded => new DurableExecutionInvocationOutput + { + Status = InvocationStatus.Succeeded, + Result = SerializeOutput(result.Result, jsonContext) + }, + InvocationStatus.Failed => new DurableExecutionInvocationOutput + { + Status = InvocationStatus.Failed, + Error = result.Exception != null + ? ErrorObject.FromException(result.Exception) + : new ErrorObject { ErrorMessage = result.Message } + }, + // Pending = workflow suspended (wait/retry/callback). No Result or Error — + // the service will re-invoke with accumulated checkpoints when ready. + InvocationStatus.Pending => new DurableExecutionInvocationOutput + { + Status = InvocationStatus.Pending + }, + _ => throw new InvalidOperationException($"Unexpected status: {result.Status}") + }; + } + + [UnconditionalSuppressMessage("Trimming", "IL2026", Justification = "Guarded by jsonContext null check.")] + [UnconditionalSuppressMessage("AOT", "IL3050", Justification = "Guarded by jsonContext null check.")] + private static string? SerializeOutput(TOutput? value, JsonSerializerContext? jsonContext) + { + if (value == null) return null; + + if (jsonContext != null) + { + if (jsonContext.GetTypeInfo(typeof(TOutput)) is JsonTypeInfo typeInfo) + return JsonSerializer.Serialize(value, typeInfo); + + throw new InvalidOperationException( + $"JsonSerializerContext {jsonContext.GetType().FullName} has no JsonTypeInfo for {typeof(TOutput).FullName}. " + + "Add [JsonSerializable(typeof(YourOutput))] to your context."); + } + + return JsonSerializer.Serialize(value, UserPayloadOptions); + } +} diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Enums.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Enums.cs new file mode 100644 index 000000000..c1bf44403 --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Enums.cs @@ -0,0 +1,14 @@ +namespace Amazon.Lambda.DurableExecution; + +/// +/// The terminal status of a durable execution invocation. +/// +public enum InvocationStatus +{ + /// The workflow completed successfully. + Succeeded, + /// The workflow failed with an unhandled exception. + Failed, + /// The workflow suspended (waiting for time, callback, or invocation). + Pending +} diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Exceptions/DurableExecutionException.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Exceptions/DurableExecutionException.cs new file mode 100644 index 000000000..0f724b4a2 --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Exceptions/DurableExecutionException.cs @@ -0,0 +1,49 @@ +namespace Amazon.Lambda.DurableExecution; + +/// +/// Base exception for all durable execution errors. +/// +public class DurableExecutionException : Exception +{ + /// Creates an empty . + public DurableExecutionException() { } + /// Creates a with the given message. + public DurableExecutionException(string message) : base(message) { } + /// Creates a wrapping an inner exception. + public DurableExecutionException(string message, Exception innerException) : base(message, innerException) { } +} + +/// +/// Thrown when code has changed between invocations, causing a replay mismatch. +/// For example, a step at index 0 was previously a WAIT but is now a STEP. +/// +public class NonDeterministicExecutionException : DurableExecutionException +{ + /// Creates an empty . + public NonDeterministicExecutionException() { } + /// Creates a with the given message. + public NonDeterministicExecutionException(string message) : base(message) { } + /// Creates a wrapping an inner exception. + public NonDeterministicExecutionException(string message, Exception innerException) : base(message, innerException) { } +} + +/// +/// Thrown when user code inside a step fails (after retries exhausted). +/// Contains the original error details from the checkpoint. +/// +public class StepException : DurableExecutionException +{ + /// The fully-qualified type name of the original exception. + public string? ErrorType { get; init; } + /// Optional structured error data attached by the user. + public string? ErrorData { get; init; } + /// Stack trace of the original exception, captured before serialization. + public IReadOnlyList? OriginalStackTrace { get; init; } + + /// Creates an empty . + public StepException() { } + /// Creates a with the given message. + public StepException(string message) : base(message) { } + /// Creates a wrapping an inner exception. + public StepException(string message, Exception innerException) : base(message, innerException) { } +} diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/ICheckpointSerializer.cs b/Libraries/src/Amazon.Lambda.DurableExecution/ICheckpointSerializer.cs new file mode 100644 index 000000000..3d7175b4d --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/ICheckpointSerializer.cs @@ -0,0 +1,25 @@ +namespace Amazon.Lambda.DurableExecution; + +/// +/// Serializes and deserializes checkpoint operation results. +/// +/// The type to serialize. +public interface ICheckpointSerializer +{ + /// + /// Serializes a value for checkpoint storage. + /// + string Serialize(T value, SerializationContext context); + + /// + /// Deserializes a value from checkpoint storage. + /// + T Deserialize(string data, SerializationContext context); +} + +/// +/// Context information available during serialization/deserialization. +/// +/// The deterministic operation ID for this step. +/// The ARN of the current durable execution. +public record SerializationContext(string OperationId, string DurableExecutionArn); diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/IDurableContext.cs b/Libraries/src/Amazon.Lambda.DurableExecution/IDurableContext.cs new file mode 100644 index 000000000..ff18d1218 --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/IDurableContext.cs @@ -0,0 +1,108 @@ +using System.Diagnostics.CodeAnalysis; +using Amazon.Lambda.Core; +using Microsoft.Extensions.Logging; + +namespace Amazon.Lambda.DurableExecution; + +/// +/// The primary interface for durable execution operations. +/// Passed to user workflow functions to access checkpointed steps and waits. +/// Additional operations (callbacks, parallel, map, etc.) are added in +/// follow-up PRs. +/// +public interface IDurableContext +{ + /// + /// A logger scoped to the durable execution. Currently returns + /// ; + /// the replay-safe DurableLogger (suppresses messages during replay) + /// ships in a follow-up PR. + /// + ILogger Logger { get; } + + /// + /// Metadata about the current durable execution. + /// + IExecutionContext ExecutionContext { get; } + + /// + /// The underlying Lambda context. + /// + ILambdaContext LambdaContext { get; } + + /// + /// Execute a step with automatic checkpointing. The step result is serialized + /// to a checkpoint using reflection-based System.Text.Json. + /// For NativeAOT or trimmed deployments, use the overload that takes an + /// . + /// + [RequiresUnreferencedCode("Reflection-based JSON for T. Use the ICheckpointSerializer overload for AOT/trimmed deployments.")] + [RequiresDynamicCode("Reflection-based JSON for T. Use the ICheckpointSerializer overload for AOT/trimmed deployments.")] + Task StepAsync( + Func> func, + string? name = null, + StepConfig? config = null, + CancellationToken cancellationToken = default); + + /// + /// Execute a step that returns no value. + /// + Task StepAsync( + Func func, + string? name = null, + StepConfig? config = null, + CancellationToken cancellationToken = default); + + /// + /// Execute a step with AOT-safe checkpoint serialization. The supplied + /// is used in place of reflection-based JSON. + /// + Task StepAsync( + Func> func, + ICheckpointSerializer serializer, + string? name = null, + StepConfig? config = null, + CancellationToken cancellationToken = default); + + /// + /// Suspend execution for the specified duration without consuming compute time. + /// The Lambda is suspended and the service re-invokes it after the wait elapses. + /// Duration must be at least 1 second (service timer granularity). + /// + Task WaitAsync( + TimeSpan duration, + string? name = null, + CancellationToken cancellationToken = default); +} + +/// +/// Context passed to step functions. +/// +public interface IStepContext +{ + /// + /// Logger scoped to this step. + /// + ILogger Logger { get; } + + /// + /// The current retry attempt number (1-based). + /// + int AttemptNumber { get; } + + /// + /// The deterministic operation ID for this step. + /// + string OperationId { get; } +} + +/// +/// Metadata about the current execution. +/// +public interface IExecutionContext +{ + /// + /// The ARN of the current durable execution. + /// + string DurableExecutionArn { get; } +} diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/CheckpointBatcher.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/CheckpointBatcher.cs new file mode 100644 index 000000000..8039e7c56 --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/CheckpointBatcher.cs @@ -0,0 +1,216 @@ +using System.Runtime.ExceptionServices; +using System.Threading.Channels; +using SdkOperationUpdate = Amazon.Lambda.Model.OperationUpdate; + +namespace Amazon.Lambda.DurableExecution.Internal; + +/// +/// Background batcher for outbound checkpoint updates. Operations are enqueued +/// via ; a single worker drains the queue and flushes +/// each batch via the supplied flushAsync delegate. Each EnqueueAsync +/// call awaits the flush of its containing batch (sync semantics). +/// +/// +/// TODO: when Map / Parallel / ChildContext / WaitForCondition land — or when +/// AtLeastOncePerRetry step START gets a non-blocking variant — they will need +/// a fire-and-forget overload like +/// Task EnqueueAsync(SdkOperationUpdate update, bool sync) where +/// sync=false returns as soon as the item is queued. Java's +/// sendOperationUpdate vs sendOperationUpdateAsync is the model. +/// Today every call site is sync, so the API stays minimal. +/// +internal sealed class CheckpointBatcher : IAsyncDisposable +{ + private readonly Func, CancellationToken, Task> _flushAsync; + private readonly CheckpointBatcherConfig _config; + private readonly Channel _channel; + private readonly Task _worker; + private readonly CancellationTokenSource _shutdownCts = new(); + + private string? _checkpointToken; + private Exception? _terminalError; + private int _disposed; + + public CheckpointBatcher( + string? initialCheckpointToken, + Func, CancellationToken, Task> flushAsync, + CheckpointBatcherConfig? config = null) + { + _checkpointToken = initialCheckpointToken; + _flushAsync = flushAsync; + _config = config ?? new CheckpointBatcherConfig(); + _channel = Channel.CreateUnbounded(new UnboundedChannelOptions + { + SingleReader = true, + SingleWriter = false + }); + _worker = Task.Run(() => RunWorkerAsync(_shutdownCts.Token)); + } + + /// + /// The most recent checkpoint token returned by the service. Updated after + /// every successful batch flush. + /// + public string? CheckpointToken => Volatile.Read(ref _checkpointToken); + + /// + /// Queues for flushing. The returned Task completes + /// when the batch containing this update has been successfully flushed to the + /// service. If the worker has already encountered a terminal error, the + /// exception is rethrown immediately. + /// + public async Task EnqueueAsync(SdkOperationUpdate update, CancellationToken cancellationToken = default) + { + var terminal = Volatile.Read(ref _terminalError); + if (terminal != null) ExceptionDispatchInfo.Throw(terminal); + + var tcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + var item = new BatchItem(update, tcs); + + if (!_channel.Writer.TryWrite(item)) + { + // Writer is completed (terminal error or disposed) — surface the cause. + terminal = Volatile.Read(ref _terminalError); + if (terminal != null) ExceptionDispatchInfo.Throw(terminal); + throw new ObjectDisposedException(nameof(CheckpointBatcher)); + } + + await tcs.Task.WaitAsync(cancellationToken).ConfigureAwait(false); + } + + /// + /// Closes the channel and awaits the worker. Any items already enqueued are + /// flushed; any subsequent call throws. + /// + public async Task DrainAsync() + { + _channel.Writer.TryComplete(); + try + { + await _worker.ConfigureAwait(false); + } + catch + { + // Surfaced via _terminalError below. + } + + var terminal = Volatile.Read(ref _terminalError); + if (terminal != null) ExceptionDispatchInfo.Throw(terminal); + } + + public async ValueTask DisposeAsync() + { + if (Interlocked.Exchange(ref _disposed, 1) != 0) return; + + _channel.Writer.TryComplete(); + _shutdownCts.Cancel(); + try { await _worker.ConfigureAwait(false); } + catch { /* swallow on dispose */ } + _shutdownCts.Dispose(); + } + + private async Task RunWorkerAsync(CancellationToken shutdownToken) + { + // TODO: also enforce _config.MaxBatchBytes here. Today we only cap by + // operation count; an item whose serialized size pushes the batch over + // ~750 KB will be sent and rejected service-side. See CheckpointBatcherConfig. + var batch = new List(_config.MaxBatchOperations); + + try + { + while (await _channel.Reader.WaitToReadAsync(shutdownToken).ConfigureAwait(false)) + { + // Drain everything currently queued. + while (_channel.Reader.TryRead(out var item)) + { + batch.Add(item); + if (batch.Count >= _config.MaxBatchOperations) + { + await FlushBatchAsync(batch, shutdownToken).ConfigureAwait(false); + batch.Clear(); + } + } + + // Optionally wait for late arrivals to coalesce into one batch. + if (_config.FlushInterval > TimeSpan.Zero && batch.Count > 0) + { + using var windowCts = CancellationTokenSource.CreateLinkedTokenSource(shutdownToken); + windowCts.CancelAfter(_config.FlushInterval); + try + { + while (await _channel.Reader.WaitToReadAsync(windowCts.Token).ConfigureAwait(false)) + { + while (_channel.Reader.TryRead(out var item)) + { + batch.Add(item); + if (batch.Count >= _config.MaxBatchOperations) + { + await FlushBatchAsync(batch, shutdownToken).ConfigureAwait(false); + batch.Clear(); + } + } + } + } + catch (OperationCanceledException) when (!shutdownToken.IsCancellationRequested) + { + // Window elapsed; fall through to flush. + } + } + + if (batch.Count > 0) + { + await FlushBatchAsync(batch, shutdownToken).ConfigureAwait(false); + batch.Clear(); + } + } + } + catch (OperationCanceledException) when (shutdownToken.IsCancellationRequested) + { + // Disposed mid-wait; fall through to drain. + } + catch (Exception ex) + { + // FlushBatchAsync's exception path already records _terminalError and + // signals batch members. This catch covers anything else (channel, + // logic). Make sure we still propagate. + Volatile.Write(ref _terminalError, ex); + } + finally + { + // Anything left in the channel after the worker exits — fail it. + var failure = Volatile.Read(ref _terminalError) ?? new ObjectDisposedException(nameof(CheckpointBatcher)); + foreach (var leftover in batch) + leftover.Completion.TrySetException(failure); + while (_channel.Reader.TryRead(out var item)) + item.Completion.TrySetException(failure); + + _channel.Writer.TryComplete(); + } + } + + private async Task FlushBatchAsync(IReadOnlyList batch, CancellationToken cancellationToken) + { + var updates = new SdkOperationUpdate[batch.Count]; + for (int i = 0; i < batch.Count; i++) + updates[i] = batch[i].Update; + + try + { + var newToken = await _flushAsync(_checkpointToken, updates, cancellationToken).ConfigureAwait(false); + Volatile.Write(ref _checkpointToken, newToken); + foreach (var item in batch) + item.Completion.TrySetResult(true); + } + catch (Exception ex) + { + Volatile.Write(ref _terminalError, ex); + foreach (var item in batch) + item.Completion.TrySetException(ex); + _channel.Writer.TryComplete(); + // No rethrow: the worker loop exits via the completed channel and + // RunWorkerAsync's finally handles any leftovers. + } + } + + private readonly record struct BatchItem(SdkOperationUpdate Update, TaskCompletionSource Completion); +} diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/CheckpointBatcherConfig.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/CheckpointBatcherConfig.cs new file mode 100644 index 000000000..a5e60b98e --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/CheckpointBatcherConfig.cs @@ -0,0 +1,35 @@ +namespace Amazon.Lambda.DurableExecution.Internal; + +/// +/// Tunables for . +/// +internal sealed class CheckpointBatcherConfig +{ + /// + /// How long the worker waits for additional items to coalesce into a single + /// batch before flushing. Default = flush as soon + /// as the queue drains. Increase to reduce API calls when many checkpoints + /// are emitted concurrently (e.g. parallel branches, future Map operation). + /// + public TimeSpan FlushInterval { get; init; } = TimeSpan.Zero; + + /// + /// Maximum operations per batch. Service-side limit is 200. + /// + public int MaxBatchOperations { get; init; } = 200; + + /// + /// Maximum batch size in bytes. Service-side limit is ~750 KB. + /// + /// + /// TODO: not enforced today. The worker only checks ; + /// a single oversized item (or a batch whose serialized size exceeds 750 KB) + /// will be sent to the service and rejected there. Java/JS/Python all + /// pre-flight this on the in-flight batch and split before the next add. + /// Wire this in alongside the async-flush operations (Map / Parallel / + /// child-context) since those are the scenarios that can actually fill a + /// batch — today every batch is 1 item with + /// = Zero, so the gap is latent. + /// + internal int MaxBatchBytes { get; init; } = 750 * 1024; +} diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/DurableOperation.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/DurableOperation.cs new file mode 100644 index 000000000..e7734abf9 --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/DurableOperation.cs @@ -0,0 +1,69 @@ +using SdkOperationUpdate = Amazon.Lambda.Model.OperationUpdate; + +namespace Amazon.Lambda.DurableExecution.Internal; + +/// +/// Abstract base for durable operations (Step, Wait, ...). Subclasses implement +/// (no prior checkpoint) and +/// (some checkpoint exists); the base handles lookup and dispatch. +/// +/// The operation's result type. +internal abstract class DurableOperation +{ + protected readonly ExecutionState State; + protected readonly TerminationManager Termination; + protected readonly string OperationId; + protected readonly string? Name; + protected readonly string DurableExecutionArn; + protected readonly CheckpointBatcher? Batcher; + + protected DurableOperation( + string operationId, + string? name, + ExecutionState state, + TerminationManager termination, + string durableExecutionArn, + CheckpointBatcher? batcher = null) + { + OperationId = operationId; + Name = name; + State = state; + Termination = termination; + DurableExecutionArn = durableExecutionArn; + Batcher = batcher; + } + + /// The wire-format operation type (e.g. "STEP", "WAIT"). + protected abstract string OperationType { get; } + + /// + /// Looks up any prior checkpoint for this op and dispatches to + /// (none) or (some). + /// + public Task ExecuteAsync(CancellationToken cancellationToken) + { + State.ValidateReplayConsistency(OperationId, OperationType, Name); + + var existing = State.GetOperation(OperationId); + return existing == null + ? StartAsync(cancellationToken) + : ReplayAsync(existing, cancellationToken); + } + + /// First-time execution path: no prior checkpoint exists. + protected abstract Task StartAsync(CancellationToken cancellationToken); + + /// + /// Replay path: a checkpoint from a prior invocation exists. Subclasses + /// switch on . + /// against constants. + /// + protected abstract Task ReplayAsync(Operation existing, CancellationToken cancellationToken); + + /// + /// Enqueues an outbound checkpoint and awaits its batch flush. No-op when + /// no batcher is wired (e.g. unit tests that don't exercise flushing). + /// + protected Task EnqueueAsync(SdkOperationUpdate update, CancellationToken cancellationToken = default) + => Batcher?.EnqueueAsync(update, cancellationToken) ?? Task.CompletedTask; +} diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/ExecutionState.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/ExecutionState.cs new file mode 100644 index 000000000..5ee690be0 --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/ExecutionState.cs @@ -0,0 +1,93 @@ +namespace Amazon.Lambda.DurableExecution.Internal; + +/// +/// Replay state of the current invocation. +/// +internal enum ExecutionMode +{ + /// Re-deriving prior operations from checkpointed state. + Replay, + /// Executing fresh code that hasn't been checkpointed before. + Execution +} + +/// +/// In-memory store of the operations replayed from . +/// Read-only after load (apart from ); outbound +/// checkpoints are owned by . +/// +internal sealed class ExecutionState +{ + private readonly Dictionary _operations = new(); + + public ExecutionMode Mode { get; private set; } = ExecutionMode.Replay; + + public int CheckpointedOperationCount => _operations.Count; + + public void LoadFromCheckpoint(InitialExecutionState? initialState) + { + if (initialState?.Operations == null) + { + Mode = ExecutionMode.Execution; + return; + } + + AddOperations(initialState.Operations); + + if (_operations.Count == 0) + { + Mode = ExecutionMode.Execution; + } + } + + public void AddOperations(IEnumerable operations) + { + foreach (var op in operations) + { + if (op.Id == null) continue; + _operations[op.Id] = op; + } + } + + /// + /// Returns the checkpointed record for , or null + /// if none. Callers should switch on against + /// constants to decide replay behavior. + /// + public Operation? GetOperation(string operationId) + { + _operations.TryGetValue(operationId, out var op); + return op; + } + + public void ValidateReplayConsistency(string operationId, string expectedType, string? expectedName) + { + if (Mode != ExecutionMode.Replay) return; + + if (!_operations.TryGetValue(operationId, out var op)) return; + + if (op.Type != null && op.Type != expectedType) + { + throw new NonDeterministicExecutionException( + $"Non-deterministic execution detected for operation '{operationId}': " + + $"expected type '{expectedType}' but found '{op.Type}' from a previous invocation. " + + $"Code must not change the order or type of durable operations between deployments."); + } + + if (expectedName != null && op.Name != null && op.Name != expectedName) + { + throw new NonDeterministicExecutionException( + $"Non-deterministic execution detected for operation '{operationId}': " + + $"expected name '{expectedName}' but found '{op.Name}' from a previous invocation. " + + $"Code must not change the order or type of durable operations between deployments."); + } + } + + public bool HasOperation(string operationId) => _operations.ContainsKey(operationId); + + /// + /// Transitions to . Called by an operation + /// that's about to run fresh (not-yet-checkpointed) code. Idempotent. + /// + public void EnterExecutionMode() => Mode = ExecutionMode.Execution; +} diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/Operation.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/Operation.cs new file mode 100644 index 000000000..473c7a3b2 --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/Operation.cs @@ -0,0 +1,140 @@ +using System.Text.Json.Serialization; + +namespace Amazon.Lambda.DurableExecution.Internal; + +/// +/// One operation in the durable execution service's invocation envelope. +/// Property names mirror the wire format exactly so System.Text.Json can +/// populate this type declaratively. Internal — consumed by ExecutionState +/// and DurableContext during replay; never exposed on a public surface. +/// +internal sealed class Operation +{ + [JsonPropertyName("Id")] + public string? Id { get; set; } + + [JsonPropertyName("Type")] + public string? Type { get; set; } + + [JsonPropertyName("Status")] + public string? Status { get; set; } + + [JsonPropertyName("Name")] + public string? Name { get; set; } + + [JsonPropertyName("ParentId")] + public string? ParentId { get; set; } + + [JsonPropertyName("SubType")] + public string? SubType { get; set; } + + [JsonPropertyName("StartTimestamp")] + public long? StartTimestamp { get; set; } + + [JsonPropertyName("EndTimestamp")] + public long? EndTimestamp { get; set; } + + [JsonPropertyName("StepDetails")] + public StepDetails? StepDetails { get; set; } + + [JsonPropertyName("WaitDetails")] + public WaitDetails? WaitDetails { get; set; } + + [JsonPropertyName("ExecutionDetails")] + public ExecutionDetails? ExecutionDetails { get; set; } + + [JsonPropertyName("CallbackDetails")] + public CallbackDetails? CallbackDetails { get; set; } + + [JsonPropertyName("ChainedInvokeDetails")] + public ChainedInvokeDetails? ChainedInvokeDetails { get; set; } + + [JsonPropertyName("ContextDetails")] + public ContextDetails? ContextDetails { get; set; } +} + +internal sealed class StepDetails +{ + [JsonPropertyName("Result")] + public string? Result { get; set; } + + [JsonPropertyName("Error")] + public ErrorObject? Error { get; set; } + + [JsonPropertyName("Attempt")] + public int? Attempt { get; set; } + + [JsonPropertyName("NextAttemptTimestamp")] + public long? NextAttemptTimestamp { get; set; } +} + +internal sealed class WaitDetails +{ + [JsonPropertyName("ScheduledEndTimestamp")] + public long? ScheduledEndTimestamp { get; set; } +} + +internal sealed class ExecutionDetails +{ + [JsonPropertyName("InputPayload")] + public string? InputPayload { get; set; } +} + +internal sealed class CallbackDetails +{ + [JsonPropertyName("CallbackId")] + public string? CallbackId { get; set; } + + [JsonPropertyName("Result")] + public string? Result { get; set; } + + [JsonPropertyName("Error")] + public ErrorObject? Error { get; set; } +} + +internal sealed class ChainedInvokeDetails +{ + [JsonPropertyName("Result")] + public string? Result { get; set; } + + [JsonPropertyName("Error")] + public ErrorObject? Error { get; set; } +} + +internal sealed class ContextDetails +{ + [JsonPropertyName("Result")] + public string? Result { get; set; } + + [JsonPropertyName("Error")] + public ErrorObject? Error { get; set; } +} + +/// +/// Wire-format string constants. +/// Plural name avoids collision with Amazon.Lambda.OperationType. +/// +internal static class OperationTypes +{ + public const string Step = "STEP"; + public const string Wait = "WAIT"; + public const string Callback = "CALLBACK"; + public const string ChainedInvoke = "CHAINED_INVOKE"; + public const string Context = "CONTEXT"; + public const string Execution = "EXECUTION"; +} + +/// +/// Wire-format string constants. +/// Plural name avoids collision with Amazon.Lambda.OperationStatus. +/// +internal static class OperationStatuses +{ + public const string Started = "STARTED"; + public const string Succeeded = "SUCCEEDED"; + public const string Failed = "FAILED"; + public const string Pending = "PENDING"; + public const string Cancelled = "CANCELLED"; + public const string Ready = "READY"; + public const string Stopped = "STOPPED"; +} diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/OperationIdGenerator.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/OperationIdGenerator.cs new file mode 100644 index 000000000..fef9cab19 --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/OperationIdGenerator.cs @@ -0,0 +1,101 @@ +using System.Security.Cryptography; +using System.Text; + +namespace Amazon.Lambda.DurableExecution.Internal; + +/// +/// Generates deterministic operation IDs for durable operations. Each call +/// increments an internal counter and SHA-256 hashes "<parentId>-<counter>" +/// (or just "<counter>" at the root). Hashing matches the wire format +/// used by the Java/JS/Python SDKs so the same workflow position produces a +/// stable, opaque ID across replays — and the human-readable step name is +/// carried separately on OperationUpdate.Name, so renaming a step does +/// not break replay correlation. +/// +internal sealed class OperationIdGenerator +{ + private int _counter; + private readonly string _prefix; + + /// + /// Creates a root-level generator. + /// + public OperationIdGenerator() + : this(parentId: null) + { + } + + /// + /// Creates a child generator scoped under a parent operation. The parent + /// ID (already hashed) becomes part of the prefix, so child IDs are + /// hash("<parentHash>-1"), hash("<parentHash>-2"), etc. + /// + public OperationIdGenerator(string? parentId) + { + _counter = 0; + ParentId = parentId; + _prefix = parentId != null ? parentId + "-" : string.Empty; + } + + /// + /// Gets the parent operation ID, if any. + /// + public string? ParentId { get; } + + /// + /// Generates the next operation ID. The counter is pre-incremented so the + /// first ID is hash("1"), matching the reference SDKs. + /// + public string NextId() + { + var counter = ++_counter; + return HashOperationId(_prefix + counter.ToString(System.Globalization.CultureInfo.InvariantCulture)); + } + + /// + /// SHA-256 hashes and returns a 64-char lowercase + /// hex digest. Public so tests and child-context construction can reproduce + /// the same hashing logic. + /// + public static string HashOperationId(string rawId) + { + var bytes = Encoding.UTF8.GetBytes(rawId); + Span hash = stackalloc byte[32]; +#if NET8_0_OR_GREATER + SHA256.HashData(bytes, hash); +#else + using var sha = SHA256.Create(); + var computed = sha.ComputeHash(bytes); + computed.CopyTo(hash); +#endif + return ToHex(hash); + } + + private static string ToHex(ReadOnlySpan bytes) + { + const string Hex = "0123456789abcdef"; + var chars = new char[bytes.Length * 2]; + for (int i = 0; i < bytes.Length; i++) + { + chars[i * 2] = Hex[bytes[i] >> 4]; + chars[i * 2 + 1] = Hex[bytes[i] & 0xF]; + } + return new string(chars); + } + + /// + /// Creates a child generator scoped under an operation ID from this generator. + /// + public OperationIdGenerator CreateChild(string operationId) + { + return new OperationIdGenerator(operationId); + } + + /// + /// Resets the counter (used for testing only). + /// + internal void Reset() + { + _counter = 0; + } +} diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/ReflectionJsonCheckpointSerializer.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/ReflectionJsonCheckpointSerializer.cs new file mode 100644 index 000000000..f7a3d0572 --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/ReflectionJsonCheckpointSerializer.cs @@ -0,0 +1,36 @@ +using System.Diagnostics.CodeAnalysis; +using System.Text.Json; + +namespace Amazon.Lambda.DurableExecution.Internal; + +/// +/// Default backed by reflection-based +/// . Constructed only by the reflection-overload +/// path of DurableContext.StepAsync; the constructor carries +/// so AOT/trimmed deployments +/// see the warning at the call site that picks this overload. +/// +internal sealed class ReflectionJsonCheckpointSerializer : ICheckpointSerializer +{ + [RequiresUnreferencedCode("Uses reflection-based JsonSerializer; not AOT-safe.")] + [RequiresDynamicCode("Uses reflection-based JsonSerializer; not AOT-safe.")] + public ReflectionJsonCheckpointSerializer() { } + + [UnconditionalSuppressMessage("Trimming", "IL2026", + Justification = "Reflection-based JsonSerializer call is acknowledged on the constructor.")] + [UnconditionalSuppressMessage("AOT", "IL3050", + Justification = "Reflection-based JsonSerializer call is acknowledged on the constructor.")] + public string Serialize(T value, SerializationContext context) + { + return JsonSerializer.Serialize(value); + } + + [UnconditionalSuppressMessage("Trimming", "IL2026", + Justification = "Reflection-based JsonSerializer call is acknowledged on the constructor.")] + [UnconditionalSuppressMessage("AOT", "IL3050", + Justification = "Reflection-based JsonSerializer call is acknowledged on the constructor.")] + public T Deserialize(string data, SerializationContext context) + { + return JsonSerializer.Deserialize(data)!; + } +} diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/StepOperation.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/StepOperation.cs new file mode 100644 index 000000000..d5084229b --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/StepOperation.cs @@ -0,0 +1,164 @@ +using Microsoft.Extensions.Logging; +using SdkErrorObject = Amazon.Lambda.Model.ErrorObject; +using SdkOperationUpdate = Amazon.Lambda.Model.OperationUpdate; + +namespace Amazon.Lambda.DurableExecution.Internal; + +/// +/// Durable step operation. Runs the user's function once across the lifetime +/// of a durable execution, persisting its result so subsequent invocations +/// replay the cached value without re-executing. +/// +/// +/// Replay semantics — example: await ctx.StepAsync(ChargeCard, "charge") +/// +/// Fresh: no prior state → run func → emit SUCCEED → return result. +/// Replay (SUCCEEDED): return cached result; func is NOT re-executed. +/// Replay (FAILED): re-throw the recorded exception. +/// +/// Serialization is delegated to the supplied ; +/// the AOT-safe overloads of IDurableContext.StepAsync wire in a +/// user-supplied serializer, while the reflection overloads inject +/// . +/// +internal sealed class StepOperation : DurableOperation +{ + private readonly Func> _func; + private readonly StepConfig? _config; + private readonly ICheckpointSerializer _serializer; + private readonly ILogger _logger; + + public StepOperation( + string operationId, + string? name, + Func> func, + StepConfig? config, + ICheckpointSerializer serializer, + ILogger logger, + ExecutionState state, + TerminationManager termination, + string durableExecutionArn, + CheckpointBatcher? batcher = null) + : base(operationId, name, state, termination, durableExecutionArn, batcher) + { + _func = func; + _config = config; + _serializer = serializer; + _logger = logger; + } + + protected override string OperationType => OperationTypes.Step; + + protected override Task StartAsync(CancellationToken cancellationToken) + { + State.EnterExecutionMode(); + return ExecuteFunc(cancellationToken); + } + + protected override Task ReplayAsync(Operation existing, CancellationToken cancellationToken) + { + switch (existing.Status) + { + case OperationStatuses.Succeeded: + // Side-effecting code runs at most once: replay returns the + // cached result without invoking func. + return Task.FromResult(DeserializeResult(existing.StepDetails?.Result)); + + case OperationStatuses.Failed: + // Retries were exhausted or never configured — re-throw so the + // user's catch-block flow matches the original execution. + throw CreateStepException(existing); + + default: + // STARTED/READY/PENDING from a prior invocation — no retry logic + // in this commit, so fall through and execute fresh. (Future work + // on retries will replace this default with explicit arms.) + State.EnterExecutionMode(); + return ExecuteFunc(cancellationToken); + } + } + + private async Task ExecuteFunc(CancellationToken cancellationToken) + { + cancellationToken.ThrowIfCancellationRequested(); + + // TODO: emit a STEP_STARTED checkpoint (action = "START") here when retries + // and/or AtMostOncePerRetry semantics land. AtMostOncePerRetry needs the + // START to be sync-flushed before user code runs (so replay can detect + // "we already attempted this and must not re-run"). AtLeastOncePerRetry + // wants it fire-and-forget for telemetry (attempt timing, retry count in + // history). Both require the async-flush overload in CheckpointBatcher + // (see TODO in CheckpointBatcher.cs). Today neither feature is wired up, + // so the START is intentionally omitted — SUCCEED alone is sufficient + // for replay correctness in the AtLeastOncePerRetry-only world this PR + // ships. Java SDK precedent: StepOperation.checkpointStarted(). + try + { + var stepContext = new StepContext(OperationId, attemptNumber: 1, _logger); + var result = await _func(stepContext); + + await EnqueueAsync(new SdkOperationUpdate + { + Id = OperationId, + Type = OperationTypes.Step, + Action = "SUCCEED", + SubType = "Step", + Name = Name, + Payload = SerializeResult(result) + }, cancellationToken); + + return result; + } + catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) + { + throw; + } + catch (Exception ex) + { + // No retry logic in this commit: any thrown exception becomes a + // FAIL checkpoint and is re-thrown as a StepException. On replay, + // the FAILED branch above will re-throw without re-executing. + await EnqueueAsync(new SdkOperationUpdate + { + Id = OperationId, + Type = OperationTypes.Step, + Action = "FAIL", + SubType = "Step", + Name = Name, + Error = ToSdkError(ex) + }, cancellationToken); + + throw new StepException(ex.Message, ex) + { + ErrorType = ex.GetType().FullName + }; + } + } + + private T DeserializeResult(string? serialized) + { + if (serialized == null) return default!; + return _serializer.Deserialize(serialized, new SerializationContext(OperationId, DurableExecutionArn)); + } + + private string SerializeResult(T value) + => _serializer.Serialize(value, new SerializationContext(OperationId, DurableExecutionArn)); + + private static StepException CreateStepException(Operation failedOp) + { + var err = failedOp.StepDetails?.Error; + return new StepException(err?.ErrorMessage ?? "Step failed") + { + ErrorType = err?.ErrorType, + ErrorData = err?.ErrorData, + OriginalStackTrace = err?.StackTrace + }; + } + + private static SdkErrorObject ToSdkError(Exception ex) => new() + { + ErrorType = ex.GetType().FullName, + ErrorMessage = ex.Message, + StackTrace = ex.StackTrace?.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries).ToList() + }; +} diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/TerminationManager.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/TerminationManager.cs new file mode 100644 index 000000000..1350c3d70 --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/TerminationManager.cs @@ -0,0 +1,77 @@ +namespace Amazon.Lambda.DurableExecution.Internal; + +/// +/// The reason the execution was terminated. +/// +internal enum TerminationReason +{ + WaitScheduled, + CallbackPending, + InvokePending, + CheckpointFailed +} + +/// +/// The result of a termination signal. +/// +internal sealed class TerminationResult +{ + public required TerminationReason Reason { get; init; } + public string? Message { get; init; } + public Exception? Exception { get; init; } +} + +/// +/// Manages the suspension signal for durable execution. +/// Uses a TaskCompletionSource that resolves when the function should suspend. +/// Only the first Terminate() call wins; subsequent calls are ignored. +/// +internal sealed class TerminationManager +{ + private readonly TaskCompletionSource _tcs = new(TaskCreationOptions.RunContinuationsAsynchronously); + private int _terminated; + + /// + /// A Task that resolves when Terminate() is called. Used in Task.WhenAny + /// to race against user code. + /// + public Task TerminationTask => _tcs.Task; + + /// + /// Whether Terminate() has been called. + /// + public bool IsTerminated => Volatile.Read(ref _terminated) == 1; + + /// + /// Signals that the execution should suspend. Thread-safe; only the first + /// call has effect. + /// + /// true if this call triggered termination, false if already terminated. + public bool Terminate(TerminationReason reason, string? message = null, Exception? exception = null) + { + if (Interlocked.CompareExchange(ref _terminated, 1, 0) != 0) + return false; + + _tcs.TrySetResult(new TerminationResult + { + Reason = reason, + Message = message, + Exception = exception + }); + + return true; + } + + /// + /// Trips the termination signal and returns a Task that never completes. + /// This is the standard suspension idiom: the caller awaits the returned + /// Task, and 's Task.WhenAny + /// race picks up instead, returning Pending + /// to the service. The returned Task is abandoned and GC'd. + /// + public Task SuspendAndAwait(TerminationReason reason, string? message = null, Exception? exception = null) + { + Terminate(reason, message, exception); + return new TaskCompletionSource().Task; + } +} diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/UpperSnakeCaseEnumConverter.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/UpperSnakeCaseEnumConverter.cs new file mode 100644 index 000000000..9610ca5f4 --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/UpperSnakeCaseEnumConverter.cs @@ -0,0 +1,64 @@ +using System.Text.Json; +using System.Text.Json.Serialization; + +namespace Amazon.Lambda.DurableExecution; + +/// +/// Converts between UPPER_SNAKE_CASE wire format (e.g., CHAINED_INVOKE) +/// and PascalCase enum values (e.g., ChainedInvoke). +/// +/// +public sealed class UpperSnakeCaseEnumConverter : JsonConverter where T : struct, Enum +{ + /// + public override T Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) + { + if (reader.TokenType == JsonTokenType.Null) + return default; + + var value = reader.GetString(); + if (value == null) + return default; + + // Convert UPPER_SNAKE_CASE to PascalCase for enum lookup + var pascalCase = SnakeToPascal(value); + + if (Enum.TryParse(pascalCase, ignoreCase: true, out var result)) + return result; + + // Fallback: try direct case-insensitive parse of the raw value + if (Enum.TryParse(value, ignoreCase: true, out result)) + return result; + + throw new JsonException($"Unable to parse '{value}' as {typeof(T).Name}."); + } + + /// + public override void Write(Utf8JsonWriter writer, T value, JsonSerializerOptions options) + { + writer.WriteStringValue(PascalToSnake(value.ToString())); + } + + private static string SnakeToPascal(string snake) + { + var parts = snake.Split('_'); + for (int i = 0; i < parts.Length; i++) + { + if (parts[i].Length > 0) + parts[i] = char.ToUpper(parts[i][0]) + parts[i][1..].ToLower(); + } + return string.Join("", parts); + } + + private static string PascalToSnake(string pascal) + { + var result = new System.Text.StringBuilder(); + for (int i = 0; i < pascal.Length; i++) + { + if (i > 0 && char.IsUpper(pascal[i])) + result.Append('_'); + result.Append(char.ToUpper(pascal[i])); + } + return result.ToString(); + } +} diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/WaitOperation.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/WaitOperation.cs new file mode 100644 index 000000000..4fb069bf3 --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/WaitOperation.cs @@ -0,0 +1,93 @@ +using SdkOperationUpdate = Amazon.Lambda.Model.OperationUpdate; +using SdkWaitOptions = Amazon.Lambda.Model.WaitOptions; + +namespace Amazon.Lambda.DurableExecution.Internal; + +/// +/// Durable wait operation. Suspends the workflow for a given duration without +/// consuming compute time; the service schedules a timer and re-invokes Lambda +/// when it fires. +/// +/// +/// Replay semantics — example: await ctx.WaitAsync(TimeSpan.FromHours(1)) +/// +/// Fresh: emit WAIT START → flush → suspend → service schedules timer. +/// Replay (SUCCEEDED): timer fired, return CompletedTask. +/// Replay (STARTED/PENDING): timer still ticking → re-suspend (or +/// short-circuit if the deadline already elapsed but SUCCEEDED hasn't +/// been stamped yet). +/// +/// See for the +/// suspension mechanics (Task.WhenAny race against TerminationManager). +/// +internal sealed class WaitOperation : DurableOperation +{ + private readonly int _waitSeconds; + + public WaitOperation( + string operationId, + string? name, + int waitSeconds, + ExecutionState state, + TerminationManager termination, + string durableExecutionArn, + CheckpointBatcher? batcher = null) + : base(operationId, name, state, termination, durableExecutionArn, batcher) + { + _waitSeconds = waitSeconds; + } + + protected override string OperationType => OperationTypes.Wait; + + protected override async Task StartAsync(CancellationToken cancellationToken) + { + State.EnterExecutionMode(); + + // Sync-flush WAIT START before suspending — the service can't schedule + // a timer for a checkpoint it hasn't received. + await EnqueueAsync(new SdkOperationUpdate + { + Id = OperationId, + Type = OperationTypes.Wait, + Action = "START", + SubType = "Wait", + Name = Name, + WaitOptions = new SdkWaitOptions { WaitSeconds = _waitSeconds } + }, cancellationToken); + + return await Termination.SuspendAndAwait( + TerminationReason.WaitScheduled, $"wait:{Name ?? OperationId}"); + } + + protected override Task ReplayAsync(Operation existing, CancellationToken cancellationToken) + { + switch (existing.Status) + { + case OperationStatuses.Succeeded: + // Common post-timer case: service stamped the wait as SUCCEEDED + // and re-invoked Lambda. Workflow proceeds to the next step. + return Task.FromResult(null); + + case OperationStatuses.Started: + case OperationStatuses.Pending: + // Service hasn't marked the wait complete yet. Either the timer + // is still ticking, or the deadline elapsed but SUCCEEDED hasn't + // been stamped yet — treat elapsed deadlines as "done" to avoid + // a pointless extra round-trip. + var expiresAtMs = existing.WaitDetails?.ScheduledEndTimestamp; + if (expiresAtMs is { } ts && DateTimeOffset.UtcNow.ToUnixTimeMilliseconds() >= ts) + { + return Task.FromResult(null); + } + + // Timer still ticking — re-suspend without re-checkpointing. + // The original WAIT START is still authoritative. + return Termination.SuspendAndAwait( + TerminationReason.WaitScheduled, $"wait:{Name ?? OperationId}"); + + default: + throw new NonDeterministicExecutionException( + $"Wait operation '{Name ?? OperationId}' has unexpected status '{existing.Status}' on replay."); + } + } +} diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Models/DurableExecutionInvocationInput.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Models/DurableExecutionInvocationInput.cs new file mode 100644 index 000000000..35bc32ecd --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Models/DurableExecutionInvocationInput.cs @@ -0,0 +1,53 @@ +using System.Text.Json.Serialization; +using Amazon.Lambda.DurableExecution.Internal; + +namespace Amazon.Lambda.DurableExecution; + +/// +/// The service envelope input for a durable execution invocation. +/// This is what Lambda receives from the durable execution service. +/// +public sealed class DurableExecutionInvocationInput +{ + /// + /// The unique ARN identifying this durable execution. + /// + [JsonPropertyName("DurableExecutionArn")] + public required string DurableExecutionArn { get; set; } + + /// + /// Token for optimistic concurrency on checkpoint operations. + /// + [JsonPropertyName("CheckpointToken")] + public string? CheckpointToken { get; set; } + + /// + /// Previously checkpointed operation state for replay. Internal — consumed + /// only by DurableFunction.WrapAsync for replay correlation; user code + /// should never read or modify this. Marked + /// so System.Text.Json populates it during deserialization despite being internal + /// (framework needs it, but it's not part of the public API contract). + /// + [JsonPropertyName("InitialExecutionState")] + [JsonInclude] + internal InitialExecutionState? InitialExecutionState { get; set; } +} + +/// +/// The previously checkpointed execution state provided on replay invocations. +/// +internal sealed class InitialExecutionState +{ + /// + /// The list of operations from prior invocations. + /// + [JsonPropertyName("Operations")] + public IReadOnlyList? Operations { get; set; } + + /// + /// If present, indicates that more operations are available. Use this value + /// with GetDurableExecutionState to fetch the next page. + /// + [JsonPropertyName("NextMarker")] + public string? NextMarker { get; set; } +} diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Models/DurableExecutionInvocationOutput.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Models/DurableExecutionInvocationOutput.cs new file mode 100644 index 000000000..602f0b245 --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Models/DurableExecutionInvocationOutput.cs @@ -0,0 +1,29 @@ +using System.Text.Json; +using System.Text.Json.Serialization; + +namespace Amazon.Lambda.DurableExecution; + +/// +/// The service envelope output returned by a durable execution invocation. +/// +public sealed class DurableExecutionInvocationOutput +{ + /// + /// The terminal status of this invocation. + /// + [JsonPropertyName("Status")] + [JsonConverter(typeof(UpperSnakeCaseEnumConverter))] + public required InvocationStatus Status { get; set; } + + /// + /// The serialized result (only present when Status is Succeeded). + /// + [JsonPropertyName("Result")] + public string? Result { get; set; } + + /// + /// Error details (only present when Status is Failed). + /// + [JsonPropertyName("Error")] + public ErrorObject? Error { get; set; } +} diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Models/ErrorObject.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Models/ErrorObject.cs new file mode 100644 index 000000000..20acac47f --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Models/ErrorObject.cs @@ -0,0 +1,46 @@ +using System.Text.Json.Serialization; + +namespace Amazon.Lambda.DurableExecution; + +/// +/// Serializable error representation stored in checkpoint state. +/// +public sealed class ErrorObject +{ + /// + /// The fully-qualified exception type name. + /// + [JsonPropertyName("ErrorType")] + public string? ErrorType { get; set; } + + /// + /// The exception message. + /// + [JsonPropertyName("ErrorMessage")] + public string? ErrorMessage { get; set; } + + /// + /// Stack trace frames. + /// + [JsonPropertyName("StackTrace")] + public IReadOnlyList? StackTrace { get; set; } + + /// + /// Additional serialized error data. + /// + [JsonPropertyName("ErrorData")] + public string? ErrorData { get; set; } + + /// + /// Creates an ErrorObject from an exception. + /// + public static ErrorObject FromException(Exception exception) + { + return new ErrorObject + { + ErrorType = exception.GetType().FullName, + ErrorMessage = exception.Message, + StackTrace = exception.StackTrace?.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries) + }; + } +} diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Services/LambdaDurableServiceClient.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Services/LambdaDurableServiceClient.cs new file mode 100644 index 000000000..709341760 --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Services/LambdaDurableServiceClient.cs @@ -0,0 +1,108 @@ +using Amazon.Lambda.DurableExecution.Internal; +using Amazon.Lambda.Model; +using SdkOperationUpdate = Amazon.Lambda.Model.OperationUpdate; +using SdkOperation = Amazon.Lambda.Model.Operation; + +namespace Amazon.Lambda.DurableExecution.Services; + +/// +/// Calls the real AWS Lambda Durable Execution APIs via the AWSSDK.Lambda client. +/// +internal sealed class LambdaDurableServiceClient +{ + private readonly IAmazonLambda _lambdaClient; + + public LambdaDurableServiceClient(IAmazonLambda lambdaClient) + { + _lambdaClient = lambdaClient; + } + + /// + /// Flushes pending checkpoint operations to the durable execution service. + /// + public async Task CheckpointAsync( + string durableExecutionArn, + string? checkpointToken, + IReadOnlyList pendingOperations, + CancellationToken cancellationToken = default) + { + if (pendingOperations.Count == 0) + return checkpointToken; + + var request = new CheckpointDurableExecutionRequest + { + DurableExecutionArn = durableExecutionArn, + CheckpointToken = checkpointToken ?? "", + Updates = pendingOperations is List list ? list : pendingOperations.ToList() + }; + + var response = await _lambdaClient.CheckpointDurableExecutionAsync(request, cancellationToken); + return response.CheckpointToken; + } + + /// + /// Fetches additional pages of execution state when the initial state is paginated. + /// + public async Task<(List Operations, string? NextMarker)> GetExecutionStateAsync( + string durableExecutionArn, + string? checkpointToken, + string marker, + CancellationToken cancellationToken = default) + { + var request = new GetDurableExecutionStateRequest + { + DurableExecutionArn = durableExecutionArn, + CheckpointToken = checkpointToken ?? "", + Marker = marker + }; + + var response = await _lambdaClient.GetDurableExecutionStateAsync(request, cancellationToken); + + var operations = new List(); + if (response.Operations != null) + { + foreach (var sdkOp in response.Operations) + { + operations.Add(MapFromSdkOperation(sdkOp)); + } + } + + return (operations, response.NextMarker); + } + + private static Internal.Operation MapFromSdkOperation(SdkOperation sdkOp) + { + return new Internal.Operation + { + Id = sdkOp.Id, + Type = sdkOp.Type, + Status = sdkOp.Status, + Name = sdkOp.Name, + ParentId = sdkOp.ParentId, + SubType = sdkOp.SubType, + StepDetails = sdkOp.StepDetails != null ? new Internal.StepDetails + { + Result = sdkOp.StepDetails.Result, + Error = sdkOp.StepDetails.Error != null ? new ErrorObject + { + ErrorType = sdkOp.StepDetails.Error.ErrorType, + ErrorMessage = sdkOp.StepDetails.Error.ErrorMessage + } : null, + Attempt = sdkOp.StepDetails.Attempt, + NextAttemptTimestamp = sdkOp.StepDetails.NextAttemptTimestamp.HasValue + ? new DateTimeOffset(sdkOp.StepDetails.NextAttemptTimestamp.Value, TimeSpan.Zero).ToUnixTimeMilliseconds() + : null + } : null, + WaitDetails = sdkOp.WaitDetails != null ? new Internal.WaitDetails + { + ScheduledEndTimestamp = sdkOp.WaitDetails.ScheduledEndTimestamp.HasValue + ? new DateTimeOffset(sdkOp.WaitDetails.ScheduledEndTimestamp.Value, TimeSpan.Zero).ToUnixTimeMilliseconds() + : null + } : null, + ExecutionDetails = sdkOp.ExecutionDetails != null ? new Internal.ExecutionDetails + { + InputPayload = sdkOp.ExecutionDetails.InputPayload + } : null + }; + } +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.AotPublishTest/Amazon.Lambda.DurableExecution.AotPublishTest.csproj b/Libraries/test/Amazon.Lambda.DurableExecution.AotPublishTest/Amazon.Lambda.DurableExecution.AotPublishTest.csproj new file mode 100644 index 000000000..ec4d0ffd0 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.AotPublishTest/Amazon.Lambda.DurableExecution.AotPublishTest.csproj @@ -0,0 +1,24 @@ + + + + Exe + net8.0 + enable + enable + true + true + full + false + true + IL2026,IL2067,IL2075,IL3050 + false + + + + + + + + + + diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.AotPublishTest/Program.cs b/Libraries/test/Amazon.Lambda.DurableExecution.AotPublishTest/Program.cs new file mode 100644 index 000000000..af84aca8c --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.AotPublishTest/Program.cs @@ -0,0 +1,81 @@ +using System.Text.Json.Serialization; +using Amazon.Lambda.Core; +using Amazon.Lambda.DurableExecution; +using Amazon.Lambda.RuntimeSupport; +using Amazon.Lambda.Serialization.SystemTextJson; + +namespace Amazon.Lambda.DurableExecution.AotPublishTest; + +/// +/// AOT publish smoke check. This program must publish under NativeAOT with +/// zero IL2026/IL3050 warnings (promoted to errors by the csproj). It uses +/// the JsonSerializerContext overload of WrapAsync. +/// +public class Program +{ + public static async Task Main() + { + var serializer = new SourceGeneratorLambdaJsonSerializer(); + Func> handler = HandlerAsync; + await LambdaBootstrapBuilder + .Create(handler, serializer) + .Build() + .RunAsync(); + } + + public static Task HandlerAsync( + DurableExecutionInvocationInput input, ILambdaContext context) => + DurableFunction.WrapAsync( + WorkflowAsync, input, context, AotJsonContext.Default); + + private static async Task WorkflowAsync(OrderEvent input, IDurableContext context) + { + var validation = await context.StepAsync( + async (_) => + { + await Task.CompletedTask; + return new ValidationResult { IsValid = true }; + }, + new ValidationResultSerializer(), + name: "validate"); + + await context.WaitAsync(TimeSpan.FromSeconds(30), name: "delay"); + + return new OrderResult { Status = validation.IsValid ? "approved" : "rejected", OrderId = input.OrderId }; + } + + private sealed class ValidationResultSerializer : ICheckpointSerializer + { + public string Serialize(ValidationResult value, SerializationContext ctx) => + System.Text.Json.JsonSerializer.Serialize(value, AotJsonContext.Default.ValidationResult); + + public ValidationResult Deserialize(string data, SerializationContext ctx) => + System.Text.Json.JsonSerializer.Deserialize(data, AotJsonContext.Default.ValidationResult) + ?? new ValidationResult(); + } + + public class OrderEvent + { + public string? OrderId { get; set; } + } + + public class OrderResult + { + public string? Status { get; set; } + public string? OrderId { get; set; } + } + + public class ValidationResult + { + public bool IsValid { get; set; } + } +} + +[JsonSerializable(typeof(DurableExecutionInvocationInput))] +[JsonSerializable(typeof(DurableExecutionInvocationOutput))] +[JsonSerializable(typeof(Program.OrderEvent))] +[JsonSerializable(typeof(Program.OrderResult))] +[JsonSerializable(typeof(Program.ValidationResult))] +public partial class AotJsonContext : JsonSerializerContext +{ +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/Amazon.Lambda.DurableExecution.IntegrationTests.csproj b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/Amazon.Lambda.DurableExecution.IntegrationTests.csproj new file mode 100644 index 000000000..0ef2e561d --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/Amazon.Lambda.DurableExecution.IntegrationTests.csproj @@ -0,0 +1,43 @@ + + + + + + + $(DefaultPackageTargets) + enable + enable + false + true + $(NoWarn);NU1903;CS1591 + + + + + + + + + + + + + PreserveNewest + + + + + + + + + + + + + + + + + + diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/DurableFunctionDeployment.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/DurableFunctionDeployment.cs new file mode 100644 index 000000000..8b5bb2e1b --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/DurableFunctionDeployment.cs @@ -0,0 +1,468 @@ +using System.Text; +using System.Text.Json; +using Amazon; +using Amazon.ECR; +using Amazon.ECR.Model; +using Amazon.IdentityManagement; +using Amazon.IdentityManagement.Model; +using Amazon.Lambda; +using Amazon.Lambda.Model; +using Xunit.Abstractions; + +namespace Amazon.Lambda.DurableExecution.IntegrationTests; + +/// +/// Builds, deploys, and invokes a single durable Lambda function for an integration test. +/// Manages the full lifecycle: IAM role, ECR repo, Docker image, Lambda function. +/// All resources are torn down on DisposeAsync. +/// +internal sealed class DurableFunctionDeployment : IAsyncDisposable +{ + private readonly ITestOutputHelper _output; + private readonly IAmazonLambda _lambdaClient; + private readonly IAmazonECR _ecrClient; + private readonly IAmazonIdentityManagementService _iamClient; + + private readonly string _functionName; + private readonly string _repoName; + private readonly string _roleName; + private string? _roleArn; + private string? _imageUri; + private bool _functionCreated; + private bool _ecrRepoCreated; + + public string FunctionName => _functionName; + public IAmazonLambda LambdaClient => _lambdaClient; + + private DurableFunctionDeployment(ITestOutputHelper output, string suffix) + { + _output = output; + _lambdaClient = new AmazonLambdaClient(RegionEndpoint.USEast1); + _ecrClient = new AmazonECRClient(RegionEndpoint.USEast1); + _iamClient = new AmazonIdentityManagementServiceClient(RegionEndpoint.USEast1); + + // Truncate the GUID (not the suffix) so CloudTrail entries stay readable. + // Keep the GUID short enough that the total stays well under 40 chars even for long suffixes. + static string ShortId() => Guid.NewGuid().ToString("N")[..Math.Min(8, 32)]; + _functionName = $"durable-integ-{suffix}-{ShortId()}"; + _repoName = $"durable-integ-{suffix}-{ShortId()}"; + _roleName = $"durable-integ-{suffix}-{ShortId()}"; + } + + public static async Task CreateAsync( + string testFunctionDir, + string scenarioSuffix, + ITestOutputHelper output) + { + var deployment = new DurableFunctionDeployment(output, scenarioSuffix); + try + { + await deployment.InitializeAsync(testFunctionDir); + } + catch + { + // Tear down anything that did get created (IAM role, ECR repo) so we + // don't leak resources when init fails part-way through. + await deployment.DisposeAsync(); + throw; + } + return deployment; + } + + private async Task InitializeAsync(string testFunctionDir) + { + // 1. Create IAM role + _output.WriteLine($"Creating IAM role: {_roleName}"); + var assumeRolePolicy = """ + { + "Version": "2012-10-17", + "Statement": [{ + "Effect": "Allow", + "Principal": {"Service": "lambda.amazonaws.com"}, + "Action": "sts:AssumeRole" + }] + } + """; + + var createRoleResponse = await _iamClient.CreateRoleAsync(new CreateRoleRequest + { + RoleName = _roleName, + AssumeRolePolicyDocument = assumeRolePolicy + }); + _roleArn = createRoleResponse.Role.Arn; + + await _iamClient.AttachRolePolicyAsync(new AttachRolePolicyRequest + { + RoleName = _roleName, + PolicyArn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" + }); + + await _iamClient.AttachRolePolicyAsync(new AttachRolePolicyRequest + { + RoleName = _roleName, + PolicyArn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicDurableExecutionRolePolicy" + }); + + // Wait for IAM propagation + await Task.Delay(TimeSpan.FromSeconds(10)); + + // 2. Create ECR repository + _output.WriteLine($"Creating ECR repository: {_repoName}"); + var createRepoResponse = await _ecrClient.CreateRepositoryAsync(new CreateRepositoryRequest + { + RepositoryName = _repoName + }); + _ecrRepoCreated = true; + var repositoryUri = createRepoResponse.Repository.RepositoryUri; + + // 3. Build and push Docker image + _output.WriteLine($"Building and pushing Docker image from {testFunctionDir}..."); + _imageUri = await BuildAndPushImage(testFunctionDir, repositoryUri); + _output.WriteLine($"Image pushed: {_imageUri}"); + + // 4. Create Lambda function + _output.WriteLine($"Creating Lambda function: {_functionName}"); + await _lambdaClient.CreateFunctionAsync(new CreateFunctionRequest + { + FunctionName = _functionName, + PackageType = PackageType.Image, + Role = _roleArn, + Code = new FunctionCode { ImageUri = _imageUri }, + Timeout = 30, + MemorySize = 256, + DurableConfig = new DurableConfig { ExecutionTimeout = 60 } + }); + _functionCreated = true; + + _output.WriteLine("Waiting for function to become Active..."); + await WaitForFunctionActive(); + } + + public async Task<(InvokeResponse Response, string ExecutionName)> InvokeAsync(string payload, string? executionName = null) + { + var name = executionName ?? $"integ-test-{Guid.NewGuid():N}"; + var response = await _lambdaClient.InvokeAsync(new InvokeRequest + { + FunctionName = _functionName, + Qualifier = "$LATEST", + Payload = payload, + DurableExecutionName = name + }); + return (response, name); + } + + /// + /// Polls ListDurableExecutionsByFunction until an execution with the given name appears. + /// Useful when the synchronous Invoke response gives no ARN (e.g., failed workflows return null). + /// + public async Task FindDurableExecutionArnByNameAsync(string executionName, TimeSpan timeout) + { + var deadline = DateTime.UtcNow + timeout; + var attempt = 0; + _output.WriteLine($"[FindArn] Starting search for execution name '{executionName}' on function '{_functionName}' (timeout: {timeout.TotalSeconds}s)"); + + while (DateTime.UtcNow < deadline) + { + attempt++; + try + { + var resp = await _lambdaClient.ListDurableExecutionsByFunctionAsync( + new ListDurableExecutionsByFunctionRequest + { + FunctionName = _functionName, + DurableExecutionName = executionName // server-side exact match + }); + + var count = resp.DurableExecutions?.Count ?? 0; + _output.WriteLine($"[FindArn] attempt {attempt}: List returned {count} executions"); + + if (count > 0) + { + foreach (var e in resp.DurableExecutions!) + { + _output.WriteLine($"[FindArn] - name='{e.DurableExecutionName}' status={e.Status} arn={e.DurableExecutionArn}"); + } + var match = resp.DurableExecutions.FirstOrDefault(e => e.DurableExecutionName == executionName); + if (match != null) + { + _output.WriteLine($"[FindArn] matched on attempt {attempt}"); + return match.DurableExecutionArn; + } + } + } + catch (Exception ex) + { + _output.WriteLine($"[FindArn] attempt {attempt} error (will retry): {ex.Message}"); + } + await Task.Delay(TimeSpan.FromSeconds(2)); + } + _output.WriteLine($"[FindArn] gave up after {attempt} attempts ({timeout.TotalSeconds}s)"); + return null; + } + + public async Task PollForCompletionAsync(string durableExecutionArn, TimeSpan timeout) + { + var deadline = DateTime.UtcNow + timeout; + + while (DateTime.UtcNow < deadline) + { + try + { + var resp = await _lambdaClient.GetDurableExecutionAsync( + new GetDurableExecutionRequest { DurableExecutionArn = durableExecutionArn }); + + var status = resp.Status?.ToString(); + if (status == "SUCCEEDED" || status == "FAILED" || + status == "TIMED_OUT" || status == "STOPPED") + { + return status; + } + } + catch (Exception ex) + { + _output.WriteLine($"Poll error (will retry): {ex.Message}"); + } + + await Task.Delay(TimeSpan.FromSeconds(2)); + } + + return "TIMEOUT"; + } + + public async Task GetExecutionAsync(string durableExecutionArn) + => await _lambdaClient.GetDurableExecutionAsync( + new GetDurableExecutionRequest { DurableExecutionArn = durableExecutionArn }); + + public async Task GetHistoryAsync(string durableExecutionArn, bool includeExecutionData = true) + => await _lambdaClient.GetDurableExecutionHistoryAsync( + new GetDurableExecutionHistoryRequest + { + DurableExecutionArn = durableExecutionArn, + IncludeExecutionData = includeExecutionData + }); + + /// + /// Repeatedly fetches history until is satisfied or the + /// timeout elapses. Needed because the history endpoint is eventually consistent — + /// the execution status can flip to SUCCEEDED before all events are indexed. + /// + public async Task WaitForHistoryAsync( + string durableExecutionArn, + Func predicate, + TimeSpan timeout, + bool includeExecutionData = true) + { + var deadline = DateTime.UtcNow + timeout; + GetDurableExecutionHistoryResponse? last = null; + var attempt = 0; + + while (DateTime.UtcNow < deadline) + { + attempt++; + try + { + last = await GetHistoryAsync(durableExecutionArn, includeExecutionData); + var eventCount = last.Events?.Count ?? 0; + _output.WriteLine($"[WaitForHistory] attempt {attempt}: {eventCount} events"); + if (predicate(last)) return last; + } + catch (Exception ex) + { + _output.WriteLine($"[WaitForHistory] attempt {attempt} error (will retry): {ex.Message}"); + } + await Task.Delay(TimeSpan.FromSeconds(2)); + } + + _output.WriteLine($"[WaitForHistory] gave up after {attempt} attempts; returning last response with {last?.Events?.Count ?? 0} events"); + return last ?? throw new TimeoutException($"GetDurableExecutionHistory never succeeded within {timeout.TotalSeconds}s"); + } + + public string? ExtractDurableExecutionArn(string responsePayload) + { + try + { + var doc = JsonDocument.Parse(responsePayload); + if (doc.RootElement.TryGetProperty("durableExecutionArn", out var arnProp)) + return arnProp.GetString(); + } + catch { } + return null; + } + + private async Task WaitForFunctionActive() + { + for (int i = 0; i < 60; i++) + { + try + { + var config = await _lambdaClient.GetFunctionConfigurationAsync( + new GetFunctionConfigurationRequest { FunctionName = _functionName }); + if (config.State == State.Active) return; + if (config.State == State.Failed) + throw new Exception($"Function creation failed: {config.StateReasonCode} - {config.StateReason}"); + } + catch (ResourceNotFoundException) { } + await Task.Delay(TimeSpan.FromSeconds(2)); + } + throw new TimeoutException("Function did not become Active within 120 seconds"); + } + + private async Task BuildAndPushImage(string testFunctionDir, string repositoryUri) + { + var publishDir = Path.Combine(testFunctionDir, "bin", "publish"); + if (Directory.Exists(publishDir)) Directory.Delete(publishDir, true); + + await RunProcess("dotnet", + $"publish -c Release -r linux-x64 --self-contained true -o \"{publishDir}\"", + testFunctionDir); + + var imageTag = $"{repositoryUri}:latest"; + await RunProcess("docker", + $"build --platform linux/amd64 --provenance=false -t {imageTag} .", + testFunctionDir); + + var authResponse = await _ecrClient.GetAuthorizationTokenAsync(new GetAuthorizationTokenRequest()); + var authData = authResponse.AuthorizationData[0]; + var token = Encoding.UTF8.GetString(Convert.FromBase64String(authData.AuthorizationToken)); + var parts = token.Split(':'); + var registryUrl = authData.ProxyEndpoint; + + await RunProcess("docker", + $"login --username {parts[0]} --password-stdin {registryUrl}", + testFunctionDir, + stdin: parts[1]); + + await RunProcess("docker", $"push {imageTag}", testFunctionDir); + + return imageTag; + } + + private async Task RunProcess(string fileName, string arguments, string workingDir, string? stdin = null) + { + _output.WriteLine($"Running: {fileName} {arguments}"); + var psi = new System.Diagnostics.ProcessStartInfo + { + FileName = fileName, + Arguments = arguments, + WorkingDirectory = workingDir, + RedirectStandardOutput = true, + RedirectStandardError = true, + RedirectStandardInput = stdin != null, + UseShellExecute = false + }; + + var process = System.Diagnostics.Process.Start(psi)!; + + if (stdin != null) + { + await process.StandardInput.WriteAsync(stdin); + process.StandardInput.Close(); + } + + var stdoutTask = process.StandardOutput.ReadToEndAsync(); + var stderrTask = process.StandardError.ReadToEndAsync(); + + await Task.WhenAny( + process.WaitForExitAsync(), + Task.Delay(TimeSpan.FromMinutes(5))); + + if (!process.HasExited) + { + process.Kill(); + throw new TimeoutException($"{fileName} timed out after 5 minutes"); + } + + var stdout = await stdoutTask; + var stderr = await stderrTask; + + if (!string.IsNullOrWhiteSpace(stdout)) + _output.WriteLine($"stdout: {stdout[..Math.Min(stdout.Length, 1000)]}"); + + if (process.ExitCode != 0) + { + _output.WriteLine($"stderr: {stderr}"); + throw new Exception($"{fileName} failed (exit {process.ExitCode}): {stderr}"); + } + } + + public async ValueTask DisposeAsync() + { + if (_functionCreated) + { + try + { + _output.WriteLine($"Deleting function: {_functionName}"); + await _lambdaClient.DeleteFunctionAsync(new DeleteFunctionRequest { FunctionName = _functionName }); + } + catch (Exception ex) { _output.WriteLine($"Cleanup error (function): {ex.Message}"); } + } + + if (_ecrRepoCreated) + { + try + { + _output.WriteLine($"Deleting ECR repository: {_repoName}"); + await _ecrClient.DeleteRepositoryAsync(new DeleteRepositoryRequest + { + RepositoryName = _repoName, + Force = true + }); + } + catch (Exception ex) { _output.WriteLine($"Cleanup error (ECR): {ex.Message}"); } + } + + if (_roleArn != null) + { + // Detach each policy independently — if one detach fails (e.g., the + // policy was never attached because init bailed out early) we still + // want to attempt the others and the final DeleteRole. + await TryDetachPolicy("arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"); + await TryDetachPolicy("arn:aws:iam::aws:policy/service-role/AWSLambdaBasicDurableExecutionRolePolicy"); + try + { + await _iamClient.DeleteRoleAsync(new DeleteRoleRequest { RoleName = _roleName }); + } + catch (Exception ex) { _output.WriteLine($"Cleanup error (IAM DeleteRole): {ex.Message}"); } + } + + async Task TryDetachPolicy(string policyArn) + { + try + { + await _iamClient.DetachRolePolicyAsync(new DetachRolePolicyRequest + { + RoleName = _roleName, + PolicyArn = policyArn + }); + } + catch (Exception ex) { _output.WriteLine($"Cleanup error (IAM Detach {policyArn}): {ex.Message}"); } + } + } + + public static string FindTestFunctionDir(string functionDirName) + { + var dir = AppContext.BaseDirectory; + while (dir != null) + { + var candidate = Path.Combine(dir, "TestFunctions", functionDirName); + if (Directory.Exists(candidate)) + return candidate; + + // Also check legacy "TestFunction" location for backwards compat + var legacy = Path.Combine(dir, functionDirName); + if (Directory.Exists(legacy) && File.Exists(Path.Combine(legacy, $"{functionDirName}.csproj"))) + return legacy; + + dir = Path.GetDirectoryName(dir); + } + + // Fallback: relative from test source directory + var fallback = Path.GetFullPath( + Path.Combine(AppContext.BaseDirectory, "..", "..", "..", "TestFunctions", functionDirName)); + if (Directory.Exists(fallback)) + return fallback; + + throw new DirectoryNotFoundException( + $"Could not find TestFunctions/{functionDirName}/ directory. Looked up from: {AppContext.BaseDirectory}"); + } +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/LongerWaitTest.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/LongerWaitTest.cs new file mode 100644 index 000000000..0592d0d44 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/LongerWaitTest.cs @@ -0,0 +1,62 @@ +using System.Linq; +using System.Text; +using Amazon.Lambda.Model; +using Xunit; +using Xunit.Abstractions; + +namespace Amazon.Lambda.DurableExecution.IntegrationTests; + +public class LongerWaitTest +{ + private readonly ITestOutputHelper _output; + public LongerWaitTest(ITestOutputHelper output) => _output = output; + + [Fact] + public async Task LongerWait_ExpiresAndCompletes() + { + await using var deployment = await DurableFunctionDeployment.CreateAsync( + DurableFunctionDeployment.FindTestFunctionDir("LongerWaitFunction"), + "longwait", _output); + + var (invokeResponse, executionName) = await deployment.InvokeAsync("""{"orderId": "long-wait-test"}"""); + var responsePayload = Encoding.UTF8.GetString(invokeResponse.Payload.ToArray()); + _output.WriteLine($"Response: {responsePayload}"); + + var arn = await deployment.FindDurableExecutionArnByNameAsync(executionName, TimeSpan.FromSeconds(60)); + Assert.NotNull(arn); + + var status = await deployment.PollForCompletionAsync(arn!, TimeSpan.FromSeconds(90)); + Assert.Equal("SUCCEEDED", status, ignoreCase: true); + + var history = await deployment.WaitForHistoryAsync( + arn!, + h => (h.Events?.Count(e => e.StepSucceededDetails != null) ?? 0) >= 2 + && (h.Events?.Any(e => e.WaitSucceededDetails != null) ?? false), + TimeSpan.FromSeconds(60)); + var events = history.Events ?? new List(); + + // Steps before and after the wait both ran, with the post-wait step seeing + // the pre-wait step's value via replay. + var stepResults = events + .Where(e => e.StepSucceededDetails != null) + .Select(e => (Name: e.Name, Payload: e.StepSucceededDetails.Result?.Payload?.Trim('"'))) + .ToList(); + Assert.Equal(2, stepResults.Count); + Assert.Equal("before_wait", stepResults[0].Name); + Assert.Equal("started-long-wait-test", stepResults[0].Payload); + Assert.Equal("after_wait", stepResults[1].Name); + Assert.Equal("after_wait-started-long-wait-test", stepResults[1].Payload); + + // The wait was checkpointed for the configured 15-second duration. + var waitStarted = events.FirstOrDefault(e => e.WaitStartedDetails != null && e.Name == "long_wait"); + Assert.NotNull(waitStarted); + Assert.Equal(15, waitStarted!.WaitStartedDetails.Duration); + + // The wait spanned at least two invocations: one to schedule it and at + // least one to resume after the timer fires. + var invocations = events.Where(e => e.InvocationCompletedDetails != null).ToList(); + Assert.True( + invocations.Count >= 2, + $"Expected at least 2 InvocationCompleted events (suspend + resume), got {invocations.Count}"); + } +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/MultipleStepsTest.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/MultipleStepsTest.cs new file mode 100644 index 000000000..573ecc082 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/MultipleStepsTest.cs @@ -0,0 +1,56 @@ +using System.Linq; +using System.Text; +using Amazon.Lambda.Model; +using Xunit; +using Xunit.Abstractions; + +namespace Amazon.Lambda.DurableExecution.IntegrationTests; + +public class MultipleStepsTest +{ + private readonly ITestOutputHelper _output; + public MultipleStepsTest(ITestOutputHelper output) => _output = output; + + [Fact] + public async Task MultipleSteps_AllCheckpointed() + { + await using var deployment = await DurableFunctionDeployment.CreateAsync( + DurableFunctionDeployment.FindTestFunctionDir("MultipleStepsFunction"), + "multi", _output); + + var (invokeResponse, executionName) = await deployment.InvokeAsync("""{"orderId": "chain"}"""); + var responsePayload = Encoding.UTF8.GetString(invokeResponse.Payload.ToArray()); + _output.WriteLine($"Response: {responsePayload}"); + + var arn = await deployment.FindDurableExecutionArnByNameAsync(executionName, TimeSpan.FromSeconds(60)); + Assert.NotNull(arn); + + var status = await deployment.PollForCompletionAsync(arn!, TimeSpan.FromSeconds(60)); + Assert.Equal("SUCCEEDED", status, ignoreCase: true); + + // History is eventually consistent — the execution can be SUCCEEDED before + // all events are indexed. Wait until we see all 5 step-succeeded events. + var history = await deployment.WaitForHistoryAsync( + arn!, + h => (h.Events?.Count(e => e.StepSucceededDetails != null) ?? 0) >= 5, + TimeSpan.FromSeconds(60)); + var events = history.Events ?? new List(); + + // Each step ran exactly once (no replay-induced duplicates) in declaration order, + // and each step's output chained from the previous one. + var stepResults = events + .Where(e => e.StepSucceededDetails != null) + .Select(e => $"{e.Name}={e.StepSucceededDetails.Result?.Payload?.Trim('"')}") + .ToList(); + Assert.Equal( + new[] + { + "step_1=a-chain", + "step_2=a-chain-b", + "step_3=a-chain-b-c", + "step_4=a-chain-b-c-d", + "step_5=a-chain-b-c-d-e", + }, + stepResults); + } +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/ReplayDeterminismTest.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/ReplayDeterminismTest.cs new file mode 100644 index 000000000..0fd7aa569 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/ReplayDeterminismTest.cs @@ -0,0 +1,67 @@ +using System.Linq; +using System.Text; +using Amazon.Lambda.Model; +using Xunit; +using Xunit.Abstractions; + +namespace Amazon.Lambda.DurableExecution.IntegrationTests; + +public class ReplayDeterminismTest +{ + private readonly ITestOutputHelper _output; + public ReplayDeterminismTest(ITestOutputHelper output) => _output = output; + + [Fact] + public async Task ReplayDeterminism_SameGuidAcrossInvocations() + { + await using var deployment = await DurableFunctionDeployment.CreateAsync( + DurableFunctionDeployment.FindTestFunctionDir("ReplayDeterminismFunction"), + "replay", _output); + + var (invokeResponse, executionName) = await deployment.InvokeAsync("""{"orderId": "replay-test"}"""); + var responsePayload = Encoding.UTF8.GetString(invokeResponse.Payload.ToArray()); + _output.WriteLine($"Response: {responsePayload}"); + + var arn = await deployment.FindDurableExecutionArnByNameAsync(executionName, TimeSpan.FromSeconds(60)); + Assert.NotNull(arn); + + var status = await deployment.PollForCompletionAsync(arn!, TimeSpan.FromSeconds(60)); + Assert.Equal("SUCCEEDED", status, ignoreCase: true); + + // History is eventually consistent — wait until both step-succeeded events are visible. + var history = await deployment.WaitForHistoryAsync( + arn!, + h => (h.Events?.Count(e => e.StepSucceededDetails != null) ?? 0) >= 2, + TimeSpan.FromSeconds(60)); + var events = history.Events ?? new List(); + + // Each step succeeded exactly once — generate_id was NOT re-executed on replay + // (a duplicate would show up as two succeeded events for the same name). + var stepSucceededEvents = events.Where(e => e.StepSucceededDetails != null).ToList(); + Assert.Equal(2, stepSucceededEvents.Count); + Assert.Single(stepSucceededEvents.Where(e => e.Name == "generate_id")); + Assert.Single(stepSucceededEvents.Where(e => e.Name == "echo_id")); + + var generateEvent = stepSucceededEvents.First(e => e.Name == "generate_id"); + var echoEvent = stepSucceededEvents.First(e => e.Name == "echo_id"); + + var generatedGuid = generateEvent.StepSucceededDetails.Result?.Payload?.Trim('"'); + var echoedResult = echoEvent.StepSucceededDetails.Result?.Payload?.Trim('"'); + Assert.NotNull(generatedGuid); + Assert.NotNull(echoedResult); + Assert.True(Guid.TryParse(generatedGuid, out _), + $"generate_id should produce a valid GUID, got: {generatedGuid}"); + + // The echoed value matches the cached GUID — proves replay returned the + // checkpointed value rather than running generate_id again. + Assert.Equal($"echo:{generatedGuid}", echoedResult); + + // The boundary wait actually caused a suspend/resume cycle. + var waitStarted = events.FirstOrDefault(e => e.WaitStartedDetails != null && e.Name == "boundary_wait"); + Assert.NotNull(waitStarted); + var invocations = events.Where(e => e.InvocationCompletedDetails != null).ToList(); + Assert.True( + invocations.Count >= 2, + $"Expected at least 2 InvocationCompleted events (proves replay actually happened), got {invocations.Count}"); + } +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/StepFailsTest.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/StepFailsTest.cs new file mode 100644 index 000000000..7b2afd427 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/StepFailsTest.cs @@ -0,0 +1,51 @@ +using System.Linq; +using System.Text; +using Amazon.Lambda.Model; +using Xunit; +using Xunit.Abstractions; + +namespace Amazon.Lambda.DurableExecution.IntegrationTests; + +public class StepFailsTest +{ + private readonly ITestOutputHelper _output; + public StepFailsTest(ITestOutputHelper output) => _output = output; + + [Fact] + public async Task StepFails_PropagatesAsFailedStatus() + { + await using var deployment = await DurableFunctionDeployment.CreateAsync( + DurableFunctionDeployment.FindTestFunctionDir("StepFailsFunction"), + "stepfail", _output); + + var (invokeResponse, executionName) = await deployment.InvokeAsync("""{"orderId": "x"}"""); + var responsePayload = Encoding.UTF8.GetString(invokeResponse.Payload.ToArray()); + _output.WriteLine($"Response: {responsePayload}"); + + // Failed workflows return null payload to the Invoke caller. Locate the execution + // by name and verify the service marked it FAILED. + var arn = await deployment.FindDurableExecutionArnByNameAsync(executionName, TimeSpan.FromSeconds(60)); + Assert.NotNull(arn); + + var status = await deployment.PollForCompletionAsync(arn!, TimeSpan.FromSeconds(60)); + Assert.Equal("FAILED", status, ignoreCase: true); + + var execution = await deployment.GetExecutionAsync(arn!); + Assert.NotNull(execution.Error); + Assert.Contains("intentional failure", execution.Error.ErrorMessage); + + var history = await deployment.WaitForHistoryAsync( + arn!, + h => h.Events?.Any(e => e.StepFailedDetails != null) ?? false, + TimeSpan.FromSeconds(60)); + var events = history.Events ?? new List(); + + // The failing step recorded a StepFailed event with the exception message. + var stepFailed = events.FirstOrDefault(e => e.StepFailedDetails != null && e.Name == "fail_step"); + Assert.NotNull(stepFailed); + Assert.Contains("intentional failure", stepFailed!.StepFailedDetails.Error?.Payload?.ErrorMessage ?? string.Empty); + + // No step ever succeeded — the workflow body was unreachable past the throw. + Assert.Empty(events.Where(e => e.StepSucceededDetails != null)); + } +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/StepWaitStepTest.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/StepWaitStepTest.cs new file mode 100644 index 000000000..684486dd9 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/StepWaitStepTest.cs @@ -0,0 +1,58 @@ +using System.Linq; +using System.Text; +using Amazon.Lambda.Model; +using Xunit; +using Xunit.Abstractions; + +namespace Amazon.Lambda.DurableExecution.IntegrationTests; + +public class StepWaitStepTest +{ + private readonly ITestOutputHelper _output; + public StepWaitStepTest(ITestOutputHelper output) => _output = output; + + [Fact] + public async Task StepWaitStep_CompletesViaService() + { + await using var deployment = await DurableFunctionDeployment.CreateAsync( + DurableFunctionDeployment.FindTestFunctionDir("StepWaitStepFunction"), + "stepwait", _output); + + var (invokeResponse, executionName) = await deployment.InvokeAsync("""{"orderId": "integ-test-123"}"""); + Assert.Equal(200, invokeResponse.StatusCode); + + var responsePayload = Encoding.UTF8.GetString(invokeResponse.Payload.ToArray()); + _output.WriteLine($"Response: {responsePayload}"); + + var arn = await deployment.FindDurableExecutionArnByNameAsync(executionName, TimeSpan.FromSeconds(60)); + Assert.NotNull(arn); + + var status = await deployment.PollForCompletionAsync(arn!, TimeSpan.FromSeconds(60)); + Assert.Equal("SUCCEEDED", status, ignoreCase: true); + + var history = await deployment.WaitForHistoryAsync( + arn!, + h => (h.Events?.Count(e => e.StepSucceededDetails != null) ?? 0) >= 2 + && (h.Events?.Any(e => e.WaitSucceededDetails != null) ?? false), + TimeSpan.FromSeconds(60)); + var events = history.Events ?? new List(); + + // Both steps ran in order and produced the expected chained outputs. + var stepResults = events + .Where(e => e.StepSucceededDetails != null) + .Select(e => (Name: e.Name, Payload: e.StepSucceededDetails.Result?.Payload?.Trim('"'))) + .ToList(); + Assert.Equal(2, stepResults.Count); + Assert.Equal("validate", stepResults[0].Name); + Assert.Equal("validated-integ-test-123", stepResults[0].Payload); + Assert.Equal("process", stepResults[1].Name); + Assert.Equal("processed-validated-integ-test-123", stepResults[1].Payload); + + // The wait was actually scheduled with the expected duration. + var waitStarted = events.FirstOrDefault(e => e.WaitStartedDetails != null && e.Name == "short_wait"); + Assert.NotNull(waitStarted); + Assert.Equal(3, waitStarted!.WaitStartedDetails.Duration); + var waitSucceeded = events.FirstOrDefault(e => e.WaitSucceededDetails != null && e.Name == "short_wait"); + Assert.NotNull(waitSucceeded); + } +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/LongerWaitFunction/Dockerfile b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/LongerWaitFunction/Dockerfile new file mode 100644 index 000000000..c1913d56a --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/LongerWaitFunction/Dockerfile @@ -0,0 +1,7 @@ +FROM public.ecr.aws/lambda/provided:al2023 + +RUN dnf install -y libicu + +COPY bin/publish/ ${LAMBDA_TASK_ROOT} + +ENTRYPOINT ["/var/task/bootstrap"] diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/LongerWaitFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/LongerWaitFunction/Function.cs new file mode 100644 index 000000000..e73a6da7e --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/LongerWaitFunction/Function.cs @@ -0,0 +1,40 @@ +using Amazon.Lambda.Core; +using Amazon.Lambda.DurableExecution; +using Amazon.Lambda.RuntimeSupport; +using Amazon.Lambda.Serialization.SystemTextJson; + +namespace DurableExecutionTestFunction; + +public class Function +{ + public static async Task Main(string[] args) + { + var handler = new Function(); + var serializer = new DefaultLambdaJsonSerializer(); + using var handlerWrapper = HandlerWrapper.GetHandlerWrapper(handler.Handler, serializer); + using var bootstrap = new LambdaBootstrap(handlerWrapper); + await bootstrap.RunAsync(); + } + + public Task Handler( + DurableExecutionInvocationInput input, ILambdaContext context) + => DurableFunction.WrapAsync(Workflow, input, context); + + private async Task Workflow(TestEvent input, IDurableContext context) + { + var step1 = await context.StepAsync( + async (_) => { await Task.CompletedTask; return $"started-{input.OrderId}"; }, + name: "before_wait"); + + await context.WaitAsync(TimeSpan.FromSeconds(15), name: "long_wait"); + + var step2 = await context.StepAsync( + async (_) => { await Task.CompletedTask; return $"after_wait-{step1}"; }, + name: "after_wait"); + + return new TestResult { Status = "completed", Data = step2 }; + } +} + +public class TestEvent { public string? OrderId { get; set; } } +public class TestResult { public string? Status { get; set; } public string? Data { get; set; } } diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/LongerWaitFunction/LongerWaitFunction.csproj b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/LongerWaitFunction/LongerWaitFunction.csproj new file mode 100644 index 000000000..6f5f657e4 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/LongerWaitFunction/LongerWaitFunction.csproj @@ -0,0 +1,18 @@ + + + + net8.0 + Exe + true + bootstrap + enable + enable + + + + + + + + + diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/MultipleStepsFunction/Dockerfile b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/MultipleStepsFunction/Dockerfile new file mode 100644 index 000000000..c1913d56a --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/MultipleStepsFunction/Dockerfile @@ -0,0 +1,7 @@ +FROM public.ecr.aws/lambda/provided:al2023 + +RUN dnf install -y libicu + +COPY bin/publish/ ${LAMBDA_TASK_ROOT} + +ENTRYPOINT ["/var/task/bootstrap"] diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/MultipleStepsFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/MultipleStepsFunction/Function.cs new file mode 100644 index 000000000..cc80e6afa --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/MultipleStepsFunction/Function.cs @@ -0,0 +1,50 @@ +using Amazon.Lambda.Core; +using Amazon.Lambda.DurableExecution; +using Amazon.Lambda.RuntimeSupport; +using Amazon.Lambda.Serialization.SystemTextJson; + +namespace DurableExecutionTestFunction; + +public class Function +{ + public static async Task Main(string[] args) + { + var handler = new Function(); + var serializer = new DefaultLambdaJsonSerializer(); + using var handlerWrapper = HandlerWrapper.GetHandlerWrapper(handler.Handler, serializer); + using var bootstrap = new LambdaBootstrap(handlerWrapper); + await bootstrap.RunAsync(); + } + + public Task Handler( + DurableExecutionInvocationInput input, ILambdaContext context) + => DurableFunction.WrapAsync(Workflow, input, context); + + private async Task Workflow(TestEvent input, IDurableContext context) + { + var step1 = await context.StepAsync( + async (_) => { await Task.CompletedTask; return $"a-{input.OrderId}"; }, + name: "step_1"); + + var step2 = await context.StepAsync( + async (_) => { await Task.CompletedTask; return $"{step1}-b"; }, + name: "step_2"); + + var step3 = await context.StepAsync( + async (_) => { await Task.CompletedTask; return $"{step2}-c"; }, + name: "step_3"); + + var step4 = await context.StepAsync( + async (_) => { await Task.CompletedTask; return $"{step3}-d"; }, + name: "step_4"); + + var step5 = await context.StepAsync( + async (_) => { await Task.CompletedTask; return $"{step4}-e"; }, + name: "step_5"); + + return new TestResult { Status = "completed", Data = step5 }; + } +} + +public class TestEvent { public string? OrderId { get; set; } } +public class TestResult { public string? Status { get; set; } public string? Data { get; set; } } diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/MultipleStepsFunction/MultipleStepsFunction.csproj b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/MultipleStepsFunction/MultipleStepsFunction.csproj new file mode 100644 index 000000000..6f5f657e4 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/MultipleStepsFunction/MultipleStepsFunction.csproj @@ -0,0 +1,18 @@ + + + + net8.0 + Exe + true + bootstrap + enable + enable + + + + + + + + + diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ReplayDeterminismFunction/Dockerfile b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ReplayDeterminismFunction/Dockerfile new file mode 100644 index 000000000..c1913d56a --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ReplayDeterminismFunction/Dockerfile @@ -0,0 +1,7 @@ +FROM public.ecr.aws/lambda/provided:al2023 + +RUN dnf install -y libicu + +COPY bin/publish/ ${LAMBDA_TASK_ROOT} + +ENTRYPOINT ["/var/task/bootstrap"] diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ReplayDeterminismFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ReplayDeterminismFunction/Function.cs new file mode 100644 index 000000000..ce2a333b1 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ReplayDeterminismFunction/Function.cs @@ -0,0 +1,43 @@ +using Amazon.Lambda.Core; +using Amazon.Lambda.DurableExecution; +using Amazon.Lambda.RuntimeSupport; +using Amazon.Lambda.Serialization.SystemTextJson; + +namespace DurableExecutionTestFunction; + +public class Function +{ + public static async Task Main(string[] args) + { + var handler = new Function(); + var serializer = new DefaultLambdaJsonSerializer(); + using var handlerWrapper = HandlerWrapper.GetHandlerWrapper(handler.Handler, serializer); + using var bootstrap = new LambdaBootstrap(handlerWrapper); + await bootstrap.RunAsync(); + } + + public Task Handler( + DurableExecutionInvocationInput input, ILambdaContext context) + => DurableFunction.WrapAsync(Workflow, input, context); + + private async Task Workflow(TestEvent input, IDurableContext context) + { + // Step 1 generates a fresh GUID. On replay, this MUST return the cached value. + var generatedId = await context.StepAsync( + async (_) => { await Task.CompletedTask; return Guid.NewGuid().ToString(); }, + name: "generate_id"); + + // Force a suspend/resume cycle to trigger replay + await context.WaitAsync(TimeSpan.FromSeconds(3), name: "boundary_wait"); + + // Step 2 echoes the GUID. After replay, it should see the SAME GUID from step 1. + var echoed = await context.StepAsync( + async (_) => { await Task.CompletedTask; return $"echo:{generatedId}"; }, + name: "echo_id"); + + return new TestResult { Status = "completed", Data = echoed }; + } +} + +public class TestEvent { public string? OrderId { get; set; } } +public class TestResult { public string? Status { get; set; } public string? Data { get; set; } } diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ReplayDeterminismFunction/ReplayDeterminismFunction.csproj b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ReplayDeterminismFunction/ReplayDeterminismFunction.csproj new file mode 100644 index 000000000..6f5f657e4 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ReplayDeterminismFunction/ReplayDeterminismFunction.csproj @@ -0,0 +1,18 @@ + + + + net8.0 + Exe + true + bootstrap + enable + enable + + + + + + + + + diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepFailsFunction/Dockerfile b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepFailsFunction/Dockerfile new file mode 100644 index 000000000..c1913d56a --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepFailsFunction/Dockerfile @@ -0,0 +1,7 @@ +FROM public.ecr.aws/lambda/provided:al2023 + +RUN dnf install -y libicu + +COPY bin/publish/ ${LAMBDA_TASK_ROOT} + +ENTRYPOINT ["/var/task/bootstrap"] diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepFailsFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepFailsFunction/Function.cs new file mode 100644 index 000000000..9aeeed2a2 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepFailsFunction/Function.cs @@ -0,0 +1,38 @@ +using Amazon.Lambda.Core; +using Amazon.Lambda.DurableExecution; +using Amazon.Lambda.RuntimeSupport; +using Amazon.Lambda.Serialization.SystemTextJson; + +namespace DurableExecutionTestFunction; + +public class Function +{ + public static async Task Main(string[] args) + { + var handler = new Function(); + var serializer = new DefaultLambdaJsonSerializer(); + using var handlerWrapper = HandlerWrapper.GetHandlerWrapper(handler.Handler, serializer); + using var bootstrap = new LambdaBootstrap(handlerWrapper); + await bootstrap.RunAsync(); + } + + public Task Handler( + DurableExecutionInvocationInput input, ILambdaContext context) + => DurableFunction.WrapAsync(Workflow, input, context); + + private async Task Workflow(TestEvent input, IDurableContext context) + { + await context.StepAsync( + async (_) => + { + await Task.CompletedTask; + throw new InvalidOperationException("intentional failure for integration test"); + }, + name: "fail_step"); + + return new TestResult { Status = "should_not_reach" }; + } +} + +public class TestEvent { public string? OrderId { get; set; } } +public class TestResult { public string? Status { get; set; } public string? Data { get; set; } } diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepFailsFunction/StepFailsFunction.csproj b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepFailsFunction/StepFailsFunction.csproj new file mode 100644 index 000000000..6f5f657e4 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepFailsFunction/StepFailsFunction.csproj @@ -0,0 +1,18 @@ + + + + net8.0 + Exe + true + bootstrap + enable + enable + + + + + + + + + diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepWaitStepFunction/Dockerfile b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepWaitStepFunction/Dockerfile new file mode 100644 index 000000000..c1913d56a --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepWaitStepFunction/Dockerfile @@ -0,0 +1,7 @@ +FROM public.ecr.aws/lambda/provided:al2023 + +RUN dnf install -y libicu + +COPY bin/publish/ ${LAMBDA_TASK_ROOT} + +ENTRYPOINT ["/var/task/bootstrap"] diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepWaitStepFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepWaitStepFunction/Function.cs new file mode 100644 index 000000000..5b6c291df --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepWaitStepFunction/Function.cs @@ -0,0 +1,40 @@ +using Amazon.Lambda.Core; +using Amazon.Lambda.DurableExecution; +using Amazon.Lambda.RuntimeSupport; +using Amazon.Lambda.Serialization.SystemTextJson; + +namespace DurableExecutionTestFunction; + +public class Function +{ + public static async Task Main(string[] args) + { + var handler = new Function(); + var serializer = new DefaultLambdaJsonSerializer(); + using var handlerWrapper = HandlerWrapper.GetHandlerWrapper(handler.Handler, serializer); + using var bootstrap = new LambdaBootstrap(handlerWrapper); + await bootstrap.RunAsync(); + } + + public Task Handler( + DurableExecutionInvocationInput input, ILambdaContext context) + => DurableFunction.WrapAsync(Workflow, input, context); + + private async Task Workflow(TestEvent input, IDurableContext context) + { + var step1 = await context.StepAsync( + async (_) => { await Task.CompletedTask; return $"validated-{input.OrderId}"; }, + name: "validate"); + + await context.WaitAsync(TimeSpan.FromSeconds(3), name: "short_wait"); + + var step2 = await context.StepAsync( + async (_) => { await Task.CompletedTask; return $"processed-{step1}"; }, + name: "process"); + + return new TestResult { Status = "completed", Data = step2 }; + } +} + +public class TestEvent { public string? OrderId { get; set; } } +public class TestResult { public string? Status { get; set; } public string? Data { get; set; } } diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepWaitStepFunction/StepWaitStepFunction.csproj b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepWaitStepFunction/StepWaitStepFunction.csproj new file mode 100644 index 000000000..6f5f657e4 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepWaitStepFunction/StepWaitStepFunction.csproj @@ -0,0 +1,18 @@ + + + + net8.0 + Exe + true + bootstrap + enable + enable + + + + + + + + + diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitOnlyFunction/Dockerfile b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitOnlyFunction/Dockerfile new file mode 100644 index 000000000..c1913d56a --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitOnlyFunction/Dockerfile @@ -0,0 +1,7 @@ +FROM public.ecr.aws/lambda/provided:al2023 + +RUN dnf install -y libicu + +COPY bin/publish/ ${LAMBDA_TASK_ROOT} + +ENTRYPOINT ["/var/task/bootstrap"] diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitOnlyFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitOnlyFunction/Function.cs new file mode 100644 index 000000000..54e4ab737 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitOnlyFunction/Function.cs @@ -0,0 +1,31 @@ +using Amazon.Lambda.Core; +using Amazon.Lambda.DurableExecution; +using Amazon.Lambda.RuntimeSupport; +using Amazon.Lambda.Serialization.SystemTextJson; + +namespace DurableExecutionTestFunction; + +public class Function +{ + public static async Task Main(string[] args) + { + var handler = new Function(); + var serializer = new DefaultLambdaJsonSerializer(); + using var handlerWrapper = HandlerWrapper.GetHandlerWrapper(handler.Handler, serializer); + using var bootstrap = new LambdaBootstrap(handlerWrapper); + await bootstrap.RunAsync(); + } + + public Task Handler( + DurableExecutionInvocationInput input, ILambdaContext context) + => DurableFunction.WrapAsync(Workflow, input, context); + + private async Task Workflow(TestEvent input, IDurableContext context) + { + await context.WaitAsync(TimeSpan.FromSeconds(5), name: "only_wait"); + return new TestResult { Status = "completed", Data = "wait_only" }; + } +} + +public class TestEvent { public string? OrderId { get; set; } } +public class TestResult { public string? Status { get; set; } public string? Data { get; set; } } diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitOnlyFunction/WaitOnlyFunction.csproj b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitOnlyFunction/WaitOnlyFunction.csproj new file mode 100644 index 000000000..6f5f657e4 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitOnlyFunction/WaitOnlyFunction.csproj @@ -0,0 +1,18 @@ + + + + net8.0 + Exe + true + bootstrap + enable + enable + + + + + + + + + diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/WaitOnlyTest.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/WaitOnlyTest.cs new file mode 100644 index 000000000..213ce0186 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/WaitOnlyTest.cs @@ -0,0 +1,55 @@ +using System.Linq; +using System.Text; +using Amazon.Lambda.Model; +using Xunit; +using Xunit.Abstractions; + +namespace Amazon.Lambda.DurableExecution.IntegrationTests; + +public class WaitOnlyTest +{ + private readonly ITestOutputHelper _output; + public WaitOnlyTest(ITestOutputHelper output) => _output = output; + + [Fact] + public async Task WaitOnly_NoSteps() + { + await using var deployment = await DurableFunctionDeployment.CreateAsync( + DurableFunctionDeployment.FindTestFunctionDir("WaitOnlyFunction"), + "waitonly", _output); + + var (invokeResponse, executionName) = await deployment.InvokeAsync("""{"orderId": "wait-only"}"""); + var responsePayload = Encoding.UTF8.GetString(invokeResponse.Payload.ToArray()); + _output.WriteLine($"Response: {responsePayload}"); + + var arn = await deployment.FindDurableExecutionArnByNameAsync(executionName, TimeSpan.FromSeconds(60)); + Assert.NotNull(arn); + + var status = await deployment.PollForCompletionAsync(arn!, TimeSpan.FromSeconds(60)); + Assert.Equal("SUCCEEDED", status, ignoreCase: true); + + var history = await deployment.WaitForHistoryAsync( + arn!, + h => (h.Events?.Any(e => e.WaitSucceededDetails != null) ?? false), + TimeSpan.FromSeconds(60)); + var events = history.Events ?? new List(); + + // The wait was checkpointed and ran for the configured duration. + var waitStarted = events.FirstOrDefault(e => e.WaitStartedDetails != null && e.Name == "only_wait"); + Assert.NotNull(waitStarted); + Assert.Equal(5, waitStarted!.WaitStartedDetails.Duration); + + var waitSucceeded = events.FirstOrDefault(e => e.WaitSucceededDetails != null && e.Name == "only_wait"); + Assert.NotNull(waitSucceeded); + + // No step events: this workflow body contains only a wait. + Assert.Empty(events.Where(e => e.StepStartedDetails != null)); + + // The wait genuinely caused a suspend/resume, not an in-process delay: + // expect at least 2 invocations recorded (initial + resume after timer fires). + var invocations = events.Where(e => e.InvocationCompletedDetails != null).ToList(); + Assert.True( + invocations.Count >= 2, + $"Expected at least 2 InvocationCompleted events (initial + post-wait resume), got {invocations.Count}"); + } +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/xunit.runner.json b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/xunit.runner.json new file mode 100644 index 000000000..b6de9b357 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/xunit.runner.json @@ -0,0 +1,6 @@ +{ + "$schema": "https://xunit.net/schema/current/xunit.runner.schema.json", + "parallelizeTestCollections": false, + "parallelizeAssembly": false, + "maxParallelThreads": 1 +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/Amazon.Lambda.DurableExecution.Tests.csproj b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/Amazon.Lambda.DurableExecution.Tests.csproj index d8d1615c9..6fa422e0a 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/Amazon.Lambda.DurableExecution.Tests.csproj +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/Amazon.Lambda.DurableExecution.Tests.csproj @@ -11,17 +11,20 @@ true enable enable - $(NoWarn);CS1591 + $(NoWarn);CS1591 + true + + diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/AssemblyLoadTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/AssemblyLoadTests.cs deleted file mode 100644 index 84295a2e1..000000000 --- a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/AssemblyLoadTests.cs +++ /dev/null @@ -1,13 +0,0 @@ -using Xunit; - -namespace Amazon.Lambda.DurableExecution.Tests; - -public class AssemblyLoadTests -{ - [Fact] - public void DurableExecutionAssembly_Loads() - { - var assembly = typeof(AssemblyMarker).Assembly; - Assert.Equal("Amazon.Lambda.DurableExecution", assembly.GetName().Name); - } -} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/CheckpointBatcherTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/CheckpointBatcherTests.cs new file mode 100644 index 000000000..c81998eaa --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/CheckpointBatcherTests.cs @@ -0,0 +1,213 @@ +using Amazon.Lambda.DurableExecution.Internal; +using Xunit; +using SdkOperationUpdate = Amazon.Lambda.Model.OperationUpdate; + +namespace Amazon.Lambda.DurableExecution.Tests; + +public class CheckpointBatcherTests +{ + private static SdkOperationUpdate Update(string id) => new() + { + Id = id, + Type = "STEP", + Action = "SUCCEED" + }; + + [Fact] + public async Task EnqueueAsync_AwaitsUntilBatchFlushes() + { + var flushedTokens = new List(); + var batcher = new CheckpointBatcher("token-0", + (token, ops, ct) => + { + flushedTokens.Add(token); + return Task.FromResult("token-1"); + }); + + await batcher.EnqueueAsync(Update("0-step")); + + Assert.Equal(new string?[] { "token-0" }, flushedTokens); + Assert.Equal("token-1", batcher.CheckpointToken); + + await batcher.DrainAsync(); + } + + [Fact] + public async Task MultipleEnqueueAsync_BatchedWithinWindow() + { + var batches = new List(); + var batcher = new CheckpointBatcher("token-0", + (token, ops, ct) => + { + batches.Add(ops.Count); + return Task.FromResult(token); + }, + new CheckpointBatcherConfig { FlushInterval = TimeSpan.FromMilliseconds(50) }); + + // Fire several enqueues concurrently and await all — they should + // coalesce into a single batch since FlushInterval > 0. + var tasks = Enumerable.Range(0, 5) + .Select(i => batcher.EnqueueAsync(Update($"{i}-step"))) + .ToArray(); + + await Task.WhenAll(tasks); + await batcher.DrainAsync(); + + Assert.Single(batches); + Assert.Equal(5, batches[0]); + } + + [Fact] + public async Task EnqueueAsync_OverflowOps_SplitsBatches() + { + var batches = new List(); + var batcher = new CheckpointBatcher("token-0", + (token, ops, ct) => + { + batches.Add(ops.Count); + return Task.FromResult(token); + }, + new CheckpointBatcherConfig + { + MaxBatchOperations = 3, + FlushInterval = TimeSpan.FromMilliseconds(100) + }); + + var tasks = Enumerable.Range(0, 7) + .Select(i => batcher.EnqueueAsync(Update($"{i}-step"))) + .ToArray(); + + await Task.WhenAll(tasks); + await batcher.DrainAsync(); + + // 7 items, max 3 per batch → 3, 3, 1 (or some permutation summing to 7 + // with no batch over 3). + Assert.Equal(7, batches.Sum()); + Assert.All(batches, count => Assert.True(count <= 3)); + Assert.True(batches.Count >= 3); + } + + [Fact] + public async Task FlushAsync_Throws_PropagatesToAllAwaiters() + { + var failure = new InvalidOperationException("service unavailable"); + var batcher = new CheckpointBatcher("token-0", + (token, ops, ct) => Task.FromException(failure), + new CheckpointBatcherConfig { FlushInterval = TimeSpan.FromMilliseconds(50) }); + + var tasks = Enumerable.Range(0, 3) + .Select(i => batcher.EnqueueAsync(Update($"{i}-step"))) + .ToArray(); + + // Each awaiter should see the same exception. + foreach (var t in tasks) + { + var ex = await Assert.ThrowsAsync(() => t); + Assert.Equal("service unavailable", ex.Message); + } + } + + [Fact] + public async Task EnqueueAsync_AfterTerminalError_FailsFast() + { + var failure = new InvalidOperationException("kaboom"); + var batcher = new CheckpointBatcher("token-0", + (token, ops, ct) => Task.FromException(failure)); + + // First enqueue trips the terminal error. + await Assert.ThrowsAsync(() => batcher.EnqueueAsync(Update("0-step"))); + + // Subsequent enqueue should fail fast with the same exception. + var second = await Assert.ThrowsAsync(() => batcher.EnqueueAsync(Update("1-step"))); + Assert.Equal("kaboom", second.Message); + } + + [Fact] + public async Task DrainAsync_FlushesRemainingItems() + { + var totalFlushed = 0; + var batcher = new CheckpointBatcher("token-0", + (token, ops, ct) => + { + Interlocked.Add(ref totalFlushed, ops.Count); + return Task.FromResult(token); + }); + + // Fire enqueues without awaiting them individually. + var tasks = Enumerable.Range(0, 4) + .Select(i => batcher.EnqueueAsync(Update($"{i}-step"))) + .ToArray(); + + await batcher.DrainAsync(); + await Task.WhenAll(tasks); + + Assert.Equal(4, totalFlushed); + } + + [Fact] + public async Task DrainAsync_AfterTerminalError_Throws() + { + var failure = new InvalidOperationException("nope"); + var batcher = new CheckpointBatcher("token-0", + (token, ops, ct) => Task.FromException(failure)); + + // Trip the terminal error. + await Assert.ThrowsAsync(() => batcher.EnqueueAsync(Update("0-step"))); + + // Drain should rethrow. + await Assert.ThrowsAsync(() => batcher.DrainAsync()); + } + + [Fact] + public async Task EnqueueAsync_AfterDispose_Throws() + { + var batcher = new CheckpointBatcher("token-0", + (token, ops, ct) => Task.FromResult(token)); + + await batcher.DisposeAsync(); + + await Assert.ThrowsAnyAsync(() => batcher.EnqueueAsync(Update("0-step"))); + } + + [Fact] + public async Task CheckpointToken_UpdatesAfterEachFlush() + { + var counter = 0; + var batcher = new CheckpointBatcher("token-0", + (token, ops, ct) => + { + var next = $"token-{Interlocked.Increment(ref counter)}"; + return Task.FromResult(next); + }); + + await batcher.EnqueueAsync(Update("0-step")); + Assert.Equal("token-1", batcher.CheckpointToken); + + await batcher.EnqueueAsync(Update("1-step")); + Assert.Equal("token-2", batcher.CheckpointToken); + + await batcher.DrainAsync(); + } + + [Fact] + public async Task ConcurrentEnqueueAsync_AllComplete() + { + var totalFlushed = 0; + var batcher = new CheckpointBatcher("token-0", + (token, ops, ct) => + { + Interlocked.Add(ref totalFlushed, ops.Count); + return Task.FromResult(token); + }, + new CheckpointBatcherConfig { FlushInterval = TimeSpan.FromMilliseconds(20) }); + + var tasks = Enumerable.Range(0, 100) + .Select(i => Task.Run(() => batcher.EnqueueAsync(Update($"{i}-step")))) + .ToArray(); + + await Task.WhenAll(tasks); + await batcher.DrainAsync(); + + Assert.Equal(100, totalFlushed); + } +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/ConfigTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/ConfigTests.cs new file mode 100644 index 000000000..f31586ea0 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/ConfigTests.cs @@ -0,0 +1,15 @@ +using Amazon.Lambda.DurableExecution; +using Xunit; + +namespace Amazon.Lambda.DurableExecution.Tests; + +public class ConfigTests +{ + [Fact] + public void SerializationContext_RecordEquality() + { + var ctx1 = new SerializationContext("op-1", "arn:aws:lambda:us-east-1:123:function:my-func"); + var ctx2 = new SerializationContext("op-1", "arn:aws:lambda:us-east-1:123:function:my-func"); + Assert.Equal(ctx1, ctx2); + } +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/DurableContextTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/DurableContextTests.cs new file mode 100644 index 000000000..806ebd844 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/DurableContextTests.cs @@ -0,0 +1,669 @@ +using Amazon.Lambda.Core; +using Amazon.Lambda.DurableExecution; +using Amazon.Lambda.DurableExecution.Internal; +using Amazon.Lambda.TestUtilities; +using Xunit; + +namespace Amazon.Lambda.DurableExecution.Tests; + +public class DurableContextTests +{ + /// Reproduces the Id that emits for the n-th root-level operation. + private static string IdAt(int position) => OperationIdGenerator.HashOperationId(position.ToString()); + + private static DurableContext CreateContext( + InitialExecutionState? initialState = null, + TerminationManager? terminationManager = null) + { + var state = new ExecutionState(); + state.LoadFromCheckpoint(initialState); + var tm = terminationManager ?? new TerminationManager(); + var idGen = new OperationIdGenerator(); + var lambdaContext = new TestLambdaContext(); + + return new DurableContext(state, tm, idGen, "arn:aws:lambda:us-east-1:123:durable-execution:test", lambdaContext); + } + + #region StepAsync Tests + + [Fact] + public async Task StepAsync_NewExecution_RunsFunction() + { + var context = CreateContext(); + var executed = false; + + var result = await context.StepAsync(async (_) => + { + executed = true; + await Task.CompletedTask; + return 42; + }, name: "my_step"); + + Assert.True(executed); + Assert.Equal(42, result); + } + + [Fact] + public async Task StepAsync_Replay_ReturnsCachedResult() + { + var context = CreateContext(new InitialExecutionState + { + Operations = new List + { + new() + { + Id = IdAt(1), + Type = OperationTypes.Step, + Status = OperationStatuses.Succeeded, + StepDetails = new StepDetails { Result = "\"cached_value\"" } + } + } + }); + + var executed = false; + var result = await context.StepAsync(async (_) => + { + executed = true; + await Task.CompletedTask; + return "fresh_value"; + }, name: "cached_step"); + + Assert.False(executed); + Assert.Equal("cached_value", result); + } + + [Fact] + public async Task StepAsync_ReplayFailed_ThrowsStepException() + { + var context = CreateContext(new InitialExecutionState + { + Operations = new List + { + new() + { + Id = IdAt(1), + Type = OperationTypes.Step, + Status = OperationStatuses.Failed, + StepDetails = new StepDetails + { + Error = new ErrorObject + { + ErrorType = "System.TimeoutException", + ErrorMessage = "timed out" + } + } + } + } + }); + + var ex = await Assert.ThrowsAsync(() => + context.StepAsync(async (_) => { await Task.CompletedTask; return "x"; }, name: "bad_step")); + + Assert.Equal("System.TimeoutException", ex.ErrorType); + Assert.Equal("timed out", ex.Message); + } + + [Fact] + public async Task StepAsync_Throws_FailsWithStepException() + { + var context = CreateContext(); + var attempts = 0; + + await Assert.ThrowsAsync(() => + context.StepAsync(async (_) => + { + attempts++; + await Task.CompletedTask; + throw new InvalidOperationException("boom"); + }, name: "fail_step")); + + // No retry support yet — the step runs once. + Assert.Equal(1, attempts); + } + + [Fact] + public async Task StepAsync_WithStepContext_ReceivesMetadata() + { + var context = CreateContext(); + string? receivedOpId = null; + int receivedAttempt = 0; + Microsoft.Extensions.Logging.ILogger? receivedLogger = null; + + await context.StepAsync(async (step) => + { + receivedOpId = step.OperationId; + receivedAttempt = step.AttemptNumber; + receivedLogger = step.Logger; + await Task.CompletedTask; + return "done"; + }, name: "meta_step"); + + Assert.Equal(IdAt(1), receivedOpId); + Assert.Equal(1, receivedAttempt); + Assert.NotNull(receivedLogger); + } + + [Fact] + public async Task StepAsync_VoidOverload_Works() + { + var context = CreateContext(); + var executed = false; + + await context.StepAsync(async (_) => + { + executed = true; + await Task.CompletedTask; + }, name: "void_step"); + + Assert.True(executed); + } + + [Fact] + public async Task StepAsync_MultipleSteps_DeterministicIds() + { + var context = CreateContext(); + + var r1 = await context.StepAsync(async (_) => { await Task.CompletedTask; return "a"; }, name: "first"); + var r2 = await context.StepAsync(async (_) => { await Task.CompletedTask; return "b"; }, name: "second"); + var r3 = await context.StepAsync(async (_) => { await Task.CompletedTask; return "c"; }); + + Assert.Equal("a", r1); + Assert.Equal("b", r2); + Assert.Equal("c", r3); + } + + [Fact] + public async Task StepAsync_ComplexType_SerializesCorrectly() + { + var context = CreateContext(new InitialExecutionState + { + Operations = new List + { + new() + { + Id = IdAt(1), + Type = OperationTypes.Step, + Status = OperationStatuses.Succeeded, + StepDetails = new StepDetails { Result = "{\"Name\":\"Alice\",\"Age\":30}" } + } + } + }); + + var result = await context.StepAsync( + async (_) => { await Task.CompletedTask; return new TestPerson { Name = "Bob", Age = 25 }; }, + name: "fetch"); + + Assert.Equal("Alice", result.Name); + Assert.Equal(30, result.Age); + } + + [Fact] + public async Task StepAsync_CustomSerializer_UsedForSerialization() + { + var serializer = new RecordingSerializer(); + var context = CreateContext(); + + var result = await context.StepAsync( + async (_) => { await Task.CompletedTask; return new TestPerson { Name = "Charlie", Age = 40 }; }, + serializer, + name: "with_custom"); + + Assert.Equal("Charlie", result.Name); + Assert.True(serializer.SerializeCalled); + Assert.False(serializer.DeserializeCalled); + } + + [Fact] + public void Logger_Defaults_ToNullLogger() + { + var context = CreateContext(); + Assert.NotNull(context.Logger); + } + + [Fact] + public void ExecutionContext_ExposesArn() + { + var context = CreateContext(); + Assert.Equal("arn:aws:lambda:us-east-1:123:durable-execution:test", context.ExecutionContext.DurableExecutionArn); + } + + [Fact] + public void LambdaContext_IsExposed() + { + var context = CreateContext(); + Assert.NotNull(context.LambdaContext); + } + + [Fact] + public async Task StepAsync_Replay_NullResult_ReturnsDefault() + { + var context = CreateContext(new InitialExecutionState + { + Operations = new List + { + new() + { + Id = IdAt(1), + Type = OperationTypes.Step, + Status = OperationStatuses.Succeeded, + StepDetails = new StepDetails { Result = null } + } + } + }); + + var result = await context.StepAsync( + async (_) => { await Task.CompletedTask; return "fresh"; }, + name: "no_result"); + + Assert.Null(result); + } + + [Fact] + public async Task StepAsync_CancelledToken_ThrowsOperationCanceled() + { + var context = CreateContext(); + using var cts = new CancellationTokenSource(); + cts.Cancel(); + + await Assert.ThrowsAnyAsync(() => + context.StepAsync( + async (_) => + { + cts.Token.ThrowIfCancellationRequested(); + await Task.CompletedTask; + return "unreachable"; + }, + name: "cancelled_step", + cancellationToken: cts.Token)); + } + + [Fact] + public async Task StepAsync_CustomSerializer_UsedForReplayDeserialization() + { + var serializer = new RecordingSerializer(); + var context = CreateContext(new InitialExecutionState + { + Operations = new List + { + new() + { + Id = IdAt(1), + Type = OperationTypes.Step, + Status = OperationStatuses.Succeeded, + StepDetails = new StepDetails { Result = "Dana,55" } + } + } + }); + + var result = await context.StepAsync( + async (_) => { await Task.CompletedTask; return new TestPerson { Name = "ignored", Age = 0 }; }, + serializer, + name: "replay_step"); + + Assert.True(serializer.DeserializeCalled); + Assert.Equal("Dana", result.Name); + Assert.Equal(55, result.Age); + } + + #endregion + + #region WaitAsync Tests + + [Fact] + public async Task WaitAsync_SubSecond_ThrowsArgumentOutOfRange() + { + var context = CreateContext(); + + await Assert.ThrowsAsync(() => + context.WaitAsync(TimeSpan.FromMilliseconds(500))); + } + + [Fact] + public async Task WaitAsync_AboveOneYear_ThrowsArgumentOutOfRange() + { + var context = CreateContext(); + + await Assert.ThrowsAsync(() => + context.WaitAsync(TimeSpan.FromSeconds(31_622_401))); + } + + [Fact] + public async Task WaitAsync_NewExecution_SignalsTermination() + { + var tm = new TerminationManager(); + var context = CreateContext(terminationManager: tm); + + // WaitAsync should signal termination and return a never-completing task + var waitTask = context.WaitAsync(TimeSpan.FromSeconds(30), name: "my_wait"); + + // Give it a moment to execute + await Task.Delay(10); + + Assert.True(tm.IsTerminated); + Assert.False(waitTask.IsCompleted); + } + + [Fact] + public async Task WaitAsync_Elapsed_ContinuesImmediately() + { + var pastExpirationMs = DateTimeOffset.UtcNow.AddSeconds(-10).ToUnixTimeMilliseconds(); + var context = CreateContext(new InitialExecutionState + { + Operations = new List + { + new() + { + Id = IdAt(1), + Type = OperationTypes.Wait, + Status = OperationStatuses.Pending, + WaitDetails = new WaitDetails { ScheduledEndTimestamp = pastExpirationMs } + } + } + }); + + await context.WaitAsync(TimeSpan.FromSeconds(30), name: "cooldown"); + // If we got here, the wait was correctly skipped + } + + [Fact] + public async Task WaitAsync_StartedButNotExpired_ResuspendsWithoutNewCheckpoint() + { + var futureExpirationMs = DateTimeOffset.UtcNow.AddSeconds(300).ToUnixTimeMilliseconds(); + var tm = new TerminationManager(); + var state = new ExecutionState(); + state.LoadFromCheckpoint(new InitialExecutionState + { + Operations = new List + { + new() + { + Id = IdAt(1), + Type = OperationTypes.Wait, + Status = OperationStatuses.Pending, + WaitDetails = new WaitDetails { ScheduledEndTimestamp = futureExpirationMs } + } + } + }); + var idGen = new OperationIdGenerator(); + var lambdaContext = new TestLambdaContext(); + var recorder = new RecordingBatcher(); + var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext, recorder.Batcher); + + var waitTask = context.WaitAsync(TimeSpan.FromSeconds(30), name: "pending_wait"); + + await Task.Delay(10); + + Assert.True(tm.IsTerminated); + Assert.False(waitTask.IsCompleted); + Assert.Empty(recorder.Flushed); + } + + [Fact] + public async Task WaitAsync_AlreadySucceeded_ContinuesImmediately() + { + var context = CreateContext(new InitialExecutionState + { + Operations = new List + { + new() + { + Id = IdAt(1), + Type = OperationTypes.Wait, + Status = OperationStatuses.Succeeded + } + } + }); + + await context.WaitAsync(TimeSpan.FromSeconds(30), name: "done_wait"); + // Completed without blocking + } + + [Fact] + public async Task WaitAsync_UnknownStatus_ThrowsNonDeterministicException() + { + // Unrecognized status on a replayed wait checkpoint must surface as + // NonDeterministicExecutionException — silently re-emitting WAIT START + // would either fail at the service or duplicate work. + var context = CreateContext(new InitialExecutionState + { + Operations = new List + { + new() + { + Id = IdAt(1), + Type = OperationTypes.Wait, + Status = "TOTALLY_BOGUS_STATUS" + } + } + }); + + await Assert.ThrowsAsync(() => + context.WaitAsync(TimeSpan.FromSeconds(30), name: "mystery_wait")); + } + + #endregion + + #region End-to-end: Step + Wait + Step + + [Fact] + public async Task EndToEnd_StepWaitStep_FirstInvocation_SuspendsOnWait() + { + var tm = new TerminationManager(); + var state = new ExecutionState(); + state.LoadFromCheckpoint(null); + var idGen = new OperationIdGenerator(); + var lambdaContext = new TestLambdaContext(); + var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext); + + var result = await DurableExecutionHandler.RunAsync( + state, tm, + async () => + { + await context.StepAsync(async (_) => { await Task.CompletedTask; return "fetched"; }, name: "fetch"); + await context.WaitAsync(TimeSpan.FromSeconds(30), name: "delay"); + var final = await context.StepAsync(async (_) => { await Task.CompletedTask; return "processed"; }, name: "process"); + return final; + }); + + Assert.Equal(InvocationStatus.Pending, result.Status); + } + + [Fact] + public async Task EndToEnd_StepWaitStep_SecondInvocation_Completes() + { + var pastExpirationMs = DateTimeOffset.UtcNow.AddSeconds(-5).ToUnixTimeMilliseconds(); + var tm = new TerminationManager(); + var state = new ExecutionState(); + state.LoadFromCheckpoint(new InitialExecutionState + { + Operations = new List + { + new() + { + Id = IdAt(1), + Type = OperationTypes.Step, + Status = OperationStatuses.Succeeded, + StepDetails = new StepDetails { Result = "\"fetched\"" } + }, + new() + { + Id = IdAt(2), + Type = OperationTypes.Wait, + Status = OperationStatuses.Pending, + WaitDetails = new WaitDetails { ScheduledEndTimestamp = pastExpirationMs } + } + } + }); + + var idGen = new OperationIdGenerator(); + var lambdaContext = new TestLambdaContext(); + var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext); + var processExecuted = false; + + var result = await DurableExecutionHandler.RunAsync( + state, tm, + async () => + { + var fetched = await context.StepAsync(async (_) => { await Task.CompletedTask; return "fresh_fetch"; }, name: "fetch"); + Assert.Equal("fetched", fetched); // cached from replay + + await context.WaitAsync(TimeSpan.FromSeconds(30), name: "delay"); + // wait is elapsed, continues + + var final = await context.StepAsync(async (_) => + { + processExecuted = true; + await Task.CompletedTask; + return "processed"; + }, name: "process"); + return final; + }); + + Assert.Equal(InvocationStatus.Succeeded, result.Status); + Assert.Equal("processed", result.Result); + Assert.True(processExecuted); + } + + #endregion + + #region Non-Determinism Detection Tests + + [Fact] + public async Task StepAsync_ReplayTypeMismatch_ThrowsNonDeterministicException() + { + var state = new ExecutionState(); + state.LoadFromCheckpoint(new InitialExecutionState + { + Operations = new List + { + new() + { + Id = IdAt(1), + Type = OperationTypes.Wait, + Status = OperationStatuses.Succeeded + } + } + }); + var tm = new TerminationManager(); + var idGen = new OperationIdGenerator(); + var lambdaContext = new TestLambdaContext(); + var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext); + + var ex = await Assert.ThrowsAsync(async () => + await context.StepAsync( + async (_) => { await Task.CompletedTask; return "should not run"; }, + name: "my_op")); + + Assert.Contains("expected type 'STEP'", ex.Message); + Assert.Contains("found 'WAIT'", ex.Message); + } + + [Fact] + public async Task WaitAsync_ReplayTypeMismatch_ThrowsNonDeterministicException() + { + var state = new ExecutionState(); + state.LoadFromCheckpoint(new InitialExecutionState + { + Operations = new List + { + new() + { + Id = IdAt(1), + Type = OperationTypes.Step, + Status = OperationStatuses.Succeeded, + StepDetails = new StepDetails { Result = "\"hello\"" } + } + } + }); + var tm = new TerminationManager(); + var idGen = new OperationIdGenerator(); + var lambdaContext = new TestLambdaContext(); + var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext); + + var ex = await Assert.ThrowsAsync(async () => + await context.WaitAsync(TimeSpan.FromSeconds(10), name: "my_op")); + + Assert.Contains("expected type 'WAIT'", ex.Message); + Assert.Contains("found 'STEP'", ex.Message); + } + + [Fact] + public async Task StepAsync_ReplayNameMismatch_ThrowsNonDeterministicException() + { + // Simulate a scenario where the operation was stored with a different name + // than what the current code passes (e.g., service returned stale data). + var state = new ExecutionState(); + state.LoadFromCheckpoint(new InitialExecutionState + { + Operations = new List + { + new() + { + Id = IdAt(1), + Type = OperationTypes.Step, + Status = OperationStatuses.Succeeded, + Name = "old_name", + StepDetails = new StepDetails { Result = "\"old_result\"" } + } + } + }); + var tm = new TerminationManager(); + var idGen = new OperationIdGenerator(); + var lambdaContext = new TestLambdaContext(); + var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext); + + var ex = await Assert.ThrowsAsync(async () => + await context.StepAsync( + async (_) => { await Task.CompletedTask; return "new"; }, + name: "my_step")); + + Assert.Contains("expected name 'my_step'", ex.Message); + Assert.Contains("found 'old_name'", ex.Message); + } + + [Fact] + public async Task StepAsync_NoReplay_SkipsValidation() + { + var context = CreateContext(); + + var result = await context.StepAsync( + async (_) => { await Task.CompletedTask; return "ok"; }, + name: "anything"); + + Assert.Equal("ok", result); + } + + #endregion + + private class TestPerson + { + public string? Name { get; set; } + public int Age { get; set; } + } + + /// + /// AOT-friendly test serializer using a trivial format. Demonstrates that + /// passing an to the AOT-safe + /// StepAsync overload fully replaces the reflection-based + /// System.Text.Json path. + /// + private class RecordingSerializer : ICheckpointSerializer + { + public bool SerializeCalled { get; private set; } + public bool DeserializeCalled { get; private set; } + + public string Serialize(TestPerson value, SerializationContext context) + { + SerializeCalled = true; + return $"{value.Name},{value.Age}"; + } + + public TestPerson Deserialize(string data, SerializationContext context) + { + DeserializeCalled = true; + var inner = data.Replace("", "").Replace("", ""); + var parts = inner.Split(','); + return new TestPerson { Name = parts[0], Age = int.Parse(parts[1]) }; + } + } +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/DurableExecutionHandlerTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/DurableExecutionHandlerTests.cs new file mode 100644 index 000000000..b5abc5882 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/DurableExecutionHandlerTests.cs @@ -0,0 +1,137 @@ +using Amazon.Lambda.DurableExecution; +using Amazon.Lambda.DurableExecution.Internal; +using Xunit; + +namespace Amazon.Lambda.DurableExecution.Tests; + +public class DurableExecutionHandlerTests +{ + [Fact] + public async Task RunAsync_UserCodeCompletes_ReturnsSucceeded() + { + var state = new ExecutionState(); + state.LoadFromCheckpoint(null); + var termination = new TerminationManager(); + + var result = await DurableExecutionHandler.RunAsync( + state, + termination, + async () => + { + await Task.Delay(1); + return "hello"; + }); + + Assert.Equal(InvocationStatus.Succeeded, result.Status); + Assert.Equal("hello", result.Result); + Assert.Null(result.Exception); + } + + [Fact] + public async Task RunAsync_UserCodeThrows_ReturnsFailed() + { + var state = new ExecutionState(); + state.LoadFromCheckpoint(null); + var termination = new TerminationManager(); + + var result = await DurableExecutionHandler.RunAsync( + state, + termination, + async () => + { + await Task.Delay(1); + throw new InvalidOperationException("something broke"); + }); + + Assert.Equal(InvocationStatus.Failed, result.Status); + Assert.Equal("something broke", result.Message); + Assert.IsType(result.Exception); + } + + [Fact] + public async Task RunAsync_TerminationWins_ReturnsPending() + { + var state = new ExecutionState(); + state.LoadFromCheckpoint(null); + var termination = new TerminationManager(); + + var result = await DurableExecutionHandler.RunAsync( + state, + termination, + async () => + { + // Simulate: user code hits a wait, signals termination, then blocks forever + termination.Terminate(TerminationReason.WaitScheduled, "waiting 30s"); + await new TaskCompletionSource().Task; // blocks forever + return "unreachable"; + }); + + Assert.Equal(InvocationStatus.Pending, result.Status); + Assert.Equal("waiting 30s", result.Message); + Assert.Null(result.Exception); + } + + [Fact] + public async Task RunAsync_TerminationWithException_ReturnsFailed() + { + var state = new ExecutionState(); + state.LoadFromCheckpoint(null); + var termination = new TerminationManager(); + + var result = await DurableExecutionHandler.RunAsync( + state, + termination, + async () => + { + termination.Terminate( + TerminationReason.CheckpointFailed, + "checkpoint error", + new InvalidOperationException("service unavailable")); + await new TaskCompletionSource().Task; + return "unreachable"; + }); + + Assert.Equal(InvocationStatus.Failed, result.Status); + Assert.IsType(result.Exception); + } + + [Fact] + public async Task RunAsync_FastUserCode_BeatsTermination() + { + var state = new ExecutionState(); + state.LoadFromCheckpoint(null); + var termination = new TerminationManager(); + + var result = await DurableExecutionHandler.RunAsync( + state, + termination, + async () => + { + // User code completes before termination is called + return 42; + }); + + Assert.Equal(InvocationStatus.Succeeded, result.Status); + Assert.Equal(42, result.Result); + } + + [Fact] + public async Task RunAsync_IntResult_WorksWithValueTypes() + { + var state = new ExecutionState(); + state.LoadFromCheckpoint(null); + var termination = new TerminationManager(); + + var result = await DurableExecutionHandler.RunAsync( + state, + termination, + async () => + { + await Task.CompletedTask; + return 100; + }); + + Assert.Equal(InvocationStatus.Succeeded, result.Status); + Assert.Equal(100, result.Result); + } +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/DurableFunctionTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/DurableFunctionTests.cs new file mode 100644 index 000000000..032a25a66 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/DurableFunctionTests.cs @@ -0,0 +1,583 @@ +using System.Net; +using System.Text.Json; +using Amazon.Lambda; +using Amazon.Lambda.DurableExecution; +using Amazon.Lambda.DurableExecution.Internal; +using Amazon.Lambda.TestUtilities; +using Amazon.Runtime; +using Xunit; +using Operation = Amazon.Lambda.DurableExecution.Internal.Operation; +using StepDetails = Amazon.Lambda.DurableExecution.Internal.StepDetails; +using WaitDetails = Amazon.Lambda.DurableExecution.Internal.WaitDetails; +using ExecutionDetails = Amazon.Lambda.DurableExecution.Internal.ExecutionDetails; + +namespace Amazon.Lambda.DurableExecution.Tests; + +public class DurableFunctionTests +{ + /// Reproduces the Id that emits for the n-th root-level operation. + private static string IdAt(int position) => OperationIdGenerator.HashOperationId(position.ToString()); + + private readonly IAmazonLambda _mockClient = new MockLambdaClient(); + + [Fact] + public async Task WrapAsync_FreshExecution_StepThenWait_ReturnsPending() + { + var input = new DurableExecutionInvocationInput + { + DurableExecutionArn = "arn:aws:lambda:us-east-1:123:durable-execution:order-123", + InitialExecutionState = new InitialExecutionState + { + Operations = new List + { + new() + { + Id = "exec-0", + Type = OperationTypes.Execution, + Status = OperationStatuses.Started, + ExecutionDetails = new ExecutionDetails { InputPayload = "{\"orderId\":\"order-123\"}" } + } + } + } + }; + + var output = await DurableFunction.WrapAsync( + MyWorkflow, + input, + new TestLambdaContext(), + _mockClient); + + Assert.Equal(InvocationStatus.Pending, output.Status); + } + + [Fact] + public async Task WrapAsync_ReplayWithElapsedWait_ReturnsSucceeded() + { + var pastExpirationMs = DateTimeOffset.UtcNow.AddSeconds(-5).ToUnixTimeMilliseconds(); + var input = new DurableExecutionInvocationInput + { + DurableExecutionArn = "arn:aws:lambda:us-east-1:123:durable-execution:order-123", + InitialExecutionState = new InitialExecutionState + { + Operations = new List + { + new() + { + Id = "exec-0", + Type = OperationTypes.Execution, + Status = OperationStatuses.Started, + ExecutionDetails = new ExecutionDetails { InputPayload = "{\"orderId\":\"order-123\"}" } + }, + new() + { + Id = IdAt(1), + Type = OperationTypes.Step, + Status = OperationStatuses.Succeeded, + StepDetails = new StepDetails { Result = "{\"IsValid\":true}" } + }, + new() + { + Id = IdAt(2), + Type = OperationTypes.Wait, + Status = OperationStatuses.Pending, + WaitDetails = new WaitDetails { ScheduledEndTimestamp = pastExpirationMs } + } + } + } + }; + + var output = await DurableFunction.WrapAsync( + MyWorkflow, + input, + new TestLambdaContext(), + _mockClient); + + Assert.Equal(InvocationStatus.Succeeded, output.Status); + Assert.NotNull(output.Result); + var result = JsonSerializer.Deserialize(output.Result!); + Assert.Equal("approved", result!.Status); + } + + [Fact] + public async Task WrapAsync_WorkflowThrows_ReturnsFailed() + { + var input = new DurableExecutionInvocationInput + { + DurableExecutionArn = "arn:aws:lambda:us-east-1:123:durable-execution:fail-test", + InitialExecutionState = new InitialExecutionState + { + Operations = new List + { + new() + { + Id = "exec-0", + Type = OperationTypes.Execution, + Status = OperationStatuses.Started, + ExecutionDetails = new ExecutionDetails { InputPayload = "{\"orderId\":\"bad-order\"}" } + } + } + } + }; + + var output = await DurableFunction.WrapAsync( + async (evt, ctx) => throw new InvalidOperationException("workflow error"), + input, + new TestLambdaContext(), + _mockClient); + + Assert.Equal(InvocationStatus.Failed, output.Status); + Assert.NotNull(output.Error); + Assert.Equal("workflow error", output.Error!.ErrorMessage); + Assert.Contains("InvalidOperationException", output.Error.ErrorType!); + } + + [Fact] + public async Task WrapAsync_VoidWorkflow_ReturnSucceeded() + { + var input = new DurableExecutionInvocationInput + { + DurableExecutionArn = "arn:aws:lambda:us-east-1:123:durable-execution:void-test", + InitialExecutionState = new InitialExecutionState + { + Operations = new List + { + new() + { + Id = "exec-0", + Type = OperationTypes.Execution, + Status = OperationStatuses.Started, + ExecutionDetails = new ExecutionDetails { InputPayload = "{\"orderId\":\"order-1\"}" } + } + } + } + }; + + var executed = false; + var output = await DurableFunction.WrapAsync( + async (evt, ctx) => + { + await ctx.StepAsync(async (_) => { await Task.CompletedTask; executed = true; }, name: "do_work"); + }, + input, + new TestLambdaContext(), + _mockClient); + + Assert.Equal(InvocationStatus.Succeeded, output.Status); + Assert.True(executed); + } + + [Fact] + public async Task WrapAsync_CheckpointsAreSentToService() + { + var mockClient = new MockLambdaClient(); + var input = new DurableExecutionInvocationInput + { + DurableExecutionArn = "arn:aws:lambda:us-east-1:123:durable-execution:checkpoint-test", + CheckpointToken = "initial-token", + InitialExecutionState = new InitialExecutionState + { + Operations = new List + { + new() + { + Id = "exec-0", + Type = OperationTypes.Execution, + Status = OperationStatuses.Started, + ExecutionDetails = new ExecutionDetails { InputPayload = "{\"orderId\":\"order-1\"}" } + } + } + } + }; + + var output = await DurableFunction.WrapAsync( + MyWorkflow, + input, + new TestLambdaContext(), + mockClient); + + Assert.Equal(InvocationStatus.Pending, output.Status); + Assert.Equal(2, mockClient.CheckpointCalls.Count); + + // First flush: step SUCCEED (the user awaits StepAsync, which awaits + // its SUCCEED enqueue, which blocks until the batcher flushes it). + var firstCall = mockClient.CheckpointCalls[0]; + Assert.Equal("arn:aws:lambda:us-east-1:123:durable-execution:checkpoint-test", firstCall.DurableExecutionArn); + Assert.Equal("initial-token", firstCall.CheckpointToken); + Assert.Single(firstCall.Updates); + var stepUpdate = firstCall.Updates[0]; + Assert.Equal("STEP", stepUpdate.Type); + Assert.Equal("SUCCEED", stepUpdate.Action); + Assert.Equal("validate", stepUpdate.Name); + Assert.NotNull(stepUpdate.Payload); + + // Second flush: wait START (blocks until the service has the timer + // recorded before WaitAsync suspends). + var secondCall = mockClient.CheckpointCalls[1]; + Assert.Single(secondCall.Updates); + var waitUpdate = secondCall.Updates[0]; + Assert.Equal("WAIT", waitUpdate.Type); + Assert.Equal("START", waitUpdate.Action); + Assert.Equal("delay", waitUpdate.Name); + Assert.NotNull(waitUpdate.WaitOptions); + Assert.Equal(30, waitUpdate.WaitOptions.WaitSeconds); + } + + [Fact] + public async Task WrapAsync_UserPayload_BindsCamelCaseToPascalCaseProperty() + { + // The wire payload uses camelCase ("orderId"), the user POCO uses PascalCase (OrderId). + // ExtractUserPayload must do case-insensitive binding so workflows can read input.OrderId. + var input = new DurableExecutionInvocationInput + { + DurableExecutionArn = "arn:aws:lambda:us-east-1:123:durable-execution:case-test", + InitialExecutionState = new InitialExecutionState + { + Operations = new List + { + new() + { + Id = "exec-0", + Type = OperationTypes.Execution, + Status = OperationStatuses.Started, + ExecutionDetails = new ExecutionDetails { InputPayload = "{\"orderId\":\"abc-123\"}" } + } + } + } + }; + + string? observedOrderId = null; + var output = await DurableFunction.WrapAsync( + async (evt, ctx) => + { + observedOrderId = evt.OrderId; + await Task.CompletedTask; + return new OrderResult { Status = "ok", OrderId = evt.OrderId }; + }, + input, + new TestLambdaContext(), + _mockClient); + + Assert.Equal(InvocationStatus.Succeeded, output.Status); + Assert.Equal("abc-123", observedOrderId); + } + + [Fact] + public async Task WrapAsync_NoExecutionOp_ReceivesDefaultPayload() + { + // No EXECUTION operation in the envelope — ExtractUserPayload returns default(TInput). + // Exercises the "loop falls through without finding EXECUTION" branch in DurableFunction.ExtractUserPayload. + var input = new DurableExecutionInvocationInput + { + DurableExecutionArn = "arn:aws:lambda:us-east-1:123:durable-execution:no-exec", + InitialExecutionState = new InitialExecutionState + { + Operations = new List() + } + }; + + OrderEvent? observed = null; + var output = await DurableFunction.WrapAsync( + async (evt, ctx) => + { + observed = evt; + await Task.CompletedTask; + return new OrderResult { Status = "ok" }; + }, + input, + new TestLambdaContext(), + _mockClient); + + Assert.Equal(InvocationStatus.Succeeded, output.Status); + Assert.Null(observed); // default(OrderEvent) for a reference type is null + } + + [Fact] + public async Task WrapAsync_PaginatedInitialState_HydratesAllPages() + { + // The service can return execution state across multiple pages — the first + // page comes inline on the invocation envelope (InitialExecutionState) and + // subsequent pages must be fetched via GetDurableExecutionState. Verify the + // pagination loop in WrapAsyncCore (DurableFunction.cs:160-167) walks every + // page so the workflow sees the full operation history on replay. + var arn = "arn:aws:lambda:us-east-1:123:durable-execution:paginated"; + + // Page 0 (in InitialExecutionState): EXECUTION op + step1 SUCCEEDED. + // Page 1 (fetched with marker "marker-1"): step2 SUCCEEDED, points to marker-2. + // Page 2 (fetched with marker "marker-2"): step3 SUCCEEDED, no NextMarker — loop exits. + var input = new DurableExecutionInvocationInput + { + DurableExecutionArn = arn, + CheckpointToken = "ckpt-0", + InitialExecutionState = new InitialExecutionState + { + Operations = new List + { + new() + { + Id = "exec-0", + Type = OperationTypes.Execution, + Status = OperationStatuses.Started, + ExecutionDetails = new ExecutionDetails { InputPayload = "{\"orderId\":\"order-1\"}" } + }, + new() + { + Id = IdAt(1), + Type = OperationTypes.Step, + Status = OperationStatuses.Succeeded, + StepDetails = new StepDetails { Result = "\"page-0-result\"" } + } + }, + NextMarker = "marker-1" + } + }; + + var mockClient = new MockLambdaClient + { + GetExecutionStateHandler = req => req.Marker switch + { + "marker-1" => new Amazon.Lambda.Model.GetDurableExecutionStateResponse + { + Operations = new List + { + new() + { + Id = IdAt(2), + Type = OperationTypes.Step, + Status = OperationStatuses.Succeeded, + StepDetails = new Amazon.Lambda.Model.StepDetails { Result = "\"page-1-result\"" } + } + }, + NextMarker = "marker-2" + }, + "marker-2" => new Amazon.Lambda.Model.GetDurableExecutionStateResponse + { + Operations = new List + { + new() + { + Id = IdAt(3), + Type = OperationTypes.Step, + Status = OperationStatuses.Succeeded, + StepDetails = new Amazon.Lambda.Model.StepDetails { Result = "\"page-2-result\"" } + } + } + // NextMarker omitted -> loop terminates. + }, + _ => throw new InvalidOperationException($"Unexpected marker: {req.Marker}") + } + }; + + var observed = new List(); + var output = await DurableFunction.WrapAsync( + async (evt, ctx) => + { + // All three steps must replay the cached results from the paginated state + // without re-executing — if the loop missed a page, the corresponding step + // would run fresh and append a different value to `observed`. + observed.Add(await ctx.StepAsync( + async (_) => { await Task.CompletedTask; return "fresh"; }, name: "step1")); + observed.Add(await ctx.StepAsync( + async (_) => { await Task.CompletedTask; return "fresh"; }, name: "step2")); + observed.Add(await ctx.StepAsync( + async (_) => { await Task.CompletedTask; return "fresh"; }, name: "step3")); + return new OrderResult { Status = "ok", OrderId = evt.OrderId }; + }, + input, + new TestLambdaContext(), + mockClient); + + Assert.Equal(InvocationStatus.Succeeded, output.Status); + + // Two GetDurableExecutionState calls — one per fetched page (page 0 was inline). + Assert.Equal(2, mockClient.GetExecutionStateCalls.Count); + Assert.Equal("marker-1", mockClient.GetExecutionStateCalls[0].Marker); + Assert.Equal(arn, mockClient.GetExecutionStateCalls[0].DurableExecutionArn); + Assert.Equal("ckpt-0", mockClient.GetExecutionStateCalls[0].CheckpointToken); + Assert.Equal("marker-2", mockClient.GetExecutionStateCalls[1].Marker); + + // The workflow saw replayed results from ALL three pages — none re-executed. + Assert.Equal(new[] { "page-0-result", "page-1-result", "page-2-result" }, observed); + + // No checkpoints were written: every step replayed from cache. + Assert.Empty(mockClient.CheckpointCalls); + } + + [Fact] + public async Task WrapAsync_NullInitialExecutionState_ReceivesDefaultPayload() + { + // No initial execution state at all. Same default-return branch in ExtractUserPayload. + var input = new DurableExecutionInvocationInput + { + DurableExecutionArn = "arn:aws:lambda:us-east-1:123:durable-execution:null-state" + }; + + OrderEvent? observed = null; + var output = await DurableFunction.WrapAsync( + async (evt, ctx) => + { + observed = evt; + await Task.CompletedTask; + return new OrderResult { Status = "ok" }; + }, + input, + new TestLambdaContext(), + _mockClient); + + Assert.Equal(InvocationStatus.Succeeded, output.Status); + Assert.Null(observed); + } + + // ────────────────────────────────────────────────────────────────────── + // IsTerminalCheckpointError classification (mirrors CheckpointError in + // aws-durable-execution-sdk-python): + // 4xx (except 429) → terminal (Failed envelope) + // 429 / 5xx / no status → transient (escapes to host for Lambda retry) + // Carve-out: InvalidParameterValueException "Invalid Checkpoint Token" → transient + // + // Driven through CheckpointDurableExecution: a workflow that succeeds a single Step + // forces the batcher to flush, which is wrapped by the try/catch in WrapAsyncCore. + // ────────────────────────────────────────────────────────────────────── + + public static IEnumerable TerminalCheckpointErrorCases() => new[] + { + new object[] { MakeServiceException("ResourceNotFoundException", HttpStatusCode.NotFound, "ARN not found") }, + new object[] { MakeServiceException("AccessDeniedException", HttpStatusCode.Forbidden, "denied") }, + new object[] { MakeServiceException("KMSAccessDeniedException", HttpStatusCode.BadRequest, "kms denied") }, + new object[] { MakeServiceException("ValidationException", HttpStatusCode.BadRequest, "bad input") }, + new object[] { MakeServiceException("InvalidParameterValueException", HttpStatusCode.BadRequest, "Some other parameter") }, + }; + + [Theory] + [MemberData(nameof(TerminalCheckpointErrorCases))] + public async Task WrapAsync_CheckpointThrowsTerminal_ReturnsFailed(AmazonServiceException ex) + { + var input = MakeCheckpointInput(); + var mockClient = new MockLambdaClient { CheckpointThrows = ex }; + + var output = await DurableFunction.WrapAsync( + SingleStepWorkflow, input, new TestLambdaContext(), mockClient); + + Assert.Equal(InvocationStatus.Failed, output.Status); + Assert.NotNull(output.Error); + Assert.Equal(ex.Message, output.Error!.ErrorMessage); + } + + public static IEnumerable TransientCheckpointErrorCases() => new[] + { + // 5xx + new object[] { MakeServiceException("InternalServerError", HttpStatusCode.InternalServerError, "boom") }, + new object[] { MakeServiceException("ServiceUnavailable", HttpStatusCode.ServiceUnavailable, "down") }, + // 429 + new object[] { MakeServiceException("TooManyRequestsException", (HttpStatusCode)429, "throttled") }, + // No status (network / SDK-internal). HttpStatusCode default (0) → classifier treats < 400 as transient. + new object[] { MakeServiceException("RequestTimeout", 0, "timeout") }, + // Carve-out: stale checkpoint token is transient. + new object[] { MakeServiceException("InvalidParameterValueException", HttpStatusCode.BadRequest, "Invalid Checkpoint Token: stale") }, + }; + + [Theory] + [MemberData(nameof(TransientCheckpointErrorCases))] + public async Task WrapAsync_CheckpointThrowsTransient_PropagatesToHost(AmazonServiceException ex) + { + var input = MakeCheckpointInput(); + var mockClient = new MockLambdaClient { CheckpointThrows = ex }; + + var thrown = await Assert.ThrowsAsync(ex.GetType(), () => + DurableFunction.WrapAsync( + SingleStepWorkflow, input, new TestLambdaContext(), mockClient)); + + Assert.Same(ex, thrown); + } + + [Fact] + public async Task WrapAsync_HydrationThrows_AlwaysPropagatesToHost() + { + // State hydration is OUTSIDE the IsTerminalCheckpointError try/catch — every + // GetExecutionStateAsync failure escapes for Lambda retry, matching Python's + // GetExecutionStateError (an InvocationError). Use a 4xx that *would* be terminal + // if it came from a checkpoint flush to prove the path isn't classified. + var input = new DurableExecutionInvocationInput + { + DurableExecutionArn = "arn:aws:lambda:us-east-1:123:durable-execution:hydrate-fail", + InitialExecutionState = new InitialExecutionState + { + Operations = new List + { + new() + { + Id = "exec-0", + Type = OperationTypes.Execution, + Status = OperationStatuses.Started, + ExecutionDetails = new ExecutionDetails { InputPayload = "{\"orderId\":\"order-1\"}" } + } + }, + NextMarker = "page-1" // force the hydration loop to run + } + }; + var ex = MakeServiceException("ResourceNotFoundException", HttpStatusCode.NotFound, "ARN gone"); + var mockClient = new MockLambdaClient { GetExecutionStateThrows = ex }; + + var thrown = await Assert.ThrowsAsync(() => + DurableFunction.WrapAsync( + MyWorkflow, input, new TestLambdaContext(), mockClient)); + + Assert.Same(ex, thrown); + } + + private static AmazonServiceException MakeServiceException(string code, HttpStatusCode status, string message) + { + return new AmazonServiceException(message, innerException: null, ErrorType.Unknown, code, requestId: "req-1", statusCode: status); + } + + private static DurableExecutionInvocationInput MakeCheckpointInput() => new() + { + DurableExecutionArn = "arn:aws:lambda:us-east-1:123:durable-execution:checkpoint-fail", + InitialExecutionState = new InitialExecutionState + { + Operations = new List + { + new() + { + Id = "exec-0", + Type = OperationTypes.Execution, + Status = OperationStatuses.Started, + ExecutionDetails = new ExecutionDetails { InputPayload = "{\"orderId\":\"order-1\"}" } + } + } + } + }; + + private static async Task SingleStepWorkflow(OrderEvent input, IDurableContext context) + { + // One step succeed → forces a checkpoint flush, which the mock fails. + await context.StepAsync(async (_) => { await Task.CompletedTask; return "ok"; }, name: "s1"); + return new OrderResult { Status = "done" }; + } + + private static async Task MyWorkflow(OrderEvent input, IDurableContext context) + { + var validation = await context.StepAsync( + async (_) => { await Task.CompletedTask; return new ValidationResult { IsValid = true }; }, + name: "validate"); + + await context.WaitAsync(TimeSpan.FromSeconds(30), name: "delay"); + + return new OrderResult { Status = "approved", OrderId = input.OrderId }; + } + + private class OrderEvent + { + public string? OrderId { get; set; } + } + + private class OrderResult + { + public string? Status { get; set; } + public string? OrderId { get; set; } + } + + private class ValidationResult + { + public bool IsValid { get; set; } + } +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/EnumsTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/EnumsTests.cs new file mode 100644 index 000000000..1626f118a --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/EnumsTests.cs @@ -0,0 +1,39 @@ +using Amazon.Lambda.DurableExecution; +using Amazon.Lambda.DurableExecution.Internal; +using Xunit; + +namespace Amazon.Lambda.DurableExecution.Tests; + +public class EnumsTests +{ + [Fact] + public void InvocationStatus_HasExpectedValues() + { + Assert.Equal(0, (int)InvocationStatus.Succeeded); + Assert.Equal(1, (int)InvocationStatus.Failed); + Assert.Equal(2, (int)InvocationStatus.Pending); + } + + [Fact] + public void OperationTypes_HasExpectedConstants() + { + Assert.Equal("STEP", OperationTypes.Step); + Assert.Equal("WAIT", OperationTypes.Wait); + Assert.Equal("CALLBACK", OperationTypes.Callback); + Assert.Equal("CHAINED_INVOKE", OperationTypes.ChainedInvoke); + Assert.Equal("CONTEXT", OperationTypes.Context); + Assert.Equal("EXECUTION", OperationTypes.Execution); + } + + [Fact] + public void OperationStatuses_HasExpectedConstants() + { + Assert.Equal("STARTED", OperationStatuses.Started); + Assert.Equal("SUCCEEDED", OperationStatuses.Succeeded); + Assert.Equal("FAILED", OperationStatuses.Failed); + Assert.Equal("PENDING", OperationStatuses.Pending); + Assert.Equal("CANCELLED", OperationStatuses.Cancelled); + Assert.Equal("READY", OperationStatuses.Ready); + Assert.Equal("STOPPED", OperationStatuses.Stopped); + } +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/ExceptionsTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/ExceptionsTests.cs new file mode 100644 index 000000000..7105849bb --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/ExceptionsTests.cs @@ -0,0 +1,68 @@ +using Amazon.Lambda.DurableExecution; +using Xunit; + +namespace Amazon.Lambda.DurableExecution.Tests; + +public class ExceptionsTests +{ + [Fact] + public void DurableExecutionException_IsBaseException() + { + var ex = new DurableExecutionException("test error"); + Assert.IsAssignableFrom(ex); + Assert.Equal("test error", ex.Message); + } + + [Fact] + public void DurableExecutionException_WrapsInnerException() + { + var inner = new InvalidOperationException("inner"); + var ex = new DurableExecutionException("outer", inner); + Assert.Same(inner, ex.InnerException); + } + + [Fact] + public void DurableExecutionException_ParameterlessCtor() + { + var ex = new DurableExecutionException(); + Assert.IsAssignableFrom(ex); + } + + [Fact] + public void StepException_ParameterlessCtor() + { + var ex = new StepException(); + Assert.IsAssignableFrom(ex); + } + + [Fact] + public void StepException_MessageOnlyCtor() + { + var ex = new StepException("step blew up"); + Assert.Equal("step blew up", ex.Message); + } + + [Fact] + public void StepException_WithInnerException() + { + var inner = new InvalidOperationException("inner"); + var ex = new StepException("wrapped", inner); + Assert.Same(inner, ex.InnerException); + } + + [Fact] + public void StepException_HasErrorProperties() + { + var ex = new StepException("step failed") + { + ErrorType = "System.TimeoutException", + ErrorData = "operation timed out", + OriginalStackTrace = new[] { "at Foo.Bar()", "at Baz.Qux()" } + }; + + Assert.IsAssignableFrom(ex); + Assert.Equal("System.TimeoutException", ex.ErrorType); + Assert.Equal("operation timed out", ex.ErrorData); + Assert.Equal(2, ex.OriginalStackTrace!.Count); + } +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/ExecutionStateTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/ExecutionStateTests.cs new file mode 100644 index 000000000..3aad57e2d --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/ExecutionStateTests.cs @@ -0,0 +1,165 @@ +using Amazon.Lambda.DurableExecution; +using Amazon.Lambda.DurableExecution.Internal; +using Xunit; +using Operation = Amazon.Lambda.DurableExecution.Internal.Operation; +using StepDetails = Amazon.Lambda.DurableExecution.Internal.StepDetails; +namespace Amazon.Lambda.DurableExecution.Tests; + +public class ExecutionStateTests +{ + [Fact] + public void LoadFromCheckpoint_NullState_EntersExecutionMode() + { + var state = new ExecutionState(); + state.LoadFromCheckpoint(null); + + Assert.Equal(ExecutionMode.Execution, state.Mode); + Assert.Equal(0, state.CheckpointedOperationCount); + } + + [Fact] + public void LoadFromCheckpoint_EmptyOperations_EntersExecutionMode() + { + var state = new ExecutionState(); + state.LoadFromCheckpoint(new InitialExecutionState { Operations = new List() }); + + Assert.Equal(ExecutionMode.Execution, state.Mode); + Assert.Equal(0, state.CheckpointedOperationCount); + } + + [Fact] + public void LoadFromCheckpoint_WithOperations_StaysInReplayMode() + { + var state = new ExecutionState(); + state.LoadFromCheckpoint(new InitialExecutionState + { + Operations = new List + { + new() + { + Id = "0-fetch_user", + Type = OperationTypes.Step, + Status = OperationStatuses.Succeeded, + StepDetails = new StepDetails { Result = "{\"name\":\"Alice\"}" } + } + } + }); + + Assert.Equal(ExecutionMode.Replay, state.Mode); + Assert.Equal(1, state.CheckpointedOperationCount); + } + + [Fact] + public void GetOperation_ReturnsCheckpointedRecord() + { + var state = new ExecutionState(); + state.LoadFromCheckpoint(new InitialExecutionState + { + Operations = new List + { + new() + { + Id = "0-validate", + Type = OperationTypes.Step, + Status = OperationStatuses.Succeeded, + StepDetails = new StepDetails { Result = "true" } + } + } + }); + + var op = state.GetOperation("0-validate"); + Assert.NotNull(op); + Assert.Equal(OperationStatuses.Succeeded, op!.Status); + Assert.Equal("true", op.StepDetails?.Result); + } + + [Fact] + public void GetOperation_ReturnsNull_WhenNotFound() + { + var state = new ExecutionState(); + state.LoadFromCheckpoint(null); + + var op = state.GetOperation("0-nonexistent"); + Assert.Null(op); + } + + [Fact] + public void HasOperation_ReturnsTrueForExisting() + { + var state = new ExecutionState(); + state.LoadFromCheckpoint(new InitialExecutionState + { + Operations = new List + { + new() + { + Id = "0-step_a", + Type = OperationTypes.Step, + Status = OperationStatuses.Succeeded + } + } + }); + + Assert.True(state.HasOperation("0-step_a")); + Assert.False(state.HasOperation("1-step_b")); + } + + [Fact] + public void EnterExecutionMode_FlipsModeAndIsIdempotent() + { + var state = new ExecutionState(); + state.LoadFromCheckpoint(new InitialExecutionState + { + Operations = new List + { + new() + { + Id = "0", + Type = OperationTypes.Step, + Status = OperationStatuses.Succeeded + } + } + }); + + Assert.Equal(ExecutionMode.Replay, state.Mode); + + state.EnterExecutionMode(); + Assert.Equal(ExecutionMode.Execution, state.Mode); + + state.EnterExecutionMode(); + Assert.Equal(ExecutionMode.Execution, state.Mode); + } + + [Fact] + public void GetOperation_ReturnsLatestRecord_WhenIdAppearsMultipleTimes() + { + // Wire format: when the service replays an envelope it includes the + // most recent record per ID. Java/Python/JS reference SDKs all key by + // ID alone and rely on the service to provide the authoritative record. + var state = new ExecutionState(); + state.LoadFromCheckpoint(new InitialExecutionState + { + Operations = new List + { + new() + { + Id = "0-payment", + Type = OperationTypes.Step, + Status = OperationStatuses.Started + }, + new() + { + Id = "0-payment", + Type = OperationTypes.Step, + Status = OperationStatuses.Succeeded, + StepDetails = new StepDetails { Result = "\"paid\"" } + } + } + }); + + var op = state.GetOperation("0-payment"); + Assert.NotNull(op); + Assert.Equal(OperationStatuses.Succeeded, op!.Status); + Assert.Equal("\"paid\"", op.StepDetails?.Result); + } +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/LambdaDurableServiceClientTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/LambdaDurableServiceClientTests.cs new file mode 100644 index 000000000..2326f8544 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/LambdaDurableServiceClientTests.cs @@ -0,0 +1,202 @@ +using Amazon.Lambda.DurableExecution.Services; +using Amazon.Lambda.Model; +using SdkErrorObject = Amazon.Lambda.Model.ErrorObject; +using Xunit; + +namespace Amazon.Lambda.DurableExecution.Tests; + +public class LambdaDurableServiceClientTests +{ + [Fact] + public async Task CheckpointAsync_EmptyOperations_NoApiCallReturnsToken() + { + var mockClient = new MockLambdaClient(); + var client = new LambdaDurableServiceClient(mockClient); + + var token = await client.CheckpointAsync( + "arn:aws:lambda:us-east-1:123:durable-execution:e1", + "input-token", + Array.Empty()); + + Assert.Equal("input-token", token); + Assert.Empty(mockClient.CheckpointCalls); + } + + [Fact] + public async Task CheckpointAsync_NullCheckpointToken_SendsEmptyString() + { + var mockClient = new MockLambdaClient(); + var client = new LambdaDurableServiceClient(mockClient); + + await client.CheckpointAsync( + "arn:aws:lambda:us-east-1:123:durable-execution:e1", + checkpointToken: null, + new[] + { + new OperationUpdate + { + Id = "0-step", + Type = "STEP", + Action = "SUCCEED", + SubType = "Step", + Name = "do_thing", + Payload = "\"ok\"" + } + }); + + var call = Assert.Single(mockClient.CheckpointCalls); + Assert.Equal("", call.CheckpointToken); + } + + [Fact] + public async Task CheckpointAsync_StepWithError_PropagatesError() + { + var mockClient = new MockLambdaClient(); + var client = new LambdaDurableServiceClient(mockClient); + + await client.CheckpointAsync( + "arn:aws:lambda:us-east-1:123:durable-execution:e1", + "tok", + new[] + { + new OperationUpdate + { + Id = "0-bad", + Type = "STEP", + Action = "FAIL", + SubType = "Step", + Name = "bad", + Error = new SdkErrorObject + { + ErrorType = "System.TimeoutException", + ErrorMessage = "timed out", + ErrorData = "{\"detail\":\"x\"}", + StackTrace = new List { "at A.B()", "at C.D()" } + } + } + }); + + var call = Assert.Single(mockClient.CheckpointCalls); + var update = Assert.Single(call.Updates); + Assert.Equal("STEP", update.Type); + Assert.Equal("FAIL", update.Action); + Assert.NotNull(update.Error); + Assert.Equal("System.TimeoutException", update.Error.ErrorType); + Assert.Equal("timed out", update.Error.ErrorMessage); + Assert.Equal("{\"detail\":\"x\"}", update.Error.ErrorData); + Assert.Equal(2, update.Error.StackTrace.Count); + } + + [Fact] + public async Task CheckpointAsync_WaitWithOptions_PropagatesWaitOptions() + { + var mockClient = new MockLambdaClient(); + var client = new LambdaDurableServiceClient(mockClient); + + await client.CheckpointAsync( + "arn", + "tok", + new[] + { + new OperationUpdate + { + Id = "0-wait", + Type = "WAIT", + Action = "START", + SubType = "Wait", + Name = "delay", + WaitOptions = new WaitOptions { WaitSeconds = 45 } + } + }); + + var update = mockClient.CheckpointCalls[0].Updates[0]; + Assert.NotNull(update.WaitOptions); + Assert.Equal(45, update.WaitOptions.WaitSeconds); + } + + [Fact] + public async Task CheckpointAsync_ParentIdAndPayload_ArePropagated() + { + var mockClient = new MockLambdaClient(); + var client = new LambdaDurableServiceClient(mockClient); + + await client.CheckpointAsync( + "arn", + "tok", + new[] + { + new OperationUpdate + { + Id = "child-1", + ParentId = "parent-0", + Type = "STEP", + Action = "SUCCEED", + SubType = "Step", + Payload = "{\"a\":1}" + } + }); + + var update = mockClient.CheckpointCalls[0].Updates[0]; + Assert.Equal("parent-0", update.ParentId); + Assert.Equal("{\"a\":1}", update.Payload); + } + + [Fact] + public async Task CheckpointAsync_MultipleUpdates_AllForwarded() + { + var mockClient = new MockLambdaClient(); + var client = new LambdaDurableServiceClient(mockClient); + + await client.CheckpointAsync( + "arn", + "tok", + new[] + { + new OperationUpdate + { + Id = "0-step", + Type = "STEP", + Action = "SUCCEED", + SubType = "Step", + Name = "validate" + }, + new OperationUpdate + { + Id = "1-wait", + Type = "WAIT", + Action = "START", + SubType = "Wait", + Name = "delay", + WaitOptions = new WaitOptions { WaitSeconds = 30 } + } + }); + + var call = Assert.Single(mockClient.CheckpointCalls); + Assert.Equal(2, call.Updates.Count); + Assert.Equal("STEP", call.Updates[0].Type); + Assert.Equal("WAIT", call.Updates[1].Type); + } + + [Fact] + public async Task CheckpointAsync_ReturnsNewToken() + { + var mockClient = new MockLambdaClient(); + var client = new LambdaDurableServiceClient(mockClient); + + var newToken = await client.CheckpointAsync( + "arn", + "old-token", + new[] + { + new OperationUpdate + { + Id = "0-x", + Type = "STEP", + Action = "SUCCEED" + } + }); + + // MockLambdaClient returns "token-1", "token-2", etc. + Assert.Equal("token-1", newToken); + } +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/MockLambdaClient.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/MockLambdaClient.cs new file mode 100644 index 000000000..8df98a67d --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/MockLambdaClient.cs @@ -0,0 +1,65 @@ +using Amazon.Lambda; +using Amazon.Lambda.Model; +using Amazon.Runtime; + +namespace Amazon.Lambda.DurableExecution.Tests; + +/// +/// A mock that subclasses AmazonLambdaClient and overrides CheckpointDurableExecutionAsync +/// to avoid real API calls. Records checkpoint requests for test assertions. +/// +internal class MockLambdaClient : AmazonLambdaClient +{ + public List CheckpointCalls { get; } = new(); + public List GetExecutionStateCalls { get; } = new(); + + /// + /// Optional handler for calls. Tests + /// that exercise the paginated-state path can set this to control the response + /// for each page. + /// + public Func? GetExecutionStateHandler { get; set; } + + private int _tokenCounter; + + public MockLambdaClient() : base("fake-access-key", "fake-secret-key", Amazon.RegionEndpoint.USEast1) { } + + /// + /// Optional exception thrown by . Tests + /// that exercise checkpoint-error classification can set this to inject a specific + /// SDK exception on the orchestration-path drain. + /// + public Exception? CheckpointThrows { get; set; } + + /// + /// Optional exception thrown by . Tests + /// that exercise hydration-error classification can set this to inject a specific + /// SDK exception on the initial state-fetch path. + /// + public Exception? GetExecutionStateThrows { get; set; } + + public override Task CheckpointDurableExecutionAsync( + CheckpointDurableExecutionRequest request, + CancellationToken cancellationToken = default) + { + CheckpointCalls.Add(request); + if (CheckpointThrows != null) throw CheckpointThrows; + return Task.FromResult(new CheckpointDurableExecutionResponse + { + CheckpointToken = $"token-{++_tokenCounter}" + }); + } + + public override Task GetDurableExecutionStateAsync( + GetDurableExecutionStateRequest request, + CancellationToken cancellationToken = default) + { + GetExecutionStateCalls.Add(request); + if (GetExecutionStateThrows != null) throw GetExecutionStateThrows; + if (GetExecutionStateHandler != null) + { + return Task.FromResult(GetExecutionStateHandler(request)); + } + return Task.FromResult(new GetDurableExecutionStateResponse()); + } +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/ModelsTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/ModelsTests.cs new file mode 100644 index 000000000..2b7d3489e --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/ModelsTests.cs @@ -0,0 +1,203 @@ +using System.Text.Json; +using Amazon.Lambda.DurableExecution; +using Amazon.Lambda.DurableExecution.Internal; +using Xunit; + +namespace Amazon.Lambda.DurableExecution.Tests; + +public class ModelsTests +{ + [Fact] + public void Operation_PropertiesAssignable() + { + var op = new Operation + { + Id = "op-1", + Type = OperationTypes.Step, + Status = OperationStatuses.Succeeded, + Name = "fetch_user", + StepDetails = new StepDetails { Result = "{\"name\":\"Alice\"}" } + }; + + Assert.Equal("op-1", op.Id); + Assert.Equal(OperationTypes.Step, op.Type); + Assert.Equal(OperationStatuses.Succeeded, op.Status); + Assert.Equal("fetch_user", op.Name); + Assert.Equal("{\"name\":\"Alice\"}", op.StepDetails?.Result); + } + + [Fact] + public void Operation_WaitWithScheduledEndTimestamp() + { + var op = new Operation + { + Id = "op-2", + Type = OperationTypes.Wait, + Status = OperationStatuses.Pending, + Name = "cooldown", + WaitDetails = new WaitDetails + { + ScheduledEndTimestamp = 1767268830000L // 2026-01-01T12:00:30Z in ms + } + }; + + Assert.Equal(OperationTypes.Wait, op.Type); + Assert.Equal(1767268830000L, op.WaitDetails?.ScheduledEndTimestamp); + } + + [Fact] + public void ErrorObject_FromException() + { + var ex = new InvalidOperationException("something went wrong"); + var error = ErrorObject.FromException(ex); + + Assert.Equal("System.InvalidOperationException", error.ErrorType); + Assert.Equal("something went wrong", error.ErrorMessage); + } + + [Fact] + public void ErrorObject_RoundTripSerialization() + { + var error = new ErrorObject + { + ErrorType = "System.TimeoutException", + ErrorMessage = "timed out", + StackTrace = new[] { "at Foo.Bar()", "at Baz.Qux()" }, + ErrorData = "{\"key\":\"value\"}" + }; + + var json = JsonSerializer.Serialize(error); + var deserialized = JsonSerializer.Deserialize(json)!; + + Assert.Equal("System.TimeoutException", deserialized.ErrorType); + Assert.Equal("timed out", deserialized.ErrorMessage); + Assert.Equal(2, deserialized.StackTrace!.Count); + Assert.Equal("{\"key\":\"value\"}", deserialized.ErrorData); + } + + [Fact] + public void DurableExecutionInvocationInput_Deserialization() + { + var json = """ + { + "DurableExecutionArn": "arn:aws:lambda:us-east-1:123:durable-execution:abc", + "CheckpointToken": "token-1", + "InitialExecutionState": { + "Operations": [ + { + "Id": "exec-1", + "Type": "EXECUTION", + "Status": "STARTED", + "ExecutionDetails": { + "InputPayload": "{\"orderId\":\"order-123\",\"amount\":99.99}" + } + }, + { + "Id": "op-1", + "Type": "STEP", + "Status": "SUCCEEDED", + "Name": "validate", + "StepDetails": { + "Result": "true" + } + } + ] + } + } + """; + + var input = JsonSerializer.Deserialize(json)!; + + Assert.Equal("arn:aws:lambda:us-east-1:123:durable-execution:abc", input.DurableExecutionArn); + Assert.Equal("token-1", input.CheckpointToken); + Assert.NotNull(input.InitialExecutionState); + Assert.Equal(2, input.InitialExecutionState!.Operations!.Count); + + var stepOp = input.InitialExecutionState.Operations![1]; + Assert.Equal("op-1", stepOp.Id); + Assert.Equal(OperationTypes.Step, stepOp.Type); + Assert.Equal("true", stepOp.StepDetails?.Result); + + // The EXECUTION operation carries the user payload in ExecutionDetails.InputPayload. + var execOp = input.InitialExecutionState.Operations[0]; + Assert.Equal(OperationTypes.Execution, execOp.Type); + var payload = JsonSerializer.Deserialize(execOp.ExecutionDetails!.InputPayload!); + Assert.Equal("order-123", payload!.OrderId); + Assert.Equal(99.99m, payload.Amount); + } + + [Fact] + public void DurableExecutionInvocationInput_NoExecutionOp_HasNullPayload() + { + var input = new DurableExecutionInvocationInput + { + DurableExecutionArn = "arn:test" + }; + + // No InitialExecutionState means no EXECUTION operation and thus no user payload + Assert.Null(input.InitialExecutionState); + } + + [Fact] + public void DurableExecutionInvocationOutput_Succeeded() + { + var output = new DurableExecutionInvocationOutput + { + Status = InvocationStatus.Succeeded, + Result = "{\"status\":\"approved\"}" + }; + + var json = JsonSerializer.Serialize(output); + var deserialized = JsonSerializer.Deserialize(json)!; + + Assert.Equal(InvocationStatus.Succeeded, deserialized.Status); + Assert.Equal("{\"status\":\"approved\"}", deserialized.Result); + } + + [Fact] + public void DurableExecutionInvocationOutput_Failed() + { + var output = new DurableExecutionInvocationOutput + { + Status = InvocationStatus.Failed, + Error = new ErrorObject + { + ErrorMessage = "step failed", + ErrorType = "StepException" + } + }; + + var json = JsonSerializer.Serialize(output); + var deserialized = JsonSerializer.Deserialize(json)!; + + Assert.Equal(InvocationStatus.Failed, deserialized.Status); + Assert.NotNull(deserialized.Error); + Assert.Equal("step failed", deserialized.Error!.ErrorMessage); + Assert.Equal("StepException", deserialized.Error.ErrorType); + } + + [Fact] + public void DurableExecutionInvocationOutput_Pending() + { + var output = new DurableExecutionInvocationOutput + { + Status = InvocationStatus.Pending + }; + + var json = JsonSerializer.Serialize(output); + var deserialized = JsonSerializer.Deserialize(json)!; + + Assert.Equal(InvocationStatus.Pending, deserialized.Status); + Assert.Null(deserialized.Result); + Assert.Null(deserialized.Error); + } + + private class TestOrderEvent + { + [System.Text.Json.Serialization.JsonPropertyName("orderId")] + public string? OrderId { get; set; } + + [System.Text.Json.Serialization.JsonPropertyName("amount")] + public decimal Amount { get; set; } + } +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/OperationIdGeneratorTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/OperationIdGeneratorTests.cs new file mode 100644 index 000000000..6eb63551b --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/OperationIdGeneratorTests.cs @@ -0,0 +1,100 @@ +using System.Security.Cryptography; +using System.Text; +using Amazon.Lambda.DurableExecution.Internal; +using Xunit; + +namespace Amazon.Lambda.DurableExecution.Tests; + +public class OperationIdGeneratorTests +{ + private static string Sha256Hex(string input) + { + using var sha = SHA256.Create(); + var bytes = sha.ComputeHash(Encoding.UTF8.GetBytes(input)); + var sb = new StringBuilder(bytes.Length * 2); + foreach (var b in bytes) sb.Append(b.ToString("x2")); + return sb.ToString(); + } + + [Fact] + public void NextId_ProducesSha256OfPositionString_StartingAtOne() + { + var gen = new OperationIdGenerator(); + Assert.Equal(Sha256Hex("1"), gen.NextId()); + Assert.Equal(Sha256Hex("2"), gen.NextId()); + Assert.Equal(Sha256Hex("3"), gen.NextId()); + } + + [Fact] + public void NextId_NameIsNotPartOfId() + { + // Name must not influence the deterministic ID — replays must still + // correlate after a step is renamed. The reference SDKs (Java/JS/Python) + // all keep Name in a separate field on OperationUpdate. + var gen = new OperationIdGenerator(); + Assert.Equal(Sha256Hex("1"), gen.NextId()); + Assert.Equal(Sha256Hex("2"), gen.NextId()); + } + + [Fact] + public void HashOperationId_IsStable() + { + Assert.Equal(Sha256Hex("hello"), OperationIdGenerator.HashOperationId("hello")); + Assert.Equal(Sha256Hex("1"), OperationIdGenerator.HashOperationId("1")); + } + + [Fact] + public void ChildGenerator_PrefixesPositionWithParentHash() + { + var gen = new OperationIdGenerator(); + var parentId = gen.NextId(); + var child = gen.CreateChild(parentId); + + Assert.Equal(Sha256Hex(parentId + "-1"), child.NextId()); + Assert.Equal(Sha256Hex(parentId + "-2"), child.NextId()); + } + + [Fact] + public void ChildGenerator_ParentIdProperty() + { + var gen = new OperationIdGenerator(); + Assert.Null(gen.ParentId); + + var child = new OperationIdGenerator("op-5"); + Assert.Equal("op-5", child.ParentId); + } + + [Fact] + public void MultipleChildren_IndependentCounters() + { + var child1 = new OperationIdGenerator("parent-1"); + var child2 = new OperationIdGenerator("parent-2"); + + Assert.Equal(Sha256Hex("parent-1-1"), child1.NextId()); + Assert.Equal(Sha256Hex("parent-2-1"), child2.NextId()); + Assert.Equal(Sha256Hex("parent-1-2"), child1.NextId()); + Assert.Equal(Sha256Hex("parent-2-2"), child2.NextId()); + } + + [Fact] + public void Deterministic_SameSequenceOnReplay() + { + var gen1 = new OperationIdGenerator(); + var ids1 = new[] { gen1.NextId(), gen1.NextId(), gen1.NextId() }; + + var gen2 = new OperationIdGenerator(); + var ids2 = new[] { gen2.NextId(), gen2.NextId(), gen2.NextId() }; + + Assert.Equal(ids1, ids2); + } + + [Fact] + public void Reset_RewindsCounter() + { + var gen = new OperationIdGenerator(); + gen.NextId(); + gen.NextId(); + gen.Reset(); + Assert.Equal(Sha256Hex("1"), gen.NextId()); + } +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/RecordingBatcher.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/RecordingBatcher.cs new file mode 100644 index 000000000..8fe7b6d6d --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/RecordingBatcher.cs @@ -0,0 +1,51 @@ +using Amazon.Lambda.DurableExecution.Internal; +using SdkOperationUpdate = Amazon.Lambda.Model.OperationUpdate; + +namespace Amazon.Lambda.DurableExecution.Tests; + +/// +/// Test helper: a that records every flushed +/// update without making any network calls. Tests construct one of these in +/// place of a real batcher to inspect what would have been sent to the service. +/// +internal sealed class RecordingBatcher +{ + private readonly List _flushed = new(); + private readonly List _flushBatchSizes = new(); + private readonly object _lock = new(); + + public CheckpointBatcher Batcher { get; } + + public RecordingBatcher(CheckpointBatcherConfig? config = null) + { + Batcher = new CheckpointBatcher("test-token", Flush, config); + } + + /// + /// Cumulative list of every update that has been flushed, in order. + /// + public IReadOnlyList Flushed + { + get { lock (_lock) return _flushed.ToArray(); } + } + + /// + /// One entry per batch flushed, recording the batch size. With + /// = Zero (default), + /// every produces one batch. + /// + public IReadOnlyList FlushBatchSizes + { + get { lock (_lock) return _flushBatchSizes.ToArray(); } + } + + private Task Flush(string? token, IReadOnlyList ops, CancellationToken ct) + { + lock (_lock) + { + _flushed.AddRange(ops); + _flushBatchSizes.Add(ops.Count); + } + return Task.FromResult(token); + } +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/TerminationManagerTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/TerminationManagerTests.cs new file mode 100644 index 000000000..a12ff4a6c --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/TerminationManagerTests.cs @@ -0,0 +1,88 @@ +using Amazon.Lambda.DurableExecution.Internal; +using Xunit; + +namespace Amazon.Lambda.DurableExecution.Tests; + +public class TerminationManagerTests +{ + [Fact] + public async Task Terminate_ResolvesTerminationTask() + { + var manager = new TerminationManager(); + Assert.False(manager.IsTerminated); + + manager.Terminate(TerminationReason.WaitScheduled, "wait pending"); + + Assert.True(manager.IsTerminated); + var result = await manager.TerminationTask; + Assert.Equal(TerminationReason.WaitScheduled, result.Reason); + Assert.Equal("wait pending", result.Message); + } + + [Fact] + public void Terminate_OnlyFirstCallWins() + { + var manager = new TerminationManager(); + + var first = manager.Terminate(TerminationReason.WaitScheduled, "first"); + var second = manager.Terminate(TerminationReason.CallbackPending, "second"); + + Assert.True(first); + Assert.False(second); + } + + [Fact] + public async Task Terminate_FirstReasonIsPreserved() + { + var manager = new TerminationManager(); + + manager.Terminate(TerminationReason.CallbackPending, "callback"); + manager.Terminate(TerminationReason.WaitScheduled, "wait"); + + var result = await manager.TerminationTask; + Assert.Equal(TerminationReason.CallbackPending, result.Reason); + Assert.Equal("callback", result.Message); + } + + [Fact] + public async Task Terminate_WithException() + { + var manager = new TerminationManager(); + var ex = new Exception("checkpoint failed"); + + manager.Terminate(TerminationReason.CheckpointFailed, "error", ex); + + var result = await manager.TerminationTask; + Assert.Equal(TerminationReason.CheckpointFailed, result.Reason); + Assert.Same(ex, result.Exception); + } + + [Fact] + public async Task TerminationTask_WinsRaceAgainstNeverCompletingTask() + { + var manager = new TerminationManager(); + var neverCompletes = new TaskCompletionSource().Task; + + manager.Terminate(TerminationReason.WaitScheduled); + + var winner = await Task.WhenAny(neverCompletes, manager.TerminationTask); + Assert.Same(manager.TerminationTask, winner); + } + + [Fact] + public async Task ConcurrentTerminate_OnlyOneSucceeds() + { + var manager = new TerminationManager(); + var results = new bool[10]; + + var tasks = Enumerable.Range(0, 10).Select(i => Task.Run(() => + { + results[i] = manager.Terminate(TerminationReason.WaitScheduled, $"caller-{i}"); + })); + + await Task.WhenAll(tasks); + + Assert.Equal(1, results.Count(r => r)); + Assert.True(manager.IsTerminated); + } +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/UpperSnakeCaseEnumConverterTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/UpperSnakeCaseEnumConverterTests.cs new file mode 100644 index 000000000..7ac6df052 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/UpperSnakeCaseEnumConverterTests.cs @@ -0,0 +1,84 @@ +using System.Text.Json; +using System.Text.Json.Serialization; +using Amazon.Lambda.DurableExecution; +using Xunit; + +namespace Amazon.Lambda.DurableExecution.Tests; + +/// +/// Direct tests for UpperSnakeCaseEnumConverter via a sample enum, exercising +/// every branch (Read with multi-word value, Read with single word, Read with +/// null/unparsable, plus the Write path for outbound serialization). +/// +public class UpperSnakeCaseEnumConverterTests +{ + public enum Sample + { + None, + FooBar, + BazQuxQuux + } + + public class Holder + { + [JsonConverter(typeof(UpperSnakeCaseEnumConverter))] + public Sample Value { get; set; } + } + + [Theory] + [InlineData("\"FOO_BAR\"", Sample.FooBar)] + [InlineData("\"BAZ_QUX_QUUX\"", Sample.BazQuxQuux)] + [InlineData("\"NONE\"", Sample.None)] + public void Read_UpperSnakeCase_ReturnsExpectedEnum(string json, Sample expected) + { + var holder = JsonSerializer.Deserialize($"{{\"Value\":{json}}}")!; + Assert.Equal(expected, holder.Value); + } + + [Fact] + public void Read_NullValue_ReturnsDefault() + { + var holder = JsonSerializer.Deserialize("{\"Value\":null}")!; + Assert.Equal(Sample.None, holder.Value); + } + + [Fact] + public void Read_AlreadyPascalCase_ParsesCaseInsensitively() + { + // The converter first tries snake→pascal, then a raw case-insensitive parse. + // A camel-case input like "fooBar" hits the fallback path. + var holder = JsonSerializer.Deserialize("{\"Value\":\"fooBar\"}")!; + Assert.Equal(Sample.FooBar, holder.Value); + } + + [Fact] + public void Read_UnparsableValue_ThrowsJsonException() + { + // Unknown wire values must surface as JsonException rather than + // silently coercing to default(T) — otherwise an unrecognized + // service status would be indistinguishable from the zero value. + Assert.Throws(() => + JsonSerializer.Deserialize("{\"Value\":\"NOT_A_REAL_VALUE\"}")); + } + + [Fact] + public void Write_PascalCase_EmitsUpperSnake() + { + var json = JsonSerializer.Serialize(new Holder { Value = Sample.FooBar }); + Assert.Contains("\"FOO_BAR\"", json); + } + + [Fact] + public void Write_MultiWord_EmitsUpperSnake() + { + var json = JsonSerializer.Serialize(new Holder { Value = Sample.BazQuxQuux }); + Assert.Contains("\"BAZ_QUX_QUUX\"", json); + } + + [Fact] + public void Write_SingleWord_EmitsUpperWithoutUnderscores() + { + var json = JsonSerializer.Serialize(new Holder { Value = Sample.None }); + Assert.Contains("\"NONE\"", json); + } +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/coverage.runsettings b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/coverage.runsettings new file mode 100644 index 000000000..6c38b1258 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/coverage.runsettings @@ -0,0 +1,15 @@ + + + + + + + cobertura + [Amazon.Lambda.DurableExecution]* + [Amazon.Lambda.DurableExecution.Tests]* + GeneratedCodeAttribute + + + + + diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/coverage.sh b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/coverage.sh new file mode 100644 index 000000000..b953bd07e --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/coverage.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +set -e +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT="$(cd "$HERE/../../.." && pwd)" +PROJ="$HERE/Amazon.Lambda.DurableExecution.Tests.csproj" +OUT="$HERE/TestResults" + +rm -rf "$OUT" +dotnet test "$PROJ" -c Release \ + --collect:"XPlat Code Coverage" \ + --settings "$HERE/coverage.runsettings" \ + --results-directory "$OUT" + +REPORT_FILE=$(find "$OUT" -name "coverage.cobertura.xml" -type f | head -1) +if [ -z "$REPORT_FILE" ]; then + echo "No coverage report found under $OUT" + exit 1 +fi + +reportgenerator \ + "-reports:$REPORT_FILE" \ + "-targetdir:$OUT/report" \ + "-reporttypes:Html;TextSummary" + +echo +echo "==================== Coverage Summary ====================" +cat "$OUT/report/Summary.txt" +echo "==========================================================" +echo "Full HTML report: $OUT/report/index.html" From 5cdab8d8d3b08b210168c0a73d7dbaf3bca0be87 Mon Sep 17 00:00:00 2001 From: Garrett Beatty Date: Thu, 14 May 2026 13:41:15 -0400 Subject: [PATCH 2/4] Track replay state per operation rather than via a global flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Match the Python / Java / JavaScript reference SDKs' replay-mode model: the workflow is "replaying" iff it has not yet revisited every checkpointed completed user-replayable operation. A single global flag flipped on the first fresh op (the prior model) misclassified workflow- body code that runs before the first step and would not generalize to Map/Parallel/Callback later. ExecutionState changes: - Replace `Mode`/`ExecutionMode`/`EnterExecutionMode()` with `IsReplaying` + `TrackReplay(operationId)`. - Initial replay decision: any non-EXECUTION op present means we're replaying. The service always sends an EXECUTION-type op carrying the input payload — that's bookkeeping, not user history, so it does not count toward replay (matches Python execution.py:258, Java ExecutionManager:81, JS execution-context.ts:62). - TrackReplay flips IsReplaying false once every checkpointed terminal- status non-EXECUTION op has been visited. Terminal set matches Python's: SUCCEEDED, FAILED, CANCELLED, STOPPED. Operation changes: - DurableOperation.ExecuteAsync calls TrackReplay(OperationId) at the top, so every operation participates in visit accounting without each subclass needing to remember. - StepOperation/WaitOperation drop their manual EnterExecutionMode calls. Tests: - ExecutionStateTests rewritten around IsReplaying/TrackReplay, including pinning regressions: only-EXECUTION-op ⇒ NotReplaying, all-visited ⇒ flips out of replay, PENDING ops do not block transition, idempotency. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../Internal/DurableOperation.cs | 4 + .../Internal/ExecutionState.cs | 105 +++++++---- .../Internal/StepOperation.cs | 6 +- .../Internal/WaitOperation.cs | 2 - .../ExecutionStateTests.cs | 168 ++++++++++++------ 5 files changed, 195 insertions(+), 90 deletions(-) diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/DurableOperation.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/DurableOperation.cs index e7734abf9..907d6e128 100644 --- a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/DurableOperation.cs +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/DurableOperation.cs @@ -44,6 +44,10 @@ public Task ExecuteAsync(CancellationToken cancellationToken) { State.ValidateReplayConsistency(OperationId, OperationType, Name); + // Record that the workflow has reached this op. If every completed + // checkpointed op has now been visited, the state flips out of replay. + State.TrackReplay(OperationId); + var existing = State.GetOperation(OperationId); return existing == null ? StartAsync(cancellationToken) diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/ExecutionState.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/ExecutionState.cs index 5ee690be0..606614621 100644 --- a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/ExecutionState.cs +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/ExecutionState.cs @@ -1,43 +1,50 @@ namespace Amazon.Lambda.DurableExecution.Internal; /// -/// Replay state of the current invocation. -/// -internal enum ExecutionMode -{ - /// Re-deriving prior operations from checkpointed state. - Replay, - /// Executing fresh code that hasn't been checkpointed before. - Execution -} - -/// -/// In-memory store of the operations replayed from . -/// Read-only after load (apart from ); outbound -/// checkpoints are owned by . +/// In-memory store of the operations replayed from +/// plus replay-mode tracking. Outbound checkpoints are owned by +/// ; this type is the inbound side only. /// +/// +/// Replay tracking mirrors the Python / Java / JavaScript reference SDKs: +/// +/// At construction the workflow is "replaying" iff any user-replayable +/// op is present. The service always sends one EXECUTION-type op +/// carrying the input payload — that's bookkeeping, not user history, +/// so it doesn't count. +/// is called by every DurableOperation.ExecuteAsync +/// at the top of the call. Once every checkpointed completed +/// non-EXECUTION op has been visited, the workflow has caught up +/// to the replay frontier and flips to false +/// for the rest of the invocation. +/// +/// internal sealed class ExecutionState { private readonly Dictionary _operations = new(); - - public ExecutionMode Mode { get; private set; } = ExecutionMode.Replay; + private readonly HashSet _visitedOperations = new(); + private bool _isReplaying; public int CheckpointedOperationCount => _operations.Count; + /// + /// True when the workflow is re-deriving prior operations from checkpointed + /// state. False when running fresh (not-yet-checkpointed) code. + /// + public bool IsReplaying => _isReplaying; + public void LoadFromCheckpoint(InitialExecutionState? initialState) { - if (initialState?.Operations == null) + if (initialState?.Operations != null) { - Mode = ExecutionMode.Execution; - return; + AddOperations(initialState.Operations); } - AddOperations(initialState.Operations); - - if (_operations.Count == 0) - { - Mode = ExecutionMode.Execution; - } + // Only user-replayable ops put us into replay mode. The service-side + // EXECUTION op (input payload bookkeeping) is always present and must + // not count — see Python execution.py:258 / Java ExecutionManager:81 / + // JS execution-context.ts:62 for the same rule. + _isReplaying = HasReplayableOperations(); } public void AddOperations(IEnumerable operations) @@ -60,9 +67,36 @@ public void AddOperations(IEnumerable operations) return op; } + public bool HasOperation(string operationId) => _operations.ContainsKey(operationId); + + /// + /// Records that the workflow has reached . + /// Once every checkpointed completed non-EXECUTION op has been + /// visited the workflow has caught up to the replay frontier and + /// flips to false. Idempotent: calling more than + /// once with the same id has no additional effect. + /// + public void TrackReplay(string operationId) + { + if (!_isReplaying) return; + + _visitedOperations.Add(operationId); + + // Have we visited every completed non-EXECUTION op? If so, anything + // emitted from here on is fresh execution. + foreach (var op in _operations.Values) + { + if (op.Type == OperationTypes.Execution) continue; + if (!IsTerminalStatus(op.Status)) continue; + if (!_visitedOperations.Contains(op.Id!)) return; + } + + _isReplaying = false; + } + public void ValidateReplayConsistency(string operationId, string expectedType, string? expectedName) { - if (Mode != ExecutionMode.Replay) return; + if (!_isReplaying) return; if (!_operations.TryGetValue(operationId, out var op)) return; @@ -83,11 +117,18 @@ public void ValidateReplayConsistency(string operationId, string expectedType, s } } - public bool HasOperation(string operationId) => _operations.ContainsKey(operationId); + private bool HasReplayableOperations() + { + foreach (var op in _operations.Values) + { + if (op.Type != OperationTypes.Execution) return true; + } + return false; + } - /// - /// Transitions to . Called by an operation - /// that's about to run fresh (not-yet-checkpointed) code. Idempotent. - /// - public void EnterExecutionMode() => Mode = ExecutionMode.Execution; + private static bool IsTerminalStatus(string? status) => + status == OperationStatuses.Succeeded + || status == OperationStatuses.Failed + || status == OperationStatuses.Cancelled + || status == OperationStatuses.Stopped; } diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/StepOperation.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/StepOperation.cs index d5084229b..2decdb309 100644 --- a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/StepOperation.cs +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/StepOperation.cs @@ -50,10 +50,7 @@ public StepOperation( protected override string OperationType => OperationTypes.Step; protected override Task StartAsync(CancellationToken cancellationToken) - { - State.EnterExecutionMode(); - return ExecuteFunc(cancellationToken); - } + => ExecuteFunc(cancellationToken); protected override Task ReplayAsync(Operation existing, CancellationToken cancellationToken) { @@ -73,7 +70,6 @@ protected override Task ReplayAsync(Operation existing, CancellationToken can // STARTED/READY/PENDING from a prior invocation — no retry logic // in this commit, so fall through and execute fresh. (Future work // on retries will replace this default with explicit arms.) - State.EnterExecutionMode(); return ExecuteFunc(cancellationToken); } } diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/WaitOperation.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/WaitOperation.cs index 4fb069bf3..59254827d 100644 --- a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/WaitOperation.cs +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/WaitOperation.cs @@ -41,8 +41,6 @@ public WaitOperation( protected override async Task StartAsync(CancellationToken cancellationToken) { - State.EnterExecutionMode(); - // Sync-flush WAIT START before suspending — the service can't schedule // a timer for a checkpoint it hasn't received. await EnqueueAsync(new SdkOperationUpdate diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/ExecutionStateTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/ExecutionStateTests.cs index 3aad57e2d..6500879c1 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/ExecutionStateTests.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/ExecutionStateTests.cs @@ -7,127 +7,193 @@ namespace Amazon.Lambda.DurableExecution.Tests; public class ExecutionStateTests { + private const string ExecutionInputId = "exec-input"; + + private static Operation ExecutionInputOp(string id = ExecutionInputId) => new() + { + Id = id, + Type = OperationTypes.Execution, + Status = OperationStatuses.Started + }; + + private static Operation StepOp(string id, string status, string? name = null) => new() + { + Id = id, + Type = OperationTypes.Step, + Status = status, + Name = name, + StepDetails = new StepDetails { Result = "true" } + }; + [Fact] - public void LoadFromCheckpoint_NullState_EntersExecutionMode() + public void LoadFromCheckpoint_NullState_NotReplaying() { var state = new ExecutionState(); state.LoadFromCheckpoint(null); - Assert.Equal(ExecutionMode.Execution, state.Mode); + Assert.False(state.IsReplaying); Assert.Equal(0, state.CheckpointedOperationCount); } [Fact] - public void LoadFromCheckpoint_EmptyOperations_EntersExecutionMode() + public void LoadFromCheckpoint_EmptyOperations_NotReplaying() { var state = new ExecutionState(); state.LoadFromCheckpoint(new InitialExecutionState { Operations = new List() }); - Assert.Equal(ExecutionMode.Execution, state.Mode); + Assert.False(state.IsReplaying); Assert.Equal(0, state.CheckpointedOperationCount); } [Fact] - public void LoadFromCheckpoint_WithOperations_StaysInReplayMode() + public void LoadFromCheckpoint_OnlyExecutionInputOp_NotReplaying() + { + // The service sends one EXECUTION-type op carrying the input payload + // even on the first invocation. That op is bookkeeping, not user + // history — it must not put us into replay mode. (Matches Python + // execution.py:258, Java ExecutionManager:81, JS execution-context.ts:62.) + var state = new ExecutionState(); + state.LoadFromCheckpoint(new InitialExecutionState + { + Operations = new List { ExecutionInputOp() } + }); + + Assert.False(state.IsReplaying); + Assert.Equal(1, state.CheckpointedOperationCount); + } + + [Fact] + public void LoadFromCheckpoint_WithReplayableOperations_IsReplaying() { var state = new ExecutionState(); state.LoadFromCheckpoint(new InitialExecutionState { Operations = new List { - new() - { - Id = "0-fetch_user", - Type = OperationTypes.Step, - Status = OperationStatuses.Succeeded, - StepDetails = new StepDetails { Result = "{\"name\":\"Alice\"}" } - } + ExecutionInputOp(), + StepOp("0-fetch_user", OperationStatuses.Succeeded) } }); - Assert.Equal(ExecutionMode.Replay, state.Mode); - Assert.Equal(1, state.CheckpointedOperationCount); + Assert.True(state.IsReplaying); + Assert.Equal(2, state.CheckpointedOperationCount); } [Fact] - public void GetOperation_ReturnsCheckpointedRecord() + public void TrackReplay_FlipsOutOfReplay_OnceAllCompletedOpsVisited() { var state = new ExecutionState(); state.LoadFromCheckpoint(new InitialExecutionState { Operations = new List { - new() - { - Id = "0-validate", - Type = OperationTypes.Step, - Status = OperationStatuses.Succeeded, - StepDetails = new StepDetails { Result = "true" } - } + ExecutionInputOp(), + StepOp("0", OperationStatuses.Succeeded), + StepOp("1", OperationStatuses.Succeeded), } }); + Assert.True(state.IsReplaying); - var op = state.GetOperation("0-validate"); - Assert.NotNull(op); - Assert.Equal(OperationStatuses.Succeeded, op!.Status); - Assert.Equal("true", op.StepDetails?.Result); + state.TrackReplay("0"); + Assert.True(state.IsReplaying); // 1-of-2 completed ops visited + + state.TrackReplay("1"); + Assert.False(state.IsReplaying); // all completed ops visited → fresh } [Fact] - public void GetOperation_ReturnsNull_WhenNotFound() + public void TrackReplay_PendingOpDoesNotBlockTransition() { + // A PENDING op (e.g. retry timer waiting) is not "completed" in the + // checkpoint sense — once the workflow has visited every terminally- + // completed op the SDK treats subsequent code as fresh. Matches Python's + // {SUCCEEDED, FAILED, CANCELLED, STOPPED, TIMED_OUT} terminal set. var state = new ExecutionState(); - state.LoadFromCheckpoint(null); + state.LoadFromCheckpoint(new InitialExecutionState + { + Operations = new List + { + ExecutionInputOp(), + StepOp("0", OperationStatuses.Succeeded), + StepOp("1", OperationStatuses.Pending), + } + }); + Assert.True(state.IsReplaying); - var op = state.GetOperation("0-nonexistent"); - Assert.Null(op); + state.TrackReplay("0"); + Assert.False(state.IsReplaying); } [Fact] - public void HasOperation_ReturnsTrueForExisting() + public void TrackReplay_IsIdempotent() { var state = new ExecutionState(); state.LoadFromCheckpoint(new InitialExecutionState { Operations = new List { - new() - { - Id = "0-step_a", - Type = OperationTypes.Step, - Status = OperationStatuses.Succeeded - } + ExecutionInputOp(), + StepOp("0", OperationStatuses.Succeeded), } }); - Assert.True(state.HasOperation("0-step_a")); - Assert.False(state.HasOperation("1-step_b")); + state.TrackReplay("0"); + Assert.False(state.IsReplaying); + + // Second call is a no-op. + state.TrackReplay("0"); + Assert.False(state.IsReplaying); } [Fact] - public void EnterExecutionMode_FlipsModeAndIsIdempotent() + public void TrackReplay_NoOpWhenNotReplaying() + { + var state = new ExecutionState(); + state.LoadFromCheckpoint(null); + Assert.False(state.IsReplaying); + + state.TrackReplay("anything"); + Assert.False(state.IsReplaying); + } + + [Fact] + public void GetOperation_ReturnsCheckpointedRecord() { var state = new ExecutionState(); state.LoadFromCheckpoint(new InitialExecutionState { Operations = new List { - new() - { - Id = "0", - Type = OperationTypes.Step, - Status = OperationStatuses.Succeeded - } + StepOp("0-validate", OperationStatuses.Succeeded) } }); - Assert.Equal(ExecutionMode.Replay, state.Mode); + var op = state.GetOperation("0-validate"); + Assert.NotNull(op); + Assert.Equal(OperationStatuses.Succeeded, op!.Status); + } + + [Fact] + public void GetOperation_ReturnsNull_WhenNotFound() + { + var state = new ExecutionState(); + state.LoadFromCheckpoint(null); - state.EnterExecutionMode(); - Assert.Equal(ExecutionMode.Execution, state.Mode); + var op = state.GetOperation("0-nonexistent"); + Assert.Null(op); + } - state.EnterExecutionMode(); - Assert.Equal(ExecutionMode.Execution, state.Mode); + [Fact] + public void HasOperation_ReturnsTrueForExisting() + { + var state = new ExecutionState(); + state.LoadFromCheckpoint(new InitialExecutionState + { + Operations = new List { StepOp("0-step_a", OperationStatuses.Succeeded) } + }); + + Assert.True(state.HasOperation("0-step_a")); + Assert.False(state.HasOperation("1-step_b")); } [Fact] From 4961809c19f79f8f9c540a2148873c0c32a8d3df Mon Sep 17 00:00:00 2001 From: Garrett Beatty Date: Thu, 14 May 2026 14:00:50 -0400 Subject: [PATCH 3/4] Add to sln --- Libraries/Libraries.sln | 58 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/Libraries/Libraries.sln b/Libraries/Libraries.sln index e42c40045..1bc34a173 100644 --- a/Libraries/Libraries.sln +++ b/Libraries/Libraries.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 18 -VisualStudioVersion = 18.5.11709.299 stable +VisualStudioVersion = 18.5.11709.299 MinimumVisualStudioVersion = 10.0.40219.1 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{AAB54E74-20B1-42ED-BC3D-CE9F7BC7FD12}" EndProject @@ -155,6 +155,14 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ResponseStreamingFunctionHa EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AspNetCoreStreamingApiGatewayTest", "test\Amazon.Lambda.RuntimeSupport.Tests\AspNetCoreStreamingApiGatewayTest\AspNetCoreStreamingApiGatewayTest.csproj", "{0768FA72-CF49-2B59-BC4C-E4CE579E5D93}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Amazon.Lambda.DurableExecution", "src\Amazon.Lambda.DurableExecution\Amazon.Lambda.DurableExecution.csproj", "{9097B5A4-E100-47FD-A676-0B666A36FAFF}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Amazon.Lambda.DurableExecution.Tests", "test\Amazon.Lambda.DurableExecution.Tests\Amazon.Lambda.DurableExecution.Tests.csproj", "{57150BA6-3826-431F-8F58-B1D11FAFC5D4}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Amazon.Lambda.DurableExecution.IntegrationTests", "test\Amazon.Lambda.DurableExecution.IntegrationTests\Amazon.Lambda.DurableExecution.IntegrationTests.csproj", "{CA132CAB-FF4F-4312-B3A3-66DE9D360F27}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Amazon.Lambda.DurableExecution.AotPublishTest", "test\Amazon.Lambda.DurableExecution.AotPublishTest\Amazon.Lambda.DurableExecution.AotPublishTest.csproj", "{16B1B1CC-3AFC-4DC7-8DB6-D14AE12924A2}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -969,6 +977,54 @@ Global {0768FA72-CF49-2B59-BC4C-E4CE579E5D93}.Release|x64.Build.0 = Release|Any CPU {0768FA72-CF49-2B59-BC4C-E4CE579E5D93}.Release|x86.ActiveCfg = Release|Any CPU {0768FA72-CF49-2B59-BC4C-E4CE579E5D93}.Release|x86.Build.0 = Release|Any CPU + {9097B5A4-E100-47FD-A676-0B666A36FAFF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {9097B5A4-E100-47FD-A676-0B666A36FAFF}.Debug|Any CPU.Build.0 = Debug|Any CPU + {9097B5A4-E100-47FD-A676-0B666A36FAFF}.Debug|x64.ActiveCfg = Debug|Any CPU + {9097B5A4-E100-47FD-A676-0B666A36FAFF}.Debug|x64.Build.0 = Debug|Any CPU + {9097B5A4-E100-47FD-A676-0B666A36FAFF}.Debug|x86.ActiveCfg = Debug|Any CPU + {9097B5A4-E100-47FD-A676-0B666A36FAFF}.Debug|x86.Build.0 = Debug|Any CPU + {9097B5A4-E100-47FD-A676-0B666A36FAFF}.Release|Any CPU.ActiveCfg = Release|Any CPU + {9097B5A4-E100-47FD-A676-0B666A36FAFF}.Release|Any CPU.Build.0 = Release|Any CPU + {9097B5A4-E100-47FD-A676-0B666A36FAFF}.Release|x64.ActiveCfg = Release|Any CPU + {9097B5A4-E100-47FD-A676-0B666A36FAFF}.Release|x64.Build.0 = Release|Any CPU + {9097B5A4-E100-47FD-A676-0B666A36FAFF}.Release|x86.ActiveCfg = Release|Any CPU + {9097B5A4-E100-47FD-A676-0B666A36FAFF}.Release|x86.Build.0 = Release|Any CPU + {57150BA6-3826-431F-8F58-B1D11FAFC5D4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {57150BA6-3826-431F-8F58-B1D11FAFC5D4}.Debug|Any CPU.Build.0 = Debug|Any CPU + {57150BA6-3826-431F-8F58-B1D11FAFC5D4}.Debug|x64.ActiveCfg = Debug|Any CPU + {57150BA6-3826-431F-8F58-B1D11FAFC5D4}.Debug|x64.Build.0 = Debug|Any CPU + {57150BA6-3826-431F-8F58-B1D11FAFC5D4}.Debug|x86.ActiveCfg = Debug|Any CPU + {57150BA6-3826-431F-8F58-B1D11FAFC5D4}.Debug|x86.Build.0 = Debug|Any CPU + {57150BA6-3826-431F-8F58-B1D11FAFC5D4}.Release|Any CPU.ActiveCfg = Release|Any CPU + {57150BA6-3826-431F-8F58-B1D11FAFC5D4}.Release|Any CPU.Build.0 = Release|Any CPU + {57150BA6-3826-431F-8F58-B1D11FAFC5D4}.Release|x64.ActiveCfg = Release|Any CPU + {57150BA6-3826-431F-8F58-B1D11FAFC5D4}.Release|x64.Build.0 = Release|Any CPU + {57150BA6-3826-431F-8F58-B1D11FAFC5D4}.Release|x86.ActiveCfg = Release|Any CPU + {57150BA6-3826-431F-8F58-B1D11FAFC5D4}.Release|x86.Build.0 = Release|Any CPU + {CA132CAB-FF4F-4312-B3A3-66DE9D360F27}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {CA132CAB-FF4F-4312-B3A3-66DE9D360F27}.Debug|Any CPU.Build.0 = Debug|Any CPU + {CA132CAB-FF4F-4312-B3A3-66DE9D360F27}.Debug|x64.ActiveCfg = Debug|Any CPU + {CA132CAB-FF4F-4312-B3A3-66DE9D360F27}.Debug|x64.Build.0 = Debug|Any CPU + {CA132CAB-FF4F-4312-B3A3-66DE9D360F27}.Debug|x86.ActiveCfg = Debug|Any CPU + {CA132CAB-FF4F-4312-B3A3-66DE9D360F27}.Debug|x86.Build.0 = Debug|Any CPU + {CA132CAB-FF4F-4312-B3A3-66DE9D360F27}.Release|Any CPU.ActiveCfg = Release|Any CPU + {CA132CAB-FF4F-4312-B3A3-66DE9D360F27}.Release|Any CPU.Build.0 = Release|Any CPU + {CA132CAB-FF4F-4312-B3A3-66DE9D360F27}.Release|x64.ActiveCfg = Release|Any CPU + {CA132CAB-FF4F-4312-B3A3-66DE9D360F27}.Release|x64.Build.0 = Release|Any CPU + {CA132CAB-FF4F-4312-B3A3-66DE9D360F27}.Release|x86.ActiveCfg = Release|Any CPU + {CA132CAB-FF4F-4312-B3A3-66DE9D360F27}.Release|x86.Build.0 = Release|Any CPU + {16B1B1CC-3AFC-4DC7-8DB6-D14AE12924A2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {16B1B1CC-3AFC-4DC7-8DB6-D14AE12924A2}.Debug|Any CPU.Build.0 = Debug|Any CPU + {16B1B1CC-3AFC-4DC7-8DB6-D14AE12924A2}.Debug|x64.ActiveCfg = Debug|Any CPU + {16B1B1CC-3AFC-4DC7-8DB6-D14AE12924A2}.Debug|x64.Build.0 = Debug|Any CPU + {16B1B1CC-3AFC-4DC7-8DB6-D14AE12924A2}.Debug|x86.ActiveCfg = Debug|Any CPU + {16B1B1CC-3AFC-4DC7-8DB6-D14AE12924A2}.Debug|x86.Build.0 = Debug|Any CPU + {16B1B1CC-3AFC-4DC7-8DB6-D14AE12924A2}.Release|Any CPU.ActiveCfg = Release|Any CPU + {16B1B1CC-3AFC-4DC7-8DB6-D14AE12924A2}.Release|Any CPU.Build.0 = Release|Any CPU + {16B1B1CC-3AFC-4DC7-8DB6-D14AE12924A2}.Release|x64.ActiveCfg = Release|Any CPU + {16B1B1CC-3AFC-4DC7-8DB6-D14AE12924A2}.Release|x64.Build.0 = Release|Any CPU + {16B1B1CC-3AFC-4DC7-8DB6-D14AE12924A2}.Release|x86.ActiveCfg = Release|Any CPU + {16B1B1CC-3AFC-4DC7-8DB6-D14AE12924A2}.Release|x86.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE From f17750765657327e46026842b173701cf2ebdcf3 Mon Sep 17 00:00:00 2001 From: Norm Johanson Date: Thu, 14 May 2026 14:27:10 -0700 Subject: [PATCH 4/4] Update Libraries.sln to put Durable Function project in right solution folder --- Libraries/Libraries.sln | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Libraries/Libraries.sln b/Libraries/Libraries.sln index 1bc34a173..65b4cd9e0 100644 --- a/Libraries/Libraries.sln +++ b/Libraries/Libraries.sln @@ -1101,6 +1101,10 @@ Global {80594C21-C6EB-469E-83CC-68F9F661CA5E} = {1DE4EE60-45BA-4EF7-BE00-B9EB861E4C69} {E404A7AC-812B-BC03-CA76-02C0BC2BA7F9} = {B5BD0336-7D08-492C-8489-42C987E29B39} {0768FA72-CF49-2B59-BC4C-E4CE579E5D93} = {B5BD0336-7D08-492C-8489-42C987E29B39} + {9097B5A4-E100-47FD-A676-0B666A36FAFF} = {AAB54E74-20B1-42ED-BC3D-CE9F7BC7FD12} + {57150BA6-3826-431F-8F58-B1D11FAFC5D4} = {1DE4EE60-45BA-4EF7-BE00-B9EB861E4C69} + {CA132CAB-FF4F-4312-B3A3-66DE9D360F27} = {1DE4EE60-45BA-4EF7-BE00-B9EB861E4C69} + {16B1B1CC-3AFC-4DC7-8DB6-D14AE12924A2} = {1DE4EE60-45BA-4EF7-BE00-B9EB861E4C69} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {503678A4-B8D1-4486-8915-405A3E9CF0EB}