diff --git a/agentex/database/migrations/alembic/versions/2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py b/agentex/database/migrations/alembic/versions/2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py new file mode 100644 index 00000000..9d773db8 --- /dev/null +++ b/agentex/database/migrations/alembic/versions/2026_06_22_1200_add_agent_run_schedules_3b1c9d2e4f6a.py @@ -0,0 +1,81 @@ +"""add agent_run_schedules + +Revision ID: 3b1c9d2e4f6a +Revises: c7a1b2d3e4f5 +Create Date: 2026-06-22 12:00:00.000000 + +Creates the agent_run_schedules table backing the scheduled-agent-runs feature. +Schema-only and idempotent: the table and its indexes are created +with IF NOT EXISTS-style guards (Alembic create_table on a fresh table), and the +indexes target the just-created table so they are non-blocking by construction. +""" +from collections.abc import Sequence + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = '3b1c9d2e4f6a' +down_revision: str | None = 'c7a1b2d3e4f5' +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + op.create_table( + 'agent_run_schedules', + sa.Column('id', sa.String(), nullable=False), + sa.Column('agent_id', sa.String(length=64), nullable=False), + sa.Column('name', sa.String(length=256), nullable=False), + sa.Column('description', sa.Text(), nullable=True), + sa.Column('cron_expression', sa.String(), nullable=True), + sa.Column('interval_seconds', sa.Integer(), nullable=True), + sa.Column( + 'timezone', sa.String(), server_default='UTC', nullable=False + ), + sa.Column('start_at', sa.DateTime(timezone=True), nullable=True), + sa.Column('end_at', sa.DateTime(timezone=True), nullable=True), + sa.Column( + 'paused', sa.Boolean(), server_default='false', nullable=False + ), + sa.Column('creator_principal', sa.JSON(), nullable=False), + sa.Column('task_params', sa.JSON(), nullable=True), + sa.Column('task_metadata', sa.JSON(), nullable=True), + sa.Column('initial_input', sa.JSON(), nullable=False), + sa.Column( + 'created_at', + sa.DateTime(timezone=True), + server_default=sa.text('now()'), + nullable=True, + ), + sa.Column( + 'updated_at', + sa.DateTime(timezone=True), + server_default=sa.text('now()'), + nullable=True, + ), + sa.ForeignKeyConstraint(['agent_id'], ['agents.id']), + sa.PrimaryKeyConstraint('id'), + ) + # Indexes target the table created in this same migration, so they hold no + # write-blocking lock against live traffic (the table has no rows yet). + op.create_index( + 'uq_agent_run_schedules_agent_name', + 'agent_run_schedules', + ['agent_id', 'name'], + unique=True, + ) + op.create_index( + 'idx_agent_run_schedules_agent', + 'agent_run_schedules', + ['agent_id'], + unique=False, + ) + + +def downgrade() -> None: + op.drop_index('idx_agent_run_schedules_agent', table_name='agent_run_schedules') + op.drop_index( + 'uq_agent_run_schedules_agent_name', table_name='agent_run_schedules' + ) + op.drop_table('agent_run_schedules') diff --git a/agentex/docker-compose.yml b/agentex/docker-compose.yml index a917de42..b203ae22 100644 --- a/agentex/docker-compose.yml +++ b/agentex/docker-compose.yml @@ -166,6 +166,8 @@ services: - MONGODB_DATABASE_NAME=agentex - WATCHFILES_FORCE_POLLING=true - ENABLE_HEALTH_CHECK_WORKFLOW=true + # Disabled by default; enable when testing, e.g. `ENABLE_AGENT_RUN_SCHEDULES=true ./dev.sh`. + - ENABLE_AGENT_RUN_SCHEDULES=${ENABLE_AGENT_RUN_SCHEDULES:-false} - AGENTEX_SERVER_TASK_QUEUE=agentex-server - ALLOWED_ORIGINS=http://localhost:3000 - OTEL_EXPORTER_OTLP_ENDPOINT=http://agentex-otel-collector:4317 diff --git a/agentex/openapi.yaml b/agentex/openapi.yaml index 2725babc..33c73450 100644 --- a/agentex/openapi.yaml +++ b/agentex/openapi.yaml @@ -3255,9 +3255,10 @@ paths: post: tags: - Schedules - summary: Create Schedule - description: Create a new schedule for recurring workflow execution for an agent. - operationId: create_schedule_agents__agent_id__schedules_post + summary: Create Run Schedule + description: Create a recurring schedule that starts a fresh agent run on each + fire. + operationId: create_run_schedule_agents__agent_id__schedules_post parameters: - name: agent_id in: path @@ -3270,14 +3271,14 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/CreateScheduleRequest' + $ref: '#/components/schemas/CreateAgentRunScheduleRequest' responses: '200': description: Successful Response content: application/json: schema: - $ref: '#/components/schemas/ScheduleResponse' + $ref: '#/components/schemas/AgentRunScheduleResponse' '422': description: Validation Error content: @@ -3287,9 +3288,9 @@ paths: get: tags: - Schedules - summary: List Agent Schedules - description: List all schedules for an agent. - operationId: list_schedules_agents__agent_id__schedules_get + summary: List Run Schedules + description: List run schedules for an agent. + operationId: list_run_schedules_agents__agent_id__schedules_get parameters: - name: agent_id in: path @@ -3297,7 +3298,7 @@ paths: schema: type: string title: Agent Id - - name: page_size + - name: limit in: query required: false schema: @@ -3305,27 +3306,27 @@ paths: maximum: 1000 minimum: 1 default: 100 - title: Page Size + title: Limit responses: '200': description: Successful Response content: application/json: schema: - $ref: '#/components/schemas/ScheduleListResponse' + $ref: '#/components/schemas/AgentRunScheduleListResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' - /agents/{agent_id}/schedules/{schedule_name}: + /agents/{agent_id}/schedules/{name}: get: tags: - Schedules - summary: Get Schedule - description: Get details of a schedule by its name. - operationId: get_schedule_agents__agent_id__schedules__schedule_name__get + summary: Get Run Schedule + description: Get a run schedule by its name. + operationId: get_run_schedule_agents__agent_id__schedules__name__get parameters: - name: agent_id in: path @@ -3333,19 +3334,58 @@ paths: schema: type: string title: Agent Id - - name: schedule_name + - name: name in: path required: true schema: type: string - title: Schedule Name + title: Name responses: '200': description: Successful Response content: application/json: schema: - $ref: '#/components/schemas/ScheduleResponse' + $ref: '#/components/schemas/AgentRunScheduleResponse' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + patch: + tags: + - Schedules + summary: Update Run Schedule + description: Partially update a run schedule's definition (cadence, window, + input, etc.). + operationId: update_run_schedule_agents__agent_id__schedules__name__patch + parameters: + - name: agent_id + in: path + required: true + schema: + type: string + title: Agent Id + - name: name + in: path + required: true + schema: + type: string + title: Name + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UpdateAgentRunScheduleRequest' + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/AgentRunScheduleResponse' '422': description: Validation Error content: @@ -3355,9 +3395,9 @@ paths: delete: tags: - Schedules - summary: Delete Schedule - description: Delete a schedule permanently. - operationId: delete_schedule_agents__agent_id__schedules__schedule_name__delete + summary: Delete Run Schedule + description: Delete a run schedule permanently. + operationId: delete_run_schedule_agents__agent_id__schedules__name__delete parameters: - name: agent_id in: path @@ -3365,12 +3405,12 @@ paths: schema: type: string title: Agent Id - - name: schedule_name + - name: name in: path required: true schema: type: string - title: Schedule Name + title: Name responses: '200': description: Successful Response @@ -3384,13 +3424,14 @@ paths: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' - /agents/{agent_id}/schedules/{schedule_name}/pause: + /agents/{agent_id}/schedules/{name}/trigger: post: tags: - Schedules - summary: Pause Schedule - description: Pause a schedule to stop it from executing. - operationId: pause_schedule_agents__agent_id__schedules__schedule_name__pause_post + summary: Trigger Run Schedule + description: Trigger an immediate, out-of-band run of the schedule (in addition + to its cadence). + operationId: trigger_run_schedule_agents__agent_id__schedules__name__trigger_post parameters: - name: agent_id in: path @@ -3398,40 +3439,32 @@ paths: schema: type: string title: Agent Id - - name: schedule_name + - name: name in: path required: true schema: type: string - title: Schedule Name - requestBody: - content: - application/json: - schema: - anyOf: - - $ref: '#/components/schemas/PauseScheduleRequest' - - type: 'null' - title: Request + title: Name responses: '200': description: Successful Response content: application/json: schema: - $ref: '#/components/schemas/ScheduleResponse' + $ref: '#/components/schemas/AgentRunScheduleResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' - /agents/{agent_id}/schedules/{schedule_name}/unpause: + /agents/{agent_id}/schedules/{name}/pause: post: tags: - Schedules - summary: Unpause Schedule - description: Unpause/resume a schedule to allow it to execute again. - operationId: unpause_schedule_agents__agent_id__schedules__schedule_name__unpause_post + summary: Pause Run Schedule + description: Pause a run schedule so it stops firing. + operationId: pause_run_schedule_agents__agent_id__schedules__name__pause_post parameters: - name: agent_id in: path @@ -3439,18 +3472,18 @@ paths: schema: type: string title: Agent Id - - name: schedule_name + - name: name in: path required: true schema: type: string - title: Schedule Name + title: Name requestBody: content: application/json: schema: anyOf: - - $ref: '#/components/schemas/UnpauseScheduleRequest' + - $ref: '#/components/schemas/PauseRunScheduleRequest' - type: 'null' title: Request responses: @@ -3459,21 +3492,20 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/ScheduleResponse' + $ref: '#/components/schemas/AgentRunScheduleResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' - /agents/{agent_id}/schedules/{schedule_name}/trigger: + /agents/{agent_id}/schedules/{name}/resume: post: tags: - Schedules - summary: Trigger Schedule - description: Trigger a schedule to run immediately, regardless of its regular - schedule. - operationId: trigger_schedule_agents__agent_id__schedules__schedule_name__trigger_post + summary: Resume Run Schedule + description: Resume a paused run schedule so it fires again. + operationId: resume_run_schedule_agents__agent_id__schedules__name__resume_post parameters: - name: agent_id in: path @@ -3481,19 +3513,27 @@ paths: schema: type: string title: Agent Id - - name: schedule_name + - name: name in: path required: true schema: type: string - title: Schedule Name + title: Name + requestBody: + content: + application/json: + schema: + anyOf: + - $ref: '#/components/schemas/ResumeRunScheduleRequest' + - type: 'null' + title: Request responses: '200': description: Successful Response content: application/json: schema: - $ref: '#/components/schemas/ScheduleResponse' + $ref: '#/components/schemas/AgentRunScheduleResponse' '422': description: Validation Error content: @@ -3976,6 +4016,152 @@ components: - $ref: '#/components/schemas/Event' - type: 'null' title: AgentRPCResult + AgentRunScheduleListResponse: + properties: + run_schedules: + items: + $ref: '#/components/schemas/AgentRunScheduleResponse' + type: array + title: Run Schedules + description: The list of run schedules. + total: + type: integer + title: Total + description: The number of run schedules returned. + type: object + required: + - run_schedules + - total + title: AgentRunScheduleListResponse + description: Response model for listing run schedules. + AgentRunScheduleResponse: + properties: + id: + type: string + title: Id + description: The unique identifier of the run schedule. + agent_id: + type: string + title: Agent Id + description: The agent this schedule belongs to. + name: + type: string + title: Name + description: Schedule name, unique per agent. + description: + anyOf: + - type: string + - type: 'null' + title: Description + description: Optional description. + cron_expression: + anyOf: + - type: string + - type: 'null' + title: Cron Expression + description: Cron cadence, if cron-based. + interval_seconds: + anyOf: + - type: integer + - type: 'null' + title: Interval Seconds + description: Interval cadence in seconds, if interval-based. + timezone: + type: string + title: Timezone + description: Timezone the cron expression is evaluated in. + default: UTC + start_at: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Start At + description: Schedule activation time. + end_at: + anyOf: + - type: string + format: date-time + - type: 'null' + title: End At + description: Schedule deactivation time. + paused: + type: boolean + title: Paused + description: Whether the schedule is paused. + default: false + task_params: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + title: Task Params + description: Task params at fire time. + task_metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + title: Task Metadata + description: Task metadata at fire time. + initial_input: + $ref: '#/components/schemas/ScheduleInitialInput' + description: The initial input. + initial_input_method: + type: string + title: Initial Input Method + description: Delivery method, inferred from the agent's ACP type. + creator_principal: + anyOf: + - $ref: '#/components/schemas/ScheduleCreatorPrincipal' + - type: 'null' + description: Credential-free creator identity. + created_at: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Created At + description: When the schedule was created. + updated_at: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Updated At + description: When the schedule was updated. + state: + $ref: '#/components/schemas/RunScheduleState' + description: Live schedule state from Temporal. + default: ACTIVE + next_action_times: + items: + type: string + format: date-time + type: array + title: Next Action Times + description: Upcoming scheduled fire times. + last_action_time: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Last Action Time + description: When the schedule last fired. + num_actions_taken: + type: integer + title: Num Actions Taken + description: Number of times the schedule has fired. + default: 0 + type: object + required: + - id + - agent_id + - name + - initial_input + - initial_input_method + title: AgentRunScheduleResponse + description: Response model describing a scheduled agent run. AgentStatus: type: string enum: @@ -4327,37 +4513,7 @@ components: - api_key_type - api_key title: CreateAPIKeyResponse - CreateDeploymentRequest: - properties: - docker_image: - type: string - title: Docker Image - description: Full Docker image URI. - registration_metadata: - anyOf: - - additionalProperties: true - type: object - - type: 'null' - title: Registration Metadata - description: Git/build metadata (commit_hash, branch_name, author_name, - author_email, build_timestamp). - sgp_deploy_id: - anyOf: - - type: string - - type: 'null' - title: Sgp Deploy Id - description: SGP deployment ID. - helm_release_name: - anyOf: - - type: string - - type: 'null' - title: Helm Release Name - description: Helm release name. - type: object - required: - - docker_image - title: CreateDeploymentRequest - CreateScheduleRequest: + CreateAgentRunScheduleRequest: properties: name: type: string @@ -4365,70 +4521,104 @@ components: minLength: 1 pattern: ^[a-z0-9][a-z0-9-]*[a-z0-9]$|^[a-z0-9]$ title: Schedule Name - description: Human-readable name for the schedule (e.g., 'weekly-profiling'). - Will be combined with agent_id to form the full schedule_id. - workflow_name: - type: string - title: Workflow Name - description: Name of the Temporal workflow to execute (e.g., 'sae-orchestrator') - task_queue: - type: string - title: Task Queue - description: Temporal task queue where the agent's worker is listening - workflow_params: + description: Human-readable name, unique per agent (e.g. 'daily-granola-summary'). + description: anyOf: - - additionalProperties: true - type: object + - type: string - type: 'null' - title: Workflow Parameters - description: Parameters to pass to the workflow + title: Description + description: Optional description of what this schedule does. cron_expression: anyOf: - type: string - type: 'null' title: Cron Expression - description: Cron expression for scheduling (e.g., '0 0 * * 0' for weekly - on Sunday) + description: Cron expression for the cadence (e.g. '0 17 * * MON-FRI'). + Mutually exclusive with interval_seconds. interval_seconds: anyOf: - type: integer minimum: 1.0 - type: 'null' title: Interval Seconds - description: Alternative to cron - run every N seconds - execution_timeout_seconds: - anyOf: - - type: integer - minimum: 1.0 - - type: 'null' - title: Execution Timeout - description: Maximum time in seconds for each workflow execution + description: Interval cadence in seconds. Mutually exclusive with cron_expression. + timezone: + type: string + title: Timezone + description: IANA timezone the cron expression is evaluated in (e.g. 'America/New_York'). + default: UTC start_at: anyOf: - type: string format: date-time - type: 'null' title: Start At - description: When the schedule should start being active + description: When the schedule should start being active. end_at: anyOf: - type: string format: date-time - type: 'null' title: End At - description: When the schedule should stop being active + description: When the schedule should stop being active. paused: type: boolean title: Paused - description: Whether to create the schedule in a paused state + description: Whether to create the schedule in a paused state. default: false + task_params: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + title: Task Params + description: Resolved config forwarded as task `params` at fire time. + task_metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + title: Task Metadata + description: Metadata copied onto each created task at fire time. + initial_input: + $ref: '#/components/schemas/ScheduleInitialInput' + description: The first input delivered to each created task. type: object required: - name - - workflow_name - - task_queue - title: CreateScheduleRequest - description: Request model for creating a new schedule for an agent + - initial_input + title: CreateAgentRunScheduleRequest + description: Request body for creating a scheduled agent run. + CreateDeploymentRequest: + properties: + docker_image: + type: string + title: Docker Image + description: Full Docker image URI. + registration_metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + title: Registration Metadata + description: Git/build metadata (commit_hash, branch_name, author_name, + author_email, build_timestamp). + sgp_deploy_id: + anyOf: + - type: string + - type: 'null' + title: Sgp Deploy Id + description: SGP deployment ID. + helm_release_name: + anyOf: + - type: string + - type: 'null' + title: Helm Release Name + description: Helm release name. + type: object + required: + - docker_image + title: CreateDeploymentRequest CreateSpanRequest: properties: id: @@ -5141,17 +5331,16 @@ components: - data title: PaginatedMessagesResponse description: Response with cursor pagination metadata. - PauseScheduleRequest: + PauseRunScheduleRequest: properties: note: anyOf: - type: string - type: 'null' title: Note - description: Optional note explaining why the schedule was paused + description: Optional note explaining the pause. type: object - title: PauseScheduleRequest - description: Request model for pausing a schedule + title: PauseRunScheduleRequest PutCheckpointRequest: properties: thread_id: @@ -5551,195 +5740,79 @@ components: snapshot → clean → rehydrate round-trips cleanly without serialization changes.' - ScheduleActionInfo: + ResumeRunScheduleRequest: properties: - workflow_name: - type: string - title: Workflow Name - description: Name of the workflow being executed - workflow_id_prefix: - type: string - title: Workflow ID Prefix - description: Prefix for workflow execution IDs - task_queue: - type: string - title: Task Queue - description: Task queue for the workflow - workflow_params: + note: anyOf: - - items: {} - type: array + - type: string - type: 'null' - title: Workflow Parameters - description: Parameters passed to the workflow + title: Note + description: Optional note explaining the resume. type: object - required: - - workflow_name - - workflow_id_prefix - - task_queue - title: ScheduleActionInfo - description: Information about the scheduled action - ScheduleListItem: + title: ResumeRunScheduleRequest + RunScheduleState: + type: string + enum: + - ACTIVE + - PAUSED + title: RunScheduleState + description: Live state of a run schedule, derived from Temporal. + ScheduleCreatorPrincipal: properties: - schedule_id: - type: string - title: Schedule ID - description: Unique identifier for the schedule - name: - type: string - title: Schedule Name - description: Human-readable name for the schedule - agent_id: - type: string - title: Agent ID - description: ID of the agent this schedule belongs to - state: - $ref: '#/components/schemas/ScheduleState' - title: State - description: Current state of the schedule - workflow_name: + principal_type: anyOf: - type: string - type: 'null' - title: Workflow Name - description: Name of the scheduled workflow - next_action_time: + title: Principal Type + description: e.g. 'user' or 'service_account'. + user_id: anyOf: - type: string - format: date-time - type: 'null' - title: Next Action Time - description: Next scheduled execution time - type: object - required: - - schedule_id - - name - - agent_id - - state - title: ScheduleListItem - description: Abbreviated schedule info for list responses - ScheduleListResponse: - properties: - schedules: - items: - $ref: '#/components/schemas/ScheduleListItem' - type: array - title: Schedules - description: List of schedules - total: - type: integer - title: Total - description: Total number of schedules - type: object - required: - - schedules - - total - title: ScheduleListResponse - description: Response model for listing schedules - ScheduleResponse: - properties: - schedule_id: - type: string - title: Schedule ID - description: Unique identifier for the schedule - name: - type: string - title: Schedule Name - description: Human-readable name for the schedule - agent_id: - type: string - title: Agent ID - description: ID of the agent this schedule belongs to - state: - $ref: '#/components/schemas/ScheduleState' - title: State - description: Current state of the schedule - action: - $ref: '#/components/schemas/ScheduleActionInfo' - title: Action - spec: - $ref: '#/components/schemas/ScheduleSpecInfo' - title: Spec - description: Schedule specification - num_actions_taken: - type: integer - title: Number of Actions Taken - description: Number of times the schedule has executed - default: 0 - num_actions_missed: - type: integer - title: Number of Actions Missed - description: Number of scheduled executions that were missed - default: 0 - next_action_times: - items: - type: string - format: date-time - type: array - title: Next Action Times - description: Upcoming scheduled execution times - last_action_time: + title: User Id + description: Creator user id, if a user principal. + service_account_id: anyOf: - type: string - format: date-time - type: 'null' - title: Last Action Time - description: When the schedule last executed - created_at: + title: Service Account Id + description: Creator service-account id, if a service principal. + account_id: anyOf: - type: string - format: date-time - type: 'null' - title: Created At - description: When the schedule was created + title: Account Id + description: Account/workspace id of the creator. type: object - required: - - schedule_id - - name - - agent_id - - state - - action - - spec - title: ScheduleResponse - description: Response model for schedule operations - ScheduleSpecInfo: + title: ScheduleCreatorPrincipal + description: 'Credential-free creator identity stored with the schedule. + + + Never carries cookies, JWTs, API keys, OAuth tokens, or request headers — + it + + is creator *context* used only for AuthZ and ownership at fire time.' + ScheduleInitialInput: properties: - cron_expressions: - items: - type: string - type: array - title: Cron Expressions - description: Cron expressions for the schedule - intervals_seconds: - items: - type: integer - type: array - title: Interval Seconds - description: Interval specifications in seconds - start_at: - anyOf: - - type: string - format: date-time - - type: 'null' - title: Start At - description: When the schedule starts being active - end_at: - anyOf: - - type: string - format: date-time - - type: 'null' - title: End At - description: When the schedule stops being active + type: + type: string + const: text + title: Type + description: Input content type. + default: text + author: + $ref: '#/components/schemas/MessageAuthor' + description: The author attributed to the initial input. + default: user + content: + type: string + title: Content + description: The initial prompt delivered to the task. type: object - title: ScheduleSpecInfo - description: Information about the schedule specification - ScheduleState: - type: string - enum: - - ACTIVE - - PAUSED - title: ScheduleState - description: Schedule state enum + required: + - content + title: ScheduleInitialInput + description: The first input delivered to each freshly created scheduled task. SendEventRequest: properties: task_id: @@ -6676,17 +6749,83 @@ components: - name title: ToolResponseDelta description: Delta for tool response updates - UnpauseScheduleRequest: + UpdateAgentRunScheduleRequest: properties: - note: + description: anyOf: - type: string - type: 'null' - title: Note - description: Optional note explaining why the schedule was unpaused + title: Description + description: Optional description of what this schedule does. + cron_expression: + anyOf: + - type: string + - type: 'null' + title: Cron Expression + description: New cron cadence. Mutually exclusive with interval_seconds. + interval_seconds: + anyOf: + - type: integer + minimum: 1.0 + - type: 'null' + title: Interval Seconds + description: New interval cadence in seconds. Mutually exclusive with cron_expression. + timezone: + anyOf: + - type: string + - type: 'null' + title: Timezone + description: IANA timezone the cron expression is evaluated in. + start_at: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Start At + description: When the schedule should start being active. + end_at: + anyOf: + - type: string + format: date-time + - type: 'null' + title: End At + description: When the schedule should stop being active. + paused: + anyOf: + - type: boolean + - type: 'null' + title: Paused + description: Pause/resume the schedule as part of the update. + task_params: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + title: Task Params + description: Resolved config forwarded as task `params` at fire time. + task_metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + title: Task Metadata + description: Metadata copied onto each created task at fire time. + initial_input: + anyOf: + - $ref: '#/components/schemas/ScheduleInitialInput' + - type: 'null' + description: Replacement initial input delivered to each created task. type: object - title: UnpauseScheduleRequest - description: Request model for unpausing a schedule + title: UpdateAgentRunScheduleRequest + description: 'Partial update for a scheduled agent run. + + + Only fields present in the request body are changed; the schedule ``name`` + is + + immutable (it is the natural key). Setting ``cron_expression`` clears + + ``interval_seconds`` and vice versa; providing both is rejected.' UpdateAgentTaskTrackerRequest: properties: last_processed_event_id: diff --git a/agentex/scripts/generate_openapi_spec.py b/agentex/scripts/generate_openapi_spec.py index d76a16b9..f043c650 100644 --- a/agentex/scripts/generate_openapi_spec.py +++ b/agentex/scripts/generate_openapi_spec.py @@ -8,6 +8,9 @@ os.environ.setdefault("ENVIRONMENT", "development") os.environ.setdefault("ALLOWED_ORIGINS", "*") +# Document feature-flagged endpoints in the spec/SDK regardless of where the +# feature is enabled at runtime. Runtime serving stays gated by the live env var. +os.environ.setdefault("ENABLE_AGENT_RUN_SCHEDULES", "true") def main() -> int: diff --git a/agentex/src/adapters/orm.py b/agentex/src/adapters/orm.py index 42a66c1a..49016124 100644 --- a/agentex/src/adapters/orm.py +++ b/agentex/src/adapters/orm.py @@ -197,6 +197,42 @@ class AgentAPIKeyORM(BaseORM): ) +class AgentRunScheduleORM(BaseORM): + __tablename__ = "agent_run_schedules" + id = Column(String, primary_key=True, default=orm_id) + agent_id = Column(String(64), ForeignKey("agents.id"), nullable=False) + name = Column(String(256), nullable=False) + description = Column(Text, nullable=True) + cron_expression = Column(String, nullable=True) + interval_seconds = Column(Integer, nullable=True) + timezone = Column(String, nullable=False, server_default="UTC") + start_at = Column(DateTime(timezone=True), nullable=True) + end_at = Column(DateTime(timezone=True), nullable=True) + paused = Column(Boolean, nullable=False, server_default="false") + # Credential-free creator context (see ScheduleCreatorPrincipal): no cookies, + # JWTs, API keys, OAuth tokens, or request headers are ever stored here. + creator_principal = Column(JSON, nullable=False) + task_params = Column(JSON, nullable=True) + task_metadata = Column(JSON, nullable=True) + initial_input = Column(JSON, nullable=False) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + updated_at = Column( + DateTime(timezone=True), server_default=func.now(), onupdate=func.now() + ) + + __table_args__ = ( + # Schedule names are unique per agent (the get/pause/resume/delete + # endpoints address a schedule by agent_id + name). + Index( + "uq_agent_run_schedules_agent_name", + "agent_id", + "name", + unique=True, + ), + Index("idx_agent_run_schedules_agent", "agent_id"), + ) + + class DeploymentHistoryORM(BaseORM): __tablename__ = "deployment_history" diff --git a/agentex/src/adapters/temporal/adapter_temporal.py b/agentex/src/adapters/temporal/adapter_temporal.py index f64d0d29..5d516fee 100644 --- a/agentex/src/adapters/temporal/adapter_temporal.py +++ b/agentex/src/adapters/temporal/adapter_temporal.py @@ -10,8 +10,12 @@ ScheduleDescription, ScheduleHandle, ScheduleIntervalSpec, + ScheduleOverlapPolicy, + SchedulePolicy, ScheduleSpec, ScheduleState, + ScheduleUpdate, + ScheduleUpdateInput, WorkflowExecution, WorkflowHandle, ) @@ -362,9 +366,20 @@ async def create_schedule( start_at: Any | None = None, end_at: Any | None = None, paused: bool = False, + time_zone_name: str | None = None, + overlap_policy: str | None = None, ) -> ScheduleHandle: """ Create a new schedule for recurring workflow execution. + + ``time_zone_name`` is an optional IANA timezone (e.g. ``America/New_York``) + the cron expression is evaluated in; when omitted, cron is evaluated in + UTC. Ignored for interval-based schedules. + + ``overlap_policy`` is an optional ScheduleOverlapPolicy name (e.g. + ``"skip"``, ``"buffer_one"``) controlling what happens when a fire is due + while a prior run is still executing. When omitted, Temporal's default + (SKIP) applies. """ if not self.client: raise TemporalConnectionError("Temporal client is not connected") @@ -377,6 +392,9 @@ async def create_schedule( try: # Build schedule spec + spec_kwargs: dict[str, Any] = {} + if time_zone_name: + spec_kwargs["time_zone_name"] = time_zone_name spec = ScheduleSpec( cron_expressions=cron_expressions or [], intervals=[ @@ -386,6 +404,7 @@ async def create_schedule( else [], start_at=start_at, end_at=end_at, + **spec_kwargs, ) # Build workflow action @@ -408,6 +427,13 @@ async def create_schedule( paused=paused, ) + # Build schedule policies (overlap), when requested + schedule_kwargs: dict[str, Any] = {} + if overlap_policy: + schedule_kwargs["policy"] = SchedulePolicy( + overlap=ScheduleOverlapPolicy[overlap_policy.upper()] + ) + # Create the schedule handle = await self.client.create_schedule( schedule_id, @@ -415,6 +441,7 @@ async def create_schedule( action=action, spec=spec, state=state, + **schedule_kwargs, ), ) @@ -589,6 +616,70 @@ async def trigger_schedule(self, schedule_id: str) -> None: detail=str(e), ) from e + async def update_schedule( + self, + schedule_id: str, + cron_expressions: list[str] | None = None, + interval_seconds: int | None = None, + start_at: Any | None = None, + end_at: Any | None = None, + time_zone_name: str | None = None, + paused: bool | None = None, + ) -> None: + """ + Update an existing schedule's spec and/or paused state. + + Rebuilds the schedule spec (cadence, window, timezone) from the provided + values and replaces it, leaving the workflow action untouched. ``paused`` + is applied only when provided. The caller is expected to pass the full + desired spec (cron XOR interval), mirroring ``create_schedule``. + """ + if not self.client: + raise TemporalConnectionError("Temporal client is not connected") + + if not cron_expressions and not interval_seconds: + raise TemporalInvalidArgumentError( + message="Either cron_expressions or interval_seconds must be provided", + detail="A schedule requires at least one scheduling specification", + ) + + spec_kwargs: dict[str, Any] = {} + if time_zone_name: + spec_kwargs["time_zone_name"] = time_zone_name + new_spec = ScheduleSpec( + cron_expressions=cron_expressions or [], + intervals=[ScheduleIntervalSpec(every=timedelta(seconds=interval_seconds))] + if interval_seconds + else [], + start_at=start_at, + end_at=end_at, + **spec_kwargs, + ) + + def _apply(input: ScheduleUpdateInput) -> ScheduleUpdate: + schedule = input.description.schedule + schedule.spec = new_spec + if paused is not None: + schedule.state.paused = paused + return ScheduleUpdate(schedule=schedule) + + try: + handle = self.client.get_schedule_handle(schedule_id) + await handle.update(_apply) + logger.info(f"Updated schedule {schedule_id}") + except Exception as e: + if "not found" in str(e).lower(): + logger.error(f"Schedule {schedule_id} not found: {e}") + raise TemporalScheduleNotFoundError( + message=f"Schedule '{schedule_id}' not found", + detail=str(e), + ) from e + logger.error(f"Failed to update schedule {schedule_id}: {e}") + raise TemporalScheduleError( + message=f"Failed to update schedule '{schedule_id}'", + detail=str(e), + ) from e + async def delete_schedule(self, schedule_id: str) -> None: """ Delete a schedule. diff --git a/agentex/src/api/app.py b/agentex/src/api/app.py index 0131ad30..07853a2b 100644 --- a/agentex/src/api/app.py +++ b/agentex/src/api/app.py @@ -30,6 +30,7 @@ from src.api.RequestLoggingMiddleware import RequestLoggingMiddleware from src.api.routes import ( agent_api_keys, + agent_run_schedules, agent_task_tracker, agents, checkpoints, @@ -37,7 +38,6 @@ deployments, events, messages, - schedules, spans, states, task_retention, @@ -204,7 +204,11 @@ async def handle_unexpected(request, exc): fastapi_app.include_router(agent_api_keys.router) fastapi_app.include_router(deployment_history.router) fastapi_app.include_router(deployments.router) -fastapi_app.include_router(schedules.router) +# Agent run schedules are feature-flagged (off by default, enabled in development). +# When disabled the routes are not registered, so the API surface is absent +# entirely in environments that haven't opted in. +if resolve_environment_variable_dependency(EnvVarKeys.ENABLE_AGENT_RUN_SCHEDULES): + fastapi_app.include_router(agent_run_schedules.router) fastapi_app.include_router(checkpoints.router) fastapi_app.include_router(task_retention.router) diff --git a/agentex/src/api/routes/agent_run_schedules.py b/agentex/src/api/routes/agent_run_schedules.py new file mode 100644 index 00000000..fe4c5167 --- /dev/null +++ b/agentex/src/api/routes/agent_run_schedules.py @@ -0,0 +1,247 @@ +from typing import Any + +from fastapi import APIRouter, Query, Request + +from src.api.schemas.agent_run_schedules import ( + AgentRunScheduleListResponse, + AgentRunScheduleResponse, + CreateAgentRunScheduleRequest, + PauseRunScheduleRequest, + ResumeRunScheduleRequest, + UpdateAgentRunScheduleRequest, +) +from src.api.schemas.authorization_types import ( + AgentexResourceType, + AuthorizedOperationType, +) +from src.api.schemas.delete_response import DeleteResponse +from src.domain.services.agent_run_schedule_service import ( + build_run_schedule_authz_selector, +) +from src.domain.services.authorization_service import DAuthorizationService +from src.domain.use_cases.agent_run_schedules_use_case import ( + DAgentRunSchedulesUseCase, +) +from src.domain.use_cases.agents_use_case import DAgentsUseCase +from src.utils.authorization_shortcuts import DAuthorizedId, DAuthorizedResourceIds +from src.utils.logging import make_logger +from src.utils.schedule_authorization import _check_schedule_or_collapse_to_404 + +logger = make_logger(__name__) + +# The canonical agent scheduling API. Schedules an agent *run* on each fire +# (creates a fresh task + delivers the configured initial input), hiding the +# underlying Temporal workflow/task-queue details. It replaced the +# earlier bare-workflow scheduler that previously owned this path. +router = APIRouter( + prefix="/agents/{agent_id}/schedules", + tags=["Schedules"], +) + +_CREATOR_PRINCIPAL_FIELDS = ( + "principal_type", + "user_id", + "service_account_id", + "account_id", +) + + +def _extract_creator_principal(principal_context: Any) -> dict[str, Any]: + """Capture the credential-free creator subset from the request principal. + + Stores only identity selectors (principal_type / user_id / service_account_id + / account_id). Never cookies, JWTs, API keys, OAuth tokens, or headers. + Returns an empty dict under authz bypass / when no principal is present. + """ + if principal_context is None: + return {} + if isinstance(principal_context, dict): + getter = principal_context.get + else: + getter = lambda key: getattr(principal_context, key, None) # noqa: E731 + return { + field: getter(field) + for field in _CREATOR_PRINCIPAL_FIELDS + if getter(field) is not None + } + + +@router.post( + "", + response_model=AgentRunScheduleResponse, + summary="Create Run Schedule", + description="Create a recurring schedule that starts a fresh agent run on each fire.", +) +async def create_run_schedule( + agent_id: DAuthorizedId(AgentexResourceType.agent, AuthorizedOperationType.update), + request: CreateAgentRunScheduleRequest, + http_request: Request, + agents_use_case: DAgentsUseCase, + run_schedules_use_case: DAgentRunSchedulesUseCase, +) -> AgentRunScheduleResponse: + """Create a run schedule for an agent. + + Gated on ``agent.update`` (no schedule resource exists yet), mirroring the + bare-workflow scheduler's create gate. The authenticated creator principal is + captured here and replayed for AuthZ / task ownership when the schedule fires. + """ + agent = await agents_use_case.get(id=agent_id) + creator_principal = _extract_creator_principal( + getattr(http_request.state, "principal_context", None) + ) + return await run_schedules_use_case.create_schedule( + agent, request, creator_principal + ) + + +@router.get( + "", + response_model=AgentRunScheduleListResponse, + summary="List Run Schedules", + description="List run schedules for an agent.", +) +async def list_run_schedules( + agent_id: str, + run_schedules_use_case: DAgentRunSchedulesUseCase, + authorized_schedule_ids: DAuthorizedResourceIds(AgentexResourceType.schedule), + limit: int = Query(default=100, ge=1, le=1000), +) -> AgentRunScheduleListResponse: + """List an agent's run schedules, filtered to those the caller owns. + + Filter-only (never 403s): ``authorized_schedule_ids`` is ``None`` under authz + bypass (return all), else the set of readable selectors (empty returns none). + """ + return await run_schedules_use_case.list_schedules( + agent_id, + authorized_schedule_ids=authorized_schedule_ids, + limit=limit, + ) + + +@router.get( + "/{name}", + response_model=AgentRunScheduleResponse, + summary="Get Run Schedule", + description="Get a run schedule by its name.", +) +async def get_run_schedule( + agent_id: str, + name: str, + run_schedules_use_case: DAgentRunSchedulesUseCase, + authorization: DAuthorizationService, +) -> AgentRunScheduleResponse: + await _check_schedule_or_collapse_to_404( + authorization, + build_run_schedule_authz_selector(agent_id, name), + AuthorizedOperationType.read, + ) + return await run_schedules_use_case.get_schedule(agent_id, name) + + +@router.patch( + "/{name}", + response_model=AgentRunScheduleResponse, + summary="Update Run Schedule", + description="Partially update a run schedule's definition (cadence, window, input, etc.).", +) +async def update_run_schedule( + agent_id: str, + name: str, + request: UpdateAgentRunScheduleRequest, + run_schedules_use_case: DAgentRunSchedulesUseCase, + authorization: DAuthorizationService, +) -> AgentRunScheduleResponse: + await _check_schedule_or_collapse_to_404( + authorization, + build_run_schedule_authz_selector(agent_id, name), + AuthorizedOperationType.update, + ) + return await run_schedules_use_case.update_schedule(agent_id, name, request) + + +@router.post( + "/{name}/trigger", + response_model=AgentRunScheduleResponse, + summary="Trigger Run Schedule", + description="Trigger an immediate, out-of-band run of the schedule (in addition to its cadence).", +) +async def trigger_run_schedule( + agent_id: str, + name: str, + run_schedules_use_case: DAgentRunSchedulesUseCase, + authorization: DAuthorizationService, +) -> AgentRunScheduleResponse: + await _check_schedule_or_collapse_to_404( + authorization, + build_run_schedule_authz_selector(agent_id, name), + AuthorizedOperationType.update, + ) + return await run_schedules_use_case.trigger_schedule(agent_id, name) + + +@router.post( + "/{name}/pause", + response_model=AgentRunScheduleResponse, + summary="Pause Run Schedule", + description="Pause a run schedule so it stops firing.", +) +async def pause_run_schedule( + agent_id: str, + name: str, + run_schedules_use_case: DAgentRunSchedulesUseCase, + authorization: DAuthorizationService, + request: PauseRunScheduleRequest | None = None, +) -> AgentRunScheduleResponse: + await _check_schedule_or_collapse_to_404( + authorization, + build_run_schedule_authz_selector(agent_id, name), + AuthorizedOperationType.update, + ) + note = request.note if request else None + return await run_schedules_use_case.pause_schedule(agent_id, name, note=note) + + +@router.post( + "/{name}/resume", + response_model=AgentRunScheduleResponse, + summary="Resume Run Schedule", + description="Resume a paused run schedule so it fires again.", +) +async def resume_run_schedule( + agent_id: str, + name: str, + run_schedules_use_case: DAgentRunSchedulesUseCase, + authorization: DAuthorizationService, + request: ResumeRunScheduleRequest | None = None, +) -> AgentRunScheduleResponse: + await _check_schedule_or_collapse_to_404( + authorization, + build_run_schedule_authz_selector(agent_id, name), + AuthorizedOperationType.update, + ) + note = request.note if request else None + return await run_schedules_use_case.resume_schedule(agent_id, name, note=note) + + +@router.delete( + "/{name}", + response_model=DeleteResponse, + summary="Delete Run Schedule", + description="Delete a run schedule permanently.", +) +async def delete_run_schedule( + agent_id: str, + name: str, + run_schedules_use_case: DAgentRunSchedulesUseCase, + authorization: DAuthorizationService, +) -> DeleteResponse: + await _check_schedule_or_collapse_to_404( + authorization, + build_run_schedule_authz_selector(agent_id, name), + AuthorizedOperationType.delete, + ) + schedule_id = await run_schedules_use_case.delete_schedule(agent_id, name) + return DeleteResponse( + id=schedule_id, + message=f"Run schedule '{name}' deleted successfully", + ) diff --git a/agentex/src/api/routes/schedules.py b/agentex/src/api/routes/schedules.py deleted file mode 100644 index 56a5516b..00000000 --- a/agentex/src/api/routes/schedules.py +++ /dev/null @@ -1,189 +0,0 @@ -from fastapi import APIRouter, Query - -from src.api.schemas.authorization_types import ( - AgentexResourceType, - AuthorizedOperationType, -) -from src.api.schemas.delete_response import DeleteResponse -from src.api.schemas.schedules import ( - CreateScheduleRequest, - PauseScheduleRequest, - ScheduleListResponse, - ScheduleResponse, - UnpauseScheduleRequest, -) -from src.domain.services.authorization_service import DAuthorizationService -from src.domain.services.schedule_service import build_schedule_id -from src.domain.use_cases.agents_use_case import DAgentsUseCase -from src.domain.use_cases.schedules_use_case import DSchedulesUseCase -from src.utils.authorization_shortcuts import ( - DAuthorizedId, - DAuthorizedResourceIds, -) -from src.utils.logging import make_logger -from src.utils.schedule_authorization import _check_schedule_or_collapse_to_404 - -logger = make_logger(__name__) - -router = APIRouter( - prefix="/agents/{agent_id}/schedules", - tags=["Schedules"], -) - - -@router.post( - "", - response_model=ScheduleResponse, - summary="Create Schedule", - description="Create a new schedule for recurring workflow execution for an agent.", -) -async def create_schedule( - agent_id: DAuthorizedId(AgentexResourceType.agent, AuthorizedOperationType.update), - request: CreateScheduleRequest, - agents_use_case: DAgentsUseCase, - schedules_use_case: DSchedulesUseCase, -) -> ScheduleResponse: - """Create a new schedule for an agent's workflow. - - Only route with a standalone parent-agent check (no schedule resource exists - yet). ``agent.update`` matches the ``parent_agent->update`` gate every - schedule mutation transitively requires. - """ - agent = await agents_use_case.get(id=agent_id) - return await schedules_use_case.create_schedule(agent, request) - - -@router.get( - "", - response_model=ScheduleListResponse, - summary="List Agent Schedules", - description="List all schedules for an agent.", -) -async def list_schedules( - agent_id: str, - schedules_use_case: DSchedulesUseCase, - authorized_schedule_ids: DAuthorizedResourceIds(AgentexResourceType.schedule), - page_size: int = Query(default=100, ge=1, le=1000), -) -> ScheduleListResponse: - """List schedules for an agent, filtered to those the caller owns. - - Filter-only (never 403s): ``authorized_schedule_ids`` is ``None`` under authz - bypass (return all), else the set of readable ids (empty returns nothing). - """ - return await schedules_use_case.list_schedules( - agent_id, - page_size=page_size, - authorized_schedule_ids=authorized_schedule_ids, - ) - - -@router.get( - "/{schedule_name}", - response_model=ScheduleResponse, - summary="Get Schedule", - description="Get details of a schedule by its name.", -) -async def get_schedule( - agent_id: str, - schedule_name: str, - schedules_use_case: DSchedulesUseCase, - authorization: DAuthorizationService, -) -> ScheduleResponse: - """Get details of a schedule.""" - await _check_schedule_or_collapse_to_404( - authorization, - build_schedule_id(agent_id, schedule_name), - AuthorizedOperationType.read, - ) - return await schedules_use_case.get_schedule(agent_id, schedule_name) - - -@router.post( - "/{schedule_name}/pause", - response_model=ScheduleResponse, - summary="Pause Schedule", - description="Pause a schedule to stop it from executing.", -) -async def pause_schedule( - agent_id: str, - schedule_name: str, - schedules_use_case: DSchedulesUseCase, - authorization: DAuthorizationService, - request: PauseScheduleRequest | None = None, -) -> ScheduleResponse: - """Pause a schedule.""" - await _check_schedule_or_collapse_to_404( - authorization, - build_schedule_id(agent_id, schedule_name), - AuthorizedOperationType.update, - ) - note = request.note if request else None - return await schedules_use_case.pause_schedule(agent_id, schedule_name, note=note) - - -@router.post( - "/{schedule_name}/unpause", - response_model=ScheduleResponse, - summary="Unpause Schedule", - description="Unpause/resume a schedule to allow it to execute again.", -) -async def unpause_schedule( - agent_id: str, - schedule_name: str, - schedules_use_case: DSchedulesUseCase, - authorization: DAuthorizationService, - request: UnpauseScheduleRequest | None = None, -) -> ScheduleResponse: - """Unpause/resume a schedule.""" - await _check_schedule_or_collapse_to_404( - authorization, - build_schedule_id(agent_id, schedule_name), - AuthorizedOperationType.update, - ) - note = request.note if request else None - return await schedules_use_case.unpause_schedule(agent_id, schedule_name, note=note) - - -@router.post( - "/{schedule_name}/trigger", - response_model=ScheduleResponse, - summary="Trigger Schedule", - description="Trigger a schedule to run immediately, regardless of its regular schedule.", -) -async def trigger_schedule( - agent_id: str, - schedule_name: str, - schedules_use_case: DSchedulesUseCase, - authorization: DAuthorizationService, -) -> ScheduleResponse: - """Trigger a schedule to run immediately.""" - await _check_schedule_or_collapse_to_404( - authorization, - build_schedule_id(agent_id, schedule_name), - AuthorizedOperationType.update, - ) - return await schedules_use_case.trigger_schedule(agent_id, schedule_name) - - -@router.delete( - "/{schedule_name}", - response_model=DeleteResponse, - summary="Delete Schedule", - description="Delete a schedule permanently.", -) -async def delete_schedule( - agent_id: str, - schedule_name: str, - schedules_use_case: DSchedulesUseCase, - authorization: DAuthorizationService, -) -> DeleteResponse: - """Delete a schedule.""" - schedule_id = build_schedule_id(agent_id, schedule_name) - await _check_schedule_or_collapse_to_404( - authorization, schedule_id, AuthorizedOperationType.delete - ) - await schedules_use_case.delete_schedule(agent_id, schedule_name) - return DeleteResponse( - id=schedule_id, - message=f"Schedule '{schedule_name}' deleted successfully", - ) diff --git a/agentex/src/api/schemas/agent_run_schedules.py b/agentex/src/api/schemas/agent_run_schedules.py new file mode 100644 index 00000000..d49fb101 --- /dev/null +++ b/agentex/src/api/schemas/agent_run_schedules.py @@ -0,0 +1,208 @@ +from datetime import datetime +from enum import Enum +from typing import Any, Literal + +from pydantic import Field + +from src.domain.entities.task_messages import MessageAuthor +from src.utils.model_utils import BaseModel + + +class RunScheduleState(str, Enum): + """Live state of a run schedule, derived from Temporal.""" + + ACTIVE = "ACTIVE" + PAUSED = "PAUSED" + + +class ScheduleInitialInput(BaseModel): + """The first input delivered to each freshly created scheduled task.""" + + type: Literal["text"] = Field("text", description="Input content type.") + author: MessageAuthor = Field( + MessageAuthor.USER, description="The author attributed to the initial input." + ) + content: str = Field(..., description="The initial prompt delivered to the task.") + + +class ScheduleCreatorPrincipal(BaseModel): + """Credential-free creator identity stored with the schedule. + + Never carries cookies, JWTs, API keys, OAuth tokens, or request headers — it + is creator *context* used only for AuthZ and ownership at fire time. + """ + + principal_type: str | None = Field( + None, description="e.g. 'user' or 'service_account'." + ) + user_id: str | None = Field( + None, description="Creator user id, if a user principal." + ) + service_account_id: str | None = Field( + None, description="Creator service-account id, if a service principal." + ) + account_id: str | None = Field( + None, description="Account/workspace id of the creator." + ) + + +class CreateAgentRunScheduleRequest(BaseModel): + """Request body for creating a scheduled agent run.""" + + name: str = Field( + ..., + title="Schedule Name", + description="Human-readable name, unique per agent (e.g. 'daily-granola-summary').", + pattern=r"^[a-z0-9][a-z0-9-]*[a-z0-9]$|^[a-z0-9]$", + min_length=1, + max_length=64, + ) + description: str | None = Field( + None, description="Optional description of what this schedule does." + ) + cron_expression: str | None = Field( + None, + description="Cron expression for the cadence (e.g. '0 17 * * MON-FRI'). " + "Mutually exclusive with interval_seconds.", + ) + interval_seconds: int | None = Field( + None, + ge=1, + description="Interval cadence in seconds. Mutually exclusive with cron_expression.", + ) + timezone: str = Field( + "UTC", + description="IANA timezone the cron expression is evaluated in (e.g. 'America/New_York').", + ) + start_at: datetime | None = Field( + None, description="When the schedule should start being active." + ) + end_at: datetime | None = Field( + None, description="When the schedule should stop being active." + ) + paused: bool = Field( + False, description="Whether to create the schedule in a paused state." + ) + task_params: dict[str, Any] | None = Field( + None, description="Resolved config forwarded as task `params` at fire time." + ) + task_metadata: dict[str, Any] | None = Field( + None, description="Metadata copied onto each created task at fire time." + ) + initial_input: ScheduleInitialInput = Field( + ..., description="The first input delivered to each created task." + ) + + +class AgentRunScheduleResponse(BaseModel): + """Response model describing a scheduled agent run.""" + + id: str = Field(..., description="The unique identifier of the run schedule.") + agent_id: str = Field(..., description="The agent this schedule belongs to.") + name: str = Field(..., description="Schedule name, unique per agent.") + description: str | None = Field(None, description="Optional description.") + cron_expression: str | None = Field( + None, description="Cron cadence, if cron-based." + ) + interval_seconds: int | None = Field( + None, description="Interval cadence in seconds, if interval-based." + ) + timezone: str = Field( + "UTC", description="Timezone the cron expression is evaluated in." + ) + start_at: datetime | None = Field(None, description="Schedule activation time.") + end_at: datetime | None = Field(None, description="Schedule deactivation time.") + paused: bool = Field(False, description="Whether the schedule is paused.") + task_params: dict[str, Any] | None = Field( + None, description="Task params at fire time." + ) + task_metadata: dict[str, Any] | None = Field( + None, description="Task metadata at fire time." + ) + initial_input: ScheduleInitialInput = Field(..., description="The initial input.") + initial_input_method: str = Field( + ..., + description="Delivery method, inferred from the agent's ACP type.", + ) + creator_principal: ScheduleCreatorPrincipal | None = Field( + None, description="Credential-free creator identity." + ) + created_at: datetime | None = Field( + None, description="When the schedule was created." + ) + updated_at: datetime | None = Field( + None, description="When the schedule was updated." + ) + # Live state derived from Temporal (best-effort; may be absent right after creation). + state: RunScheduleState = Field( + RunScheduleState.ACTIVE, description="Live schedule state from Temporal." + ) + next_action_times: list[datetime] = Field( + default_factory=list, description="Upcoming scheduled fire times." + ) + last_action_time: datetime | None = Field( + None, description="When the schedule last fired." + ) + num_actions_taken: int = Field( + 0, description="Number of times the schedule has fired." + ) + + +class AgentRunScheduleListResponse(BaseModel): + """Response model for listing run schedules.""" + + run_schedules: list[AgentRunScheduleResponse] = Field( + ..., description="The list of run schedules." + ) + total: int = Field(..., description="The number of run schedules returned.") + + +class UpdateAgentRunScheduleRequest(BaseModel): + """Partial update for a scheduled agent run. + + Only fields present in the request body are changed; the schedule ``name`` is + immutable (it is the natural key). Setting ``cron_expression`` clears + ``interval_seconds`` and vice versa; providing both is rejected. + """ + + description: str | None = Field( + None, description="Optional description of what this schedule does." + ) + cron_expression: str | None = Field( + None, + description="New cron cadence. Mutually exclusive with interval_seconds.", + ) + interval_seconds: int | None = Field( + None, + ge=1, + description="New interval cadence in seconds. Mutually exclusive with cron_expression.", + ) + timezone: str | None = Field( + None, description="IANA timezone the cron expression is evaluated in." + ) + start_at: datetime | None = Field( + None, description="When the schedule should start being active." + ) + end_at: datetime | None = Field( + None, description="When the schedule should stop being active." + ) + paused: bool | None = Field( + None, description="Pause/resume the schedule as part of the update." + ) + task_params: dict[str, Any] | None = Field( + None, description="Resolved config forwarded as task `params` at fire time." + ) + task_metadata: dict[str, Any] | None = Field( + None, description="Metadata copied onto each created task at fire time." + ) + initial_input: ScheduleInitialInput | None = Field( + None, description="Replacement initial input delivered to each created task." + ) + + +class PauseRunScheduleRequest(BaseModel): + note: str | None = Field(None, description="Optional note explaining the pause.") + + +class ResumeRunScheduleRequest(BaseModel): + note: str | None = Field(None, description="Optional note explaining the resume.") diff --git a/agentex/src/api/schemas/schedules.py b/agentex/src/api/schemas/schedules.py deleted file mode 100644 index f7a4358e..00000000 --- a/agentex/src/api/schemas/schedules.py +++ /dev/null @@ -1,255 +0,0 @@ -from datetime import datetime -from enum import Enum -from typing import Any - -from pydantic import Field - -from src.utils.model_utils import BaseModel - - -class ScheduleState(str, Enum): - """Schedule state enum""" - - ACTIVE = "ACTIVE" - PAUSED = "PAUSED" - - -class CreateScheduleRequest(BaseModel): - """Request model for creating a new schedule for an agent""" - - name: str = Field( - ..., - title="Schedule Name", - description="Human-readable name for the schedule (e.g., 'weekly-profiling'). " - "Will be combined with agent_id to form the full schedule_id.", - pattern=r"^[a-z0-9][a-z0-9-]*[a-z0-9]$|^[a-z0-9]$", - min_length=1, - max_length=64, - ) - workflow_name: str = Field( - ..., - title="Workflow Name", - description="Name of the Temporal workflow to execute (e.g., 'sae-orchestrator')", - ) - task_queue: str = Field( - ..., - title="Task Queue", - description="Temporal task queue where the agent's worker is listening", - ) - workflow_params: dict[str, Any] | None = Field( - default=None, - title="Workflow Parameters", - description="Parameters to pass to the workflow", - ) - cron_expression: str | None = Field( - default=None, - title="Cron Expression", - description="Cron expression for scheduling (e.g., '0 0 * * 0' for weekly on Sunday)", - ) - interval_seconds: int | None = Field( - default=None, - title="Interval Seconds", - description="Alternative to cron - run every N seconds", - ge=1, - ) - execution_timeout_seconds: int | None = Field( - default=None, - title="Execution Timeout", - description="Maximum time in seconds for each workflow execution", - ge=1, - ) - start_at: datetime | None = Field( - default=None, - title="Start At", - description="When the schedule should start being active", - ) - end_at: datetime | None = Field( - default=None, - title="End At", - description="When the schedule should stop being active", - ) - paused: bool = Field( - default=False, - title="Paused", - description="Whether to create the schedule in a paused state", - ) - - -class ScheduleActionInfo(BaseModel): - """Information about the scheduled action""" - - workflow_name: str = Field( - ..., - title="Workflow Name", - description="Name of the workflow being executed", - ) - workflow_id_prefix: str = Field( - ..., - title="Workflow ID Prefix", - description="Prefix for workflow execution IDs", - ) - task_queue: str = Field( - ..., - title="Task Queue", - description="Task queue for the workflow", - ) - workflow_params: list[Any] | None = Field( - default=None, - title="Workflow Parameters", - description="Parameters passed to the workflow", - ) - - -class ScheduleSpecInfo(BaseModel): - """Information about the schedule specification""" - - cron_expressions: list[str] = Field( - default_factory=list, - title="Cron Expressions", - description="Cron expressions for the schedule", - ) - intervals_seconds: list[int] = Field( - default_factory=list, - title="Interval Seconds", - description="Interval specifications in seconds", - ) - start_at: datetime | None = Field( - default=None, - title="Start At", - description="When the schedule starts being active", - ) - end_at: datetime | None = Field( - default=None, - title="End At", - description="When the schedule stops being active", - ) - - -class ScheduleResponse(BaseModel): - """Response model for schedule operations""" - - schedule_id: str = Field( - ..., - title="Schedule ID", - description="Unique identifier for the schedule", - ) - name: str = Field( - ..., - title="Schedule Name", - description="Human-readable name for the schedule", - ) - agent_id: str = Field( - ..., - title="Agent ID", - description="ID of the agent this schedule belongs to", - ) - state: ScheduleState = Field( - ..., - title="State", - description="Current state of the schedule", - ) - action: ScheduleActionInfo = Field( - ..., - title="Action", - description="Information about the scheduled action", - ) - spec: ScheduleSpecInfo = Field( - ..., - title="Spec", - description="Schedule specification", - ) - num_actions_taken: int = Field( - default=0, - title="Number of Actions Taken", - description="Number of times the schedule has executed", - ) - num_actions_missed: int = Field( - default=0, - title="Number of Actions Missed", - description="Number of scheduled executions that were missed", - ) - next_action_times: list[datetime] = Field( - default_factory=list, - title="Next Action Times", - description="Upcoming scheduled execution times", - ) - last_action_time: datetime | None = Field( - default=None, - title="Last Action Time", - description="When the schedule last executed", - ) - created_at: datetime | None = Field( - default=None, - title="Created At", - description="When the schedule was created", - ) - - -class ScheduleListItem(BaseModel): - """Abbreviated schedule info for list responses""" - - schedule_id: str = Field( - ..., - title="Schedule ID", - description="Unique identifier for the schedule", - ) - name: str = Field( - ..., - title="Schedule Name", - description="Human-readable name for the schedule", - ) - agent_id: str = Field( - ..., - title="Agent ID", - description="ID of the agent this schedule belongs to", - ) - state: ScheduleState = Field( - ..., - title="State", - description="Current state of the schedule", - ) - workflow_name: str | None = Field( - default=None, - title="Workflow Name", - description="Name of the scheduled workflow", - ) - next_action_time: datetime | None = Field( - default=None, - title="Next Action Time", - description="Next scheduled execution time", - ) - - -class ScheduleListResponse(BaseModel): - """Response model for listing schedules""" - - schedules: list[ScheduleListItem] = Field( - ..., - title="Schedules", - description="List of schedules", - ) - total: int = Field( - ..., - title="Total", - description="Total number of schedules", - ) - - -class PauseScheduleRequest(BaseModel): - """Request model for pausing a schedule""" - - note: str | None = Field( - default=None, - title="Note", - description="Optional note explaining why the schedule was paused", - ) - - -class UnpauseScheduleRequest(BaseModel): - """Request model for unpausing a schedule""" - - note: str | None = Field( - default=None, - title="Note", - description="Optional note explaining why the schedule was unpaused", - ) diff --git a/agentex/src/config/environment_variables.py b/agentex/src/config/environment_variables.py index 0872c0cf..477a78e5 100644 --- a/agentex/src/config/environment_variables.py +++ b/agentex/src/config/environment_variables.py @@ -57,6 +57,7 @@ class EnvVarKeys(str, Enum): SSE_KEEPALIVE_PING_INTERVAL = "SSE_KEEPALIVE_PING_INTERVAL" AGENTEX_SERVER_TASK_QUEUE = "AGENTEX_SERVER_TASK_QUEUE" ENABLE_HEALTH_CHECK_WORKFLOW = "ENABLE_HEALTH_CHECK_WORKFLOW" + ENABLE_AGENT_RUN_SCHEDULES = "ENABLE_AGENT_RUN_SCHEDULES" WEBHOOK_REQUEST_TIMEOUT = "WEBHOOK_REQUEST_TIMEOUT" RETENTION_CLEANUP_ENABLED = "RETENTION_CLEANUP_ENABLED" RETENTION_CLEANUP_AGENT_ALLOWLIST = "RETENTION_CLEANUP_AGENT_ALLOWLIST" @@ -120,6 +121,8 @@ class EnvironmentVariables(BaseModel): SSE_KEEPALIVE_PING_INTERVAL: int = 15 # SSE keepalive ping interval in seconds AGENTEX_SERVER_TASK_QUEUE: str | None = None ENABLE_HEALTH_CHECK_WORKFLOW: bool = False + # Gates the agent run schedules API. Off by default; enabled in development. + ENABLE_AGENT_RUN_SCHEDULES: bool = False WEBHOOK_REQUEST_TIMEOUT: float = 15.0 # Webhook request timeout in seconds RETENTION_CLEANUP_ENABLED: bool = False RETENTION_CLEANUP_AGENT_ALLOWLIST: list[str] = [] @@ -214,6 +217,9 @@ def refresh(cls, force_refresh: bool = False) -> EnvironmentVariables | None: os.environ.get(EnvVarKeys.ENABLE_HEALTH_CHECK_WORKFLOW, "false") == "true" ), + ENABLE_AGENT_RUN_SCHEDULES=( + os.environ.get(EnvVarKeys.ENABLE_AGENT_RUN_SCHEDULES, "false") == "true" + ), WEBHOOK_REQUEST_TIMEOUT=float( os.environ.get(EnvVarKeys.WEBHOOK_REQUEST_TIMEOUT, "15.0") ), diff --git a/agentex/src/domain/entities/agent_run_schedules.py b/agentex/src/domain/entities/agent_run_schedules.py new file mode 100644 index 00000000..b51a46e7 --- /dev/null +++ b/agentex/src/domain/entities/agent_run_schedules.py @@ -0,0 +1,89 @@ +from datetime import datetime +from enum import Enum +from typing import Any + +from pydantic import Field + +from src.domain.entities.agents import ACPType +from src.utils.model_utils import BaseModel + + +class InitialInputMethod(str, Enum): + """How the configured first input is delivered to the freshly created task. + + Always inferred from the target agent's ACP type at fire time. + """ + + EVENT_SEND = "event/send" # async / agentic agents + MESSAGE_SEND = "message/send" # sync agents + + +def infer_initial_input_method(acp_type: ACPType) -> InitialInputMethod: + """Map an agent's ACP type to the delivery method for the initial input. + + async / agentic agents receive the first input as an ``event/send``; sync + agents receive it as a ``message/send``. + """ + if acp_type == ACPType.SYNC: + return InitialInputMethod.MESSAGE_SEND + return InitialInputMethod.EVENT_SEND + + +class AgentRunScheduleEntity(BaseModel): + """A persisted definition of a recurring agent run. + + The Postgres row is the source of truth for what each future fire should do; + the Temporal Schedule is only the recurring clock and carries nothing but the + schedule id. + + JSON-backed fields (``creator_principal``, ``task_params``, ``task_metadata``, + ``initial_input``) are stored as plain dicts so they round-trip cleanly through + the JSON columns. Their typed shapes are validated at the API schema layer + (``ScheduleCreatorPrincipal`` / ``ScheduleInitialInput``). + """ + + id: str = Field(..., description="The unique identifier of the run schedule.") + agent_id: str = Field(..., description="The agent this schedule belongs to.") + name: str = Field( + ..., description="Human-readable schedule name, unique per agent." + ) + description: str | None = Field( + None, description="Optional description of the schedule." + ) + cron_expression: str | None = Field( + None, description="Cron expression for the cadence (mutually exclusive)." + ) + interval_seconds: int | None = Field( + None, description="Interval cadence in seconds (mutually exclusive)." + ) + timezone: str = Field( + "UTC", description="IANA timezone the cron expression is evaluated in." + ) + start_at: datetime | None = Field( + None, description="When the schedule should start being active." + ) + end_at: datetime | None = Field( + None, description="When the schedule should stop being active." + ) + paused: bool = Field(False, description="Whether the schedule is currently paused.") + # Credential-free creator context: principal_type / user_id / service_account_id / + # account_id only. Never cookies, JWTs, API keys, OAuth tokens, or headers. + creator_principal: dict[str, Any] = Field( + ..., + description="Credential-free creator identity used for AuthZ at fire time.", + ) + task_params: dict[str, Any] | None = Field( + None, description="Resolved config forwarded as task `params` at fire time." + ) + task_metadata: dict[str, Any] | None = Field( + None, description="Metadata copied onto each created task at fire time." + ) + initial_input: dict[str, Any] = Field( + ..., description="The first input delivered to each created task." + ) + created_at: datetime | None = Field( + None, description="When the schedule was created." + ) + updated_at: datetime | None = Field( + None, description="When the schedule was last updated." + ) diff --git a/agentex/src/domain/repositories/agent_run_schedule_repository.py b/agentex/src/domain/repositories/agent_run_schedule_repository.py new file mode 100644 index 00000000..a498859e --- /dev/null +++ b/agentex/src/domain/repositories/agent_run_schedule_repository.py @@ -0,0 +1,78 @@ +from typing import Annotated + +from fastapi import Depends +from sqlalchemy import select +from src.adapters.crud_store.adapter_postgres import PostgresCRUDRepository +from src.adapters.crud_store.exceptions import ItemDoesNotExist +from src.adapters.orm import AgentRunScheduleORM +from src.config.dependencies import ( + DDatabaseAsyncReadOnlySessionMaker, + DDatabaseAsyncReadWriteSessionMaker, +) +from src.domain.entities.agent_run_schedules import AgentRunScheduleEntity +from src.utils.logging import make_logger + +logger = make_logger(__name__) + + +class AgentRunScheduleRepository( + PostgresCRUDRepository[AgentRunScheduleORM, AgentRunScheduleEntity] +): + def __init__( + self, + async_read_write_session_maker: DDatabaseAsyncReadWriteSessionMaker, + async_read_only_session_maker: DDatabaseAsyncReadOnlySessionMaker, + ): + super().__init__( + async_read_write_session_maker, + async_read_only_session_maker, + AgentRunScheduleORM, + AgentRunScheduleEntity, + ) + + async def list_by_agent_id( + self, + agent_id: str, + limit: int | None = None, + page_number: int | None = None, + ) -> list[AgentRunScheduleEntity]: + """List run schedules for a single agent, newest first.""" + query = select(AgentRunScheduleORM).where( + AgentRunScheduleORM.agent_id == agent_id + ) + return await super().list( + query=query, + order_by="created_at", + order_direction="desc", + limit=limit, + page_number=page_number, + ) + + async def get_by_agent_id_and_name( + self, agent_id: str, name: str + ) -> AgentRunScheduleEntity | None: + """Get a run schedule by its (agent_id, name) natural key, or None.""" + async with self.start_async_db_session(allow_writes=False) as session: + query = select(AgentRunScheduleORM).where( + AgentRunScheduleORM.agent_id == agent_id, + AgentRunScheduleORM.name == name, + ) + result = await session.execute(query) + row = result.scalars().first() + return AgentRunScheduleEntity.model_validate(row) if row else None + + async def get_by_agent_id_and_name_or_raise( + self, agent_id: str, name: str + ) -> AgentRunScheduleEntity: + """Get a run schedule by (agent_id, name) or raise ItemDoesNotExist.""" + schedule = await self.get_by_agent_id_and_name(agent_id, name) + if schedule is None: + raise ItemDoesNotExist( + f"Run schedule '{name}' for agent '{agent_id}' does not exist." + ) + return schedule + + +DAgentRunScheduleRepository = Annotated[ + AgentRunScheduleRepository, Depends(AgentRunScheduleRepository) +] diff --git a/agentex/src/domain/services/agent_run_schedule_service.py b/agentex/src/domain/services/agent_run_schedule_service.py new file mode 100644 index 00000000..c1786a18 --- /dev/null +++ b/agentex/src/domain/services/agent_run_schedule_service.py @@ -0,0 +1,505 @@ +from datetime import datetime +from typing import Annotated, Any, cast + +from fastapi import Depends +from temporalio.client import ScheduleDescription + +from src.adapters.crud_store.exceptions import DuplicateItemError, ItemDoesNotExist +from src.adapters.temporal.adapter_temporal import DTemporalAdapter +from src.adapters.temporal.exceptions import TemporalScheduleNotFoundError +from src.api.schemas.agent_run_schedules import ( + AgentRunScheduleListResponse, + AgentRunScheduleResponse, + CreateAgentRunScheduleRequest, + RunScheduleState, + ScheduleCreatorPrincipal, + ScheduleInitialInput, + UpdateAgentRunScheduleRequest, +) +from src.api.schemas.authorization_types import AgentexResource +from src.domain.entities.agent_run_schedules import ( + AgentRunScheduleEntity, + infer_initial_input_method, +) +from src.domain.entities.agents import AgentEntity +from src.domain.exceptions import ClientError +from src.domain.repositories.agent_repository import DAgentRepository +from src.domain.repositories.agent_run_schedule_repository import ( + DAgentRunScheduleRepository, +) +from src.domain.services.authorization_service import DAuthorizationService +from src.utils.ids import orm_id +from src.utils.logging import make_logger + +logger = make_logger(__name__) + +# Temporal schedule id derived from the Postgres row id. The prefix namespaces +# these schedules within the shared Temporal namespace and keeps the id stable +# and small (the row id is the only thing the workflow needs). +RUN_SCHEDULE_TEMPORAL_ID_PREFIX = "agent-run-schedule" + +# Registered (class) name of the workflow each fire starts. Referenced by name so +# the API/service layer doesn't import the Temporal workflow definition. +SCHEDULED_AGENT_RUN_WORKFLOW_NAME = "ScheduledAgentRunWorkflow" + + +def build_run_schedule_temporal_id(schedule_row_id: str) -> str: + return f"{RUN_SCHEDULE_TEMPORAL_ID_PREFIX}:{schedule_row_id}" + + +def build_run_schedule_authz_selector(agent_id: str, name: str) -> str: + """Authorization selector for a run schedule's ``schedule`` resource. + + Derivable from the (agent_id, name) path params so the CRUD endpoints can + authorize without a prior DB lookup. The ``run-schedule::`` prefix namespaces + the selector within the ``schedule`` resource type. + """ + return f"run-schedule::{agent_id}::{name}" + + +class AgentRunScheduleService: + """Manage Postgres-backed scheduled agent runs and their Temporal Schedules. + + The Postgres row is the source of truth for the schedule definition; the + Temporal Schedule is only the recurring clock and is given nothing but the + schedule row id as its workflow argument. + """ + + def __init__( + self, + temporal_adapter: DTemporalAdapter, + authorization_service: DAuthorizationService, + schedule_repository: DAgentRunScheduleRepository, + agent_repository: DAgentRepository, + ): + self.temporal_adapter = temporal_adapter + self.authorization_service = authorization_service + self.schedule_repository = schedule_repository + self.agent_repository = agent_repository + + async def create_schedule( + self, + agent: AgentEntity, + request: CreateAgentRunScheduleRequest, + creator_principal: dict[str, Any], + ) -> AgentRunScheduleResponse: + existing = await self.schedule_repository.get_by_agent_id_and_name( + agent.id, request.name + ) + if existing is not None: + raise ClientError( + f"Run schedule '{request.name}' already exists for agent '{agent.id}'" + ) + + entity = AgentRunScheduleEntity( + id=orm_id(), + agent_id=agent.id, + name=request.name, + description=request.description, + cron_expression=request.cron_expression, + interval_seconds=request.interval_seconds, + timezone=request.timezone, + start_at=request.start_at, + end_at=request.end_at, + paused=request.paused, + creator_principal=creator_principal, + task_params=request.task_params, + task_metadata=request.task_metadata, + initial_input=request.initial_input.to_dict(mode="json"), + ) + + try: + created = await self.schedule_repository.create(entity) + except DuplicateItemError as exc: + raise ClientError( + f"Run schedule '{request.name}' already exists for agent '{agent.id}'" + ) from exc + + temporal_id = build_run_schedule_temporal_id(created.id) + authz_selector = build_run_schedule_authz_selector(agent.id, created.name) + # Register (fail-closed, before the Temporal write) and create the schedule + # under one rollback scope: if EITHER the auth registration or the Temporal + # create fails, the persisted row is removed so a failed create leaves + # nothing behind. Registration happens first so an auth failure aborts + # before the Temporal write. + registered = False + try: + registered = await self._register_schedule_in_auth( + authz_selector=authz_selector, agent_id=agent.id + ) + await self.temporal_adapter.create_schedule( + schedule_id=temporal_id, + workflow=SCHEDULED_AGENT_RUN_WORKFLOW_NAME, + workflow_id=f"{temporal_id}-run", + args=[created.id], + task_queue=self._task_queue(), + cron_expressions=( + [created.cron_expression] if created.cron_expression else None + ), + interval_seconds=created.interval_seconds, + start_at=created.start_at, + end_at=created.end_at, + paused=created.paused, + time_zone_name=created.timezone if created.cron_expression else None, + overlap_policy="skip", + ) + except Exception: + if registered: + await self._deregister_schedule_from_auth(authz_selector=authz_selector) + await self._best_effort_delete_row(created.id) + raise + + return await self._to_response(created, agent=agent, temporal_id=temporal_id) + + async def list_schedules( + self, + agent_id: str, + authorized_schedule_ids: list[str] | None = None, + limit: int = 100, + ) -> AgentRunScheduleListResponse: + rows = await self.schedule_repository.list_by_agent_id(agent_id, limit=limit) + + # Gate on ``is not None``: an empty list means the caller owns nothing and + # everything is filtered out; None means authorization is bypassed. + authorized = ( + set(authorized_schedule_ids) + if authorized_schedule_ids is not None + else None + ) + agent = await self.agent_repository.get(id=agent_id) + items: list[AgentRunScheduleResponse] = [] + for row in rows: + selector = build_run_schedule_authz_selector(agent_id, row.name) + if authorized is not None and selector not in authorized: + continue + temporal_id = build_run_schedule_temporal_id(row.id) + # Serve the list from Postgres only — no per-row Temporal describe. + # Fanning out one RPC per row (up to the route's limit of 1000) makes + # list latency scale with Temporal round-trips; live fields are + # available on the single-schedule GET instead. + items.append( + await self._to_response( + row, agent=agent, temporal_id=temporal_id, include_live=False + ) + ) + return AgentRunScheduleListResponse(run_schedules=items, total=len(items)) + + async def get_schedule(self, agent_id: str, name: str) -> AgentRunScheduleResponse: + row = await self.schedule_repository.get_by_agent_id_and_name_or_raise( + agent_id, name + ) + agent = await self.agent_repository.get(id=agent_id) + return await self._to_response( + row, agent=agent, temporal_id=build_run_schedule_temporal_id(row.id) + ) + + async def pause_schedule( + self, agent_id: str, name: str, note: str | None = None + ) -> AgentRunScheduleResponse: + return await self._set_paused(agent_id, name, paused=True, note=note) + + async def resume_schedule( + self, agent_id: str, name: str, note: str | None = None + ) -> AgentRunScheduleResponse: + return await self._set_paused(agent_id, name, paused=False, note=note) + + async def delete_schedule(self, agent_id: str, name: str) -> str: + row = await self.schedule_repository.get_by_agent_id_and_name_or_raise( + agent_id, name + ) + temporal_id = build_run_schedule_temporal_id(row.id) + # Temporal is the recurring clock; delete it first so no further fires can + # occur, then drop the row and the auth entry. A missing Temporal schedule + # is treated as success (the clock is already gone) so a prior partial + # delete — Temporal removed but the row write failed — can still be cleaned + # up through this path rather than being stranded forever. + try: + await self.temporal_adapter.delete_schedule(temporal_id) + except TemporalScheduleNotFoundError: + logger.warning( + "run_schedule_temporal_already_absent_on_delete", + extra={"temporal_id": temporal_id, "schedule_id": row.id}, + ) + await self.schedule_repository.delete(id=row.id) + await self._deregister_schedule_from_auth( + authz_selector=build_run_schedule_authz_selector(agent_id, row.name) + ) + return row.id + + async def update_schedule( + self, agent_id: str, name: str, request: UpdateAgentRunScheduleRequest + ) -> AgentRunScheduleResponse: + """Apply a partial update to a schedule's definition and Temporal spec. + + Only fields present in the request are changed. Setting one of + cron_expression / interval_seconds clears the other; the merged result + must still have exactly one cadence. + """ + row = await self.schedule_repository.get_by_agent_id_and_name_or_raise( + agent_id, name + ) + provided = request.model_dump(exclude_unset=True) + if "description" in provided: + row.description = request.description + if "cron_expression" in provided: + row.cron_expression = request.cron_expression + if request.cron_expression is not None: + row.interval_seconds = None + if "interval_seconds" in provided: + row.interval_seconds = request.interval_seconds + if request.interval_seconds is not None: + row.cron_expression = None + if "timezone" in provided and request.timezone is not None: + row.timezone = request.timezone + if "start_at" in provided: + row.start_at = request.start_at + if "end_at" in provided: + row.end_at = request.end_at + if "paused" in provided and request.paused is not None: + row.paused = request.paused + if "task_params" in provided: + row.task_params = request.task_params + if "task_metadata" in provided: + row.task_metadata = request.task_metadata + if "initial_input" in provided and request.initial_input is not None: + row.initial_input = request.initial_input.to_dict(mode="json") + + if not row.cron_expression and not row.interval_seconds: + raise ClientError( + "Schedule must have exactly one of cron_expression or interval_seconds" + ) + if row.cron_expression and row.interval_seconds: + raise ClientError( + "Provide only one of cron_expression or interval_seconds, not both" + ) + + temporal_id = build_run_schedule_temporal_id(row.id) + # Push the merged cadence/window/paused state to the Temporal clock BEFORE + # committing the row. This closes the common divergence: a rejected spec + # (invalid cron / timezone) or a transient Temporal error aborts the + # update with nothing persisted. A residual window remains — if Temporal + # accepts the update and the row write below then fails, the clock leads + # the row — but there is no cross-store transaction, and the row stays the + # declared source of truth, so any later successful update re-converges + # them. (Create keeps the analogous invariant by rolling the row back on + # failure; update has no in-place rollback, so it orders the writes + # instead.) A missing schedule is logged rather than raised so the + # persisted row stays authoritative (mirrors the describe/delete + # tolerance) and the merged definition is still committed. + try: + await self.temporal_adapter.update_schedule( + schedule_id=temporal_id, + cron_expressions=( + [row.cron_expression] if row.cron_expression else None + ), + interval_seconds=row.interval_seconds, + start_at=row.start_at, + end_at=row.end_at, + time_zone_name=row.timezone if row.cron_expression else None, + paused=row.paused, + ) + except TemporalScheduleNotFoundError: + logger.warning( + "run_schedule_temporal_missing_on_update", + extra={"temporal_id": temporal_id, "schedule_id": row.id}, + ) + updated = await self.schedule_repository.update(row) + agent = await self.agent_repository.get(id=agent_id) + return await self._to_response(updated, agent=agent, temporal_id=temporal_id) + + async def trigger_schedule( + self, agent_id: str, name: str + ) -> AgentRunScheduleResponse: + """Trigger an immediate, out-of-band fire of the schedule.""" + row = await self.schedule_repository.get_by_agent_id_and_name_or_raise( + agent_id, name + ) + temporal_id = build_run_schedule_temporal_id(row.id) + await self.temporal_adapter.trigger_schedule(temporal_id) + agent = await self.agent_repository.get(id=agent_id) + return await self._to_response(row, agent=agent, temporal_id=temporal_id) + + # -- internals --------------------------------------------------------- + + async def _set_paused( + self, agent_id: str, name: str, *, paused: bool, note: str | None + ) -> AgentRunScheduleResponse: + row = await self.schedule_repository.get_by_agent_id_and_name_or_raise( + agent_id, name + ) + temporal_id = build_run_schedule_temporal_id(row.id) + # A missing Temporal schedule is logged rather than raised: the persisted + # ``paused`` flag is authoritative and the activity honors it defensively, + # so a missing clock can't strand the row in an un-toggleable state. + try: + if paused: + await self.temporal_adapter.pause_schedule(temporal_id, note=note) + else: + await self.temporal_adapter.unpause_schedule(temporal_id, note=note) + except TemporalScheduleNotFoundError: + logger.warning( + "run_schedule_temporal_missing_on_pause_toggle", + extra={ + "temporal_id": temporal_id, + "schedule_id": row.id, + "paused": paused, + }, + ) + row.paused = paused + updated = await self.schedule_repository.update(row) + agent = await self.agent_repository.get(id=agent_id) + return await self._to_response(updated, agent=agent, temporal_id=temporal_id) + + def _task_queue(self) -> str: + # Local import avoids a circular import (run_worker imports the factory, + # which would otherwise transitively import this service). + from src.temporal.run_worker import AGENTEX_SERVER_TASK_QUEUE + + return AGENTEX_SERVER_TASK_QUEUE + + async def _to_response( + self, + entity: AgentRunScheduleEntity, + agent: AgentEntity, + temporal_id: str, + include_live: bool = True, + ) -> AgentRunScheduleResponse: + effective_method = infer_initial_input_method(agent.acp_type).value + + state = RunScheduleState.PAUSED if entity.paused else RunScheduleState.ACTIVE + next_action_times: list[datetime] = [] + last_action_time: datetime | None = None + num_actions_taken = 0 + + # Live Temporal fields are best-effort and opt-in. ``include_live=False`` + # (list path) skips the describe RPC entirely and serves state from the + # persisted ``paused`` flag. When enabled (single GET), a describe failure + # (e.g. right after creation, or a transient Temporal error) must not break + # the response, which is fully serviceable from the persisted row. + if include_live: + try: + description = await self.temporal_adapter.describe_schedule(temporal_id) + live = self._extract_live_fields(description) + state = live["state"] + next_action_times = live["next_action_times"] + last_action_time = live["last_action_time"] + num_actions_taken = live["num_actions_taken"] + except Exception as exc: + logger.warning( + "run_schedule_describe_failed", + extra={ + "temporal_id": temporal_id, + "error_type": type(exc).__name__, + }, + ) + + return AgentRunScheduleResponse( + id=entity.id, + agent_id=entity.agent_id, + name=entity.name, + description=entity.description, + cron_expression=entity.cron_expression, + interval_seconds=entity.interval_seconds, + timezone=entity.timezone, + start_at=entity.start_at, + end_at=entity.end_at, + paused=entity.paused, + task_params=entity.task_params, + task_metadata=entity.task_metadata, + initial_input=ScheduleInitialInput.model_validate(entity.initial_input), + initial_input_method=effective_method, + creator_principal=ScheduleCreatorPrincipal.model_validate( + entity.creator_principal + ), + created_at=entity.created_at, + updated_at=entity.updated_at, + state=state, + next_action_times=next_action_times, + last_action_time=last_action_time, + num_actions_taken=num_actions_taken, + ) + + @staticmethod + def _extract_live_fields(description: ScheduleDescription) -> dict[str, Any]: + state = RunScheduleState.ACTIVE + if description.schedule.state and description.schedule.state.paused: + state = RunScheduleState.PAUSED + + info = description.info + next_action_times = ( + list(info.next_action_times) if info.next_action_times else [] + ) + last_action_time: datetime | None = None + if getattr(info, "recent_actions", None): + last_action = info.recent_actions[-1] + last_action_time = getattr(last_action, "started_at", None) or getattr( + last_action, "scheduled_at", None + ) + num_actions_taken = ( + cast(int, info.num_actions) if hasattr(info, "num_actions") else 0 + ) + return { + "state": state, + "next_action_times": next_action_times, + "last_action_time": last_action_time, + "num_actions_taken": num_actions_taken, + } + + async def _register_schedule_in_auth( + self, *, authz_selector: str, agent_id: str + ) -> bool: + """Register the schedule under its parent agent so permissions cascade. + + Returns True when registered, or False when no creator identity is + resolvable (mirrors ScheduleService: registration is skipped under authz + bypass / when no principal is present). + """ + principal_context = self.authorization_service.principal_context + if isinstance(principal_context, dict): + user_id = principal_context.get("user_id") + service_account_id = principal_context.get("service_account_id") + else: + user_id = getattr(principal_context, "user_id", None) + service_account_id = getattr(principal_context, "service_account_id", None) + if user_id is None and service_account_id is None: + logger.warning( + "Skipping auth registration for run schedule: no creator resolvable", + extra={"authz_selector": authz_selector, "agent_id": agent_id}, + ) + return False + await self.authorization_service.register_resource( + resource=AgentexResource.schedule(authz_selector), + parent=AgentexResource.agent(agent_id), + ) + return True + + async def _deregister_schedule_from_auth(self, *, authz_selector: str) -> None: + try: + await self.authorization_service.deregister_resource( + resource=AgentexResource.schedule(authz_selector), + ) + except Exception as exc: + logger.warning( + "Auth deregister failed for run schedule; entry may be orphaned", + extra={ + "authz_selector": authz_selector, + "error_type": type(exc).__name__, + }, + exc_info=True, + ) + + async def _best_effort_delete_row(self, schedule_id: str) -> None: + try: + await self.schedule_repository.delete(id=schedule_id) + except ItemDoesNotExist: + pass + except Exception: + logger.exception( + "Failed to roll back run schedule row after Temporal create failure", + extra={"schedule_id": schedule_id}, + ) + + +DAgentRunScheduleService = Annotated[ + AgentRunScheduleService, Depends(AgentRunScheduleService) +] diff --git a/agentex/src/domain/services/schedule_service.py b/agentex/src/domain/services/schedule_service.py deleted file mode 100644 index 8f5ae02f..00000000 --- a/agentex/src/domain/services/schedule_service.py +++ /dev/null @@ -1,471 +0,0 @@ -from datetime import datetime, timedelta -from typing import Annotated, Any, cast - -from fastapi import Depends -from temporalio.client import ScheduleActionStartWorkflow, ScheduleDescription - -from src.adapters.temporal.adapter_temporal import DTemporalAdapter -from src.api.schemas.authorization_types import AgentexResource -from src.api.schemas.schedules import ( - CreateScheduleRequest, - ScheduleActionInfo, - ScheduleListItem, - ScheduleListResponse, - ScheduleResponse, - ScheduleSpecInfo, - ScheduleState, -) -from src.domain.entities.agents import AgentEntity -from src.domain.services.authorization_service import DAuthorizationService -from src.utils.logging import make_logger - -logger = make_logger(__name__) - -# Schedule ID format: {agent_id}--{schedule_name} -SCHEDULE_ID_SEPARATOR = "--" - - -def build_schedule_id(agent_id: str, schedule_name: str) -> str: - """Build a schedule ID from agent ID and schedule name.""" - return f"{agent_id}{SCHEDULE_ID_SEPARATOR}{schedule_name}" - - -def parse_schedule_id(schedule_id: str) -> tuple[str, str]: - """Parse a schedule ID into (agent_id, schedule_name).""" - parts = schedule_id.split(SCHEDULE_ID_SEPARATOR, 1) - if len(parts) != 2: - return schedule_id, "" - return parts[0], parts[1] - - -class ScheduleService: - """ - Service for managing Temporal schedules scoped to agents. - """ - - def __init__( - self, - temporal_adapter: DTemporalAdapter, - authorization_service: DAuthorizationService, - ): - self.temporal_adapter = temporal_adapter - self.authorization_service = authorization_service - - async def create_schedule( - self, - agent: AgentEntity, - request: CreateScheduleRequest, - ) -> ScheduleResponse: - """ - Create a new schedule for recurring workflow execution. - - Args: - agent: The agent this schedule belongs to - request: The schedule creation request - - Returns: - ScheduleResponse with the created schedule details - """ - schedule_id = build_schedule_id(agent.id, request.name) - workflow_id_prefix = f"{schedule_id}-run" - - # Build args for the workflow - args = [request.workflow_params] if request.workflow_params else None - - # Convert cron_expression to list if provided - cron_expressions = ( - [request.cron_expression] if request.cron_expression else None - ) - - # Convert execution timeout to timedelta - execution_timeout = ( - timedelta(seconds=request.execution_timeout_seconds) - if request.execution_timeout_seconds - else None - ) - - # Schedules have no Postgres row: Temporal is the store and the auth - # selector is the schedule id ({agent_id}--{schedule_name}). Register - # before the Temporal write so an auth failure fails closed. If the - # Temporal create fails after registration, compensate with a deregister. - # The read-back below is intentionally outside the compensation scope - # because the schedule was already created. - registered = await self._register_schedule_in_auth( - schedule_id=schedule_id, agent_id=agent.id - ) - try: - await self.temporal_adapter.create_schedule( - schedule_id=schedule_id, - workflow=request.workflow_name, - workflow_id=workflow_id_prefix, - args=args, - task_queue=request.task_queue, - cron_expressions=cron_expressions, - interval_seconds=request.interval_seconds, - execution_timeout=execution_timeout, - start_at=request.start_at, - end_at=request.end_at, - paused=request.paused, - ) - except Exception: - # Orphan guard: the auth entry was written but the schedule never - # landed in Temporal. Best-effort compensating deregister, then - # re-raise the original error. - if registered: - await self._deregister_schedule_from_auth(schedule_id=schedule_id) - raise - - # Fetch and return the created schedule - return await self.get_schedule(agent.id, request.name) - - async def _register_schedule_in_auth( - self, *, schedule_id: str, agent_id: str - ) -> bool: - """Register the schedule in the authorization graph before creating it. - - The schedule is registered under its parent agent so permissions - cascade from the owning agent. Registering before the Temporal create - fails closed: an auth failure aborts the create, and the caller - compensates with a deregister if the Temporal create later fails. - - Returns True when the schedule was registered, or False when no creator - identity is resolvable and registration is skipped. - """ - principal_context = self.authorization_service.principal_context - # principal_context is `Any` (a dict from /v1/authn), not a typed model, - # so getattr always yields None and silently skips the Spark register. - if isinstance(principal_context, dict): - user_id = principal_context.get("user_id") - service_account_id = principal_context.get("service_account_id") - else: - user_id = getattr(principal_context, "user_id", None) - service_account_id = getattr(principal_context, "service_account_id", None) - if user_id is None and service_account_id is None: - logger.warning( - "Skipping auth registration for schedule: no creator resolvable", - extra={"schedule_id": schedule_id, "agent_id": agent_id}, - ) - return False - try: - await self.authorization_service.register_resource( - resource=AgentexResource.schedule(schedule_id), - parent=AgentexResource.agent(agent_id), - ) - except Exception as exc: - logger.exception( - "Auth registration failed for agent_schedule; aborting create", - extra={ - "schedule_id": schedule_id, - "agent_id": agent_id, - "error_type": type(exc).__name__, - }, - ) - raise - return True - - async def _deregister_schedule_from_auth(self, *, schedule_id: str) -> None: - """Best-effort removal of the schedule from the authorization graph. - - Temporal is the source of truth for schedule existence. Once Temporal - delete succeeds, a deregister failure is logged but does not block the - delete response. - """ - try: - await self.authorization_service.deregister_resource( - resource=AgentexResource.schedule(schedule_id), - ) - except Exception as exc: - logger.warning( - "Auth deregister failed for agent_schedule; entry may be orphaned", - extra={ - "schedule_id": schedule_id, - "error_type": type(exc).__name__, - }, - exc_info=True, - ) - - async def get_schedule(self, agent_id: str, schedule_name: str) -> ScheduleResponse: - """ - Get details of a schedule. - - Args: - agent_id: The agent ID - schedule_name: The schedule name - - Returns: - ScheduleResponse with schedule details - """ - schedule_id = build_schedule_id(agent_id, schedule_name) - description = await self.temporal_adapter.describe_schedule(schedule_id) - - return self._description_to_response(schedule_id, description) - - async def list_schedules( - self, - agent_id: str | None = None, - page_size: int = 100, - authorized_schedule_ids: list[str] | None = None, - ) -> ScheduleListResponse: - """ - List schedules, optionally filtered by agent. - - Args: - agent_id: Optional agent ID to filter schedules - page_size: Number of results to return - authorized_schedule_ids: Ownership filter applied against the - schedule id ({agent_id}--{schedule_name}). ``None`` means "no - filter" (authorization bypass); any list (including empty) - restricts results to those ids, so an empty list yields no - schedules. - - Returns: - ScheduleListResponse with list of schedules - - Note: - ``page_size`` caps the upstream Temporal listing, which is then - filtered in-process by ``agent_id`` and ``authorized_schedule_ids``, - so fewer than ``page_size`` rows may be returned even when more - matching schedules exist. Pre-dates the ownership filter (the - ``agent_id`` filter already had it). Server-side filtering isn't - available (Temporal standard visibility can't filter on the schedule - id); the fix is to loop pages until the requested page is filled, - tracked separately. - """ - schedules = await self.temporal_adapter.list_schedules(page_size=page_size) - - # Gate on ``is not None``, not truthiness: an empty list means the caller - # owns nothing and must filter everything out, not pass through unfiltered. - authorized_ids = ( - set(authorized_schedule_ids) - if authorized_schedule_ids is not None - else None - ) - - items = [] - for schedule in schedules: - parsed_agent_id, schedule_name = parse_schedule_id(schedule.id) - - if agent_id and parsed_agent_id != agent_id: - continue - - if authorized_ids is not None and schedule.id not in authorized_ids: - continue - - # Extract workflow name from action if available - workflow_name = None - if hasattr(schedule, "info") and hasattr(schedule.info, "action"): - action = schedule.info.action - if isinstance(action, ScheduleActionStartWorkflow): - workflow_name = action.workflow - - # Extract next action time - next_action_time = None - if hasattr(schedule, "info") and schedule.info.next_action_times: - next_action_time = schedule.info.next_action_times[0] - - # Determine state - state = ScheduleState.ACTIVE - if hasattr(schedule, "info") and hasattr(schedule.info, "paused"): - state = ( - ScheduleState.PAUSED - if schedule.info.paused - else ScheduleState.ACTIVE - ) - - items.append( - ScheduleListItem( - schedule_id=schedule.id, - name=schedule_name or schedule.id, - agent_id=parsed_agent_id, - state=state, - workflow_name=workflow_name, - next_action_time=next_action_time, - ) - ) - - return ScheduleListResponse( - schedules=items, - total=len(items), - ) - - async def pause_schedule( - self, agent_id: str, schedule_name: str, note: str | None = None - ) -> ScheduleResponse: - """ - Pause a schedule. - - Args: - agent_id: The agent ID - schedule_name: The schedule name - note: Optional note explaining why the schedule was paused - - Returns: - ScheduleResponse with updated schedule details - """ - schedule_id = build_schedule_id(agent_id, schedule_name) - await self.temporal_adapter.pause_schedule(schedule_id, note=note) - return await self.get_schedule(agent_id, schedule_name) - - async def unpause_schedule( - self, agent_id: str, schedule_name: str, note: str | None = None - ) -> ScheduleResponse: - """ - Unpause/resume a schedule. - - Args: - agent_id: The agent ID - schedule_name: The schedule name - note: Optional note explaining why the schedule was unpaused - - Returns: - ScheduleResponse with updated schedule details - """ - schedule_id = build_schedule_id(agent_id, schedule_name) - await self.temporal_adapter.unpause_schedule(schedule_id, note=note) - return await self.get_schedule(agent_id, schedule_name) - - async def trigger_schedule( - self, agent_id: str, schedule_name: str - ) -> ScheduleResponse: - """ - Trigger a schedule to run immediately. - - Args: - agent_id: The agent ID - schedule_name: The schedule name - - Returns: - ScheduleResponse with updated schedule details - """ - schedule_id = build_schedule_id(agent_id, schedule_name) - await self.temporal_adapter.trigger_schedule(schedule_id) - return await self.get_schedule(agent_id, schedule_name) - - async def delete_schedule(self, agent_id: str, schedule_name: str) -> None: - """ - Delete a schedule. - - Args: - agent_id: The agent ID - schedule_name: The schedule name - """ - schedule_id = build_schedule_id(agent_id, schedule_name) - await self.temporal_adapter.delete_schedule(schedule_id) - # Best-effort: drop the auth tuple after the Temporal delete. A failure - # here is logged but never blocks the delete. - await self._deregister_schedule_from_auth(schedule_id=schedule_id) - - def _description_to_response( - self, schedule_id: str, description: ScheduleDescription - ) -> ScheduleResponse: - """ - Convert a Temporal ScheduleDescription to a ScheduleResponse. - - Args: - schedule_id: The schedule ID - description: Temporal ScheduleDescription object - - Returns: - ScheduleResponse - """ - # Parse agent_id and name from schedule_id - agent_id, schedule_name = parse_schedule_id(schedule_id) - - # Extract action info - action = description.schedule.action - workflow_name = "" - workflow_id_prefix = "" - task_queue = "" - workflow_params: list[Any] | None = None - - if isinstance(action, ScheduleActionStartWorkflow): - workflow_name = action.workflow - workflow_id_prefix = action.id - task_queue = action.task_queue or "" - # Convert Temporal Payload objects to JSON-serializable format - # The args are raw Temporal payloads that can't be directly serialized - if action.args: - try: - # Try to extract data from payloads if they have a data attribute - workflow_params = [] - for arg in action.args: - if hasattr(arg, "data"): - # Decode bytes to string if possible - try: - import json - - workflow_params.append( - json.loads(arg.data.decode("utf-8")) - ) - except (json.JSONDecodeError, UnicodeDecodeError): - workflow_params.append(str(arg.data)) - else: - workflow_params.append(str(arg)) - except Exception: - # If conversion fails, just indicate params exist but can't be displayed - workflow_params = None - else: - workflow_params = None - - # Extract spec info - spec = description.schedule.spec - cron_expressions = list(spec.cron_expressions) if spec.cron_expressions else [] - intervals_seconds = [ - int(interval.every.total_seconds()) for interval in (spec.intervals or []) - ] - - # Extract state - state = ScheduleState.ACTIVE - if description.schedule.state and description.schedule.state.paused: - state = ScheduleState.PAUSED - - # Extract info - info = description.info - num_actions_taken = info.num_actions if hasattr(info, "num_actions") else 0 - num_actions_missed = ( - info.num_actions_missed_catchup_window - if hasattr(info, "num_actions_missed_catchup_window") - else 0 - ) - next_action_times = ( - list(info.next_action_times) if info.next_action_times else [] - ) - last_action_time = None - if hasattr(info, "recent_actions") and info.recent_actions: - # ScheduleActionResult has started_at (when action started) and scheduled_at (when it was scheduled) - last_action = info.recent_actions[-1] - last_action_time = getattr(last_action, "started_at", None) or getattr( - last_action, "scheduled_at", None - ) - created_at: datetime | None = ( - cast(datetime, info.create_time) - if hasattr(info, "create_time") and info.create_time - else None - ) - - return ScheduleResponse( - schedule_id=schedule_id, - name=schedule_name or schedule_id, - agent_id=agent_id, - state=state, - action=ScheduleActionInfo( - workflow_name=workflow_name, - workflow_id_prefix=workflow_id_prefix, - task_queue=task_queue, - workflow_params=workflow_params, - ), - spec=ScheduleSpecInfo( - cron_expressions=cron_expressions, - intervals_seconds=intervals_seconds, - start_at=spec.start_at, - end_at=spec.end_at, - ), - num_actions_taken=num_actions_taken, - num_actions_missed=num_actions_missed, - next_action_times=next_action_times, - last_action_time=last_action_time, - created_at=created_at, - ) - - -DScheduleService = Annotated[ScheduleService, Depends(ScheduleService)] diff --git a/agentex/src/domain/use_cases/agent_run_schedules_use_case.py b/agentex/src/domain/use_cases/agent_run_schedules_use_case.py new file mode 100644 index 00000000..5dd55661 --- /dev/null +++ b/agentex/src/domain/use_cases/agent_run_schedules_use_case.py @@ -0,0 +1,93 @@ +from typing import Annotated, Any + +from fastapi import Depends + +from src.api.schemas.agent_run_schedules import ( + AgentRunScheduleListResponse, + AgentRunScheduleResponse, + CreateAgentRunScheduleRequest, + UpdateAgentRunScheduleRequest, +) +from src.domain.entities.agents import AgentEntity +from src.domain.exceptions import ClientError +from src.domain.services.agent_run_schedule_service import DAgentRunScheduleService +from src.utils.logging import make_logger + +logger = make_logger(__name__) + + +class AgentRunSchedulesUseCase: + """Use case for managing scheduled agent runs.""" + + def __init__( + self, + run_schedule_service: DAgentRunScheduleService, + ): + self.run_schedule_service = run_schedule_service + + async def create_schedule( + self, + agent: AgentEntity, + request: CreateAgentRunScheduleRequest, + creator_principal: dict[str, Any], + ) -> AgentRunScheduleResponse: + if not request.cron_expression and not request.interval_seconds: + raise ClientError( + "Either cron_expression or interval_seconds must be provided" + ) + if request.cron_expression and request.interval_seconds: + raise ClientError( + "Provide only one of cron_expression or interval_seconds, not both" + ) + return await self.run_schedule_service.create_schedule( + agent, request, creator_principal + ) + + async def list_schedules( + self, + agent_id: str, + authorized_schedule_ids: list[str] | None = None, + limit: int = 100, + ) -> AgentRunScheduleListResponse: + return await self.run_schedule_service.list_schedules( + agent_id, + authorized_schedule_ids=authorized_schedule_ids, + limit=limit, + ) + + async def get_schedule(self, agent_id: str, name: str) -> AgentRunScheduleResponse: + return await self.run_schedule_service.get_schedule(agent_id, name) + + async def pause_schedule( + self, agent_id: str, name: str, note: str | None = None + ) -> AgentRunScheduleResponse: + return await self.run_schedule_service.pause_schedule(agent_id, name, note=note) + + async def resume_schedule( + self, agent_id: str, name: str, note: str | None = None + ) -> AgentRunScheduleResponse: + return await self.run_schedule_service.resume_schedule( + agent_id, name, note=note + ) + + async def update_schedule( + self, agent_id: str, name: str, request: UpdateAgentRunScheduleRequest + ) -> AgentRunScheduleResponse: + if request.cron_expression and request.interval_seconds: + raise ClientError( + "Provide only one of cron_expression or interval_seconds, not both" + ) + return await self.run_schedule_service.update_schedule(agent_id, name, request) + + async def trigger_schedule( + self, agent_id: str, name: str + ) -> AgentRunScheduleResponse: + return await self.run_schedule_service.trigger_schedule(agent_id, name) + + async def delete_schedule(self, agent_id: str, name: str) -> str: + return await self.run_schedule_service.delete_schedule(agent_id, name) + + +DAgentRunSchedulesUseCase = Annotated[ + AgentRunSchedulesUseCase, Depends(AgentRunSchedulesUseCase) +] diff --git a/agentex/src/domain/use_cases/schedules_use_case.py b/agentex/src/domain/use_cases/schedules_use_case.py deleted file mode 100644 index 1d00bf45..00000000 --- a/agentex/src/domain/use_cases/schedules_use_case.py +++ /dev/null @@ -1,153 +0,0 @@ -from typing import Annotated - -from fastapi import Depends - -from src.api.schemas.schedules import ( - CreateScheduleRequest, - ScheduleListResponse, - ScheduleResponse, -) -from src.domain.entities.agents import AgentEntity -from src.domain.exceptions import ClientError -from src.domain.services.schedule_service import DScheduleService -from src.utils.logging import make_logger - -logger = make_logger(__name__) - - -class SchedulesUseCase: - """ - Use case for managing Temporal schedules scoped to agents. - """ - - def __init__( - self, - schedule_service: DScheduleService, - ): - self.schedule_service = schedule_service - - async def create_schedule( - self, - agent: AgentEntity, - request: CreateScheduleRequest, - ) -> ScheduleResponse: - """ - Create a new schedule for recurring workflow execution. - - Args: - agent: The agent this schedule belongs to - request: The schedule creation request - - Returns: - ScheduleResponse with the created schedule details - - Raises: - ClientError: If neither cron_expression nor interval_seconds is provided - """ - if not request.cron_expression and not request.interval_seconds: - raise ClientError( - "Either cron_expression or interval_seconds must be provided" - ) - - return await self.schedule_service.create_schedule(agent, request) - - async def get_schedule(self, agent_id: str, schedule_name: str) -> ScheduleResponse: - """ - Get details of a schedule. - - Args: - agent_id: The agent ID - schedule_name: The schedule name - - Returns: - ScheduleResponse with schedule details - """ - return await self.schedule_service.get_schedule(agent_id, schedule_name) - - async def list_schedules( - self, - agent_id: str, - page_size: int = 100, - authorized_schedule_ids: list[str] | None = None, - ) -> ScheduleListResponse: - """ - List schedules for an agent. - - Args: - agent_id: The agent ID - page_size: Number of results to return - authorized_schedule_ids: Ownership filter forwarded to the service - (``None`` = bypass → all; a list restricts; empty → none). - - Returns: - ScheduleListResponse with list of schedules - """ - return await self.schedule_service.list_schedules( - agent_id=agent_id, - page_size=page_size, - authorized_schedule_ids=authorized_schedule_ids, - ) - - async def pause_schedule( - self, agent_id: str, schedule_name: str, note: str | None = None - ) -> ScheduleResponse: - """ - Pause a schedule. - - Args: - agent_id: The agent ID - schedule_name: The schedule name - note: Optional note explaining why the schedule was paused - - Returns: - ScheduleResponse with updated schedule details - """ - return await self.schedule_service.pause_schedule( - agent_id, schedule_name, note=note - ) - - async def unpause_schedule( - self, agent_id: str, schedule_name: str, note: str | None = None - ) -> ScheduleResponse: - """ - Unpause/resume a schedule. - - Args: - agent_id: The agent ID - schedule_name: The schedule name - note: Optional note explaining why the schedule was unpaused - - Returns: - ScheduleResponse with updated schedule details - """ - return await self.schedule_service.unpause_schedule( - agent_id, schedule_name, note=note - ) - - async def trigger_schedule( - self, agent_id: str, schedule_name: str - ) -> ScheduleResponse: - """ - Trigger a schedule to run immediately. - - Args: - agent_id: The agent ID - schedule_name: The schedule name - - Returns: - ScheduleResponse with updated schedule details - """ - return await self.schedule_service.trigger_schedule(agent_id, schedule_name) - - async def delete_schedule(self, agent_id: str, schedule_name: str) -> None: - """ - Delete a schedule. - - Args: - agent_id: The agent ID - schedule_name: The schedule name - """ - await self.schedule_service.delete_schedule(agent_id, schedule_name) - - -DSchedulesUseCase = Annotated[SchedulesUseCase, Depends(SchedulesUseCase)] diff --git a/agentex/src/temporal/activities/scheduled_agent_run_activities.py b/agentex/src/temporal/activities/scheduled_agent_run_activities.py new file mode 100644 index 00000000..07628083 --- /dev/null +++ b/agentex/src/temporal/activities/scheduled_agent_run_activities.py @@ -0,0 +1,308 @@ +""" +Temporal activity for scheduled agent runs. + +``launch_scheduled_agent_run`` is the single activity each scheduled fire runs. +It loads the persisted schedule, creates a fresh Agentex task with a deterministic +name, and delivers the configured initial input through the same path a manual +agent run uses — ``task/create`` then ``event/send`` (async / agentic agents) or +``message/send`` (sync agents) — attributed to the schedule's stored creator +principal. + +Correctness: +- Deterministic task name ``scheduled-run:{schedule_id}:{fire_id}`` makes + ``task/create`` get-or-create, so an activity retry returns the same task + instead of duplicating it. +- A ``scheduled_input_delivered`` marker on the task metadata guards against + re-delivering the initial input when the activity retries after a prior + successful delivery. + +Boundary types are JSON-native (the backend data converter does not serialize +Pydantic models), so args and the return value are plain str / dict. +""" + +import re +from datetime import UTC, datetime +from typing import Any + +from src.adapters.authorization.exceptions import AuthorizationError +from src.adapters.crud_store.exceptions import ItemDoesNotExist +from src.api.schemas.authorization_types import ( + AgentexResource, + AuthorizedOperationType, +) +from src.config.dependencies import GlobalDependencies +from src.domain.entities.agent_run_schedules import ( + InitialInputMethod, + infer_initial_input_method, +) +from src.domain.entities.agents import AgentStatus +from src.domain.entities.agents_rpc import ( + AgentRPCMethod, + CreateTaskRequestEntity, + SendEventRequestEntity, + SendMessageRequestEntity, +) +from src.domain.entities.task_messages import ( + MessageAuthor, + TaskMessageContentEntity, + TextContentEntity, +) +from src.domain.repositories.agent_run_schedule_repository import ( + AgentRunScheduleRepository, +) +from src.domain.use_cases.agents_acp_use_case import AgentsACPUseCase +from src.temporal.scheduled_agent_run_factory import build_acp_use_case_for_principal +from src.utils.logging import make_logger +from temporalio import activity + +logger = make_logger(__name__) + +LAUNCH_SCHEDULED_AGENT_RUN_ACTIVITY = "launch_scheduled_agent_run_activity" + +_INPUT_DELIVERED_MARKER = "scheduled_input_delivered" + +# Temporal suffixes a scheduled workflow id with the nominal fire time +# (e.g. ``...-run-2026-06-23T15:19:00Z``). Matching the trailing ISO-8601 lets +# the display label use the *scheduled* time, which is stable across activity +# retries, rather than wall-clock now() (which drifts on a delayed retry). +_NOMINAL_FIRE_TIME_RE = re.compile( + r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?Z?)$" +) + + +def _format_fire_time(fire_id: str) -> str: + """Format the schedule's nominal fire time for the task display name. + + Falls back to the current time when ``fire_id`` carries no recognizable + timestamp suffix (e.g. a manually triggered fire). + """ + match = _NOMINAL_FIRE_TIME_RE.search(fire_id) + if match: + try: + parsed = datetime.fromisoformat(match.group(1).replace("Z", "+00:00")) + return parsed.strftime("%Y-%m-%d %H:%M UTC") + except ValueError: + pass + return datetime.now(UTC).strftime("%Y-%m-%d %H:%M UTC") + + +def _build_initial_content(initial_input: dict[str, Any]) -> TaskMessageContentEntity: + """Build the message content delivered as the scheduled task's first input. + + Only text input is supported (enforced by ``ScheduleInitialInput.type``). + """ + author = initial_input.get("author", MessageAuthor.USER.value) + if not isinstance(author, MessageAuthor): + author = MessageAuthor(author) + return TextContentEntity( + author=author, + content=initial_input.get("content", ""), + ) + + +async def _authorize_or_skip( + authorization_service: Any, + checks: list[tuple[Any, Any]], + *, + schedule_id: str, + task_id: str | None = None, +) -> dict[str, Any] | None: + """Run fire-time AuthZ checks under the stored creator principal. + + Returns ``None`` when every check passes (or authz is bypassed). On a + permanent ``AuthorizationError`` (403) it returns a + ``skipped`` / ``permission_denied`` outcome so a revoked principal stops + future fires; transient authz errors propagate so Temporal retries. + """ + for resource, operation in checks: + try: + await authorization_service.check(resource=resource, operation=operation) + except AuthorizationError as exc: + logger.warning( + "scheduled_run_permission_denied", + extra={ + "schedule_id": schedule_id, + "resource": f"{resource.type}:{resource.selector}", + "operation": str(operation), + }, + ) + outcome: dict[str, Any] = { + "status": "skipped", + "reason": "permission_denied", + "schedule_id": schedule_id, + "detail": str(exc), + } + if task_id is not None: + outcome["task_id"] = task_id + return outcome + return None + + +class ScheduledAgentRunActivities: + def __init__( + self, + global_dependencies: GlobalDependencies, + schedule_repository: AgentRunScheduleRepository, + ): + self.global_dependencies = global_dependencies + self.schedule_repository = schedule_repository + + @activity.defn(name=LAUNCH_SCHEDULED_AGENT_RUN_ACTIVITY) + async def launch_scheduled_agent_run( + self, schedule_id: str, fire_id: str + ) -> dict[str, Any]: + """Create a task for the scheduled fire and deliver its initial input. + + Args: + schedule_id: The persisted ``agent_run_schedules`` row id. + fire_id: A token unique to this scheduled fire (the workflow id, + which Temporal makes unique per fire and stable across activity + retries within the same execution). Used to build the + deterministic, idempotent task name. + + Returns: + A JSON-native dict describing the outcome (``launched`` / ``skipped``). + """ + try: + schedule = await self.schedule_repository.get(id=schedule_id) + except ItemDoesNotExist: + logger.warning( + "scheduled_run_schedule_not_found", + extra={"schedule_id": schedule_id, "fire_id": fire_id}, + ) + return { + "status": "skipped", + "reason": "schedule_not_found", + "schedule_id": schedule_id, + } + + if schedule.paused: + # Temporal pauses the schedule too, but a manual trigger can still + # fire a paused schedule — honor the stored paused state defensively. + return { + "status": "skipped", + "reason": "schedule_paused", + "schedule_id": schedule_id, + } + + use_case: AgentsACPUseCase = build_acp_use_case_for_principal( + self.global_dependencies, schedule.creator_principal + ) + + agent = await use_case.agent_repository.get(id=schedule.agent_id) + if agent.status == AgentStatus.DELETED: + return { + "status": "skipped", + "reason": "agent_deleted", + "schedule_id": schedule_id, + } + + method = infer_initial_input_method(agent.acp_type).value + + # Re-check the stored creator principal's permission at fire time, mirroring + # the JSON-RPC route's authorization order: agent.execute (the RPC endpoint + # gate) then task.create (re-checks the creator's permission at fire time). A revoked + # creator stops future fires instead of running under stale ownership. + # AuthorizationError (403) is a permanent denial → skip cleanly; transient + # authz errors propagate so Temporal retries. Under authz bypass (local / + # disabled) these are no-ops. + denied = await _authorize_or_skip( + use_case.authorization_service, + [ + ( + AgentexResource.agent(schedule.agent_id), + AuthorizedOperationType.execute, + ), + (AgentexResource.task("*"), AuthorizedOperationType.create), + ], + schedule_id=schedule_id, + ) + if denied is not None: + return denied + + task_name = f"scheduled-run:{schedule_id}:{fire_id}" + # Human-friendly label the UI renders for the task (it reads + # task_metadata.display_name, never the deterministic `name` above). + # Templated per fire so runs are distinguishable; placed first so a + # caller-supplied display_name in schedule.task_metadata overrides it. + fire_time = _format_fire_time(fire_id) + task_metadata = { + "display_name": f"Scheduled Message: {schedule.name} · {fire_time}", + **(schedule.task_metadata or {}), + "schedule_id": schedule_id, + "scheduled_fire_id": fire_id, + } + + # task/create — get-or-create by deterministic name, so a retry returns + # the same task. For async / agentic agents this also forwards the task + # to the ACP server; for sync agents it only persists the row. + task = await use_case.handle_rpc_request( + method=AgentRPCMethod.TASK_CREATE, + params=CreateTaskRequestEntity( + name=task_name, + params=schedule.task_params, + task_metadata=task_metadata, + ), + agent_id=schedule.agent_id, + ) + + # Duplicate-input guard: if this fire's task already carries the delivered + # marker, a prior attempt already delivered the initial input. + if task.task_metadata and task.task_metadata.get(_INPUT_DELIVERED_MARKER): + return { + "status": "skipped", + "reason": "input_already_delivered", + "task_id": task.id, + "schedule_id": schedule_id, + } + + # Mirror the route's per-method gate for event/send & message/send: + # update permission on the task before delivering the initial input. + denied = await _authorize_or_skip( + use_case.authorization_service, + [(AgentexResource.task(task.id), AuthorizedOperationType.update)], + schedule_id=schedule_id, + task_id=task.id, + ) + if denied is not None: + return denied + + content = _build_initial_content(schedule.initial_input) + if method == InitialInputMethod.MESSAGE_SEND.value: + await use_case.handle_rpc_request( + method=AgentRPCMethod.MESSAGE_SEND, + params=SendMessageRequestEntity( + task_name=task_name, content=content, stream=False + ), + agent_id=schedule.agent_id, + ) + else: + await use_case.handle_rpc_request( + method=AgentRPCMethod.EVENT_SEND, + params=SendEventRequestEntity(task_name=task_name, content=content), + agent_id=schedule.agent_id, + ) + + # Best-effort delivered marker for the retry guard above. A crash between + # delivery and this update is the only window where a retry could + # re-deliver; deterministic task naming still prevents duplicate tasks. + task.task_metadata = { + **(task.task_metadata or {}), + _INPUT_DELIVERED_MARKER: True, + } + await use_case.task_service.update_task(task) + + logger.info( + "scheduled_run_launched", + extra={ + "schedule_id": schedule_id, + "task_id": task.id, + "method": method, + }, + ) + return { + "status": "launched", + "task_id": task.id, + "schedule_id": schedule_id, + "method": method, + } diff --git a/agentex/src/temporal/run_worker.py b/agentex/src/temporal/run_worker.py index de44cba6..cba6f634 100644 --- a/agentex/src/temporal/run_worker.py +++ b/agentex/src/temporal/run_worker.py @@ -27,12 +27,21 @@ from src.temporal.activities.retention_cleanup_activities import ( RetentionCleanupActivities, ) +from src.temporal.activities.scheduled_agent_run_activities import ( + ScheduledAgentRunActivities, +) +from src.temporal.scheduled_agent_run_factory import ( + build_agent_run_schedule_repository, +) from src.temporal.task_retention_factory import build_task_retention_use_case from src.temporal.workflows.healthcheck_workflow import HealthCheckWorkflow from src.temporal.workflows.retention_cleanup_workflow import ( RetentionCleanupSweepWorkflow, RetentionCleanupTaskWorkflow, ) +from src.temporal.workflows.scheduled_agent_run_workflow import ( + ScheduledAgentRunWorkflow, +) from src.utils.logging import make_logger logger = make_logger(__name__) @@ -161,6 +170,11 @@ def create_agentex_server_worker( use_case=retention_use_case, ) + scheduled_agent_run_activities = ScheduledAgentRunActivities( + global_dependencies=global_dependencies, + schedule_repository=build_agent_run_schedule_repository(global_dependencies), + ) + return asyncio.create_task( run_worker( task_queue=task_queue, @@ -168,6 +182,7 @@ def create_agentex_server_worker( HealthCheckWorkflow, RetentionCleanupSweepWorkflow, RetentionCleanupTaskWorkflow, + ScheduledAgentRunWorkflow, ], activities=[ health_check_activities.check_status_activity, @@ -176,6 +191,7 @@ def create_agentex_server_worker( retention_activities.find_cleanup_candidates, retention_activities.find_multi_agent_cleanup_candidates, retention_activities.clean_task, + scheduled_agent_run_activities.launch_scheduled_agent_run, ], max_workers=50, max_concurrent_activities=50, diff --git a/agentex/src/temporal/scheduled_agent_run_factory.py b/agentex/src/temporal/scheduled_agent_run_factory.py new file mode 100644 index 00000000..c9c0339a --- /dev/null +++ b/agentex/src/temporal/scheduled_agent_run_factory.py @@ -0,0 +1,141 @@ +""" +Wire the dependencies the scheduled-agent-run activity needs outside FastAPI's +Depends DI, for use inside the Temporal worker. Mirrors the manual-wiring pattern +in task_retention_factory.py. + +Each scheduled fire creates a fresh Agentex task and delivers the schedule's +configured initial input under the *stored creator principal* — +not as an agent identity. So the AgentsACPUseCase is rebuilt per fire with an +AuthorizationService whose principal_context is that fire's creator principal and +whose agent_identity is None, attributing task ownership and AuthZ checks to the +schedule's creator rather than to the worker's service identity. +""" + +from types import SimpleNamespace +from typing import Any + +from src.adapters.authorization.adapter_agentex_authz_proxy import ( + AgentexAuthorizationProxy, +) +from src.adapters.http.adapter_httpx import HttpxGateway +from src.adapters.streams.adapter_redis import RedisStreamRepository +from src.api.middleware_utils import resolve_authorization_enabled +from src.config.dependencies import ( + GlobalDependencies, + database_async_read_only_session_maker, + database_async_read_write_engine, + database_async_read_write_session_maker, + resolve_environment_variable_dependency, +) +from src.config.environment_variables import EnvironmentVariables, EnvVarKeys +from src.domain.repositories.agent_api_key_repository import AgentAPIKeyRepository +from src.domain.repositories.agent_repository import AgentRepository +from src.domain.repositories.agent_run_schedule_repository import ( + AgentRunScheduleRepository, +) +from src.domain.repositories.deployment_repository import DeploymentRepository +from src.domain.repositories.event_repository import EventRepository +from src.domain.repositories.task_message_repository import TaskMessageRepository +from src.domain.repositories.task_repository import TaskRepository +from src.domain.repositories.task_state_repository import TaskStateRepository +from src.domain.services.agent_acp_service import AgentACPService +from src.domain.services.authorization_service import AuthorizationService +from src.domain.services.task_message_service import TaskMessageService +from src.domain.services.task_service import AgentTaskService +from src.domain.use_cases.agents_acp_use_case import AgentsACPUseCase + + +class _ScheduledRunRequest: + """Minimal ``Request`` stand-in for worker-side AuthZ + ACP delegation. + + Carries the stored creator principal as ``state.principal_context`` with no + ``agent_identity`` (so AuthZ attributes ownership to the creator, not a + service) and no headers (so no live user credentials — cookies, API keys — + are forwarded downstream). ``build_delegation_headers`` returns an + empty mapping when there are no inbound credential headers, which is exactly + the intended behavior here. + """ + + def __init__(self, principal_context: dict[str, Any] | None): + self.state = SimpleNamespace( + principal_context=principal_context, + agent_identity=None, + ) + self.headers: dict[str, str] = {} + + +def build_agent_run_schedule_repository( + global_dependencies: GlobalDependencies, +) -> AgentRunScheduleRepository: + """Build the schedule repository from an already-loaded GlobalDependencies.""" + engine = database_async_read_write_engine() + rw_session_maker = database_async_read_write_session_maker(engine) + ro_session_maker = database_async_read_only_session_maker(engine) + return AgentRunScheduleRepository(rw_session_maker, ro_session_maker) + + +def build_acp_use_case_for_principal( + global_dependencies: GlobalDependencies, + creator_principal: dict[str, Any] | None, +) -> AgentsACPUseCase: + """Construct an AgentsACPUseCase bound to a specific creator principal. + + The returned use case routes task creation and initial-input delivery exactly + as the JSON-RPC path does (ACP-type validation, acp_url resolution, ownership + grant, get-or-create idempotency), but attributes everything to + *creator_principal* instead of the request principal. + """ + env = EnvironmentVariables.refresh() + engine = database_async_read_write_engine() + rw_session_maker = database_async_read_write_session_maker(engine) + ro_session_maker = database_async_read_only_session_maker(engine) + + request = _ScheduledRunRequest(creator_principal) + + agent_repository = AgentRepository(rw_session_maker, ro_session_maker) + agent_api_key_repository = AgentAPIKeyRepository(rw_session_maker, ro_session_maker) + deployment_repository = DeploymentRepository(rw_session_maker, ro_session_maker) + task_repository = TaskRepository(rw_session_maker, ro_session_maker) + event_repository = EventRepository(rw_session_maker, ro_session_maker) + + task_state_repository = TaskStateRepository(global_dependencies.mongodb_database) + task_message_repository = TaskMessageRepository( + global_dependencies.mongodb_database + ) + task_message_service = TaskMessageService( + message_repository=task_message_repository + ) + + http_gateway = HttpxGateway(env) + stream_repository = RedisStreamRepository(env, global_dependencies.redis_pool) + + auth_url = resolve_environment_variable_dependency(EnvVarKeys.AGENTEX_AUTH_URL) + authz_gateway = AgentexAuthorizationProxy(agentex_auth_url=auth_url) + authorization_service = AuthorizationService( + enabled=resolve_authorization_enabled(auth_url), + gateway=authz_gateway, + request=request, # type: ignore[arg-type] + ) + + acp_client = AgentACPService( + agent_repository=agent_repository, + agent_api_key_repository=agent_api_key_repository, + http_gateway=http_gateway, + request=request, # type: ignore[arg-type] + ) + task_service = AgentTaskService( + acp_client=acp_client, + task_state_repository=task_state_repository, + task_repository=task_repository, + event_repository=event_repository, + stream_repository=stream_repository, + authorization_service=authorization_service, + ) + return AgentsACPUseCase( + agent_repository=agent_repository, + deployment_repository=deployment_repository, + acp_client=acp_client, + task_service=task_service, + task_message_service=task_message_service, + authorization_service=authorization_service, + ) diff --git a/agentex/src/temporal/workflows/scheduled_agent_run_workflow.py b/agentex/src/temporal/workflows/scheduled_agent_run_workflow.py new file mode 100644 index 00000000..b2ccc824 --- /dev/null +++ b/agentex/src/temporal/workflows/scheduled_agent_run_workflow.py @@ -0,0 +1,39 @@ +""" +Scheduled agent run workflow. + +Started by a Temporal Schedule on each cron / interval fire. The workflow is +deliberately thin: it passes only the schedule id and a per-fire token to a +single activity and does no DB / API / ACP work itself, so it stays +deterministic. All side effects live in ``launch_scheduled_agent_run``. + +The per-fire token is the workflow id, which Temporal makes unique per scheduled +fire (it suffixes the configured workflow id with the nominal fire time) and +keeps stable across activity retries within the same execution. The activity +uses it to build a deterministic, idempotent task name. +""" + +from datetime import timedelta +from typing import Any + +from src.temporal.activities.scheduled_agent_run_activities import ( + LAUNCH_SCHEDULED_AGENT_RUN_ACTIVITY, +) +from temporalio import workflow +from temporalio.common import RetryPolicy + + +@workflow.defn +class ScheduledAgentRunWorkflow: + @workflow.run + async def run(self, schedule_id: str) -> dict[str, Any]: + fire_id = workflow.info().workflow_id + return await workflow.execute_activity( + LAUNCH_SCHEDULED_AGENT_RUN_ACTIVITY, + args=[schedule_id, fire_id], + start_to_close_timeout=timedelta(seconds=120), + retry_policy=RetryPolicy( + maximum_attempts=5, + initial_interval=timedelta(seconds=2), + backoff_coefficient=2.0, + ), + ) diff --git a/agentex/tests/integration/services/test_schedule_service_dual_write.py b/agentex/tests/integration/services/test_schedule_service_dual_write.py deleted file mode 100644 index b030f439..00000000 --- a/agentex/tests/integration/services/test_schedule_service_dual_write.py +++ /dev/null @@ -1,251 +0,0 @@ -"""Integration tests for ScheduleService authorization writes. - -Schedules have no Postgres row: Temporal is the store and the auth selector is -``{agent_id}--{schedule_name}``. The authorization-write sequencing therefore -lives in ``ScheduleService`` next to the Temporal write: - -- Create registers the schedule in the authorization graph under parent=agent, - before the Temporal create. -- Registration failure prevents the Temporal create. -- A Temporal create failure after a successful registration triggers a - best-effort compensating deregister and re-raises the original Temporal error. -- A post-create read-back failure does not deregister, because the schedule was - actually created. -- Delete removes the Temporal schedule first, then deregisters best-effort. -- No creator identity means the registration is skipped and the schedule still - lands in Temporal. - -The tests mock the Temporal adapter and authorization service and stub the -post-create read-back; the behavior under test is the call sequencing inside -``ScheduleService``, not Temporal or the authorization service itself. -""" - -from __future__ import annotations - -from types import SimpleNamespace -from unittest.mock import AsyncMock, Mock - -import pytest -from src.api.schemas.authorization_types import AgentexResource, AgentexResourceType -from src.api.schemas.schedules import CreateScheduleRequest, ScheduleResponse -from src.domain.entities.agents import ACPType, AgentEntity, AgentStatus -from src.domain.services.schedule_service import ScheduleService, build_schedule_id -from src.utils.ids import orm_id - - -def _principal( - user_id: str | None = None, service_account_id: str | None = None -) -> SimpleNamespace: - """Minimal stand-in for the auth principal context.""" - return SimpleNamespace( - user_id=user_id, service_account_id=service_account_id, account_id="acct-1" - ) - - -def _agent() -> AgentEntity: - agent_id = orm_id() - return AgentEntity( - id=agent_id, - name=f"agent-{agent_id[:8]}", - description="authorization-write test agent", - status=AgentStatus.READY, - acp_type=ACPType.SYNC, - acp_url="http://test-acp", - ) - - -def _request(name: str = "nightly") -> CreateScheduleRequest: - return CreateScheduleRequest( - name=name, - workflow_name="test-workflow", - task_queue="test-queue", - cron_expression="0 0 * * *", - ) - - -def _build_service( - *, - principal: SimpleNamespace | None, - register_resource: AsyncMock | None = None, - deregister_resource: AsyncMock | None = None, - create_raises: Exception | None = None, - delete_raises: Exception | None = None, - get_schedule_raises: Exception | None = None, -) -> tuple[ScheduleService, Mock, Mock]: - temporal_adapter = Mock() - temporal_adapter.create_schedule = AsyncMock( - side_effect=create_raises, return_value=None - ) - temporal_adapter.delete_schedule = AsyncMock( - side_effect=delete_raises, return_value=None - ) - - authorization_service = Mock() - authorization_service.principal_context = principal - authorization_service.register_resource = register_resource or AsyncMock( - return_value=None - ) - authorization_service.deregister_resource = deregister_resource or AsyncMock( - return_value=None - ) - - service = ScheduleService( - temporal_adapter=temporal_adapter, - authorization_service=authorization_service, - ) - # Stub the post-create read-back so create_schedule doesn't hit - # describe_schedule; tests covering a read-back failure pass get_schedule_raises. - if get_schedule_raises is not None: - service.get_schedule = AsyncMock(side_effect=get_schedule_raises) - else: - service.get_schedule = AsyncMock(return_value=Mock(spec=ScheduleResponse)) - - return service, temporal_adapter, authorization_service - - -@pytest.mark.asyncio -@pytest.mark.integration -async def test_create_schedule_calls_register_resource_with_parent() -> None: - agent = _agent() - request = _request("nightly") - service, temporal_adapter, authorization_service = _build_service( - principal=_principal(user_id="user-A"), - ) - - await service.create_schedule(agent, request) - - authorization_service.register_resource.assert_awaited_once() - registered_resource: AgentexResource = ( - authorization_service.register_resource.await_args.kwargs["resource"] - ) - assert registered_resource.type == AgentexResourceType.schedule - assert registered_resource.selector == build_schedule_id(agent.id, request.name) - registered_parent: AgentexResource = ( - authorization_service.register_resource.await_args.kwargs["parent"] - ) - # parent_agent is load-bearing: without it the authorization cascade from - # the owning agent fails closed for readers. - assert registered_parent is not None - assert registered_parent.type == AgentexResourceType.agent - assert registered_parent.selector == agent.id - temporal_adapter.create_schedule.assert_awaited_once() - - -@pytest.mark.asyncio -@pytest.mark.integration -async def test_delete_schedule_calls_deregister_resource() -> None: - agent = _agent() - service, temporal_adapter, authorization_service = _build_service( - principal=_principal(user_id="user-A"), - ) - - await service.delete_schedule(agent.id, "nightly") - - schedule_id = build_schedule_id(agent.id, "nightly") - temporal_adapter.delete_schedule.assert_awaited_once_with(schedule_id) - authorization_service.deregister_resource.assert_awaited_once() - deregistered_resource: AgentexResource = ( - authorization_service.deregister_resource.await_args.kwargs["resource"] - ) - assert deregistered_resource.type == AgentexResourceType.schedule - assert deregistered_resource.selector == schedule_id - - -@pytest.mark.asyncio -@pytest.mark.integration -async def test_create_schedule_register_failure_prevents_temporal_create() -> None: - register = AsyncMock(side_effect=RuntimeError("authz unavailable")) - agent = _agent() - service, temporal_adapter, authorization_service = _build_service( - principal=_principal(user_id="user-A"), - register_resource=register, - ) - - with pytest.raises(RuntimeError, match="authz unavailable"): - await service.create_schedule(agent, _request()) - - temporal_adapter.create_schedule.assert_not_awaited() - authorization_service.deregister_resource.assert_not_awaited() - - -@pytest.mark.asyncio -@pytest.mark.integration -async def test_create_schedule_temporal_failure_triggers_compensating_deregister() -> ( - None -): - agent = _agent() - request = _request("nightly") - service, _, authorization_service = _build_service( - principal=_principal(user_id="user-A"), - create_raises=RuntimeError("temporal down"), - ) - - with pytest.raises(RuntimeError, match="temporal down"): - await service.create_schedule(agent, request) - - authorization_service.register_resource.assert_awaited_once() - # The schedule never landed in Temporal, so the auth entry is cleaned up. - authorization_service.deregister_resource.assert_awaited_once() - compensated: AgentexResource = ( - authorization_service.deregister_resource.await_args.kwargs["resource"] - ) - assert compensated.type == AgentexResourceType.schedule - assert compensated.selector == build_schedule_id(agent.id, request.name) - - -@pytest.mark.asyncio -@pytest.mark.integration -async def test_create_schedule_readback_failure_does_not_compensate() -> None: - # The Temporal create succeeded but the post-create describe failed. The - # schedule genuinely exists, so the auth entry must survive the read-back - # error. - agent = _agent() - service, temporal_adapter, authorization_service = _build_service( - principal=_principal(user_id="user-A"), - get_schedule_raises=RuntimeError("describe transient error"), - ) - - with pytest.raises(RuntimeError, match="describe transient error"): - await service.create_schedule(agent, _request()) - - temporal_adapter.create_schedule.assert_awaited_once() - authorization_service.register_resource.assert_awaited_once() - authorization_service.deregister_resource.assert_not_awaited() - - -@pytest.mark.asyncio -@pytest.mark.integration -async def test_delete_schedule_deregister_failure_does_not_block_delete() -> None: - deregister = AsyncMock(side_effect=RuntimeError("authz unavailable")) - agent = _agent() - service, temporal_adapter, authorization_service = _build_service( - principal=_principal(user_id="user-A"), - deregister_resource=deregister, - ) - - # Best-effort cleanup: a deregister failure is swallowed after Temporal - # delete succeeds. - await service.delete_schedule(agent.id, "nightly") - - temporal_adapter.delete_schedule.assert_awaited_once_with( - build_schedule_id(agent.id, "nightly") - ) - authorization_service.deregister_resource.assert_awaited_once() - - -@pytest.mark.asyncio -@pytest.mark.integration -async def test_create_schedule_no_creator_skips_auth_writes() -> None: - agent = _agent() - request = _request("nightly") - # Neither user_id nor service_account_id: internal paths still create the - # schedule, but there is no creator identity to register as owner. - service, temporal_adapter, authorization_service = _build_service( - principal=_principal(user_id=None, service_account_id=None), - ) - - await service.create_schedule(agent, request) - - authorization_service.register_resource.assert_not_awaited() - authorization_service.deregister_resource.assert_not_awaited() - temporal_adapter.create_schedule.assert_awaited_once() diff --git a/agentex/tests/unit/api/test_schedules_authz.py b/agentex/tests/unit/api/test_schedules_authz.py deleted file mode 100644 index 0a07a303..00000000 --- a/agentex/tests/unit/api/test_schedules_authz.py +++ /dev/null @@ -1,399 +0,0 @@ -"""Tests for the agent_schedule route migration to fine-grained authorization. - -Mirrors the structure of the agent_api_key and task route-authorization tests. -Covers: - - 1. The ``_check_schedule_or_collapse_to_404`` helper. - 2. ``DAuthorizedScheduleId`` builds the composite ``{agent_id}--{schedule_name}`` - selector, returns the schedule name when allowed, and preserves 403 for - denied operations on readable schedules. - 3. ``create_schedule`` enforces parent ``agent.update`` (the only route where - no schedule resource exists yet, so the authorization service can't - transitively gate it). - 4. ``ScheduleService.list_schedules`` filters to the authorized id set, with - ``None`` (bypass) returning everything and ``[]`` returning nothing. - -Cross-tenant and transitive-expansion checks belong in an end-to-end suite -gated on a live authorization-service cluster (the ``agent_schedule.update`` -permission transitively requires ``parent_agent->update`` in the authorization -policy, which this repo does not own). Here we only assert that the route layer -issues the correct ``check`` call with the correct operation. -""" - -from __future__ import annotations - -from types import SimpleNamespace -from unittest.mock import AsyncMock, MagicMock - -import pytest -from src.adapters.authorization.exceptions import AuthorizationError -from src.adapters.crud_store.exceptions import ItemDoesNotExist -from src.api.schemas.authorization_types import ( - AgentexResource, - AuthorizedOperationType, -) -from src.domain.services.schedule_service import ScheduleService, build_schedule_id -from src.utils.schedule_authorization import _check_schedule_or_collapse_to_404 - - -def _dep_callable(annotation): - """Pull the inner FastAPI dependency function out of an ``Annotated[str, Depends(...)]``.""" - return annotation.__metadata__[0].dependency - - -@pytest.mark.unit -@pytest.mark.asyncio -class TestCheckScheduleOrCollapseTo404: - """The schedule-resource authz wrap hides unreadable schedules.""" - - async def test_allowed_check_returns_normally(self): - authorization = MagicMock() - authorization.check = AsyncMock(return_value=True) - - await _check_schedule_or_collapse_to_404( - authorization, - "agent-1--nightly", - AuthorizedOperationType.read, - ) - - authorization.check.assert_awaited_once() - called_kwargs = authorization.check.await_args.kwargs - assert called_kwargs["resource"] == AgentexResource.schedule("agent-1--nightly") - assert called_kwargs["operation"] == AuthorizedOperationType.read - - async def test_denied_read_collapses_to_not_found(self): - authorization = MagicMock() - authorization.check = AsyncMock(side_effect=AuthorizationError("denied")) - - with pytest.raises(ItemDoesNotExist): - await _check_schedule_or_collapse_to_404( - authorization, - "agent-1--nightly", - AuthorizedOperationType.read, - ) - - authorization.check.assert_awaited_once() - - async def test_denied_non_read_collapses_to_not_found_when_read_denied(self): - authorization = MagicMock() - authorization.check = AsyncMock(side_effect=AuthorizationError("denied")) - - with pytest.raises(ItemDoesNotExist): - await _check_schedule_or_collapse_to_404( - authorization, - "agent-1--nightly", - AuthorizedOperationType.delete, - ) - - assert authorization.check.await_count == 2 - first_call, second_call = authorization.check.await_args_list - assert first_call.kwargs["operation"] == AuthorizedOperationType.delete - assert second_call.kwargs["operation"] == AuthorizedOperationType.read - - async def test_denied_non_read_surfaces_authorization_error_when_read_allowed(self): - authorization = MagicMock() - operation_denied = AuthorizationError("denied") - authorization.check = AsyncMock(side_effect=[operation_denied, True]) - - with pytest.raises(AuthorizationError) as exc_info: - await _check_schedule_or_collapse_to_404( - authorization, - "agent-1--nightly", - AuthorizedOperationType.delete, - ) - - assert exc_info.value is operation_denied - - async def test_forwards_operation_verbatim(self): - """The transitive expansion for ``update``/``delete`` in the - authorization policy is what bundles in the ``parent_agent->update`` - factor — the helper just needs to forward the operation.""" - authorization = MagicMock() - authorization.check = AsyncMock(return_value=True) - - await _check_schedule_or_collapse_to_404( - authorization, - "agent-1--nightly", - AuthorizedOperationType.update, - ) - - called_kwargs = authorization.check.await_args.kwargs - assert called_kwargs["operation"] == AuthorizedOperationType.update - - -@pytest.mark.unit -@pytest.mark.asyncio -class TestSingleResourceRouteAuthz: - """The single-resource routes (get/pause/unpause/trigger/delete) check the - schedule resource on the composite ``{agent_id}--{schedule_name}`` selector - inline, mirroring the agent_api_key name routes. - Verifies per-route operation routing and that a denial skips the use case.""" - - async def test_get_authorized_checks_read_and_calls_use_case(self): - from src.api.routes.schedules import get_schedule - - authorization = MagicMock() - authorization.check = AsyncMock(return_value=True) - use_case = MagicMock() - use_case.get_schedule = AsyncMock(return_value=MagicMock()) - - await get_schedule( - agent_id="agent-1", - schedule_name="nightly", - schedules_use_case=use_case, - authorization=authorization, - ) - - called = authorization.check.await_args.kwargs - assert called["resource"] == AgentexResource.schedule( - build_schedule_id("agent-1", "nightly") - ) - assert called["operation"] == AuthorizedOperationType.read - use_case.get_schedule.assert_awaited_once_with("agent-1", "nightly") - - async def test_get_denied_collapses_to_404_and_skips_use_case(self): - from src.api.routes.schedules import get_schedule - - authorization = MagicMock() - authorization.check = AsyncMock(side_effect=AuthorizationError("denied")) - use_case = MagicMock() - use_case.get_schedule = AsyncMock() - - with pytest.raises(ItemDoesNotExist): - await get_schedule( - agent_id="agent-1", - schedule_name="nightly", - schedules_use_case=use_case, - authorization=authorization, - ) - # The check runs before the Temporal lookup, so a denial never reaches it. - use_case.get_schedule.assert_not_called() - - async def test_pause_uses_update_op(self): - from src.api.routes.schedules import pause_schedule - - authorization = MagicMock() - authorization.check = AsyncMock(return_value=True) - use_case = MagicMock() - use_case.pause_schedule = AsyncMock(return_value=MagicMock()) - - await pause_schedule( - agent_id="agent-1", - schedule_name="nightly", - schedules_use_case=use_case, - authorization=authorization, - request=None, - ) - - called = authorization.check.await_args.kwargs - assert called["resource"] == AgentexResource.schedule("agent-1--nightly") - assert called["operation"] == AuthorizedOperationType.update - - async def test_delete_uses_delete_op_and_denied_skips_delete(self): - from src.api.routes.schedules import delete_schedule - - authorization = MagicMock() - authorization.check = AsyncMock(side_effect=AuthorizationError("denied")) - use_case = MagicMock() - use_case.delete_schedule = AsyncMock() - - with pytest.raises(ItemDoesNotExist): - await delete_schedule( - agent_id="agent-1", - schedule_name="nightly", - schedules_use_case=use_case, - authorization=authorization, - ) - use_case.delete_schedule.assert_not_called() - assert authorization.check.await_count == 2 - first_call, second_call = authorization.check.await_args_list - assert first_call.kwargs["operation"] == AuthorizedOperationType.delete - assert second_call.kwargs["operation"] == AuthorizedOperationType.read - - async def test_delete_denied_when_readable_surfaces_authorization_error(self): - from src.api.routes.schedules import delete_schedule - - authorization = MagicMock() - authorization.check = AsyncMock( - side_effect=[AuthorizationError("delete denied"), True] - ) - use_case = MagicMock() - use_case.delete_schedule = AsyncMock() - - with pytest.raises(AuthorizationError): - await delete_schedule( - agent_id="agent-1", - schedule_name="nightly", - schedules_use_case=use_case, - authorization=authorization, - ) - use_case.delete_schedule.assert_not_called() - - -@pytest.mark.unit -@pytest.mark.asyncio -class TestCreateParentAgentCheck: - """``create_schedule`` is the only route where no schedule resource exists - yet, so the authorization service cannot transitively gate on it. The - route's ``agent_id`` guard MUST check ``agent.update`` on the parent, and a - denial collapses to 404 when the parent is unreadable. A caller who can - read the parent but not update it sees 403.""" - - @staticmethod - def _agent_id_dep(): - from src.api.routes.schedules import create_schedule - - return _dep_callable(create_schedule.__annotations__["agent_id"]) - - async def test_create_checks_parent_agent_update(self): - dep = self._agent_id_dep() - - authorization = MagicMock() - authorization.check = AsyncMock(return_value=True) - - # Repos are unused on the agent path; bind resource_id by keyword so an - # added repo dep fails loudly instead of silently mis-binding the id. - result = await dep( - authorization, MagicMock(), MagicMock(), MagicMock(), resource_id="agent-1" - ) - - assert result == "agent-1" - called_kwargs = authorization.check.await_args.kwargs - assert called_kwargs["resource"] == AgentexResource.agent("agent-1") - assert called_kwargs["operation"] == AuthorizedOperationType.update - - async def test_create_denied_collapses_to_404(self): - dep = self._agent_id_dep() - - authorization = MagicMock() - authorization.check = AsyncMock(side_effect=AuthorizationError("denied")) - - # Parent-agent denial collapses to 404 so creating a schedule under an - # agent in another tenant can't reveal that the agent exists. - with pytest.raises(ItemDoesNotExist): - await dep( - authorization, - MagicMock(), - MagicMock(), - MagicMock(), - resource_id="agent-1", - ) - - async def test_create_denied_when_parent_readable_surfaces_authorization_error( - self, - ): - dep = self._agent_id_dep() - - authorization = MagicMock() - authorization.check = AsyncMock( - side_effect=[AuthorizationError("update denied"), True] - ) - - with pytest.raises(AuthorizationError): - await dep( - authorization, - MagicMock(), - MagicMock(), - MagicMock(), - resource_id="agent-1", - ) - - -def _fake_schedule(schedule_id: str, *, paused: bool = False): - """Minimal stand-in for a Temporal schedule list entry. - - ``list_schedules`` reads ``.id`` and ``.info.{action,next_action_times, - paused}``; a non-``ScheduleActionStartWorkflow`` action yields - ``workflow_name=None`` and an empty ``next_action_times`` yields - ``next_action_time=None``, both valid for ``ScheduleListItem``. - """ - info = SimpleNamespace(action=None, next_action_times=[], paused=paused) - return SimpleNamespace(id=schedule_id, info=info) - - -@pytest.mark.unit -@pytest.mark.asyncio -class TestListOwnershipFiltering: - """``ScheduleService.list_schedules`` filters the Temporal page to the - authorized id set. ``None`` (authz bypass) returns everything; ``[]`` (caller - owns nothing) returns nothing — gating on ``is not None``, not truthiness.""" - - @staticmethod - def _service(): - temporal_adapter = MagicMock() - temporal_adapter.list_schedules = AsyncMock( - return_value=[ - _fake_schedule("agent-1--alpha"), - _fake_schedule("agent-1--beta"), - _fake_schedule("agent-2--gamma"), - ] - ) - return ScheduleService( - temporal_adapter=temporal_adapter, - authorization_service=MagicMock(), - ) - - async def test_none_returns_all_for_agent(self): - service = self._service() - - response = await service.list_schedules( - agent_id="agent-1", authorized_schedule_ids=None - ) - - ids = {item.schedule_id for item in response.schedules} - assert ids == {"agent-1--alpha", "agent-1--beta"} - - async def test_empty_list_returns_nothing(self): - service = self._service() - - response = await service.list_schedules( - agent_id="agent-1", authorized_schedule_ids=[] - ) - - assert response.schedules == [] - assert response.total == 0 - - async def test_subset_filters_to_authorized_ids(self): - service = self._service() - - response = await service.list_schedules( - agent_id="agent-1", authorized_schedule_ids=["agent-1--alpha"] - ) - - ids = {item.schedule_id for item in response.schedules} - assert ids == {"agent-1--alpha"} - - async def test_authorized_id_under_other_agent_is_excluded(self): - """The agent_id scope is applied first, so an authorized id belonging to - a different agent never leaks into this agent's listing.""" - service = self._service() - - response = await service.list_schedules( - agent_id="agent-1", authorized_schedule_ids=["agent-2--gamma"] - ) - - assert response.schedules == [] - - -@pytest.mark.unit -@pytest.mark.asyncio -class TestUseCaseForwardsAuthorizedIds: - """The use case is a thin pass-through; it must forward the ownership filter - to the service unchanged.""" - - async def test_list_forwards_authorized_schedule_ids(self): - from src.domain.use_cases.schedules_use_case import SchedulesUseCase - - schedule_service = MagicMock() - schedule_service.list_schedules = AsyncMock(return_value=MagicMock()) - use_case = SchedulesUseCase(schedule_service=schedule_service) - - await use_case.list_schedules( - "agent-1", page_size=50, authorized_schedule_ids=["agent-1--alpha"] - ) - - schedule_service.list_schedules.assert_awaited_once_with( - agent_id="agent-1", - page_size=50, - authorized_schedule_ids=["agent-1--alpha"], - ) diff --git a/agentex/tests/unit/config/test_agent_run_schedules_env.py b/agentex/tests/unit/config/test_agent_run_schedules_env.py new file mode 100644 index 00000000..6842f9d5 --- /dev/null +++ b/agentex/tests/unit/config/test_agent_run_schedules_env.py @@ -0,0 +1,21 @@ +import pytest +from src.config.environment_variables import EnvironmentVariables + + +@pytest.mark.unit +def test_agent_run_schedules_flag_parses_enabled(monkeypatch): + monkeypatch.setenv("ENABLE_AGENT_RUN_SCHEDULES", "true") + + env = EnvironmentVariables.refresh(force_refresh=True) + + assert env.ENABLE_AGENT_RUN_SCHEDULES is True + + +@pytest.mark.unit +def test_agent_run_schedules_flag_defaults_disabled(monkeypatch): + monkeypatch.delenv("ENABLE_AGENT_RUN_SCHEDULES", raising=False) + + env = EnvironmentVariables.refresh(force_refresh=True) + + # Off by default — the API surface is absent unless an environment opts in. + assert env.ENABLE_AGENT_RUN_SCHEDULES is False diff --git a/agentex/tests/unit/services/test_agent_run_schedule_service.py b/agentex/tests/unit/services/test_agent_run_schedule_service.py new file mode 100644 index 00000000..e1b577b2 --- /dev/null +++ b/agentex/tests/unit/services/test_agent_run_schedule_service.py @@ -0,0 +1,323 @@ +from unittest.mock import AsyncMock, PropertyMock +from uuid import uuid4 + +import pytest +from src.adapters.temporal.exceptions import TemporalScheduleNotFoundError +from src.api.schemas.agent_run_schedules import ( + CreateAgentRunScheduleRequest, + RunScheduleState, + ScheduleInitialInput, + UpdateAgentRunScheduleRequest, +) +from src.domain.entities.agent_run_schedules import AgentRunScheduleEntity +from src.domain.entities.agents import ACPType, AgentEntity, AgentStatus +from src.domain.exceptions import ClientError +from src.domain.services.agent_run_schedule_service import ( + AgentRunScheduleService, + build_run_schedule_authz_selector, + build_run_schedule_temporal_id, +) + + +@pytest.fixture +def agent(): + return AgentEntity( + id="agent-123", + name="test-agent", + description="A test agent", + status=AgentStatus.READY, + acp_type=ACPType.ASYNC, + acp_url="http://acp.example.com", + ) + + +@pytest.fixture +def service(): + temporal_adapter = AsyncMock() + # describe_schedule failing keeps _to_response on the persisted-row path. + temporal_adapter.describe_schedule.side_effect = RuntimeError("not found yet") + authorization_service = AsyncMock() + type(authorization_service).principal_context = PropertyMock( + return_value={"user_id": "u1", "account_id": "a1"} + ) + schedule_repository = AsyncMock() + agent_repository = AsyncMock() + return AgentRunScheduleService( + temporal_adapter=temporal_adapter, + authorization_service=authorization_service, + schedule_repository=schedule_repository, + agent_repository=agent_repository, + ) + + +def _request(**overrides) -> CreateAgentRunScheduleRequest: + payload: dict = { + "name": "daily-summary", + "cron_expression": "0 17 * * MON-FRI", + "timezone": "America/New_York", + "initial_input": ScheduleInitialInput(content="hello"), + } + payload.update(overrides) + return CreateAgentRunScheduleRequest(**payload) + + +def _persisted(agent_id: str, request: CreateAgentRunScheduleRequest): + return AgentRunScheduleEntity( + id=str(uuid4()), + agent_id=agent_id, + name=request.name, + cron_expression=request.cron_expression, + interval_seconds=request.interval_seconds, + timezone=request.timezone, + paused=request.paused, + creator_principal={"user_id": "u1", "account_id": "a1"}, + task_params=request.task_params, + task_metadata=request.task_metadata, + initial_input=request.initial_input.to_dict(mode="json"), + ) + + +class TestRunScheduleIdHelpers: + def test_temporal_id_prefix(self): + assert build_run_schedule_temporal_id("row-1") == "agent-run-schedule:row-1" + + def test_authz_selector_distinct_from_bare_schedule(self): + # Bare schedules key the shared `schedule` resource as `{agent}--{name}`; + # run schedules must not collide with that namespace. + selector = build_run_schedule_authz_selector("agent-123", "daily-summary") + assert selector == "run-schedule::agent-123::daily-summary" + assert selector != "agent-123--daily-summary" + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestAgentRunScheduleServiceCreate: + async def test_create_persists_and_schedules(self, service, agent): + request = _request() + persisted = _persisted(agent.id, request) + service.schedule_repository.get_by_agent_id_and_name.return_value = None + service.schedule_repository.create.return_value = persisted + + response = await service.create_schedule(agent, request, {"user_id": "u1"}) + + # Temporal schedule points at the run workflow with only the row id as arg, + # the server task queue, and the cron timezone passed through. + create_kwargs = service.temporal_adapter.create_schedule.call_args.kwargs + assert create_kwargs["workflow"] == "ScheduledAgentRunWorkflow" + assert create_kwargs["args"] == [persisted.id] + assert create_kwargs["schedule_id"] == build_run_schedule_temporal_id( + persisted.id + ) + assert create_kwargs["time_zone_name"] == "America/New_York" + + # Ownership registered before the Temporal write. + service.authorization_service.register_resource.assert_called_once() + + assert response.name == "daily-summary" + assert response.initial_input_method == "event/send" # async agent + assert response.state == RunScheduleState.ACTIVE + assert response.initial_input.content == "hello" + + async def test_create_rejects_duplicate_name(self, service, agent): + request = _request() + service.schedule_repository.get_by_agent_id_and_name.return_value = _persisted( + agent.id, request + ) + + with pytest.raises(ClientError): + await service.create_schedule(agent, request, {"user_id": "u1"}) + + service.temporal_adapter.create_schedule.assert_not_called() + + async def test_create_rolls_back_row_on_temporal_failure(self, service, agent): + request = _request() + persisted = _persisted(agent.id, request) + service.schedule_repository.get_by_agent_id_and_name.return_value = None + service.schedule_repository.create.return_value = persisted + service.temporal_adapter.create_schedule.side_effect = RuntimeError("boom") + + with pytest.raises(RuntimeError): + await service.create_schedule(agent, request, {"user_id": "u1"}) + + # The orphaned row and auth entry are compensated. + service.schedule_repository.delete.assert_called_once_with(id=persisted.id) + service.authorization_service.deregister_resource.assert_called_once() + + async def test_create_rolls_back_row_on_auth_registration_failure( + self, service, agent + ): + request = _request() + persisted = _persisted(agent.id, request) + service.schedule_repository.get_by_agent_id_and_name.return_value = None + service.schedule_repository.create.return_value = persisted + service.authorization_service.register_resource.side_effect = RuntimeError( + "authz down" + ) + + with pytest.raises(RuntimeError): + await service.create_schedule(agent, request, {"user_id": "u1"}) + + # Auth registration failing must still roll back the persisted row, and + # must not create a Temporal schedule. + service.schedule_repository.delete.assert_called_once_with(id=persisted.id) + service.temporal_adapter.create_schedule.assert_not_called() + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestAgentRunScheduleServiceList: + async def test_list_filters_by_authorized_selectors(self, service, agent): + req_a = _request(name="sched-a") + req_b = _request(name="sched-b") + rows = [_persisted(agent.id, req_a), _persisted(agent.id, req_b)] + service.schedule_repository.list_by_agent_id.return_value = rows + service.agent_repository.get.return_value = agent + + # Authorize only sched-a's selector. + authorized = [build_run_schedule_authz_selector(agent.id, "sched-a")] + result = await service.list_schedules( + agent.id, authorized_schedule_ids=authorized + ) + + assert result.total == 1 + assert result.run_schedules[0].name == "sched-a" + + async def test_list_none_authorized_means_bypass(self, service, agent): + rows = [_persisted(agent.id, _request(name="sched-a"))] + service.schedule_repository.list_by_agent_id.return_value = rows + service.agent_repository.get.return_value = agent + + result = await service.list_schedules(agent.id, authorized_schedule_ids=None) + + assert result.total == 1 + + async def test_list_does_not_fan_out_to_temporal(self, service, agent): + # The list path must not issue a describe RPC per row (would scale list + # latency with the number of schedules). State comes from the row instead. + rows = [ + _persisted(agent.id, _request(name="sched-a")), + _persisted(agent.id, _request(name="sched-b")), + ] + service.schedule_repository.list_by_agent_id.return_value = rows + service.agent_repository.get.return_value = agent + + await service.list_schedules(agent.id, authorized_schedule_ids=None) + + service.temporal_adapter.describe_schedule.assert_not_called() + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestAgentRunScheduleServiceDelete: + async def test_delete_tolerates_missing_temporal_schedule(self, service, agent): + # A prior partial delete (Temporal gone, row survived) must still be + # cleanable: a missing Temporal schedule is treated as success. + row = _persisted(agent.id, _request()) + service.schedule_repository.get_by_agent_id_and_name_or_raise.return_value = row + service.temporal_adapter.delete_schedule.side_effect = ( + TemporalScheduleNotFoundError(message="gone", detail="gone") + ) + + result = await service.delete_schedule(agent.id, row.name) + + assert result == row.id + service.schedule_repository.delete.assert_called_once_with(id=row.id) + service.authorization_service.deregister_resource.assert_called_once() + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestAgentRunScheduleServicePauseResume: + async def test_pause_tolerates_missing_temporal_schedule(self, service, agent): + row = _persisted(agent.id, _request()) + service.schedule_repository.get_by_agent_id_and_name_or_raise.return_value = row + service.schedule_repository.update.return_value = row + service.agent_repository.get.return_value = agent + service.temporal_adapter.pause_schedule.side_effect = ( + TemporalScheduleNotFoundError(message="gone", detail="gone") + ) + + response = await service.pause_schedule(agent.id, row.name) + + # The persisted paused flag is still flipped even though the clock is gone. + assert row.paused is True + assert response.paused is True + service.schedule_repository.update.assert_called_once() + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestAgentRunScheduleServiceUpdate: + async def test_update_swaps_cron_for_interval(self, service, agent): + row = _persisted(agent.id, _request()) # cron-based + service.schedule_repository.get_by_agent_id_and_name_or_raise.return_value = row + service.schedule_repository.update.return_value = row + service.agent_repository.get.return_value = agent + + await service.update_schedule( + agent.id, row.name, UpdateAgentRunScheduleRequest(interval_seconds=120) + ) + + # Setting interval clears cron, and the new cadence is pushed to Temporal. + assert row.cron_expression is None + assert row.interval_seconds == 120 + update_kwargs = service.temporal_adapter.update_schedule.call_args.kwargs + assert update_kwargs["interval_seconds"] == 120 + assert update_kwargs["cron_expressions"] is None + + async def test_update_rejects_clearing_all_cadences(self, service, agent): + row = _persisted(agent.id, _request()) # cron-based, no interval + service.schedule_repository.get_by_agent_id_and_name_or_raise.return_value = row + + # Explicitly nulling cron without supplying an interval leaves no cadence. + with pytest.raises(ClientError): + await service.update_schedule( + agent.id, row.name, UpdateAgentRunScheduleRequest(cron_expression=None) + ) + + service.temporal_adapter.update_schedule.assert_not_called() + + async def test_update_tolerates_missing_temporal_schedule(self, service, agent): + row = _persisted(agent.id, _request()) + service.schedule_repository.get_by_agent_id_and_name_or_raise.return_value = row + service.schedule_repository.update.return_value = row + service.agent_repository.get.return_value = agent + service.temporal_adapter.update_schedule.side_effect = ( + TemporalScheduleNotFoundError(message="gone", detail="gone") + ) + + response = await service.update_schedule( + agent.id, row.name, UpdateAgentRunScheduleRequest(description="new") + ) + + assert response.description == "new" + + async def test_update_does_not_commit_row_on_temporal_failure(self, service, agent): + # A non-NotFound Temporal failure (rejected cron/timezone or a transient + # outage) must abort before the row is persisted, so the DB can never + # diverge from the clock. Unlike NotFound, this error propagates. + row = _persisted(agent.id, _request()) + service.schedule_repository.get_by_agent_id_and_name_or_raise.return_value = row + service.temporal_adapter.update_schedule.side_effect = RuntimeError("boom") + + with pytest.raises(RuntimeError): + await service.update_schedule( + agent.id, row.name, UpdateAgentRunScheduleRequest(description="new") + ) + + service.schedule_repository.update.assert_not_called() + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestAgentRunScheduleServiceTrigger: + async def test_trigger_calls_temporal(self, service, agent): + row = _persisted(agent.id, _request()) + service.schedule_repository.get_by_agent_id_and_name_or_raise.return_value = row + service.agent_repository.get.return_value = agent + + await service.trigger_schedule(agent.id, row.name) + + service.temporal_adapter.trigger_schedule.assert_called_once_with( + build_run_schedule_temporal_id(row.id) + ) diff --git a/agentex/tests/unit/services/test_schedule_service.py b/agentex/tests/unit/services/test_schedule_service.py deleted file mode 100644 index 0e0f4a07..00000000 --- a/agentex/tests/unit/services/test_schedule_service.py +++ /dev/null @@ -1,806 +0,0 @@ -from datetime import UTC, datetime, timedelta -from types import SimpleNamespace -from unittest.mock import AsyncMock, MagicMock, Mock -from uuid import uuid4 - -import pytest -from src.adapters.temporal.exceptions import ( - TemporalScheduleAlreadyExistsError, - TemporalScheduleError, - TemporalScheduleNotFoundError, -) -from src.api.schemas.schedules import ( - CreateScheduleRequest, - ScheduleListResponse, - ScheduleResponse, - ScheduleState, -) -from src.domain.entities.agents import ACPType, AgentEntity, AgentStatus -from src.domain.services.schedule_service import ( - SCHEDULE_ID_SEPARATOR, - ScheduleService, - build_schedule_id, - parse_schedule_id, -) -from temporalio.client import ( - Schedule, - ScheduleActionStartWorkflow, - ScheduleDescription, - ScheduleInfo, - ScheduleIntervalSpec, - ScheduleSpec, -) -from temporalio.client import ( - ScheduleState as TemporalScheduleState, -) - - -@pytest.fixture -def mock_temporal_adapter(): - """Mock Temporal adapter for testing schedule service""" - mock = AsyncMock() - mock.create_schedule = AsyncMock() - mock.describe_schedule = AsyncMock() - mock.list_schedules = AsyncMock() - mock.pause_schedule = AsyncMock() - mock.unpause_schedule = AsyncMock() - mock.trigger_schedule = AsyncMock() - mock.delete_schedule = AsyncMock() - return mock - - -@pytest.fixture -def mock_authorization_service(): - """Mock authorization service with a resolvable creator principal.""" - mock = Mock() - mock.principal_context = SimpleNamespace( - user_id="user-test", service_account_id=None, account_id="acct-test" - ) - mock.register_resource = AsyncMock(return_value=None) - mock.deregister_resource = AsyncMock(return_value=None) - return mock - - -@pytest.fixture -def schedule_service(mock_temporal_adapter, mock_authorization_service): - """Create ScheduleService instance with mocked temporal adapter""" - return ScheduleService( - temporal_adapter=mock_temporal_adapter, - authorization_service=mock_authorization_service, - ) - - -@pytest.fixture -def sample_agent(): - """Sample agent entity for testing""" - return AgentEntity( - id=str(uuid4()), - name="test-agent", - description="A test agent for schedule testing", - status=AgentStatus.READY, - acp_type=ACPType.ASYNC, - acp_url="http://test-acp.example.com", - ) - - -@pytest.fixture -def sample_create_schedule_request(): - """Sample schedule creation request""" - return CreateScheduleRequest( - name="weekly-task", - workflow_name="test-workflow", - task_queue="test-queue", - cron_expression="0 0 * * 0", - workflow_params={"key": "value"}, - ) - - -@pytest.fixture -def sample_create_schedule_request_interval(): - """Sample schedule creation request with interval""" - return CreateScheduleRequest( - name="interval-task", - workflow_name="test-workflow", - task_queue="test-queue", - interval_seconds=3600, - workflow_params={"key": "value"}, - ) - - -def create_mock_schedule_description( - schedule_id: str, - workflow_name: str = "test-workflow", - task_queue: str = "test-queue", - paused: bool = False, - cron_expressions: list[str] | None = None, - intervals: list[ScheduleIntervalSpec] | None = None, -) -> ScheduleDescription: - """Helper to create a mock ScheduleDescription""" - # Create mock action - mock_action = MagicMock(spec=ScheduleActionStartWorkflow) - mock_action.workflow = workflow_name - mock_action.id = f"{schedule_id}-run" - mock_action.task_queue = task_queue - mock_action.args = None - - # Create mock spec - mock_spec = MagicMock(spec=ScheduleSpec) - mock_spec.cron_expressions = cron_expressions or [] - mock_spec.intervals = intervals or [] - mock_spec.start_at = None - mock_spec.end_at = None - - # Create mock state - mock_state = MagicMock(spec=TemporalScheduleState) - mock_state.paused = paused - - # Create mock schedule - mock_schedule = MagicMock(spec=Schedule) - mock_schedule.action = mock_action - mock_schedule.spec = mock_spec - mock_schedule.state = mock_state - - # Create mock info - mock_info = MagicMock(spec=ScheduleInfo) - mock_info.num_actions = 5 - mock_info.num_actions_missed_catchup_window = 0 - mock_info.next_action_times = [datetime.now(UTC) + timedelta(hours=1)] - mock_info.recent_actions = [] - mock_info.create_time = datetime.now(UTC) - - # Create mock description - mock_description = MagicMock(spec=ScheduleDescription) - mock_description.schedule = mock_schedule - mock_description.info = mock_info - - return mock_description - - -@pytest.mark.unit -class TestScheduleIdHelpers: - """Test suite for schedule ID helper functions""" - - def test_build_schedule_id(self): - """Test building schedule ID from agent ID and schedule name""" - agent_id = "agent-123" - schedule_name = "weekly-task" - - result = build_schedule_id(agent_id, schedule_name) - - assert result == f"agent-123{SCHEDULE_ID_SEPARATOR}weekly-task" - assert SCHEDULE_ID_SEPARATOR in result - - def test_parse_schedule_id(self): - """Test parsing schedule ID into agent ID and schedule name""" - schedule_id = f"agent-123{SCHEDULE_ID_SEPARATOR}weekly-task" - - agent_id, schedule_name = parse_schedule_id(schedule_id) - - assert agent_id == "agent-123" - assert schedule_name == "weekly-task" - - def test_parse_schedule_id_invalid_format(self): - """Test parsing invalid schedule ID""" - schedule_id = "invalid-id-without-separator" - - agent_id, schedule_name = parse_schedule_id(schedule_id) - - assert agent_id == schedule_id - assert schedule_name == "" - - def test_build_and_parse_roundtrip(self): - """Test that build and parse are inverse operations""" - original_agent_id = "agent-uuid-12345" - original_schedule_name = "my-schedule" - - schedule_id = build_schedule_id(original_agent_id, original_schedule_name) - parsed_agent_id, parsed_schedule_name = parse_schedule_id(schedule_id) - - assert parsed_agent_id == original_agent_id - assert parsed_schedule_name == original_schedule_name - - -@pytest.mark.unit -@pytest.mark.asyncio -class TestScheduleService: - """Test suite for ScheduleService""" - - async def test_create_schedule_with_cron( - self, - schedule_service, - mock_temporal_adapter, - sample_agent, - sample_create_schedule_request, - ): - """Test creating a schedule with cron expression""" - # Given - expected_schedule_id = build_schedule_id( - sample_agent.id, sample_create_schedule_request.name - ) - mock_description = create_mock_schedule_description( - schedule_id=expected_schedule_id, - workflow_name=sample_create_schedule_request.workflow_name, - task_queue=sample_create_schedule_request.task_queue, - cron_expressions=[sample_create_schedule_request.cron_expression], - ) - mock_temporal_adapter.describe_schedule.return_value = mock_description - - # When - result = await schedule_service.create_schedule( - sample_agent, sample_create_schedule_request - ) - - # Then - assert result is not None - assert isinstance(result, ScheduleResponse) - assert result.schedule_id == expected_schedule_id - assert result.agent_id == sample_agent.id - assert result.name == sample_create_schedule_request.name - assert ( - result.action.workflow_name == sample_create_schedule_request.workflow_name - ) - assert result.action.task_queue == sample_create_schedule_request.task_queue - - # Verify temporal adapter was called - mock_temporal_adapter.create_schedule.assert_called_once() - call_kwargs = mock_temporal_adapter.create_schedule.call_args[1] - assert call_kwargs["schedule_id"] == expected_schedule_id - assert call_kwargs["workflow"] == sample_create_schedule_request.workflow_name - assert call_kwargs["task_queue"] == sample_create_schedule_request.task_queue - assert call_kwargs["cron_expressions"] == [ - sample_create_schedule_request.cron_expression - ] - - async def test_create_schedule_with_interval( - self, - schedule_service, - mock_temporal_adapter, - sample_agent, - sample_create_schedule_request_interval, - ): - """Test creating a schedule with interval""" - # Given - expected_schedule_id = build_schedule_id( - sample_agent.id, sample_create_schedule_request_interval.name - ) - mock_description = create_mock_schedule_description( - schedule_id=expected_schedule_id, - workflow_name=sample_create_schedule_request_interval.workflow_name, - task_queue=sample_create_schedule_request_interval.task_queue, - intervals=[ScheduleIntervalSpec(every=timedelta(seconds=3600))], - ) - mock_temporal_adapter.describe_schedule.return_value = mock_description - - # When - result = await schedule_service.create_schedule( - sample_agent, sample_create_schedule_request_interval - ) - - # Then - assert result is not None - assert isinstance(result, ScheduleResponse) - assert result.schedule_id == expected_schedule_id - - # Verify temporal adapter was called with interval - mock_temporal_adapter.create_schedule.assert_called_once() - call_kwargs = mock_temporal_adapter.create_schedule.call_args[1] - assert ( - call_kwargs["interval_seconds"] - == sample_create_schedule_request_interval.interval_seconds - ) - - async def test_create_schedule_with_execution_timeout( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test creating a schedule with execution timeout""" - # Given - request = CreateScheduleRequest( - name="timeout-task", - workflow_name="test-workflow", - task_queue="test-queue", - cron_expression="0 0 * * *", - execution_timeout_seconds=3600, - ) - expected_schedule_id = build_schedule_id(sample_agent.id, request.name) - mock_description = create_mock_schedule_description( - schedule_id=expected_schedule_id, - workflow_name=request.workflow_name, - task_queue=request.task_queue, - ) - mock_temporal_adapter.describe_schedule.return_value = mock_description - - # When - result = await schedule_service.create_schedule(sample_agent, request) - - # Then - assert result is not None - mock_temporal_adapter.create_schedule.assert_called_once() - call_kwargs = mock_temporal_adapter.create_schedule.call_args[1] - assert call_kwargs["execution_timeout"] == timedelta(seconds=3600) - - async def test_create_schedule_paused( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test creating a schedule in paused state""" - # Given - request = CreateScheduleRequest( - name="paused-task", - workflow_name="test-workflow", - task_queue="test-queue", - cron_expression="0 0 * * *", - paused=True, - ) - expected_schedule_id = build_schedule_id(sample_agent.id, request.name) - mock_description = create_mock_schedule_description( - schedule_id=expected_schedule_id, - workflow_name=request.workflow_name, - task_queue=request.task_queue, - paused=True, - ) - mock_temporal_adapter.describe_schedule.return_value = mock_description - - # When - result = await schedule_service.create_schedule(sample_agent, request) - - # Then - assert result is not None - assert result.state == ScheduleState.PAUSED - mock_temporal_adapter.create_schedule.assert_called_once() - call_kwargs = mock_temporal_adapter.create_schedule.call_args[1] - assert call_kwargs["paused"] is True - - async def test_get_schedule( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test getting a schedule by name""" - # Given - schedule_name = "test-schedule" - schedule_id = build_schedule_id(sample_agent.id, schedule_name) - mock_description = create_mock_schedule_description( - schedule_id=schedule_id, - workflow_name="test-workflow", - task_queue="test-queue", - ) - mock_temporal_adapter.describe_schedule.return_value = mock_description - - # When - result = await schedule_service.get_schedule(sample_agent.id, schedule_name) - - # Then - assert result is not None - assert isinstance(result, ScheduleResponse) - assert result.schedule_id == schedule_id - assert result.name == schedule_name - assert result.agent_id == sample_agent.id - mock_temporal_adapter.describe_schedule.assert_called_once_with(schedule_id) - - async def test_get_schedule_paused( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test getting a paused schedule""" - # Given - schedule_name = "paused-schedule" - schedule_id = build_schedule_id(sample_agent.id, schedule_name) - mock_description = create_mock_schedule_description( - schedule_id=schedule_id, - workflow_name="test-workflow", - task_queue="test-queue", - paused=True, - ) - mock_temporal_adapter.describe_schedule.return_value = mock_description - - # When - result = await schedule_service.get_schedule(sample_agent.id, schedule_name) - - # Then - assert result.state == ScheduleState.PAUSED - - async def test_list_schedules_for_agent( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test listing schedules for a specific agent""" - # Given - schedule_id_1 = build_schedule_id(sample_agent.id, "schedule-1") - schedule_id_2 = build_schedule_id(sample_agent.id, "schedule-2") - schedule_id_other = build_schedule_id("other-agent", "schedule-3") - - mock_schedule_1 = MagicMock() - mock_schedule_1.id = schedule_id_1 - mock_schedule_1.info = MagicMock() - mock_schedule_1.info.action = MagicMock(spec=ScheduleActionStartWorkflow) - mock_schedule_1.info.action.workflow = "workflow-1" - mock_schedule_1.info.next_action_times = [datetime.now(UTC)] - mock_schedule_1.info.paused = False - - mock_schedule_2 = MagicMock() - mock_schedule_2.id = schedule_id_2 - mock_schedule_2.info = MagicMock() - mock_schedule_2.info.action = MagicMock(spec=ScheduleActionStartWorkflow) - mock_schedule_2.info.action.workflow = "workflow-2" - mock_schedule_2.info.next_action_times = [] - mock_schedule_2.info.paused = True - - mock_schedule_other = MagicMock() - mock_schedule_other.id = schedule_id_other - mock_schedule_other.info = MagicMock() - mock_schedule_other.info.action = MagicMock(spec=ScheduleActionStartWorkflow) - mock_schedule_other.info.action.workflow = "workflow-3" - mock_schedule_other.info.next_action_times = [] - mock_schedule_other.info.paused = False - - mock_temporal_adapter.list_schedules.return_value = [ - mock_schedule_1, - mock_schedule_2, - mock_schedule_other, - ] - - # When - result = await schedule_service.list_schedules(agent_id=sample_agent.id) - - # Then - assert result is not None - assert isinstance(result, ScheduleListResponse) - assert result.total == 2 # Only schedules for this agent - assert len(result.schedules) == 2 - - schedule_names = [s.name for s in result.schedules] - assert "schedule-1" in schedule_names - assert "schedule-2" in schedule_names - - async def test_list_schedules_all(self, schedule_service, mock_temporal_adapter): - """Test listing all schedules without agent filter""" - # Given - mock_schedule = MagicMock() - mock_schedule.id = "agent-1--schedule-1" - mock_schedule.info = MagicMock() - mock_schedule.info.action = MagicMock(spec=ScheduleActionStartWorkflow) - mock_schedule.info.action.workflow = "workflow-1" - mock_schedule.info.next_action_times = [] - mock_schedule.info.paused = False - - mock_temporal_adapter.list_schedules.return_value = [mock_schedule] - - # When - result = await schedule_service.list_schedules(agent_id=None) - - # Then - assert result is not None - assert result.total == 1 - - async def test_pause_schedule( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test pausing a schedule""" - # Given - schedule_name = "active-schedule" - schedule_id = build_schedule_id(sample_agent.id, schedule_name) - mock_description = create_mock_schedule_description( - schedule_id=schedule_id, - paused=True, - ) - mock_temporal_adapter.describe_schedule.return_value = mock_description - - # When - result = await schedule_service.pause_schedule( - sample_agent.id, schedule_name, note="Maintenance" - ) - - # Then - assert result is not None - assert result.state == ScheduleState.PAUSED - mock_temporal_adapter.pause_schedule.assert_called_once_with( - schedule_id, note="Maintenance" - ) - - async def test_pause_schedule_without_note( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test pausing a schedule without a note""" - # Given - schedule_name = "active-schedule" - schedule_id = build_schedule_id(sample_agent.id, schedule_name) - mock_description = create_mock_schedule_description( - schedule_id=schedule_id, - paused=True, - ) - mock_temporal_adapter.describe_schedule.return_value = mock_description - - # When - result = await schedule_service.pause_schedule(sample_agent.id, schedule_name) - - # Then - assert result is not None - mock_temporal_adapter.pause_schedule.assert_called_once_with( - schedule_id, note=None - ) - - async def test_unpause_schedule( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test unpausing a schedule""" - # Given - schedule_name = "paused-schedule" - schedule_id = build_schedule_id(sample_agent.id, schedule_name) - mock_description = create_mock_schedule_description( - schedule_id=schedule_id, - paused=False, - ) - mock_temporal_adapter.describe_schedule.return_value = mock_description - - # When - result = await schedule_service.unpause_schedule( - sample_agent.id, schedule_name, note="Resuming operations" - ) - - # Then - assert result is not None - assert result.state == ScheduleState.ACTIVE - mock_temporal_adapter.unpause_schedule.assert_called_once_with( - schedule_id, note="Resuming operations" - ) - - async def test_unpause_schedule_without_note( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test unpausing a schedule without a note""" - # Given - schedule_name = "paused-schedule" - schedule_id = build_schedule_id(sample_agent.id, schedule_name) - mock_description = create_mock_schedule_description( - schedule_id=schedule_id, - paused=False, - ) - mock_temporal_adapter.describe_schedule.return_value = mock_description - - # When - result = await schedule_service.unpause_schedule(sample_agent.id, schedule_name) - - # Then - assert result is not None - mock_temporal_adapter.unpause_schedule.assert_called_once_with( - schedule_id, note=None - ) - - async def test_trigger_schedule( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test triggering a schedule immediately""" - # Given - schedule_name = "scheduled-task" - schedule_id = build_schedule_id(sample_agent.id, schedule_name) - mock_description = create_mock_schedule_description( - schedule_id=schedule_id, - ) - mock_temporal_adapter.describe_schedule.return_value = mock_description - - # When - result = await schedule_service.trigger_schedule(sample_agent.id, schedule_name) - - # Then - assert result is not None - mock_temporal_adapter.trigger_schedule.assert_called_once_with(schedule_id) - - async def test_delete_schedule( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test deleting a schedule""" - # Given - schedule_name = "schedule-to-delete" - schedule_id = build_schedule_id(sample_agent.id, schedule_name) - - # When - await schedule_service.delete_schedule(sample_agent.id, schedule_name) - - # Then - mock_temporal_adapter.delete_schedule.assert_called_once_with(schedule_id) - - async def test_description_to_response_with_workflow_params( - self, schedule_service, sample_agent - ): - """Test converting schedule description with workflow params""" - # Given - schedule_name = "task-with-params" - schedule_id = build_schedule_id(sample_agent.id, schedule_name) - - # Create mock with args - mock_action = MagicMock(spec=ScheduleActionStartWorkflow) - mock_action.workflow = "test-workflow" - mock_action.id = f"{schedule_id}-run" - mock_action.task_queue = "test-queue" - - # Mock args with data attribute (simulating Temporal payload) - mock_arg = MagicMock() - mock_arg.data = b'{"key": "value"}' - mock_action.args = [mock_arg] - - mock_spec = MagicMock(spec=ScheduleSpec) - mock_spec.cron_expressions = ["0 0 * * *"] - mock_spec.intervals = [] - mock_spec.start_at = None - mock_spec.end_at = None - - mock_state = MagicMock(spec=TemporalScheduleState) - mock_state.paused = False - - mock_schedule = MagicMock(spec=Schedule) - mock_schedule.action = mock_action - mock_schedule.spec = mock_spec - mock_schedule.state = mock_state - - mock_info = MagicMock(spec=ScheduleInfo) - mock_info.num_actions = 10 - mock_info.num_actions_missed_catchup_window = 1 - mock_info.next_action_times = [] - mock_info.recent_actions = [] - mock_info.create_time = datetime.now(UTC) - - mock_description = MagicMock(spec=ScheduleDescription) - mock_description.schedule = mock_schedule - mock_description.info = mock_info - - # When - result = schedule_service._description_to_response( - schedule_id, mock_description - ) - - # Then - assert result.schedule_id == schedule_id - assert result.name == schedule_name - assert result.agent_id == sample_agent.id - assert result.action.workflow_name == "test-workflow" - assert result.num_actions_taken == 10 - assert result.num_actions_missed == 1 - assert result.action.workflow_params == [{"key": "value"}] - - async def test_description_to_response_with_intervals( - self, schedule_service, sample_agent - ): - """Test converting schedule description with interval spec""" - # Given - schedule_name = "interval-task" - schedule_id = build_schedule_id(sample_agent.id, schedule_name) - - mock_description = create_mock_schedule_description( - schedule_id=schedule_id, - intervals=[ - ScheduleIntervalSpec(every=timedelta(seconds=3600)), - ScheduleIntervalSpec(every=timedelta(seconds=7200)), - ], - ) - - # When - result = schedule_service._description_to_response( - schedule_id, mock_description - ) - - # Then - assert result.spec.intervals_seconds == [3600, 7200] - - async def test_create_schedule_with_start_and_end_dates( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test creating a schedule with start and end dates""" - # Given - start_at = datetime.now(UTC) + timedelta(days=1) - end_at = datetime.now(UTC) + timedelta(days=30) - request = CreateScheduleRequest( - name="bounded-task", - workflow_name="test-workflow", - task_queue="test-queue", - cron_expression="0 0 * * *", - start_at=start_at, - end_at=end_at, - ) - expected_schedule_id = build_schedule_id(sample_agent.id, request.name) - mock_description = create_mock_schedule_description( - schedule_id=expected_schedule_id, - ) - mock_temporal_adapter.describe_schedule.return_value = mock_description - - # When - result = await schedule_service.create_schedule(sample_agent, request) - - # Then - assert result is not None - mock_temporal_adapter.create_schedule.assert_called_once() - call_kwargs = mock_temporal_adapter.create_schedule.call_args[1] - assert call_kwargs["start_at"] == start_at - assert call_kwargs["end_at"] == end_at - - async def test_create_schedule_already_exists_error( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test that schedule already exists error propagates""" - # Given - request = CreateScheduleRequest( - name="existing-task", - workflow_name="test-workflow", - task_queue="test-queue", - cron_expression="0 0 * * *", - ) - mock_temporal_adapter.create_schedule.side_effect = ( - TemporalScheduleAlreadyExistsError( - message="Schedule already exists", - detail="Schedule 'existing-task' already exists", - ) - ) - - # When/Then - with pytest.raises(TemporalScheduleAlreadyExistsError): - await schedule_service.create_schedule(sample_agent, request) - - async def test_get_schedule_not_found_error( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test that schedule not found error propagates""" - # Given - mock_temporal_adapter.describe_schedule.side_effect = ( - TemporalScheduleNotFoundError( - message="Schedule not found", - detail="Schedule 'nonexistent' not found", - ) - ) - - # When/Then - with pytest.raises(TemporalScheduleNotFoundError): - await schedule_service.get_schedule(sample_agent.id, "nonexistent") - - async def test_pause_schedule_not_found_error( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test that pause schedule not found error propagates""" - # Given - mock_temporal_adapter.pause_schedule.side_effect = ( - TemporalScheduleNotFoundError( - message="Schedule not found", - detail="Schedule 'nonexistent' not found", - ) - ) - - # When/Then - with pytest.raises(TemporalScheduleNotFoundError): - await schedule_service.pause_schedule(sample_agent.id, "nonexistent") - - async def test_delete_schedule_not_found_error( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test that delete schedule not found error propagates""" - # Given - mock_temporal_adapter.delete_schedule.side_effect = ( - TemporalScheduleNotFoundError( - message="Schedule not found", - detail="Schedule 'nonexistent' not found", - ) - ) - - # When/Then - with pytest.raises(TemporalScheduleNotFoundError): - await schedule_service.delete_schedule(sample_agent.id, "nonexistent") - - async def test_trigger_schedule_not_found_error( - self, schedule_service, mock_temporal_adapter, sample_agent - ): - """Test that trigger schedule not found error propagates""" - # Given - mock_temporal_adapter.trigger_schedule.side_effect = ( - TemporalScheduleNotFoundError( - message="Schedule not found", - detail="Schedule 'nonexistent' not found", - ) - ) - - # When/Then - with pytest.raises(TemporalScheduleNotFoundError): - await schedule_service.trigger_schedule(sample_agent.id, "nonexistent") - - async def test_list_schedules_error(self, schedule_service, mock_temporal_adapter): - """Test that list schedules error propagates""" - # Given - mock_temporal_adapter.list_schedules.side_effect = TemporalScheduleError( - message="Failed to list schedules", - detail="Temporal connection error", - ) - - # When/Then - with pytest.raises(TemporalScheduleError): - await schedule_service.list_schedules() diff --git a/agentex/tests/unit/temporal/test_scheduled_agent_run_activity.py b/agentex/tests/unit/temporal/test_scheduled_agent_run_activity.py new file mode 100644 index 00000000..666c27ac --- /dev/null +++ b/agentex/tests/unit/temporal/test_scheduled_agent_run_activity.py @@ -0,0 +1,243 @@ +from unittest.mock import AsyncMock, MagicMock +from uuid import uuid4 + +import pytest +import src.temporal.activities.scheduled_agent_run_activities as activities_module +from src.adapters.crud_store.exceptions import ItemDoesNotExist +from src.api.routes.agent_run_schedules import _extract_creator_principal +from src.domain.entities.agent_run_schedules import AgentRunScheduleEntity +from src.domain.entities.agents import ACPType, AgentEntity, AgentStatus +from src.domain.entities.agents_rpc import AgentRPCMethod +from src.domain.entities.task_messages import MessageAuthor, TextContentEntity +from src.domain.entities.tasks import TaskEntity +from src.temporal.activities.scheduled_agent_run_activities import ( + ScheduledAgentRunActivities, + _build_initial_content, +) + + +def _agent(acp_type=ACPType.ASYNC, status=AgentStatus.READY): + return AgentEntity( + id="agent-1", + name="test-agent", + description="A test agent", + status=status, + acp_type=acp_type, + acp_url="http://acp.example.com", + ) + + +def _schedule(**overrides) -> AgentRunScheduleEntity: + payload: dict = { + "id": str(uuid4()), + "agent_id": "agent-1", + "name": "daily-summary", + "cron_expression": "0 17 * * *", + "creator_principal": {"user_id": "u1", "account_id": "a1"}, + "initial_input": {"type": "text", "author": "user", "content": "hello"}, + } + payload.update(overrides) + return AgentRunScheduleEntity(**payload) + + +def _fake_use_case(agent, created_task): + use_case = MagicMock() + use_case.agent_repository = AsyncMock() + use_case.agent_repository.get.return_value = agent + use_case.handle_rpc_request = AsyncMock(return_value=created_task) + use_case.task_service = AsyncMock() + # AuthZ check succeeds by default (no-op / allowed). + use_case.authorization_service = AsyncMock() + use_case.authorization_service.check = AsyncMock(return_value=True) + return use_case + + +@pytest.fixture +def activity_instance(monkeypatch): + instance = ScheduledAgentRunActivities( + global_dependencies=MagicMock(), + schedule_repository=AsyncMock(), + ) + return instance + + +def _patch_use_case(monkeypatch, use_case): + monkeypatch.setattr( + activities_module, + "build_acp_use_case_for_principal", + lambda *args, **kwargs: use_case, + ) + + +class TestBuildInitialContent: + def test_builds_text_content(self): + content = _build_initial_content( + {"type": "text", "author": "user", "content": "hi there"} + ) + assert isinstance(content, TextContentEntity) + assert content.content == "hi there" + assert content.author == MessageAuthor.USER + + +class TestExtractCreatorPrincipal: + def test_strips_to_safe_subset(self): + principal = { + "user_id": "u1", + "account_id": "a1", + "principal_type": "user", + # credentials that must never be persisted: + "cookie": "session=abc", + "api_key": "sk-123", + "authorization": "Bearer xyz", + } + result = _extract_creator_principal(principal) + assert result == { + "user_id": "u1", + "account_id": "a1", + "principal_type": "user", + } + assert "cookie" not in result + assert "api_key" not in result + assert "authorization" not in result + + def test_none_principal_yields_empty(self): + assert _extract_creator_principal(None) == {} + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestLaunchScheduledAgentRun: + async def test_skips_when_schedule_missing(self, activity_instance): + activity_instance.schedule_repository.get.side_effect = ItemDoesNotExist("x") + + result = await activity_instance.launch_scheduled_agent_run("sched-1", "fire-1") + + assert result["status"] == "skipped" + assert result["reason"] == "schedule_not_found" + + async def test_skips_when_paused(self, activity_instance): + activity_instance.schedule_repository.get.return_value = _schedule(paused=True) + + result = await activity_instance.launch_scheduled_agent_run("sched-1", "fire-1") + + assert result["status"] == "skipped" + assert result["reason"] == "schedule_paused" + + async def test_async_agent_delivers_via_event_send( + self, activity_instance, monkeypatch + ): + schedule = _schedule() + activity_instance.schedule_repository.get.return_value = schedule + task = TaskEntity(id="task-1", task_metadata={"schedule_id": schedule.id}) + use_case = _fake_use_case(_agent(ACPType.ASYNC), task) + _patch_use_case(monkeypatch, use_case) + + result = await activity_instance.launch_scheduled_agent_run( + schedule.id, "fire-1" + ) + + assert result["status"] == "launched" + assert result["method"] == "event/send" + methods = [ + call.kwargs["method"] for call in use_case.handle_rpc_request.call_args_list + ] + assert methods == [AgentRPCMethod.TASK_CREATE, AgentRPCMethod.EVENT_SEND] + # Deterministic task name embeds schedule id + fire id. + create_params = use_case.handle_rpc_request.call_args_list[0].kwargs["params"] + assert create_params.name == f"scheduled-run:{schedule.id}:fire-1" + use_case.task_service.update_task.assert_awaited_once() + # Fire-time authz mirrors the RPC route: agent.execute, then task.create, + # then task.update on the created task — in that order. + from src.api.schemas.authorization_types import ( + AgentexResourceType, + AuthorizedOperationType, + ) + + checks = [ + (c.kwargs["resource"].type, c.kwargs["operation"]) + for c in use_case.authorization_service.check.call_args_list + ] + assert checks == [ + (AgentexResourceType.agent, AuthorizedOperationType.execute), + (AgentexResourceType.task, AuthorizedOperationType.create), + (AgentexResourceType.task, AuthorizedOperationType.update), + ] + + async def test_sync_agent_delivers_via_message_send( + self, activity_instance, monkeypatch + ): + schedule = _schedule() + activity_instance.schedule_repository.get.return_value = schedule + task = TaskEntity(id="task-1") + use_case = _fake_use_case(_agent(ACPType.SYNC), task) + _patch_use_case(monkeypatch, use_case) + + result = await activity_instance.launch_scheduled_agent_run( + schedule.id, "fire-1" + ) + + assert result["method"] == "message/send" + methods = [ + call.kwargs["method"] for call in use_case.handle_rpc_request.call_args_list + ] + assert methods == [AgentRPCMethod.TASK_CREATE, AgentRPCMethod.MESSAGE_SEND] + + async def test_skips_delivery_when_already_delivered( + self, activity_instance, monkeypatch + ): + schedule = _schedule() + activity_instance.schedule_repository.get.return_value = schedule + # Retry case: the deterministic task already carries the delivered marker. + task = TaskEntity( + id="task-1", task_metadata={"scheduled_input_delivered": True} + ) + use_case = _fake_use_case(_agent(ACPType.ASYNC), task) + _patch_use_case(monkeypatch, use_case) + + result = await activity_instance.launch_scheduled_agent_run( + schedule.id, "fire-1" + ) + + assert result["status"] == "skipped" + assert result["reason"] == "input_already_delivered" + # Only task/create ran; no second delivery call. + assert use_case.handle_rpc_request.call_count == 1 + use_case.task_service.update_task.assert_not_awaited() + + async def test_skips_when_creator_permission_revoked( + self, activity_instance, monkeypatch + ): + from src.adapters.authorization.exceptions import AuthorizationError + + schedule = _schedule() + activity_instance.schedule_repository.get.return_value = schedule + use_case = _fake_use_case(_agent(ACPType.ASYNC), TaskEntity(id="t")) + # Creator's create permission was revoked since the schedule was made. + use_case.authorization_service.check = AsyncMock( + side_effect=AuthorizationError(message="forbidden") + ) + _patch_use_case(monkeypatch, use_case) + + result = await activity_instance.launch_scheduled_agent_run( + schedule.id, "fire-1" + ) + + assert result["status"] == "skipped" + assert result["reason"] == "permission_denied" + # Denied before any task creation. + use_case.handle_rpc_request.assert_not_called() + + async def test_skips_when_agent_deleted(self, activity_instance, monkeypatch): + schedule = _schedule() + activity_instance.schedule_repository.get.return_value = schedule + use_case = _fake_use_case( + _agent(ACPType.ASYNC, status=AgentStatus.DELETED), TaskEntity(id="t") + ) + _patch_use_case(monkeypatch, use_case) + + result = await activity_instance.launch_scheduled_agent_run( + schedule.id, "fire-1" + ) + + assert result["status"] == "skipped" + assert result["reason"] == "agent_deleted" diff --git a/agentex/tests/unit/use_cases/test_agent_run_schedules_use_case.py b/agentex/tests/unit/use_cases/test_agent_run_schedules_use_case.py new file mode 100644 index 00000000..bf4b088d --- /dev/null +++ b/agentex/tests/unit/use_cases/test_agent_run_schedules_use_case.py @@ -0,0 +1,118 @@ +from unittest.mock import AsyncMock +from uuid import uuid4 + +import pytest +from src.api.schemas.agent_run_schedules import ( + CreateAgentRunScheduleRequest, + ScheduleInitialInput, + UpdateAgentRunScheduleRequest, +) +from src.domain.entities.agents import ACPType, AgentEntity, AgentStatus +from src.domain.exceptions import ClientError +from src.domain.use_cases.agent_run_schedules_use_case import ( + AgentRunSchedulesUseCase, +) + + +@pytest.fixture +def mock_service(): + mock = AsyncMock() + return mock + + +@pytest.fixture +def use_case(mock_service): + return AgentRunSchedulesUseCase(run_schedule_service=mock_service) + + +@pytest.fixture +def agent(): + return AgentEntity( + id=str(uuid4()), + name="test-agent", + description="A test agent", + status=AgentStatus.READY, + acp_type=ACPType.ASYNC, + acp_url="http://acp.example.com", + ) + + +def _request(**overrides) -> CreateAgentRunScheduleRequest: + payload: dict = { + "name": "daily-summary", + "cron_expression": "0 17 * * MON-FRI", + "initial_input": ScheduleInitialInput(content="hello"), + } + payload.update(overrides) + return CreateAgentRunScheduleRequest(**payload) + + +@pytest.mark.unit +@pytest.mark.asyncio +class TestAgentRunSchedulesUseCase: + async def test_create_with_cron_delegates(self, use_case, mock_service, agent): + request = _request() + mock_service.create_schedule.return_value = "ok" + creator = {"user_id": "u1", "account_id": "a1"} + + result = await use_case.create_schedule(agent, request, creator) + + assert result == "ok" + mock_service.create_schedule.assert_called_once_with(agent, request, creator) + + async def test_create_with_interval_delegates(self, use_case, mock_service, agent): + request = _request(cron_expression=None, interval_seconds=30) + mock_service.create_schedule.return_value = "ok" + + await use_case.create_schedule(agent, request, {"user_id": "u1"}) + + mock_service.create_schedule.assert_called_once() + + async def test_create_requires_a_cadence(self, use_case, agent): + request = _request(cron_expression=None, interval_seconds=None) + + with pytest.raises(ClientError) as exc: + await use_case.create_schedule(agent, request, {"user_id": "u1"}) + + assert "cron_expression or interval_seconds" in str(exc.value) + + async def test_create_rejects_both_cadences(self, use_case, agent): + request = _request(cron_expression="0 0 * * *", interval_seconds=30) + + with pytest.raises(ClientError) as exc: + await use_case.create_schedule(agent, request, {"user_id": "u1"}) + + assert "only one" in str(exc.value) + + async def test_pause_resume_delete_delegate(self, use_case, mock_service, agent): + await use_case.pause_schedule(agent.id, "daily-summary", note="n") + mock_service.pause_schedule.assert_called_once_with( + agent.id, "daily-summary", note="n" + ) + + await use_case.resume_schedule(agent.id, "daily-summary") + mock_service.resume_schedule.assert_called_once_with( + agent.id, "daily-summary", note=None + ) + + await use_case.delete_schedule(agent.id, "daily-summary") + mock_service.delete_schedule.assert_called_once_with(agent.id, "daily-summary") + + async def test_update_delegates(self, use_case, mock_service, agent): + request = UpdateAgentRunScheduleRequest(interval_seconds=120) + await use_case.update_schedule(agent.id, "daily-summary", request) + mock_service.update_schedule.assert_called_once_with( + agent.id, "daily-summary", request + ) + + async def test_update_rejects_both_cadences(self, use_case, agent): + request = UpdateAgentRunScheduleRequest( + cron_expression="0 0 * * *", interval_seconds=30 + ) + with pytest.raises(ClientError) as exc: + await use_case.update_schedule(agent.id, "daily-summary", request) + assert "only one" in str(exc.value) + + async def test_trigger_delegates(self, use_case, mock_service, agent): + await use_case.trigger_schedule(agent.id, "daily-summary") + mock_service.trigger_schedule.assert_called_once_with(agent.id, "daily-summary") diff --git a/agentex/tests/unit/use_cases/test_schedules_use_case.py b/agentex/tests/unit/use_cases/test_schedules_use_case.py deleted file mode 100644 index 5812c7cf..00000000 --- a/agentex/tests/unit/use_cases/test_schedules_use_case.py +++ /dev/null @@ -1,624 +0,0 @@ -from datetime import UTC, datetime, timedelta -from unittest.mock import AsyncMock -from uuid import uuid4 - -import pytest -from src.api.schemas.schedules import ( - CreateScheduleRequest, - ScheduleActionInfo, - ScheduleListItem, - ScheduleListResponse, - ScheduleResponse, - ScheduleSpecInfo, - ScheduleState, -) -from src.domain.entities.agents import ACPType, AgentEntity, AgentStatus -from src.domain.exceptions import ClientError -from src.domain.use_cases.schedules_use_case import SchedulesUseCase - - -@pytest.fixture -def mock_schedule_service(): - """Mock schedule service for testing use case""" - mock = AsyncMock() - mock.create_schedule = AsyncMock() - mock.get_schedule = AsyncMock() - mock.list_schedules = AsyncMock() - mock.pause_schedule = AsyncMock() - mock.unpause_schedule = AsyncMock() - mock.trigger_schedule = AsyncMock() - mock.delete_schedule = AsyncMock() - return mock - - -@pytest.fixture -def schedules_use_case(mock_schedule_service): - """Create SchedulesUseCase instance with mocked service""" - return SchedulesUseCase(schedule_service=mock_schedule_service) - - -@pytest.fixture -def sample_agent(): - """Sample agent entity for testing""" - return AgentEntity( - id=str(uuid4()), - name="test-agent", - description="A test agent for use case testing", - status=AgentStatus.READY, - acp_type=ACPType.ASYNC, - acp_url="http://test-acp.example.com", - ) - - -@pytest.fixture -def sample_schedule_response(sample_agent): - """Sample schedule response for testing""" - return ScheduleResponse( - schedule_id=f"{sample_agent.id}--weekly-task", - name="weekly-task", - agent_id=sample_agent.id, - state=ScheduleState.ACTIVE, - action=ScheduleActionInfo( - workflow_name="test-workflow", - workflow_id_prefix=f"{sample_agent.id}--weekly-task-run", - task_queue="test-queue", - workflow_params=None, - ), - spec=ScheduleSpecInfo( - cron_expressions=["0 0 * * 0"], - intervals_seconds=[], - start_at=None, - end_at=None, - ), - num_actions_taken=5, - num_actions_missed=0, - next_action_times=[datetime.now(UTC) + timedelta(hours=1)], - last_action_time=datetime.now(UTC) - timedelta(days=1), - created_at=datetime.now(UTC) - timedelta(days=7), - ) - - -@pytest.mark.unit -@pytest.mark.asyncio -class TestSchedulesUseCase: - """Test suite for SchedulesUseCase""" - - async def test_create_schedule_with_cron( - self, - schedules_use_case, - mock_schedule_service, - sample_agent, - sample_schedule_response, - ): - """Test creating a schedule with cron expression""" - # Given - request = CreateScheduleRequest( - name="weekly-task", - workflow_name="test-workflow", - task_queue="test-queue", - cron_expression="0 0 * * 0", - ) - mock_schedule_service.create_schedule.return_value = sample_schedule_response - - # When - result = await schedules_use_case.create_schedule(sample_agent, request) - - # Then - assert result is not None - assert isinstance(result, ScheduleResponse) - assert result.name == "weekly-task" - assert result.state == ScheduleState.ACTIVE - mock_schedule_service.create_schedule.assert_called_once_with( - sample_agent, request - ) - - async def test_create_schedule_with_interval( - self, schedules_use_case, mock_schedule_service, sample_agent - ): - """Test creating a schedule with interval""" - # Given - request = CreateScheduleRequest( - name="interval-task", - workflow_name="test-workflow", - task_queue="test-queue", - interval_seconds=3600, - ) - expected_response = ScheduleResponse( - schedule_id=f"{sample_agent.id}--interval-task", - name="interval-task", - agent_id=sample_agent.id, - state=ScheduleState.ACTIVE, - action=ScheduleActionInfo( - workflow_name="test-workflow", - workflow_id_prefix=f"{sample_agent.id}--interval-task-run", - task_queue="test-queue", - workflow_params=None, - ), - spec=ScheduleSpecInfo( - cron_expressions=[], - intervals_seconds=[3600], - start_at=None, - end_at=None, - ), - ) - mock_schedule_service.create_schedule.return_value = expected_response - - # When - result = await schedules_use_case.create_schedule(sample_agent, request) - - # Then - assert result is not None - assert result.spec.intervals_seconds == [3600] - mock_schedule_service.create_schedule.assert_called_once() - - async def test_create_schedule_validation_error_no_schedule_spec( - self, schedules_use_case, sample_agent - ): - """Test that creating a schedule without cron or interval raises error""" - # Given - request = CreateScheduleRequest( - name="invalid-task", - workflow_name="test-workflow", - task_queue="test-queue", - # Neither cron_expression nor interval_seconds provided - ) - - # When/Then - with pytest.raises(ClientError) as exc_info: - await schedules_use_case.create_schedule(sample_agent, request) - - assert "Either cron_expression or interval_seconds must be provided" in str( - exc_info.value - ) - - async def test_create_schedule_with_both_cron_and_interval( - self, schedules_use_case, mock_schedule_service, sample_agent - ): - """Test creating a schedule with both cron and interval (should succeed)""" - # Given - having both is valid, cron takes precedence - request = CreateScheduleRequest( - name="combined-task", - workflow_name="test-workflow", - task_queue="test-queue", - cron_expression="0 0 * * *", - interval_seconds=3600, - ) - expected_response = ScheduleResponse( - schedule_id=f"{sample_agent.id}--combined-task", - name="combined-task", - agent_id=sample_agent.id, - state=ScheduleState.ACTIVE, - action=ScheduleActionInfo( - workflow_name="test-workflow", - workflow_id_prefix=f"{sample_agent.id}--combined-task-run", - task_queue="test-queue", - workflow_params=None, - ), - spec=ScheduleSpecInfo( - cron_expressions=["0 0 * * *"], - intervals_seconds=[3600], - start_at=None, - end_at=None, - ), - ) - mock_schedule_service.create_schedule.return_value = expected_response - - # When - result = await schedules_use_case.create_schedule(sample_agent, request) - - # Then - assert result is not None - mock_schedule_service.create_schedule.assert_called_once() - - async def test_create_schedule_with_workflow_params( - self, schedules_use_case, mock_schedule_service, sample_agent - ): - """Test creating a schedule with workflow parameters""" - # Given - workflow_params = { - "input_data": "test", - "config": {"timeout": 300, "retries": 3}, - } - request = CreateScheduleRequest( - name="params-task", - workflow_name="test-workflow", - task_queue="test-queue", - cron_expression="0 0 * * *", - workflow_params=workflow_params, - ) - expected_response = ScheduleResponse( - schedule_id=f"{sample_agent.id}--params-task", - name="params-task", - agent_id=sample_agent.id, - state=ScheduleState.ACTIVE, - action=ScheduleActionInfo( - workflow_name="test-workflow", - workflow_id_prefix=f"{sample_agent.id}--params-task-run", - task_queue="test-queue", - workflow_params=[workflow_params], - ), - spec=ScheduleSpecInfo( - cron_expressions=["0 0 * * *"], - intervals_seconds=[], - start_at=None, - end_at=None, - ), - ) - mock_schedule_service.create_schedule.return_value = expected_response - - # When - result = await schedules_use_case.create_schedule(sample_agent, request) - - # Then - assert result is not None - assert result.action.workflow_params == [workflow_params] - - async def test_get_schedule( - self, - schedules_use_case, - mock_schedule_service, - sample_agent, - sample_schedule_response, - ): - """Test getting a schedule by name""" - # Given - mock_schedule_service.get_schedule.return_value = sample_schedule_response - - # When - result = await schedules_use_case.get_schedule(sample_agent.id, "weekly-task") - - # Then - assert result is not None - assert result.name == "weekly-task" - assert result.agent_id == sample_agent.id - mock_schedule_service.get_schedule.assert_called_once_with( - sample_agent.id, "weekly-task" - ) - - async def test_list_schedules( - self, schedules_use_case, mock_schedule_service, sample_agent - ): - """Test listing schedules for an agent""" - # Given - expected_response = ScheduleListResponse( - schedules=[ - ScheduleListItem( - schedule_id=f"{sample_agent.id}--schedule-1", - name="schedule-1", - agent_id=sample_agent.id, - state=ScheduleState.ACTIVE, - workflow_name="workflow-1", - next_action_time=datetime.now(UTC), - ), - ScheduleListItem( - schedule_id=f"{sample_agent.id}--schedule-2", - name="schedule-2", - agent_id=sample_agent.id, - state=ScheduleState.PAUSED, - workflow_name="workflow-2", - next_action_time=None, - ), - ], - total=2, - ) - mock_schedule_service.list_schedules.return_value = expected_response - - # When - result = await schedules_use_case.list_schedules(sample_agent.id) - - # Then - assert result is not None - assert result.total == 2 - assert len(result.schedules) == 2 - mock_schedule_service.list_schedules.assert_called_once_with( - agent_id=sample_agent.id, page_size=100, authorized_schedule_ids=None - ) - - async def test_list_schedules_with_page_size( - self, schedules_use_case, mock_schedule_service, sample_agent - ): - """Test listing schedules with custom page size""" - # Given - expected_response = ScheduleListResponse(schedules=[], total=0) - mock_schedule_service.list_schedules.return_value = expected_response - - # When - result = await schedules_use_case.list_schedules(sample_agent.id, page_size=50) - - # Then - assert result is not None - mock_schedule_service.list_schedules.assert_called_once_with( - agent_id=sample_agent.id, page_size=50, authorized_schedule_ids=None - ) - - async def test_pause_schedule( - self, schedules_use_case, mock_schedule_service, sample_agent - ): - """Test pausing a schedule""" - # Given - paused_response = ScheduleResponse( - schedule_id=f"{sample_agent.id}--weekly-task", - name="weekly-task", - agent_id=sample_agent.id, - state=ScheduleState.PAUSED, - action=ScheduleActionInfo( - workflow_name="test-workflow", - workflow_id_prefix=f"{sample_agent.id}--weekly-task-run", - task_queue="test-queue", - workflow_params=None, - ), - spec=ScheduleSpecInfo( - cron_expressions=["0 0 * * 0"], - intervals_seconds=[], - start_at=None, - end_at=None, - ), - ) - mock_schedule_service.pause_schedule.return_value = paused_response - - # When - result = await schedules_use_case.pause_schedule( - sample_agent.id, "weekly-task", note="Maintenance window" - ) - - # Then - assert result is not None - assert result.state == ScheduleState.PAUSED - mock_schedule_service.pause_schedule.assert_called_once_with( - sample_agent.id, "weekly-task", note="Maintenance window" - ) - - async def test_pause_schedule_without_note( - self, schedules_use_case, mock_schedule_service, sample_agent - ): - """Test pausing a schedule without a note""" - # Given - paused_response = ScheduleResponse( - schedule_id=f"{sample_agent.id}--weekly-task", - name="weekly-task", - agent_id=sample_agent.id, - state=ScheduleState.PAUSED, - action=ScheduleActionInfo( - workflow_name="test-workflow", - workflow_id_prefix=f"{sample_agent.id}--weekly-task-run", - task_queue="test-queue", - workflow_params=None, - ), - spec=ScheduleSpecInfo( - cron_expressions=["0 0 * * 0"], - intervals_seconds=[], - start_at=None, - end_at=None, - ), - ) - mock_schedule_service.pause_schedule.return_value = paused_response - - # When - result = await schedules_use_case.pause_schedule(sample_agent.id, "weekly-task") - - # Then - assert result is not None - mock_schedule_service.pause_schedule.assert_called_once_with( - sample_agent.id, "weekly-task", note=None - ) - - async def test_unpause_schedule( - self, - schedules_use_case, - mock_schedule_service, - sample_agent, - sample_schedule_response, - ): - """Test unpausing a schedule""" - # Given - mock_schedule_service.unpause_schedule.return_value = sample_schedule_response - - # When - result = await schedules_use_case.unpause_schedule( - sample_agent.id, "weekly-task", note="Maintenance complete" - ) - - # Then - assert result is not None - assert result.state == ScheduleState.ACTIVE - mock_schedule_service.unpause_schedule.assert_called_once_with( - sample_agent.id, "weekly-task", note="Maintenance complete" - ) - - async def test_unpause_schedule_without_note( - self, - schedules_use_case, - mock_schedule_service, - sample_agent, - sample_schedule_response, - ): - """Test unpausing a schedule without a note""" - # Given - mock_schedule_service.unpause_schedule.return_value = sample_schedule_response - - # When - result = await schedules_use_case.unpause_schedule( - sample_agent.id, "weekly-task" - ) - - # Then - assert result is not None - mock_schedule_service.unpause_schedule.assert_called_once_with( - sample_agent.id, "weekly-task", note=None - ) - - async def test_trigger_schedule( - self, - schedules_use_case, - mock_schedule_service, - sample_agent, - sample_schedule_response, - ): - """Test triggering a schedule immediately""" - # Given - mock_schedule_service.trigger_schedule.return_value = sample_schedule_response - - # When - result = await schedules_use_case.trigger_schedule( - sample_agent.id, "weekly-task" - ) - - # Then - assert result is not None - mock_schedule_service.trigger_schedule.assert_called_once_with( - sample_agent.id, "weekly-task" - ) - - async def test_delete_schedule( - self, schedules_use_case, mock_schedule_service, sample_agent - ): - """Test deleting a schedule""" - # Given - mock_schedule_service.delete_schedule.return_value = None - - # When - await schedules_use_case.delete_schedule(sample_agent.id, "weekly-task") - - # Then - mock_schedule_service.delete_schedule.assert_called_once_with( - sample_agent.id, "weekly-task" - ) - - async def test_create_schedule_paused( - self, schedules_use_case, mock_schedule_service, sample_agent - ): - """Test creating a schedule in paused state""" - # Given - request = CreateScheduleRequest( - name="paused-task", - workflow_name="test-workflow", - task_queue="test-queue", - cron_expression="0 0 * * *", - paused=True, - ) - paused_response = ScheduleResponse( - schedule_id=f"{sample_agent.id}--paused-task", - name="paused-task", - agent_id=sample_agent.id, - state=ScheduleState.PAUSED, - action=ScheduleActionInfo( - workflow_name="test-workflow", - workflow_id_prefix=f"{sample_agent.id}--paused-task-run", - task_queue="test-queue", - workflow_params=None, - ), - spec=ScheduleSpecInfo( - cron_expressions=["0 0 * * *"], - intervals_seconds=[], - start_at=None, - end_at=None, - ), - ) - mock_schedule_service.create_schedule.return_value = paused_response - - # When - result = await schedules_use_case.create_schedule(sample_agent, request) - - # Then - assert result is not None - assert result.state == ScheduleState.PAUSED - - async def test_create_schedule_with_execution_timeout( - self, schedules_use_case, mock_schedule_service, sample_agent - ): - """Test creating a schedule with execution timeout""" - # Given - request = CreateScheduleRequest( - name="timeout-task", - workflow_name="test-workflow", - task_queue="test-queue", - cron_expression="0 0 * * *", - execution_timeout_seconds=7200, - ) - expected_response = ScheduleResponse( - schedule_id=f"{sample_agent.id}--timeout-task", - name="timeout-task", - agent_id=sample_agent.id, - state=ScheduleState.ACTIVE, - action=ScheduleActionInfo( - workflow_name="test-workflow", - workflow_id_prefix=f"{sample_agent.id}--timeout-task-run", - task_queue="test-queue", - workflow_params=None, - ), - spec=ScheduleSpecInfo( - cron_expressions=["0 0 * * *"], - intervals_seconds=[], - start_at=None, - end_at=None, - ), - ) - mock_schedule_service.create_schedule.return_value = expected_response - - # When - result = await schedules_use_case.create_schedule(sample_agent, request) - - # Then - assert result is not None - # Verify the request was passed through to the service - call_args = mock_schedule_service.create_schedule.call_args - assert call_args[0][1].execution_timeout_seconds == 7200 - - async def test_create_schedule_with_time_bounds( - self, schedules_use_case, mock_schedule_service, sample_agent - ): - """Test creating a schedule with start and end times""" - # Given - start_at = datetime.now(UTC) + timedelta(days=1) - end_at = datetime.now(UTC) + timedelta(days=30) - request = CreateScheduleRequest( - name="bounded-task", - workflow_name="test-workflow", - task_queue="test-queue", - cron_expression="0 0 * * *", - start_at=start_at, - end_at=end_at, - ) - expected_response = ScheduleResponse( - schedule_id=f"{sample_agent.id}--bounded-task", - name="bounded-task", - agent_id=sample_agent.id, - state=ScheduleState.ACTIVE, - action=ScheduleActionInfo( - workflow_name="test-workflow", - workflow_id_prefix=f"{sample_agent.id}--bounded-task-run", - task_queue="test-queue", - workflow_params=None, - ), - spec=ScheduleSpecInfo( - cron_expressions=["0 0 * * *"], - intervals_seconds=[], - start_at=start_at, - end_at=end_at, - ), - ) - mock_schedule_service.create_schedule.return_value = expected_response - - # When - result = await schedules_use_case.create_schedule(sample_agent, request) - - # Then - assert result is not None - assert result.spec.start_at == start_at - assert result.spec.end_at == end_at - - async def test_list_schedules_empty( - self, schedules_use_case, mock_schedule_service, sample_agent - ): - """Test listing schedules when none exist""" - # Given - expected_response = ScheduleListResponse(schedules=[], total=0) - mock_schedule_service.list_schedules.return_value = expected_response - - # When - result = await schedules_use_case.list_schedules(sample_agent.id) - - # Then - assert result is not None - assert result.total == 0 - assert len(result.schedules) == 0