diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index 1459df29473..da3a3c30da8 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -7,6 +7,7 @@ ### CLI ### Bundles +* Add a `job_runs` resource (direct engine only) that triggers a job run as part of `bundle deploy`. Any change to the run configuration re-triggers the run; otherwise it is considered up to date ([#5603](https://github.com/databricks/cli/pull/5603)). ### Dependency updates diff --git a/acceptance/bundle/invariant/configs/job_run.yml.tmpl b/acceptance/bundle/invariant/configs/job_run.yml.tmpl new file mode 100644 index 00000000000..a09987da02a --- /dev/null +++ b/acceptance/bundle/invariant/configs/job_run.yml.tmpl @@ -0,0 +1,20 @@ +bundle: + name: test-bundle-$UNIQUE_NAME + +resources: + jobs: + foo: + name: test-job-$UNIQUE_NAME + tasks: + - task_key: only_task + notebook_task: + notebook_path: /Shared/notebook + new_cluster: + spark_version: $DEFAULT_SPARK_VERSION + node_type_id: $NODE_TYPE_ID + instance_pool_id: $TEST_INSTANCE_POOL_ID + num_workers: 1 + + job_runs: + foo_run: + job_id: ${resources.jobs.foo.id} diff --git a/acceptance/bundle/invariant/continue_293/out.test.toml b/acceptance/bundle/invariant/continue_293/out.test.toml index 5c601542bce..28d7ba3cfeb 100644 --- a/acceptance/bundle/invariant/continue_293/out.test.toml +++ b/acceptance/bundle/invariant/continue_293/out.test.toml @@ -19,6 +19,7 @@ EnvMatrix.INPUT_CONFIG = [ "job_pydabs_1000_tasks.yml.tmpl", "job_cross_resource_ref.yml.tmpl", "job_permission_ref.yml.tmpl", + "job_run.yml.tmpl", "job_run_job_ref.yml.tmpl", "job_with_depends_on.yml.tmpl", "job_with_permissions.yml.tmpl", diff --git a/acceptance/bundle/invariant/continue_293/test.toml b/acceptance/bundle/invariant/continue_293/test.toml index 6887ada9a71..9c2022aa118 100644 --- a/acceptance/bundle/invariant/continue_293/test.toml +++ b/acceptance/bundle/invariant/continue_293/test.toml @@ -12,6 +12,9 @@ EnvMatrixExclude.no_vector_search_endpoint = ["INPUT_CONFIG=vector_search_endpoi # genie_spaces resource is not supported on v0.293.0 EnvMatrixExclude.no_genie_space = ["INPUT_CONFIG=genie_space.yml.tmpl"] +# job_runs resource is not supported on v0.293.0 +EnvMatrixExclude.no_job_run = ["INPUT_CONFIG=job_run.yml.tmpl"] + # Dotted pipeline configuration keys are not supported on v0.293.0 EnvMatrixExclude.no_pipeline_config_dots = ["INPUT_CONFIG=pipeline_config_dots.yml.tmpl"] diff --git a/acceptance/bundle/invariant/migrate/out.test.toml b/acceptance/bundle/invariant/migrate/out.test.toml index 5c601542bce..28d7ba3cfeb 100644 --- a/acceptance/bundle/invariant/migrate/out.test.toml +++ b/acceptance/bundle/invariant/migrate/out.test.toml @@ -19,6 +19,7 @@ EnvMatrix.INPUT_CONFIG = [ "job_pydabs_1000_tasks.yml.tmpl", "job_cross_resource_ref.yml.tmpl", "job_permission_ref.yml.tmpl", + "job_run.yml.tmpl", "job_run_job_ref.yml.tmpl", "job_with_depends_on.yml.tmpl", "job_with_permissions.yml.tmpl", diff --git a/acceptance/bundle/invariant/migrate/test.toml b/acceptance/bundle/invariant/migrate/test.toml index a121cbf6475..af25cbb62dd 100644 --- a/acceptance/bundle/invariant/migrate/test.toml +++ b/acceptance/bundle/invariant/migrate/test.toml @@ -2,6 +2,10 @@ EnvMatrixExclude.no_vector_search_endpoint = ["INPUT_CONFIG=vector_search_endpoint.yml.tmpl"] EnvMatrixExclude.no_vector_search_index = ["INPUT_CONFIG=vector_search_index.yml.tmpl"] +# job_runs is a direct-only resource with no terraform converter, so the +# terraform deploy that seeds the migration fails for it. +EnvMatrixExclude.no_job_run = ["INPUT_CONFIG=job_run.yml.tmpl"] + # Error: Catalog resources are only supported with direct deployment mode EnvMatrixExclude.no_catalog = ["INPUT_CONFIG=catalog.yml.tmpl"] EnvMatrixExclude.no_external_location = ["INPUT_CONFIG=external_location.yml.tmpl"] diff --git a/acceptance/bundle/invariant/no_drift/out.test.toml b/acceptance/bundle/invariant/no_drift/out.test.toml index e39444592d5..0eb53a24f1c 100644 --- a/acceptance/bundle/invariant/no_drift/out.test.toml +++ b/acceptance/bundle/invariant/no_drift/out.test.toml @@ -19,6 +19,7 @@ EnvMatrix.INPUT_CONFIG = [ "job_pydabs_1000_tasks.yml.tmpl", "job_cross_resource_ref.yml.tmpl", "job_permission_ref.yml.tmpl", + "job_run.yml.tmpl", "job_run_job_ref.yml.tmpl", "job_with_depends_on.yml.tmpl", "job_with_permissions.yml.tmpl", diff --git a/acceptance/bundle/invariant/test.toml b/acceptance/bundle/invariant/test.toml index 6a4f46c5f17..df473eaa82e 100644 --- a/acceptance/bundle/invariant/test.toml +++ b/acceptance/bundle/invariant/test.toml @@ -37,6 +37,7 @@ EnvMatrix.INPUT_CONFIG = [ "job_pydabs_1000_tasks.yml.tmpl", "job_cross_resource_ref.yml.tmpl", "job_permission_ref.yml.tmpl", + "job_run.yml.tmpl", "job_run_job_ref.yml.tmpl", "job_with_depends_on.yml.tmpl", "job_with_permissions.yml.tmpl", diff --git a/acceptance/bundle/refschema/out.fields.txt b/acceptance/bundle/refschema/out.fields.txt index 7d7b4913c9d..c3d17ac674f 100644 --- a/acceptance/bundle/refschema/out.fields.txt +++ b/acceptance/bundle/refschema/out.fields.txt @@ -756,6 +756,45 @@ resources.genie_spaces.*.permissions[*].group_name string ALL resources.genie_spaces.*.permissions[*].level iam.PermissionLevel ALL resources.genie_spaces.*.permissions[*].service_principal_name string ALL resources.genie_spaces.*.permissions[*].user_name string ALL +resources.job_runs.*.dbt_commands []string ALL +resources.job_runs.*.dbt_commands[*] string ALL +resources.job_runs.*.id string INPUT +resources.job_runs.*.idempotency_token string ALL +resources.job_runs.*.jar_params []string ALL +resources.job_runs.*.jar_params[*] string ALL +resources.job_runs.*.job_id int64 ALL +resources.job_runs.*.job_parameters map[string]string ALL +resources.job_runs.*.job_parameters.* string ALL +resources.job_runs.*.lifecycle resources.Lifecycle INPUT +resources.job_runs.*.lifecycle.prevent_destroy bool INPUT +resources.job_runs.*.modified_status string INPUT +resources.job_runs.*.notebook_params map[string]string ALL +resources.job_runs.*.notebook_params.* string ALL +resources.job_runs.*.only []string ALL +resources.job_runs.*.only[*] string ALL +resources.job_runs.*.performance_target jobs.PerformanceTarget ALL +resources.job_runs.*.pipeline_params *jobs.PipelineParams ALL +resources.job_runs.*.pipeline_params.full_refresh bool ALL +resources.job_runs.*.pipeline_params.full_refresh_selection []string ALL +resources.job_runs.*.pipeline_params.full_refresh_selection[*] string ALL +resources.job_runs.*.pipeline_params.refresh_flow_selection []string ALL +resources.job_runs.*.pipeline_params.refresh_flow_selection[*] string ALL +resources.job_runs.*.pipeline_params.refresh_selection []string ALL +resources.job_runs.*.pipeline_params.refresh_selection[*] string ALL +resources.job_runs.*.pipeline_params.reset_checkpoint_selection []string ALL +resources.job_runs.*.pipeline_params.reset_checkpoint_selection[*] string ALL +resources.job_runs.*.python_named_params map[string]string ALL +resources.job_runs.*.python_named_params.* string ALL +resources.job_runs.*.python_params []string ALL +resources.job_runs.*.python_params[*] string ALL +resources.job_runs.*.queue *jobs.QueueSettings ALL +resources.job_runs.*.queue.enabled bool ALL +resources.job_runs.*.run_id int64 REMOTE +resources.job_runs.*.spark_submit_params []string ALL +resources.job_runs.*.spark_submit_params[*] string ALL +resources.job_runs.*.sql_params map[string]string ALL +resources.job_runs.*.sql_params.* string ALL +resources.job_runs.*.url string INPUT resources.jobs.*.budget_policy_id string ALL resources.jobs.*.continuous *jobs.Continuous ALL resources.jobs.*.continuous.pause_status jobs.PauseStatus ALL diff --git a/acceptance/bundle/resources/job_runs/basic/databricks.yml b/acceptance/bundle/resources/job_runs/basic/databricks.yml new file mode 100644 index 00000000000..5ea02d60dc2 --- /dev/null +++ b/acceptance/bundle/resources/job_runs/basic/databricks.yml @@ -0,0 +1,15 @@ +bundle: + name: job-runs-basic + +resources: + jobs: + my_job: + name: my-job + tasks: + - task_key: main + notebook_task: + notebook_path: /Workspace/test + + job_runs: + my_run: + job_id: ${resources.jobs.my_job.id} diff --git a/acceptance/bundle/resources/job_runs/basic/out.test.toml b/acceptance/bundle/resources/job_runs/basic/out.test.toml new file mode 100644 index 00000000000..e90b6d5d1ba --- /dev/null +++ b/acceptance/bundle/resources/job_runs/basic/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/acceptance/bundle/resources/job_runs/basic/output.txt b/acceptance/bundle/resources/job_runs/basic/output.txt new file mode 100644 index 00000000000..d9618704d01 --- /dev/null +++ b/acceptance/bundle/resources/job_runs/basic/output.txt @@ -0,0 +1,90 @@ + +=== deploy triggers exactly one run +>>> [CLI] bundle validate +Name: job-runs-basic +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/job-runs-basic/default + +Validation OK! + +>>> [CLI] bundle plan +create job_runs.my_run +create jobs.my_job + +Plan: 2 to add, 0 to change, 0 to delete, 0 unchanged + +>>> [CLI] bundle summary +Name: job-runs-basic +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/job-runs-basic/default +Resources: + Job Runs: + my_run: + Name: + URL: (not deployed) + Jobs: + my_job: + Name: my-job + URL: (not deployed) + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/job-runs-basic/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/job-runs-basic/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] bundle plan +Plan: 0 to add, 0 to change, 0 to delete, 2 unchanged + +>>> [CLI] bundle summary +Name: job-runs-basic +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/job-runs-basic/default +Resources: + Job Runs: + my_run: + Name: + URL: [DATABRICKS_URL]/jobs/[MY_JOB_ID]/runs/[MY_RUN_ID]?w=[NUMID] + Jobs: + my_job: + Name: my-job + URL: [DATABRICKS_URL]/jobs/[MY_JOB_ID]?w=[NUMID] + +=== exactly one run-now request was made +>>> print_requests.py //jobs/run-now +{ + "method": "POST", + "path": "/api/2.2/jobs/run-now", + "body": { + "job_id": [MY_JOB_ID] + } +} + +=== the triggered run id is stored in state +>>> read_id.py my_run +[MY_RUN_ID] + +>>> read_id.py my_job +[MY_JOB_ID] + +>>> [CLI] bundle destroy --auto-approve +The following resources will be deleted: + delete resources.job_runs.my_run + delete resources.jobs.my_job + +All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/job-runs-basic/default + +Deleting files... +Destroy complete! diff --git a/acceptance/bundle/resources/job_runs/basic/script b/acceptance/bundle/resources/job_runs/basic/script new file mode 100644 index 00000000000..e9bdc54d706 --- /dev/null +++ b/acceptance/bundle/resources/job_runs/basic/script @@ -0,0 +1,24 @@ +cleanup() { + trace $CLI bundle destroy --auto-approve + rm out.requests.txt +} +trap cleanup EXIT + +title "deploy triggers exactly one run" +trace $CLI bundle validate +trace $CLI bundle plan +trace $CLI bundle summary +trace $CLI bundle deploy + +# confirm that redeploy does not trigger a second run +trace $CLI bundle deploy + +trace $CLI bundle plan +trace $CLI bundle summary + +title "exactly one run-now request was made" +trace print_requests.py //jobs/run-now + +title "the triggered run id is stored in state" +trace read_id.py my_run +trace read_id.py my_job diff --git a/acceptance/bundle/resources/job_runs/basic/test.toml b/acceptance/bundle/resources/job_runs/basic/test.toml new file mode 100644 index 00000000000..4b94d8b58e9 --- /dev/null +++ b/acceptance/bundle/resources/job_runs/basic/test.toml @@ -0,0 +1,4 @@ +# job_runs is a direct-engine-only resource; the Terraform provider has no +# equivalent, so restrict the matrix to direct. +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] +RecordRequests = true diff --git a/acceptance/bundle/resources/job_runs/job_parameters/databricks.yml b/acceptance/bundle/resources/job_runs/job_parameters/databricks.yml new file mode 100644 index 00000000000..fb6970b9286 --- /dev/null +++ b/acceptance/bundle/resources/job_runs/job_parameters/databricks.yml @@ -0,0 +1,26 @@ +bundle: + name: job-runs-job-parameters + +resources: + jobs: + my_job: + name: my-job + parameters: + - name: env + default: dev + - name: region + default: us + tasks: + - task_key: main + notebook_task: + notebook_path: /Workspace/test + + job_runs: + my_run: + job_id: ${resources.jobs.my_job.id} + # Override only one of the job's two parameters. GetRun resolves the full + # parameter set (including the un-overridden `region` default), so without + # job_parameters in ignore_remote_changes this drift would recreate the run + # on every plan. + job_parameters: + env: prod diff --git a/acceptance/bundle/resources/job_runs/job_parameters/out.test.toml b/acceptance/bundle/resources/job_runs/job_parameters/out.test.toml new file mode 100644 index 00000000000..e90b6d5d1ba --- /dev/null +++ b/acceptance/bundle/resources/job_runs/job_parameters/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/acceptance/bundle/resources/job_runs/job_parameters/output.txt b/acceptance/bundle/resources/job_runs/job_parameters/output.txt new file mode 100644 index 00000000000..bcf3f21e017 --- /dev/null +++ b/acceptance/bundle/resources/job_runs/job_parameters/output.txt @@ -0,0 +1,49 @@ + +=== deploy triggers the run with only the overridden parameter +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/job-runs-job-parameters/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> print_requests.py //jobs/run-now +{ + "method": "POST", + "path": "/api/2.2/jobs/run-now", + "body": { + "job_id": [NUMID], + "job_parameters": { + "env": "prod" + } + } +} + +=== plan is stable: the resolved job default does not look like drift +>>> [CLI] bundle plan +Plan: 0 to add, 0 to change, 0 to delete, 2 unchanged + +>>> [CLI] bundle summary +Name: job-runs-job-parameters +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/job-runs-job-parameters/default +Resources: + Job Runs: + my_run: + Name: + URL: [DATABRICKS_URL]/jobs/[NUMID]/runs/[NUMID]?w=[NUMID] + Jobs: + my_job: + Name: my-job + URL: [DATABRICKS_URL]/jobs/[NUMID]?w=[NUMID] + +>>> [CLI] bundle destroy --auto-approve +The following resources will be deleted: + delete resources.job_runs.my_run + delete resources.jobs.my_job + +All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/job-runs-job-parameters/default + +Deleting files... +Destroy complete! diff --git a/acceptance/bundle/resources/job_runs/job_parameters/script b/acceptance/bundle/resources/job_runs/job_parameters/script new file mode 100644 index 00000000000..7128b53ef68 --- /dev/null +++ b/acceptance/bundle/resources/job_runs/job_parameters/script @@ -0,0 +1,13 @@ +cleanup() { + trace $CLI bundle destroy --auto-approve + rm out.requests.txt +} +trap cleanup EXIT + +title "deploy triggers the run with only the overridden parameter" +trace $CLI bundle deploy +trace print_requests.py //jobs/run-now + +title "plan is stable: the resolved job default does not look like drift" +trace $CLI bundle plan +trace $CLI bundle summary diff --git a/acceptance/bundle/resources/job_runs/job_parameters/test.toml b/acceptance/bundle/resources/job_runs/job_parameters/test.toml new file mode 100644 index 00000000000..4b94d8b58e9 --- /dev/null +++ b/acceptance/bundle/resources/job_runs/job_parameters/test.toml @@ -0,0 +1,4 @@ +# job_runs is a direct-engine-only resource; the Terraform provider has no +# equivalent, so restrict the matrix to direct. +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] +RecordRequests = true diff --git a/acceptance/bundle/resources/job_runs/redeploy/databricks.yml b/acceptance/bundle/resources/job_runs/redeploy/databricks.yml new file mode 100644 index 00000000000..370829ce20c --- /dev/null +++ b/acceptance/bundle/resources/job_runs/redeploy/databricks.yml @@ -0,0 +1,17 @@ +bundle: + name: job-runs-redeploy + +resources: + jobs: + my_job: + name: my-job + tasks: + - task_key: main + notebook_task: + notebook_path: /Workspace/test + + job_runs: + my_run: + job_id: ${resources.jobs.my_job.id} + job_parameters: + env: dev diff --git a/acceptance/bundle/resources/job_runs/redeploy/out.test.toml b/acceptance/bundle/resources/job_runs/redeploy/out.test.toml new file mode 100644 index 00000000000..e90b6d5d1ba --- /dev/null +++ b/acceptance/bundle/resources/job_runs/redeploy/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/acceptance/bundle/resources/job_runs/redeploy/output.txt b/acceptance/bundle/resources/job_runs/redeploy/output.txt new file mode 100644 index 00000000000..d36561488b2 --- /dev/null +++ b/acceptance/bundle/resources/job_runs/redeploy/output.txt @@ -0,0 +1,97 @@ + +=== initial deploy triggers the first run +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/job-runs-redeploy/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] bundle plan +Plan: 0 to add, 0 to change, 0 to delete, 2 unchanged + +>>> [CLI] bundle summary +Name: job-runs-redeploy +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/job-runs-redeploy/default +Resources: + Job Runs: + my_run: + Name: + URL: [DATABRICKS_URL]/jobs/[MY_JOB_ID]/runs/[NUMID]?w=[NUMID] + Jobs: + my_job: + Name: my-job + URL: [DATABRICKS_URL]/jobs/[MY_JOB_ID]?w=[NUMID] + +>>> read_id.py my_job +[MY_JOB_ID] + +>>> print_requests.py //jobs/run-now +{ + "method": "POST", + "path": "/api/2.2/jobs/run-now", + "body": { + "job_id": [MY_JOB_ID], + "job_parameters": { + "env": "dev" + } + } +} + +=== change the run configuration and redeploy +>>> update_file.py databricks.yml env: dev env: prod + +>>> [CLI] bundle plan +recreate job_runs.my_run + +Plan: 1 to add, 0 to change, 1 to delete, 1 unchanged + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/job-runs-redeploy/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] bundle plan +Plan: 0 to add, 0 to change, 0 to delete, 2 unchanged + +>>> [CLI] bundle summary +Name: job-runs-redeploy +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/job-runs-redeploy/default +Resources: + Job Runs: + my_run: + Name: + URL: [DATABRICKS_URL]/jobs/[MY_JOB_ID]/runs/[NUMID]?w=[NUMID] + Jobs: + my_job: + Name: my-job + URL: [DATABRICKS_URL]/jobs/[MY_JOB_ID]?w=[NUMID] + +=== the config change triggered a second, different run +>>> print_requests.py //jobs/run-now +{ + "method": "POST", + "path": "/api/2.2/jobs/run-now", + "body": { + "job_id": [MY_JOB_ID], + "job_parameters": { + "env": "prod" + } + } +} + +>>> [CLI] bundle destroy --auto-approve +The following resources will be deleted: + delete resources.job_runs.my_run + delete resources.jobs.my_job + +All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/job-runs-redeploy/default + +Deleting files... +Destroy complete! diff --git a/acceptance/bundle/resources/job_runs/redeploy/script b/acceptance/bundle/resources/job_runs/redeploy/script new file mode 100644 index 00000000000..0c153800ad6 --- /dev/null +++ b/acceptance/bundle/resources/job_runs/redeploy/script @@ -0,0 +1,22 @@ +cleanup() { + trace $CLI bundle destroy --auto-approve + rm out.requests.txt +} +trap cleanup EXIT + +title "initial deploy triggers the first run" +trace $CLI bundle deploy +trace $CLI bundle plan +trace $CLI bundle summary +trace read_id.py my_job +trace print_requests.py //jobs/run-now + +title "change the run configuration and redeploy" +trace update_file.py databricks.yml "env: dev" "env: prod" +trace $CLI bundle plan +trace $CLI bundle deploy +trace $CLI bundle plan +trace $CLI bundle summary + +title "the config change triggered a second, different run" +trace print_requests.py //jobs/run-now diff --git a/acceptance/bundle/resources/job_runs/redeploy/test.toml b/acceptance/bundle/resources/job_runs/redeploy/test.toml new file mode 100644 index 00000000000..4b94d8b58e9 --- /dev/null +++ b/acceptance/bundle/resources/job_runs/redeploy/test.toml @@ -0,0 +1,4 @@ +# job_runs is a direct-engine-only resource; the Terraform provider has no +# equivalent, so restrict the matrix to direct. +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] +RecordRequests = true diff --git a/bundle/config/mutator/resourcemutator/apply_bundle_permissions_test.go b/bundle/config/mutator/resourcemutator/apply_bundle_permissions_test.go index 2fbe07b270a..41686b326ac 100644 --- a/bundle/config/mutator/resourcemutator/apply_bundle_permissions_test.go +++ b/bundle/config/mutator/resourcemutator/apply_bundle_permissions_test.go @@ -33,6 +33,7 @@ var unsupportedResources = []string{ "postgres_roles", "postgres_synced_tables", "vector_search_indexes", + "job_runs", } func TestApplyBundlePermissions(t *testing.T) { diff --git a/bundle/config/mutator/resourcemutator/apply_target_mode_test.go b/bundle/config/mutator/resourcemutator/apply_target_mode_test.go index bc3b44cbdca..da9fcab9770 100644 --- a/bundle/config/mutator/resourcemutator/apply_target_mode_test.go +++ b/bundle/config/mutator/resourcemutator/apply_target_mode_test.go @@ -92,6 +92,9 @@ func mockBundle(mode config.Mode) *bundle.Bundle { }, }, }, + JobRuns: map[string]*resources.JobRun{ + "job_run1": {RunNow: jobs.RunNow{JobId: 1234}}, + }, Pipelines: map[string]*resources.Pipeline{ "pipeline1": {CreatePipeline: pipelines.CreatePipeline{Name: "pipeline1", Continuous: true}}, }, diff --git a/bundle/config/mutator/resourcemutator/run_as.go b/bundle/config/mutator/resourcemutator/run_as.go index 4f5e3ce9036..132f0d1dfa5 100644 --- a/bundle/config/mutator/resourcemutator/run_as.go +++ b/bundle/config/mutator/resourcemutator/run_as.go @@ -126,6 +126,17 @@ func validateRunAs(b *bundle.Bundle) diag.Diagnostics { )) } + // Job runs execute under the triggered job's own identity; the RunNow API + // has no run_as field, so a differing bundle run_as cannot be honored. + if len(b.Config.Resources.JobRuns) > 0 { + diags = diags.Extend(reportRunAsNotSupported( + "job_runs", + b.Config.GetLocation("resources.job_runs"), + b.Config.Workspace.CurrentUser.UserName, + identity, + )) + } + return diags } diff --git a/bundle/config/mutator/resourcemutator/run_as_test.go b/bundle/config/mutator/resourcemutator/run_as_test.go index 5f98190ecb3..01da5d753f7 100644 --- a/bundle/config/mutator/resourcemutator/run_as_test.go +++ b/bundle/config/mutator/resourcemutator/run_as_test.go @@ -42,6 +42,7 @@ func allResourceTypes(t *testing.T) []string { "experiments", "external_locations", "genie_spaces", + "job_runs", "jobs", "model_serving_endpoints", "models", diff --git a/bundle/config/resources.go b/bundle/config/resources.go index 8d294aef103..cfbfbe9fff5 100644 --- a/bundle/config/resources.go +++ b/bundle/config/resources.go @@ -12,6 +12,7 @@ import ( // Resources defines Databricks resources associated with the bundle. type Resources struct { Jobs map[string]*resources.Job `json:"jobs,omitempty"` + JobRuns map[string]*resources.JobRun `json:"job_runs,omitempty"` Pipelines map[string]*resources.Pipeline `json:"pipelines,omitempty"` Models map[string]*resources.MlflowModel `json:"models,omitempty"` @@ -96,6 +97,7 @@ func (r *Resources) AllResources() []ResourceGroup { descriptions := SupportedResources() return []ResourceGroup{ collectResourceMap(descriptions["jobs"], r.Jobs), + collectResourceMap(descriptions["job_runs"], r.JobRuns), collectResourceMap(descriptions["pipelines"], r.Pipelines), collectResourceMap(descriptions["models"], r.Models), collectResourceMap(descriptions["experiments"], r.Experiments), @@ -157,6 +159,7 @@ func (r *Resources) FindResourceByConfigKey(key string) (ConfigResource, error) func SupportedResources() map[string]resources.ResourceDescription { return map[string]resources.ResourceDescription{ "jobs": (&resources.Job{}).ResourceDescription(), + "job_runs": (&resources.JobRun{}).ResourceDescription(), "pipelines": (&resources.Pipeline{}).ResourceDescription(), "models": (&resources.MlflowModel{}).ResourceDescription(), "experiments": (&resources.MlflowExperiment{}).ResourceDescription(), diff --git a/bundle/config/resources/job_run.go b/bundle/config/resources/job_run.go new file mode 100644 index 00000000000..1e618ff15b6 --- /dev/null +++ b/bundle/config/resources/job_run.go @@ -0,0 +1,84 @@ +package resources + +import ( + "context" + "net/url" + "strconv" + + "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/workspaceurls" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/apierr" + "github.com/databricks/databricks-sdk-go/marshal" + "github.com/databricks/databricks-sdk-go/service/jobs" +) + +// JobRun is the bundle config for a triggered job run. The run is described by +// the same fields as the Jobs RunNow API request, so we embed jobs.RunNow +// directly instead of re-declaring them. The run re-triggers only when its own +// RunNow config changes; edits to the targeted job (addressed by the stable +// job_id) do not re-trigger it. +type JobRun struct { + BaseResource + jobs.RunNow +} + +func (r *JobRun) UnmarshalJSON(b []byte) error { + return marshal.Unmarshal(b, r) +} + +func (r JobRun) MarshalJSON() ([]byte, error) { + return marshal.Marshal(r) +} + +// Exists reports whether the run identified by id (a numeric run id) still +// exists in the workspace. A run is the unit of existence here: once RunNow has +// been called, the run is retrievable via GetRun for as long as the workspace +// retains its history. +func (r *JobRun) Exists(ctx context.Context, w *databricks.WorkspaceClient, id string) (bool, error) { + runID, err := strconv.ParseInt(id, 10, 64) + if err != nil { + return false, err + } + _, err = w.Jobs.GetRun(ctx, jobs.GetRunRequest{ + RunId: runID, + }) + if err != nil { + log.Debugf(ctx, "job run %s does not exist: %v", id, err) + if apierr.IsMissing(err) { + return false, nil + } + return false, err + } + return true, nil +} + +func (r *JobRun) ResourceDescription() ResourceDescription { + return ResourceDescription{ + SingularName: "job_run", + PluralName: "job_runs", + SingularTitle: "Job Run", + PluralTitle: "Job Runs", + } +} + +// GetName returns the in-product name. A run has no user-assigned name, so this +// is empty. +func (r *JobRun) GetName() string { + return "" +} + +func (r *JobRun) GetURL() string { + return r.URL +} + +// InitializeURL sets the run's workspace URL once both IDs that address it are +// known. Before deploy neither is populated: the run id is backfilled from +// state and the job id may still be an unresolved ${resources.jobs.*.id} +// reference, so we skip rather than emit a broken jobs/0 URL. +func (r *JobRun) InitializeURL(baseURL url.URL) { + if r.ID == "" || r.JobId == 0 { + return + } + r.URL = workspaceurls.JobRunURL(baseURL, strconv.FormatInt(r.JobId, 10), r.ID) +} diff --git a/bundle/config/resources_test.go b/bundle/config/resources_test.go index c5f32267531..978d5799f45 100644 --- a/bundle/config/resources_test.go +++ b/bundle/config/resources_test.go @@ -125,6 +125,10 @@ func TestBundleResourcePluralNamesResolveInWorkspaceURLs(t *testing.T) { // Resources that intentionally have no workspace URL. noURL := map[string]bool{ "external_locations": true, + // A job run does have a workspace URL, but it's addressed by two IDs + // (job + run) so it can't be expressed as a single-ID pattern here; it's + // built in JobRun.InitializeURL via workspaceurls.JobRunURL instead. + "job_runs": true, "postgres_branches": true, "postgres_databases": true, "postgres_endpoints": true, @@ -159,6 +163,11 @@ func TestResourcesBindSupport(t *testing.T) { JobSettings: jobs.JobSettings{}, }, }, + JobRuns: map[string]*resources.JobRun{ + "my_job_run": { + RunNow: jobs.RunNow{}, + }, + }, Pipelines: map[string]*resources.Pipeline{ "my_pipeline": { CreatePipeline: pipelines.CreatePipeline{}, @@ -336,6 +345,7 @@ func TestResourcesBindSupport(t *testing.T) { ctx := t.Context() m := mocks.NewMockWorkspaceClient(t) m.GetMockJobsAPI().EXPECT().Get(mock.Anything, mock.Anything).Return(nil, nil) + m.GetMockJobsAPI().EXPECT().GetRun(mock.Anything, mock.Anything).Return(nil, nil) m.GetMockPipelinesAPI().EXPECT().Get(mock.Anything, mock.Anything).Return(nil, nil) m.GetMockExperimentsAPI().EXPECT().GetExperiment(mock.Anything, mock.Anything).Return(nil, nil) m.GetMockRegisteredModelsAPI().EXPECT().Get(mock.Anything, mock.Anything).Return(nil, nil) diff --git a/bundle/deploy/terraform/lifecycle_test.go b/bundle/deploy/terraform/lifecycle_test.go index b60bff612c7..248a66c21a9 100644 --- a/bundle/deploy/terraform/lifecycle_test.go +++ b/bundle/deploy/terraform/lifecycle_test.go @@ -18,6 +18,7 @@ func TestConvertLifecycleForAllResources(t *testing.T) { "catalogs", "external_locations", "genie_spaces", + "job_runs", "vector_search_endpoints", "vector_search_indexes", } diff --git a/bundle/direct/dresources/all.go b/bundle/direct/dresources/all.go index 4df9dfe58b8..9856e6f979d 100644 --- a/bundle/direct/dresources/all.go +++ b/bundle/direct/dresources/all.go @@ -8,6 +8,7 @@ import ( var SupportedResources = map[string]any{ "jobs": (*ResourceJob)(nil), + "job_runs": (*ResourceJobRun)(nil), "pipelines": (*ResourcePipeline)(nil), "experiments": (*ResourceExperiment)(nil), "catalogs": (*ResourceCatalog)(nil), diff --git a/bundle/direct/dresources/all_test.go b/bundle/direct/dresources/all_test.go index 8994fa328fc..02c34ea912e 100644 --- a/bundle/direct/dresources/all_test.go +++ b/bundle/direct/dresources/all_test.go @@ -322,6 +322,30 @@ var testDeps = map[string]prepareWorkspace{ return testConfig["vector_search_indexes"], nil }, + "job_runs": func(ctx context.Context, client *databricks.WorkspaceClient) (any, error) { + // A run can only be triggered against an existing job, so create one first. + resp, err := client.Jobs.Create(ctx, jobs.CreateJob{ + Name: "job-for-run", + Tasks: []jobs.Task{ + { + TaskKey: "t", + NotebookTask: &jobs.NotebookTask{ + NotebookPath: "/Workspace/Users/user@example.com/notebook", + }, + }, + }, + }) + if err != nil { + return nil, err + } + + return &resources.JobRun{ + RunNow: jobs.RunNow{ + JobId: resp.JobId, + }, + }, nil + }, + "jobs.permissions": func(ctx context.Context, client *databricks.WorkspaceClient) (any, error) { resp, err := client.Jobs.Create(ctx, jobs.CreateJob{ Name: "job-permissions", @@ -1032,7 +1056,9 @@ func testCRUD(t *testing.T, group string, adapter *Adapter, client *databricks.W require.NoError(t, err) } - deleteIsNoop := strings.HasSuffix(group, "permissions") || strings.HasSuffix(group, "grants") + // job_runs has a no-op DoDelete (a triggered run cannot be "undeployed"), so + // the run remains readable after delete, like permissions and grants. + deleteIsNoop := strings.HasSuffix(group, "permissions") || strings.HasSuffix(group, "grants") || group == "job_runs" remoteAfterDelete, err := adapter.DoRead(ctx, createdID) if deleteIsNoop { @@ -1126,27 +1152,40 @@ func TestNoUpdateResourcesCoverAllFields(t *testing.T) { // provided_id_fields, or ignore_local_changes; output-only fields are // covered by ignore_remote_changes since the user never sets them. covered := map[string]bool{} + // A root rule (nil Field) matches every path via HasPatternPrefix, so it + // covers all fields at once. Coverage here is by exact path string, which + // a root rule (Field.String() == "") would never hit, so track it apart. + rootCovered := false + markCovered := func(r FieldRule) { + if r.Field.IsRoot() { + rootCovered = true + } + covered[r.Field.String()] = true + } for _, cfg := range []*ResourceLifecycleConfig{adapter.ResourceConfig(), adapter.GeneratedResourceConfig()} { if cfg == nil { continue } for _, r := range cfg.RecreateOnChanges { - covered[r.Field.String()] = true + markCovered(r) } for _, r := range cfg.ProvidedIDFields { - covered[r.Field.String()] = true + markCovered(r) } for _, r := range cfg.IgnoreLocalChanges { - covered[r.Field.String()] = true + markCovered(r) } for _, r := range cfg.IgnoreRemoteChanges { if strings.HasSuffix(r.Reason, "output_only") { - covered[r.Field.String()] = true + markCovered(r) } } } t.Run(resourceType, func(t *testing.T) { + if rootCovered { + return + } err := structwalk.WalkType(adapter.StateType(), func(path *structpath.PatternNode, typ reflect.Type, _ *reflect.StructField) bool { if path.IsRoot() { return true diff --git a/bundle/direct/dresources/apitypes.generated.yml b/bundle/direct/dresources/apitypes.generated.yml index 8fb3f0117c4..8fc2eec34a0 100644 --- a/bundle/direct/dresources/apitypes.generated.yml +++ b/bundle/direct/dresources/apitypes.generated.yml @@ -20,6 +20,8 @@ external_locations: catalog.CreateExternalLocation genie_spaces: dashboards.GenieUpdateSpaceRequest +job_runs: jobs.RunNow + jobs: jobs.JobSettings model_serving_endpoints: serving.CreateServingEndpoint diff --git a/bundle/direct/dresources/config_test.go b/bundle/direct/dresources/config_test.go index 706c100fde1..5db93913c28 100644 --- a/bundle/direct/dresources/config_test.go +++ b/bundle/direct/dresources/config_test.go @@ -4,7 +4,10 @@ import ( "testing" "github.com/databricks/cli/libs/structs/structaccess" + "github.com/databricks/cli/libs/structs/structpath" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.yaml.in/yaml/v3" ) func TestMustLoadConfig(t *testing.T) { @@ -17,6 +20,28 @@ func TestGetResourceConfig(t *testing.T) { assert.Empty(t, GetResourceConfig("nonexistent").RecreateOnChanges) } +// TestFieldRuleEmptyStringVsOmitted documents how a FieldRule's pattern is +// matched and guards the empty-string footgun. The matcher +// (path.HasPatternPrefix) treats only a nil pattern as root (matches every +// field). The two ways one might try to express "all fields" differ: +// - field: "" would parse to a non-nil zero PatternNode (an empty component +// that matches nothing), so it is rejected at unmarshal time. +// - omitting field leaves the pointer nil, which IS root and matches all. +// +// So to cover all fields, omit the field key entirely; a literal "" is an error. +func TestFieldRuleEmptyStringVsOmitted(t *testing.T) { + someField := structpath.MustParsePath("dbt_commands") + + var emptyString FieldRule + err := yaml.Unmarshal([]byte("field: \"\"\nreason: input_only\n"), &emptyString) + assert.ErrorContains(t, err, "empty path string", "field: \"\" should be rejected") + + var omitted FieldRule + require.NoError(t, yaml.Unmarshal([]byte("reason: input_only\n"), &omitted)) + assert.True(t, omitted.Field.IsRoot(), "omitting field should be root") + assert.True(t, someField.HasPatternPrefix(omitted.Field), "omitting field should match every field") +} + // categoryRules projects ResourceLifecycleConfig's categories onto a // uniform [name, []FieldRule] shape so the redundancy check can iterate them. func categoryRules(c ResourceLifecycleConfig) []struct { diff --git a/bundle/direct/dresources/job_run.go b/bundle/direct/dresources/job_run.go new file mode 100644 index 00000000000..92ca174f134 --- /dev/null +++ b/bundle/direct/dresources/job_run.go @@ -0,0 +1,112 @@ +package dresources + +import ( + "context" + "fmt" + "strconv" + + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/marshal" + "github.com/databricks/databricks-sdk-go/service/jobs" +) + +// JobRunState is what we persist for a triggered run: the RunNow request that +// launched it. +type JobRunState struct { + jobs.RunNow +} + +func (s *JobRunState) UnmarshalJSON(b []byte) error { + return marshal.Unmarshal(b, s) +} + +func (s JobRunState) MarshalJSON() ([]byte, error) { + return marshal.Marshal(s) +} + +type ResourceJobRun struct { + client *databricks.WorkspaceClient +} + +func (*ResourceJobRun) New(client *databricks.WorkspaceClient) *ResourceJobRun { + return &ResourceJobRun{ + client: client, + } +} + +func (*ResourceJobRun) PrepareState(input *resources.JobRun) *JobRunState { + return &JobRunState{ + RunNow: input.RunNow, + } +} + +// DoRead returns the run's remote state. We call GetRun both to read back the +// run's job_id and to confirm the run still exists: a run can be deleted +// out-of-band or age out of the workspace's run-history retention, and a +// not-found result lets the planner re-trigger it. RemoteType is the same as +// StateType: we don't track any remote-only fields, so no RemapState is needed. +func (r *ResourceJobRun) DoRead(ctx context.Context, id string) (*JobRunState, error) { + runID, err := parseRunID(id) + if err != nil { + return nil, err + } + run, err := r.client.Jobs.GetRun(ctx, jobs.GetRunRequest{ + RunId: runID, + IncludeHistory: false, + IncludeResolvedValues: false, + PageToken: "", + ForceSendFields: nil, + }) + if err != nil { + return nil, err + } + // A run is immutable and fire-once: nothing about it changes on the backend + // after launch. In this milestone a run is re-triggered solely by a local + // change to its own RunNow config (every settable input is marked + // recreate_on_changes in resources.yml and ignored for remote changes); + // re-triggering on other signals (every deploy, file or referenced-value + // changes) is a later milestone. We therefore record only the run's identity + // (job_id) to confirm it still targets the expected job. Reading the run's + // overriding parameters back here would only feed a remote diff we then have + // to suppress, so we don't. + return &JobRunState{RunNow: jobs.RunNow{JobId: run.JobId}}, nil +} + +func (r *ResourceJobRun) DoCreate(ctx context.Context, config *JobRunState) (string, *JobRunState, error) { + // RunNow returns immediately with the new run id; waiting for completion is + // a later milestone. + wait, err := r.client.Jobs.RunNow(ctx, config.RunNow) + if err != nil { + return "", nil, err + } + // RunNow's response carries only the run id. We don't track remote-only + // fields, so the faithful record of what we created is the config we sent; + // echo it back as the remote state (RemoteType == StateType). + remote := &JobRunState{RunNow: config.RunNow} + return strconv.FormatInt(wait.RunId, 10), remote, nil +} + +// DoUpdate is intentionally not implemented: there is no API to modify a run in +// place. Every request field is marked recreate_on_changes in resources.yml, so +// any config change goes through delete + create (a fresh RunNow). + +// DoDelete is a no-op in this milestone. The project plan defines all changes as +// "recreate" (no-op delete + a fresh RunNow), with the run considered deployed as +// soon as it is triggered; waiting for completion is the next milestone. We also +// can't usefully call jobs/runs/delete here: it only deletes a non-active run and +// errors on an active one, and because we don't wait for completion the run is +// typically still active when DoDelete runs (including on the recreate path, +// which calls DoDelete before DoCreate). Real deletion can be revisited once runs +// are awaited. +func (r *ResourceJobRun) DoDelete(ctx context.Context, id string, _ *JobRunState) error { + return nil +} + +func parseRunID(id string) (int64, error) { + result, err := strconv.ParseInt(id, 10, 64) + if err != nil { + return 0, fmt.Errorf("internal error: run id is not integer: %q: %w", id, err) + } + return result, nil +} diff --git a/bundle/direct/dresources/resources.generated.yml b/bundle/direct/dresources/resources.generated.yml index d1dab12b17d..e67e7a3282f 100644 --- a/bundle/direct/dresources/resources.generated.yml +++ b/bundle/direct/dresources/resources.generated.yml @@ -176,6 +176,8 @@ resources: - field: etag reason: spec:output_only + # job_runs: no api field behaviors + # jobs: no api field behaviors # model_serving_endpoints: no api field behaviors diff --git a/bundle/direct/dresources/resources.yml b/bundle/direct/dresources/resources.yml index 2001d47dc4f..c44d9ad3d48 100644 --- a/bundle/direct/dresources/resources.yml +++ b/bundle/direct/dresources/resources.yml @@ -116,6 +116,52 @@ resources: - field: tasks[*].new_cluster.data_security_mode - field: job_clusters[*].new_cluster.data_security_mode + job_runs: + # A run is immutable and fire-once. In this milestone it is re-triggered + # solely by a local change to its own config, never from remote drift; + # re-triggering on other signals (every deploy, file or referenced-value + # changes) is a later milestone. Every settable run input is therefore + # ignored for remote changes here (ignore_remote_changes only suppresses a + # purely-remote difference, i.e. when local state still equals config), while + # a local change to any of them re-triggers the run via the recreate_on_changes + # list below. DoRead records only the run's identity (job_id), which it + # confirms but does not re-trigger on. + ignore_remote_changes: + - field: dbt_commands + reason: input_only + - field: idempotency_token + reason: input_only + - field: jar_params + reason: input_only + - field: job_parameters + reason: input_only + - field: notebook_params + reason: input_only + - field: only + reason: input_only + - field: performance_target + reason: input_only + - field: pipeline_params + reason: input_only + - field: python_named_params + reason: input_only + - field: python_params + reason: input_only + - field: queue + reason: input_only + - field: spark_submit_params + reason: input_only + - field: sql_params + reason: input_only + # There is no API to modify a run in place, so any change to the request must + # re-trigger a fresh run. With no DoUpdate, recreate (no-op delete + RunNow) + # is the only path; every request field is immutable. Omitting `field` makes + # the rule match the root, i.e. every field, so any change recreates the run. + # (A literal `field: ""` would NOT do this: it parses to an empty component + # that matches nothing -- see TestFieldRuleEmptyStringVsOmitted.) + recreate_on_changes: + - reason: immutable + pipelines: recreate_on_changes: - field: storage diff --git a/bundle/direct/dstate/state.go b/bundle/direct/dstate/state.go index eb41217f61d..e604938f6a5 100644 --- a/bundle/direct/dstate/state.go +++ b/bundle/direct/dstate/state.go @@ -10,9 +10,9 @@ import ( "io/fs" "os" "path/filepath" - "strings" "sync" + "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/deployplan" "github.com/databricks/cli/bundle/statemgmt/resourcestate" "github.com/databricks/cli/internal/build" @@ -462,6 +462,11 @@ func (db *DeploymentState) AssertOpenedForWrite() { func ExportStateFromData(data Database) resourcestate.ExportedResourcesMap { result := make(resourcestate.ExportedResourcesMap) for key, entry := range data.State { + // Match on the exact resource type rather than substring-matching the + // key, so a sub-resource entry like resources...permissions + // (type ".permissions") is not mistaken for the resource itself. + resourceType := config.GetResourceTypeFromKey(key) + var etag string // Extract etag for resources that use it for drift detection // (dashboards and genie_spaces). Both follow the same pattern of @@ -470,7 +475,7 @@ func ExportStateFromData(data Database) resourcestate.ExportedResourcesMap { // covered by test cases: // - bundle/deploy/dashboard/detect-change // - bundle/resources/genie_spaces/simple - if (strings.Contains(key, ".dashboards.") || strings.Contains(key, ".genie_spaces.")) && len(entry.State) > 0 { + if (resourceType == "dashboards" || resourceType == "genie_spaces") && len(entry.State) > 0 { var holder struct { Etag string `json:"etag"` } @@ -479,9 +484,24 @@ func ExportStateFromData(data Database) resourcestate.ExportedResourcesMap { } } + // Extract the resolved job_id for job runs. A run's URL needs its parent + // job id, which in config is a ${resources.jobs.*.id} reference that is + // only resolved at deploy; restoring the deployed value lets read-only + // commands build the run URL. + var jobID int64 + if resourceType == "job_runs" && len(entry.State) > 0 { + var holder struct { + JobID int64 `json:"job_id"` + } + if err := json.Unmarshal(entry.State, &holder); err == nil { + jobID = holder.JobID + } + } + result[key] = resourcestate.ResourceState{ ID: entry.ID, ETag: etag, + JobID: jobID, StateSizeBytes: len(entry.State), } } diff --git a/bundle/direct/dstate/state_test.go b/bundle/direct/dstate/state_test.go index b52f2bf4d82..2906f475fad 100644 --- a/bundle/direct/dstate/state_test.go +++ b/bundle/direct/dstate/state_test.go @@ -44,6 +44,51 @@ func TestFinalizeWithNoEntriesDoesNotWriteStateFile(t *testing.T) { assert.ErrorIs(t, err, os.ErrNotExist) } +func TestExportStateFromDataJobRunJobID(t *testing.T) { + data := Database{ + State: map[string]ResourceEntry{ + "resources.job_runs.my_run": { + ID: "456", + State: json.RawMessage(`{"job_id": 123}`), + }, + // A permissions sub-resource entry shares the ".job_runs." infix but + // is not a run. Its state even carries a job_id to prove we match on + // the exact resource type, not a substring of the key. + "resources.job_runs.my_run.permissions": { + ID: "456", + State: json.RawMessage(`{"job_id": 999}`), + }, + }, + } + + result := ExportStateFromData(data) + + assert.Equal(t, int64(123), result["resources.job_runs.my_run"].JobID) + assert.Equal(t, int64(0), result["resources.job_runs.my_run.permissions"].JobID) +} + +func TestExportStateFromDataDashboardEtag(t *testing.T) { + data := Database{ + State: map[string]ResourceEntry{ + "resources.dashboards.my_dash": { + ID: "abc", + State: json.RawMessage(`{"etag": "v1"}`), + }, + // A permissions sub-resource entry shares the ".dashboards." infix but + // is not a dashboard; its etag must not be lifted onto it. + "resources.dashboards.my_dash.permissions": { + ID: "abc", + State: json.RawMessage(`{"etag": "v2"}`), + }, + }, + } + + result := ExportStateFromData(data) + + assert.Equal(t, "v1", result["resources.dashboards.my_dash"].ETag) + assert.Empty(t, result["resources.dashboards.my_dash.permissions"].ETag) +} + func TestPanicOnDoubleOpen(t *testing.T) { path := filepath.Join(t.TempDir(), "state.json") diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index c8a8353012f..a730ce91d5a 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -914,6 +914,16 @@ resources: "warehouse_id": "description": |- ID of the SQL warehouse used to run queries for this Genie space. + "job_runs": + "description": |- + The job run definitions for the bundle, where each key is the name of the job run. Each job run triggers a run of an existing job as part of bundle deployment. + "$fields": + "lifecycle": + "description": |- + Settings that control the deployment lifecycle of the resource, such as preventing it from being destroyed. + "python_named_params": + "description": |- + PLACEHOLDER "jobs": "description": |- The job definitions for the bundle, where each key is the name of the job. diff --git a/bundle/internal/validation/generated/enum_fields.go b/bundle/internal/validation/generated/enum_fields.go index d5f62ac45b8..0509372f157 100644 --- a/bundle/internal/validation/generated/enum_fields.go +++ b/bundle/internal/validation/generated/enum_fields.go @@ -64,6 +64,8 @@ var EnumFields = map[string][]string{ "resources.genie_spaces.*.permissions[*].level": {"CAN_ATTACH_TO", "CAN_BIND", "CAN_CREATE", "CAN_CREATE_APP", "CAN_EDIT", "CAN_EDIT_METADATA", "CAN_MANAGE", "CAN_MANAGE_PRODUCTION_VERSIONS", "CAN_MANAGE_RUN", "CAN_MANAGE_STAGING_VERSIONS", "CAN_MONITOR", "CAN_MONITOR_ONLY", "CAN_QUERY", "CAN_READ", "CAN_RESTART", "CAN_RUN", "CAN_USE", "CAN_VIEW", "CAN_VIEW_METADATA", "IS_OWNER"}, + "resources.job_runs.*.performance_target": {"PERFORMANCE_OPTIMIZED", "STANDARD"}, + "resources.jobs.*.continuous.pause_status": {"PAUSED", "UNPAUSED"}, "resources.jobs.*.continuous.task_retry_mode": {"NEVER", "ON_FAILURE"}, "resources.jobs.*.deployment.kind": {"BUNDLE", "SYSTEM_MANAGED"}, diff --git a/bundle/internal/validation/generated/required_fields.go b/bundle/internal/validation/generated/required_fields.go index 46862290d10..c80a433422c 100644 --- a/bundle/internal/validation/generated/required_fields.go +++ b/bundle/internal/validation/generated/required_fields.go @@ -66,6 +66,9 @@ var RequiredFields = map[string][]string{ "resources.genie_spaces.*.permissions[*]": {"level"}, + "resources.job_runs.*": {"job_id"}, + "resources.job_runs.*.queue": {"enabled"}, + "resources.jobs.*.deployment": {"kind"}, "resources.jobs.*.environments[*]": {"environment_key"}, "resources.jobs.*.git_source": {"git_provider", "git_url"}, diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index dc03d3c9d0b..39b5b08f374 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -1026,6 +1026,111 @@ } ] }, + "resources.JobRun": { + "oneOf": [ + { + "type": "object", + "properties": { + "dbt_commands": { + "description": "[Private Preview] An array of commands to execute for jobs with the dbt task, for example `\"dbt_commands\": [\"dbt deps\", \"dbt seed\", \"dbt deps\", \"dbt seed\", \"dbt run\"]`\n\n⚠ **Deprecation note** Use [job parameters](https://docs.databricks.com/jobs/job-parameters.html#job-parameter-pushdown) to pass information down to tasks.", + "$ref": "#/$defs/slice/string", + "x-databricks-launch-stage": "PRIVATE_PREVIEW", + "deprecationMessage": "This field is deprecated", + "doNotSuggest": true, + "deprecated": true + }, + "idempotency_token": { + "description": "An optional token to guarantee the idempotency of job run requests. If a run with the provided token already exists,\nthe request does not create a new run but returns the ID of the existing run instead. If a run with the provided token is deleted,\nan error is returned.\n\nIf you specify the idempotency token, upon failure you can retry until the request succeeds. Databricks guarantees that exactly one run\nis launched with that idempotency token.\n\nThis token must have at most 64 characters.\n\nFor more information, see [How to ensure idempotency for jobs](https://kb.databricks.com/jobs/jobs-idempotency.html).", + "$ref": "#/$defs/string" + }, + "jar_params": { + "description": "[Private Preview] A list of parameters for jobs with Spark JAR tasks, for example `\"jar_params\": [\"john doe\", \"35\"]`.\nThe parameters are used to invoke the main function of the main class specified in the Spark JAR task.\nIf not specified upon `run-now`, it defaults to an empty list.\njar_params cannot be specified in conjunction with notebook_params.\nThe JSON representation of this field (for example `{\"jar_params\":[\"john doe\",\"35\"]}`) cannot exceed 10,000 bytes.\n\n⚠ **Deprecation note** Use [job parameters](https://docs.databricks.com/jobs/job-parameters.html#job-parameter-pushdown) to pass information down to tasks.", + "$ref": "#/$defs/slice/string", + "x-databricks-launch-stage": "PRIVATE_PREVIEW", + "deprecationMessage": "This field is deprecated", + "doNotSuggest": true, + "deprecated": true + }, + "job_id": { + "description": "The ID of the job to be executed", + "$ref": "#/$defs/int64" + }, + "job_parameters": { + "description": "Job-level parameters used in the run. for example `\"param\": \"overriding_val\"`", + "$ref": "#/$defs/map/string" + }, + "lifecycle": { + "description": "Settings that control the deployment lifecycle of the resource, such as preventing it from being destroyed.", + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.Lifecycle" + }, + "notebook_params": { + "description": "[Private Preview] A map from keys to values for jobs with notebook task, for example `\"notebook_params\": {\"name\": \"john doe\", \"age\": \"35\"}`.\nThe map is passed to the notebook and is accessible through the [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html) function.\n\nIf not specified upon `run-now`, the triggered run uses the job’s base parameters.\n\nnotebook_params cannot be specified in conjunction with jar_params.\n\n⚠ **Deprecation note** Use [job parameters](https://docs.databricks.com/jobs/job-parameters.html#job-parameter-pushdown) to pass information down to tasks.\n\nThe JSON representation of this field (for example `{\"notebook_params\":{\"name\":\"john doe\",\"age\":\"35\"}}`) cannot exceed 10,000 bytes.", + "$ref": "#/$defs/map/string", + "x-databricks-launch-stage": "PRIVATE_PREVIEW", + "deprecationMessage": "This field is deprecated", + "doNotSuggest": true, + "deprecated": true + }, + "only": { + "description": "A list of task keys to run inside of the job. If this field is not provided, all tasks in the job will be run.", + "$ref": "#/$defs/slice/string" + }, + "performance_target": { + "description": "The performance mode on a serverless job. The performance target determines the level of compute performance or cost-efficiency for the run. This field overrides the performance target defined on the job level.\n\n* `STANDARD`: Enables cost-efficient execution of serverless workloads.\n* `PERFORMANCE_OPTIMIZED`: Prioritizes fast startup and execution times through rapid scaling and optimized cluster performance.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.PerformanceTarget" + }, + "pipeline_params": { + "description": "Controls whether the pipeline should perform a full refresh", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.PipelineParams" + }, + "python_named_params": { + "description": "[Private Preview]", + "$ref": "#/$defs/map/string", + "x-databricks-launch-stage": "PRIVATE_PREVIEW", + "deprecationMessage": "This field is deprecated", + "doNotSuggest": true, + "deprecated": true + }, + "python_params": { + "description": "[Private Preview] A list of parameters for jobs with Python tasks, for example `\"python_params\": [\"john doe\", \"35\"]`.\nThe parameters are passed to Python file as command-line parameters. If specified upon `run-now`, it would overwrite\nthe parameters specified in job setting. The JSON representation of this field (for example `{\"python_params\":[\"john doe\",\"35\"]}`)\ncannot exceed 10,000 bytes.\n\n⚠ **Deprecation note** Use [job parameters](https://docs.databricks.com/jobs/job-parameters.html#job-parameter-pushdown) to pass information down to tasks.\n\nImportant\n\nThese parameters accept only Latin characters (ASCII character set). Using non-ASCII characters returns an error.\nExamples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and emojis.", + "$ref": "#/$defs/slice/string", + "x-databricks-launch-stage": "PRIVATE_PREVIEW", + "deprecationMessage": "This field is deprecated", + "doNotSuggest": true, + "deprecated": true + }, + "queue": { + "description": "The queue settings of the run.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.QueueSettings" + }, + "spark_submit_params": { + "description": "[Private Preview] A list of parameters for jobs with spark submit task, for example `\"spark_submit_params\": [\"--class\", \"org.apache.spark.examples.SparkPi\"]`.\nThe parameters are passed to spark-submit script as command-line parameters. If specified upon `run-now`, it would overwrite the\nparameters specified in job setting. The JSON representation of this field (for example `{\"python_params\":[\"john doe\",\"35\"]}`)\ncannot exceed 10,000 bytes.\n\n⚠ **Deprecation note** Use [job parameters](https://docs.databricks.com/jobs/job-parameters.html#job-parameter-pushdown) to pass information down to tasks.\n\nImportant\n\nThese parameters accept only Latin characters (ASCII character set). Using non-ASCII characters returns an error.\nExamples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and emojis.", + "$ref": "#/$defs/slice/string", + "x-databricks-launch-stage": "PRIVATE_PREVIEW", + "deprecationMessage": "This field is deprecated", + "doNotSuggest": true, + "deprecated": true + }, + "sql_params": { + "description": "[Private Preview] A map from keys to values for jobs with SQL task, for example `\"sql_params\": {\"name\": \"john doe\", \"age\": \"35\"}`. The SQL alert task does not support custom parameters.\n\n⚠ **Deprecation note** Use [job parameters](https://docs.databricks.com/jobs/job-parameters.html#job-parameter-pushdown) to pass information down to tasks.", + "$ref": "#/$defs/map/string", + "x-databricks-launch-stage": "PRIVATE_PREVIEW", + "deprecationMessage": "This field is deprecated", + "doNotSuggest": true, + "deprecated": true + } + }, + "additionalProperties": false, + "required": [ + "job_id" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.\\p{L}+([-_]*[\\p{L}\\p{N}]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, "resources.Lifecycle": { "oneOf": [ { @@ -2966,6 +3071,10 @@ "genie_spaces": { "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.GenieSpace" }, + "job_runs": { + "description": "The job run definitions for the bundle, where each key is the name of the job run. Each job run triggers a run of an existing job as part of bundle deployment.", + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.JobRun" + }, "jobs": { "description": "The job definitions for the bundle, where each key is the name of the job.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.Job", @@ -13342,6 +13451,20 @@ } ] }, + "resources.JobRun": { + "oneOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.JobRun" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.\\p{L}+([-_]*[\\p{L}\\p{N}]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, "resources.MlflowExperiment": { "oneOf": [ { diff --git a/bundle/statemgmt/resourcestate/resourcestate.go b/bundle/statemgmt/resourcestate/resourcestate.go index ec98f1bc827..4bd5d4eb15e 100644 --- a/bundle/statemgmt/resourcestate/resourcestate.go +++ b/bundle/statemgmt/resourcestate/resourcestate.go @@ -8,6 +8,13 @@ type ResourceState struct { // For dashboards ETag string + // For job_runs: the resolved job_id the run was launched against. A run's + // URL needs its parent job id, but in config that comes from a + // ${resources.jobs.*.id} reference that is only resolved at deploy, so at + // read time (e.g. bundle summary) it reads 0. We carry the deployed value + // here so it can be restored into config at load time. + JobID int64 + // Size in bytes of the resource's serialized state blob. Populated by the // direct engine (len of the JSON stored in resources.json) for deploy // telemetry; left zero by the terraform path. diff --git a/bundle/statemgmt/state_load.go b/bundle/statemgmt/state_load.go index 573c69126c2..02f5664021d 100644 --- a/bundle/statemgmt/state_load.go +++ b/bundle/statemgmt/state_load.go @@ -131,6 +131,16 @@ func StateToBundle(ctx context.Context, state ExportedResourcesMap, config *conf if err != nil { return dyn.InvalidValue, err } + + // A run's job_id in config is a ${resources.jobs.*.id} reference + // that is only resolved at deploy, so at read time it is 0. + // Restore the deployed value so the run URL can be built. + if groupName == "job_runs" && attrs.JobID != 0 { + v, err = dyn.SetByPath(v, dyn.Path{dyn.Key("resources"), dyn.Key(groupName), dyn.Key(resourceName), dyn.Key("job_id")}, dyn.V(attrs.JobID)) + if err != nil { + return dyn.InvalidValue, err + } + } } } diff --git a/bundle/statemgmt/state_load_test.go b/bundle/statemgmt/state_load_test.go index 86d7f76f806..313348f30d2 100644 --- a/bundle/statemgmt/state_load_test.go +++ b/bundle/statemgmt/state_load_test.go @@ -27,6 +27,7 @@ func TestStateToBundleEmptyLocalResources(t *testing.T) { state := ExportedResourcesMap{ "resources.jobs.test_job": {ID: "1"}, + "resources.job_runs.test_job_run": {ID: "1"}, "resources.pipelines.test_pipeline": {ID: "1"}, "resources.models.test_mlflow_model": {ID: "1"}, "resources.experiments.test_mlflow_experiment": {ID: "1"}, @@ -63,6 +64,9 @@ func TestStateToBundleEmptyLocalResources(t *testing.T) { assert.Equal(t, "1", config.Resources.Jobs["test_job"].ID) assert.Equal(t, resources.ModifiedStatusDeleted, config.Resources.Jobs["test_job"].ModifiedStatus) + assert.Equal(t, "1", config.Resources.JobRuns["test_job_run"].ID) + assert.Equal(t, resources.ModifiedStatusDeleted, config.Resources.JobRuns["test_job_run"].ModifiedStatus) + assert.Equal(t, "1", config.Resources.Pipelines["test_pipeline"].ID) assert.Equal(t, resources.ModifiedStatusDeleted, config.Resources.Pipelines["test_pipeline"].ModifiedStatus) @@ -158,6 +162,13 @@ func TestStateToBundleEmptyRemoteResources(t *testing.T) { }, }, }, + JobRuns: map[string]*resources.JobRun{ + "test_job_run": { + RunNow: jobs.RunNow{ + JobId: 1234, + }, + }, + }, Pipelines: map[string]*resources.Pipeline{ "test_pipeline": { CreatePipeline: pipelines.CreatePipeline{ @@ -376,6 +387,9 @@ func TestStateToBundleEmptyRemoteResources(t *testing.T) { assert.Empty(t, config.Resources.Jobs["test_job"].ID) assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Jobs["test_job"].ModifiedStatus) + assert.Empty(t, config.Resources.JobRuns["test_job_run"].ID) + assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.JobRuns["test_job_run"].ModifiedStatus) + assert.Empty(t, config.Resources.Pipelines["test_pipeline"].ID) assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Pipelines["test_pipeline"].ModifiedStatus) @@ -481,6 +495,18 @@ func TestStateToBundleModifiedResources(t *testing.T) { }, }, }, + JobRuns: map[string]*resources.JobRun{ + "test_job_run": { + RunNow: jobs.RunNow{ + JobId: 1234, + }, + }, + "test_job_run_new": { + RunNow: jobs.RunNow{ + JobId: 5678, + }, + }, + }, Pipelines: map[string]*resources.Pipeline{ "test_pipeline": { CreatePipeline: pipelines.CreatePipeline{ @@ -835,6 +861,8 @@ func TestStateToBundleModifiedResources(t *testing.T) { state := ExportedResourcesMap{ "resources.jobs.test_job": {ID: "1"}, "resources.jobs.test_job_old": {ID: "2"}, + "resources.job_runs.test_job_run": {ID: "1"}, + "resources.job_runs.test_job_run_old": {ID: "2"}, "resources.pipelines.test_pipeline": {ID: "1"}, "resources.pipelines.test_pipeline_old": {ID: "2"}, "resources.models.test_mlflow_model": {ID: "1"}, @@ -896,6 +924,13 @@ func TestStateToBundleModifiedResources(t *testing.T) { assert.Empty(t, config.Resources.Jobs["test_job_new"].ID) assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Jobs["test_job_new"].ModifiedStatus) + assert.Equal(t, "1", config.Resources.JobRuns["test_job_run"].ID) + assert.Empty(t, config.Resources.JobRuns["test_job_run"].ModifiedStatus) + assert.Equal(t, "2", config.Resources.JobRuns["test_job_run_old"].ID) + assert.Equal(t, resources.ModifiedStatusDeleted, config.Resources.JobRuns["test_job_run_old"].ModifiedStatus) + assert.Empty(t, config.Resources.JobRuns["test_job_run_new"].ID) + assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.JobRuns["test_job_run_new"].ModifiedStatus) + assert.Equal(t, "1", config.Resources.Pipelines["test_pipeline"].ID) assert.Empty(t, config.Resources.Pipelines["test_pipeline"].ModifiedStatus) assert.Equal(t, "2", config.Resources.Pipelines["test_pipeline_old"].ID) diff --git a/libs/structs/structpath/path.go b/libs/structs/structpath/path.go index 5ae81019ee4..edc426503e2 100644 --- a/libs/structs/structpath/path.go +++ b/libs/structs/structpath/path.go @@ -891,13 +891,16 @@ func (p *PathNode) UnmarshalYAML(unmarshal func(any) error) error { if err := unmarshal(&s); err != nil { return err } + // An explicit empty string parses to the zero node (an empty component that + // matches nothing), which reads like a wildcard but isn't. The root path is + // expressed by omitting the field entirely, leaving the pointer nil. + if s == "" { + return errors.New("empty path string; omit the field to match the root") + } parsed, err := parse(s, false) if err != nil { return err } - if parsed == nil { - return nil - } *p = *(*PathNode)(parsed) return nil } @@ -914,13 +917,17 @@ func (p *PatternNode) UnmarshalYAML(unmarshal func(any) error) error { if err := unmarshal(&s); err != nil { return err } + // An explicit empty string parses to the zero node (an empty component that + // matches nothing), which reads like a wildcard but isn't. The root pattern + // (match everything) is expressed by omitting the field entirely, leaving + // the pointer nil. + if s == "" { + return errors.New("empty path string; omit the field to match the root") + } parsed, err := parse(s, true) if err != nil { return err } - if parsed == nil { - return nil - } *p = *parsed return nil } diff --git a/libs/structs/structpath/path_test.go b/libs/structs/structpath/path_test.go index 2f8fe4bf6be..e6394230490 100644 --- a/libs/structs/structpath/path_test.go +++ b/libs/structs/structpath/path_test.go @@ -1105,14 +1105,12 @@ func TestPathNodeYAMLNullAndEmpty(t *testing.T) { require.NoError(t, err) assert.Nil(t, config.Path) - // Empty string results in allocated pointer with zero-value PathNode. - // The zero value has index=0, which represents "[0]" (array index 0). - // This is a quirk - in practice, use null for "no path" in YAML configs. + // An explicit empty string is rejected: it would otherwise parse to a + // zero-value PathNode (index=0, i.e. "[0]") that silently matches nothing. + // Use null / omit the field for "no path" instead. var config2 Config err = yaml.Unmarshal([]byte("path: ''"), &config2) - require.NoError(t, err) - require.NotNil(t, config2.Path) - assert.Equal(t, "[0]", config2.Path.String()) + require.ErrorContains(t, err, "empty path string") } func TestPathNodeYAMLUnmarshalErrors(t *testing.T) { diff --git a/libs/structs/structwalk/walktype_test.go b/libs/structs/structwalk/walktype_test.go index 78ccc6c3f61..3b20a1b1dc8 100644 --- a/libs/structs/structwalk/walktype_test.go +++ b/libs/structs/structwalk/walktype_test.go @@ -136,7 +136,7 @@ func TestTypeJobSettings(t *testing.T) { func TestTypeRoot(t *testing.T) { testStruct(t, reflect.TypeFor[config.Root](), - 5000, 6000, // 5813 after genie_space resource added + 5000, 8000, // 6027 with the job_runs resource (RunNow request fields only) map[string]any{ "bundle.target": "", `variables.*.lookup.dashboard`: "", diff --git a/libs/testserver/jobs.go b/libs/testserver/jobs.go index 9f42055f079..fb1c025956f 100644 --- a/libs/testserver/jobs.go +++ b/libs/testserver/jobs.go @@ -326,18 +326,72 @@ func (s *FakeWorkspace) JobsRunNow(req Request) Response { } s.JobRuns[runId] = jobs.Run{ - RunId: runId, - JobId: request.JobId, - State: &jobs.RunState{LifeCycleState: jobs.RunLifeCycleStateRunning}, - RunPageUrl: fmt.Sprintf("%s/?o=900800700600#job/%d/run/%d", s.url, request.JobId, runId), - RunType: jobs.RunTypeJobRun, - RunName: runName, - Tasks: tasks, + RunId: runId, + JobId: request.JobId, + State: &jobs.RunState{LifeCycleState: jobs.RunLifeCycleStateRunning}, + RunPageUrl: fmt.Sprintf("%s/?o=900800700600#job/%d/run/%d", s.url, request.JobId, runId), + RunType: jobs.RunTypeJobRun, + RunName: runName, + Tasks: tasks, + JobParameters: runJobParameters(job.Settings, request.JobParameters), + OverridingParameters: runOverridingParameters(request), } return Response{Body: jobs.RunNowResponse{RunId: runId}} } +// runJobParameters mirrors how GetRun resolves job-level parameters: it returns +// every parameter the job defines with its value, applying the run's overrides +// on top of the job's defaults. This is the cloud behavior that lets a run +// surface a job parameter it never explicitly overrode. Sorted by name for +// deterministic output. +func runJobParameters(settings *jobs.JobSettings, overrides map[string]string) []jobs.JobParameter { + resolved := map[string]jobs.JobParameter{} + if settings != nil { + for _, p := range settings.Parameters { + resolved[p.Name] = jobs.JobParameter{Name: p.Name, Default: p.Default, Value: p.Default} + } + } + for name, value := range overrides { + p := resolved[name] + p.Name = name + p.Value = value + resolved[name] = p + } + if len(resolved) == 0 { + return nil + } + result := make([]jobs.JobParameter, 0, len(resolved)) + for _, p := range resolved { + result = append(result, p) + } + slices.SortFunc(result, func(a, b jobs.JobParameter) int { + return cmp.Compare(a.Name, b.Name) + }) + return result +} + +// runOverridingParameters mirrors how GetRun echoes the run's overriding +// parameters. Returns nil when the request set none. +func runOverridingParameters(request jobs.RunNow) *jobs.RunParameters { + p := jobs.RunParameters{ + DbtCommands: request.DbtCommands, + JarParams: request.JarParams, + NotebookParams: request.NotebookParams, + PipelineParams: request.PipelineParams, + PythonNamedParams: request.PythonNamedParams, + PythonParams: request.PythonParams, + SparkSubmitParams: request.SparkSubmitParams, + SqlParams: request.SqlParams, + } + if len(p.DbtCommands) == 0 && len(p.JarParams) == 0 && len(p.NotebookParams) == 0 && + p.PipelineParams == nil && len(p.PythonNamedParams) == 0 && len(p.PythonParams) == 0 && + len(p.SparkSubmitParams) == 0 && len(p.SqlParams) == 0 { + return nil + } + return &p +} + // executePythonWheelTask runs a python wheel task locally using uv. // For tasks using existing_cluster_id, the venv is cached per cluster to match // cloud behavior where libraries are cached on running clusters. diff --git a/libs/workspaceurls/urls.go b/libs/workspaceurls/urls.go index f599139e340..54b6da9bda1 100644 --- a/libs/workspaceurls/urls.go +++ b/libs/workspaceurls/urls.go @@ -81,6 +81,15 @@ func JobRunPath(jobID, runID string) string { return fmt.Sprintf("jobs/%s/runs/%s", jobID, runID) } +// JobRunURL constructs a workspace URL for a job run from a base URL, job ID, +// and run ID. Unlike ResourceURL it takes two IDs because a run is addressed by +// both its parent job and the run itself, so it can't be expressed as a single +// entry in resourceURLPatterns. +func JobRunURL(baseURL url.URL, jobID, runID string) string { + baseURL.Path = JobRunPath(jobID, runID) + return baseURL.String() +} + // ResourceURL constructs a workspace URL for a named resource type and ID. func ResourceURL(baseURL url.URL, resourceType, id string) string { resourceType = resolveAlias(resourceType) diff --git a/libs/workspaceurls/urls_test.go b/libs/workspaceurls/urls_test.go index 683b9c470e7..e39d28d9aaf 100644 --- a/libs/workspaceurls/urls_test.go +++ b/libs/workspaceurls/urls_test.go @@ -35,6 +35,14 @@ func TestJobRunPath(t *testing.T) { assert.Equal(t, "jobs/123/runs/456", JobRunPath("123", "456")) } +func TestJobRunURL(t *testing.T) { + base := url.URL{Scheme: "https", Host: "host.com"} + assert.Equal(t, "https://host.com/jobs/123/runs/456", JobRunURL(base, "123", "456")) + + withWorkspace := url.URL{Scheme: "https", Host: "host.com", RawQuery: "w=789"} + assert.Equal(t, "https://host.com/jobs/123/runs/456?w=789", JobRunURL(withWorkspace, "123", "456")) +} + func TestResourceTypes(t *testing.T) { types := ResourceTypes() assert.NotEmpty(t, types)