diff --git a/acceptance/bundle/generate/python_job_and_deploy/databricks.yml b/acceptance/bundle/generate/python_job_and_deploy/databricks.yml new file mode 100644 index 00000000000..d7f9b4f9454 --- /dev/null +++ b/acceptance/bundle/generate/python_job_and_deploy/databricks.yml @@ -0,0 +1,2 @@ +bundle: + name: python_job_and_deploy diff --git a/acceptance/bundle/generate/python_job_and_deploy/out.test.toml b/acceptance/bundle/generate/python_job_and_deploy/out.test.toml new file mode 100644 index 00000000000..bbc7fcfd1bd --- /dev/null +++ b/acceptance/bundle/generate/python_job_and_deploy/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = true +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/generate/python_job_and_deploy/output.txt b/acceptance/bundle/generate/python_job_and_deploy/output.txt new file mode 100644 index 00000000000..418e008c535 --- /dev/null +++ b/acceptance/bundle/generate/python_job_and_deploy/output.txt @@ -0,0 +1,29 @@ + +=== Upload notebook to a workspace path +>>> [CLI] workspace import /Workspace/Users/[USERNAME]/test_notebook.py --file test_notebook.py --format AUTO --overwrite + +=== Create a job that references the notebookCreated job + +=== Generate bundle config from the job +>>> [CLI] bundle generate job --existing-job-id [JOB_ID] --key out --config-dir resources --source-dir src --force +File successfully saved to src/test_notebook.py +Job configuration successfully saved to resources/out.job.yml + +=== Verify generated yaml has expected fields +=== Deploy the generated bundle +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/python_job_and_deploy/default/files... +Deploying resources... +Deployment complete! + +=== Destroy the deployed bundle +>>> [CLI] bundle destroy --auto-approve +All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/python_job_and_deploy/default + +Deleting files... +Destroy complete! + +=== Cleanup: delete the original job and notebook +>>> errcode [CLI] jobs delete [JOB_ID] + +>>> errcode [CLI] workspace delete /Workspace/Users/[USERNAME]/test_notebook diff --git a/acceptance/bundle/generate/python_job_and_deploy/script b/acceptance/bundle/generate/python_job_and_deploy/script new file mode 100644 index 00000000000..bf40ed318dc --- /dev/null +++ b/acceptance/bundle/generate/python_job_and_deploy/script @@ -0,0 +1,40 @@ +title "Upload notebook to a workspace path" +trace $CLI workspace import "/Workspace/Users/${CURRENT_USER_NAME}/test_notebook.py" --file test_notebook.py --format AUTO --overwrite + +title "Create a job that references the notebook" +JOB_ID=$($CLI jobs create --json '{ + "name": "test-job", + "max_concurrent_runs": 1, + "queue": {"enabled": true}, + "tasks": [ + { + "task_key": "test_task", + "notebook_task": { + "notebook_path": "/Workspace/Users/'${CURRENT_USER_NAME}'/test_notebook" + } + } + ] +}' | jq -r '.job_id') +echo "Created job" +# Disable MSYS_NO_PATHCONV when invoking python scripts: with it set, Git Bash on Windows +# fails to translate the script path so the python interpreter can't find the file. +env -u MSYS_NO_PATHCONV add_repl.py "$JOB_ID" JOB_ID + +cleanup() { + title "Cleanup: delete the original job and notebook" + trace errcode $CLI jobs delete "$JOB_ID" + trace errcode $CLI workspace delete "/Workspace/Users/${CURRENT_USER_NAME}/test_notebook" +} +trap cleanup EXIT + +title "Generate bundle config from the job" +trace $CLI bundle generate job --existing-job-id "$JOB_ID" --key out --config-dir resources --source-dir src --force + +title "Verify generated yaml has expected fields" +cat resources/out.job.yml | env -u MSYS_NO_PATHCONV contains.py "task_key: test_task" "notebook_task:" "notebook_path: ../src/test_notebook.py" > /dev/null + +title "Deploy the generated bundle" +trace $CLI bundle deploy + +title "Destroy the deployed bundle" +trace $CLI bundle destroy --auto-approve diff --git a/acceptance/bundle/generate/python_job_and_deploy/test.toml b/acceptance/bundle/generate/python_job_and_deploy/test.toml new file mode 100644 index 00000000000..3eba85b404a --- /dev/null +++ b/acceptance/bundle/generate/python_job_and_deploy/test.toml @@ -0,0 +1,13 @@ +Local = true +Cloud = true + +Ignore = [ + "databricks.yml", + "resources/*", + "src/*", + ".databricks", +] + +[Env] +# MSYS2 automatically converts absolute paths on Windows; disable for the workspace path. +MSYS_NO_PATHCONV = "1" diff --git a/acceptance/bundle/generate/python_job_and_deploy/test_notebook.py b/acceptance/bundle/generate/python_job_and_deploy/test_notebook.py new file mode 100644 index 00000000000..38d86b79c70 --- /dev/null +++ b/acceptance/bundle/generate/python_job_and_deploy/test_notebook.py @@ -0,0 +1,2 @@ +# Databricks notebook source +print("Hello, World!") diff --git a/integration/bundle/generate_job_test.go b/integration/bundle/generate_job_test.go deleted file mode 100644 index 8c51a55d407..00000000000 --- a/integration/bundle/generate_job_test.go +++ /dev/null @@ -1,116 +0,0 @@ -package bundle_test - -import ( - "context" - "os" - "path" - "path/filepath" - "strconv" - "strings" - "testing" - - "github.com/databricks/cli/integration/internal/acc" - "github.com/databricks/cli/internal/testcli" - "github.com/databricks/cli/internal/testutil" - "github.com/databricks/cli/libs/env" - "github.com/databricks/cli/libs/filer" - "github.com/databricks/databricks-sdk-go" - "github.com/databricks/databricks-sdk-go/service/compute" - "github.com/databricks/databricks-sdk-go/service/jobs" - "github.com/google/uuid" - "github.com/stretchr/testify/require" -) - -func TestGenerateFromExistingJobAndDeploy(t *testing.T) { - ctx, wt := acc.WorkspaceTest(t) - gt := &generateJobTest{T: wt, w: wt.W} - - uniqueId := uuid.New().String() - bundleRoot := initTestTemplate(t, ctx, "with_includes", map[string]any{ - "unique_id": uniqueId, - }) - - jobId := gt.createTestJob(ctx) - t.Cleanup(func() { - gt.destroyJob(context.WithoutCancel(ctx), jobId) - }) - - ctx = env.Set(ctx, "BUNDLE_ROOT", bundleRoot) - c := testcli.NewRunner(t, ctx, "bundle", "generate", "job", - "--existing-job-id", strconv.FormatInt(jobId, 10), - "--config-dir", filepath.Join(bundleRoot, "resources"), - "--source-dir", filepath.Join(bundleRoot, "src")) - _, _, err := c.Run() - require.NoError(t, err) - - _, err = os.Stat(filepath.Join(bundleRoot, "src", "test.py")) - require.NoError(t, err) - - matches, err := filepath.Glob(filepath.Join(bundleRoot, "resources", "generated_job_*.yml")) - require.NoError(t, err) - require.Len(t, matches, 1) - - // check the content of generated yaml - data, err := os.ReadFile(matches[0]) - require.NoError(t, err) - generatedYaml := string(data) - require.Contains(t, generatedYaml, "notebook_task:") - require.Contains(t, generatedYaml, "notebook_path: "+filepath.Join("..", "src", "test.py")) - require.Contains(t, generatedYaml, "task_key: test") - require.Contains(t, generatedYaml, "new_cluster:") - require.Contains(t, generatedYaml, "spark_version: 13.3.x-scala2.12") - require.Contains(t, generatedYaml, "num_workers: 1") - - deployBundle(t, ctx, bundleRoot) - - destroyBundle(t, ctx, bundleRoot) -} - -type generateJobTest struct { - T *acc.WorkspaceT - w *databricks.WorkspaceClient -} - -func (gt *generateJobTest) createTestJob(ctx context.Context) int64 { - t := gt.T - w := gt.w - - tmpdir := acc.TemporaryWorkspaceDir(t, "generate-job-") - f, err := filer.NewWorkspaceFilesClient(w, tmpdir) - require.NoError(t, err) - - err = f.Write(ctx, "test.py", strings.NewReader("# Databricks notebook source\nprint('Hello world!'))")) - require.NoError(t, err) - - resp, err := w.Jobs.Create(ctx, jobs.CreateJob{ - Name: testutil.RandomName("generated-job-"), - Tasks: []jobs.Task{ - { - TaskKey: "test", - NewCluster: &compute.ClusterSpec{ - SparkVersion: "13.3.x-scala2.12", - NumWorkers: 1, - NodeTypeId: testutil.GetCloud(t).NodeTypeID(), - SparkConf: map[string]string{ - "spark.databricks.enableWsfs": "true", - "spark.databricks.hive.metastore.glueCatalog.enabled": "true", - "spark.databricks.pip.ignoreSSL": "true", - }, - }, - NotebookTask: &jobs.NotebookTask{ - NotebookPath: path.Join(tmpdir, "test"), - }, - }, - }, - }) - require.NoError(t, err) - - return resp.JobId -} - -func (gt *generateJobTest) destroyJob(ctx context.Context, jobId int64) { - err := gt.w.Jobs.Delete(ctx, jobs.DeleteJob{ - JobId: jobId, - }) - require.NoError(gt.T, err) -}