Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEXT_CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

### Bundles

* direct: Cluster resize now falls back to regular update if resize fails due to `INVALID_STATE` ([#5716](https://github.com/databricks/cli/pull/5716)).

### Dependency updates

### API Changes
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
bundle:
name: test-bundle

workspace:
root_path: ~/.bundle/$UNIQUE_NAME

resources:
clusters:
test_cluster:
cluster_name: test-cluster-$UNIQUE_NAME
spark_version: $DEFAULT_SPARK_VERSION
node_type_id: $NODE_TYPE_ID
instance_pool_id: $TEST_INSTANCE_POOL_ID
num_workers: 2

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@

>>> [CLI] bundle deploy
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/[UNIQUE_NAME]/files...
Deploying resources...
Updating deployment state...
Deployment complete!

=== Create a plan while cluster is running: should show resize

>>> [CLI] bundle plan -o json

=== Terminate the cluster before applying the saved plan

>>> [CLI] clusters get [CLUSTER_ID]
{
"cluster_name": "test-cluster-[UNIQUE_NAME]",
"num_workers": 2,
"state": "TERMINATED"
}

=== Apply saved plan: resize fails with INVALID_STATE, falls back to edit

>>> [CLI] bundle deploy --plan plan.json
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/[UNIQUE_NAME]/files...
Deploying resources...
Updating deployment state...
Deployment complete!

>>> print_requests.py //clusters/resize //clusters/edit
{
"method": "POST",
"path": "/api/2.1/clusters/resize",
"body": {
"cluster_id": "[CLUSTER_ID]",
"num_workers": 3
}
}
{
"method": "POST",
"path": "/api/2.1/clusters/edit",
"body": {
"autotermination_minutes": 60,
"cluster_id": "[CLUSTER_ID]",
"cluster_name": "test-cluster-[UNIQUE_NAME]",
"instance_pool_id": "[TEST_INSTANCE_POOL_ID]",
"num_workers": 3,
"spark_version": "13.3.x-snapshot-scala2.12"
}
}

=== Cluster should have new num_workers

>>> [CLI] clusters get [CLUSTER_ID]
{
"cluster_name": "test-cluster-[UNIQUE_NAME]",
"num_workers": 3
}

>>> [CLI] bundle destroy --auto-approve
The following resources will be deleted:
delete resources.clusters.test_cluster

All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/[UNIQUE_NAME]

Deleting files...
Destroy complete!
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
envsubst < databricks.yml.tmpl > databricks.yml

cleanup() {
trace $CLI bundle destroy --auto-approve
rm -f out.requests.txt
}
trap cleanup EXIT

trace $CLI bundle deploy

CLUSTER_ID=$($CLI bundle summary -o json | jq -r '.resources.clusters.test_cluster.id')
echo "$CLUSTER_ID:CLUSTER_ID" >> ACC_REPLS

title "Create a plan while cluster is running: should show resize\n"
update_file.py databricks.yml "num_workers: 2" "num_workers: 3"
trace $CLI bundle plan -o json > plan.json

title "Terminate the cluster before applying the saved plan\n"
$CLI clusters delete "$CLUSTER_ID" > /dev/null
trace $CLI clusters get "$CLUSTER_ID" | jq '{cluster_name,num_workers,state}'

title "Apply saved plan: resize fails with INVALID_STATE, falls back to edit\n"
trace $CLI bundle deploy --plan plan.json
trace print_requests.py //clusters/resize //clusters/edit

title "Cluster should have new num_workers\n"
trace $CLI clusters get "$CLUSTER_ID" | jq '{cluster_name,num_workers}'
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Local = true
Cloud = true
RecordRequests = true

Ignore = [".databricks", "databricks.yml", "plan.json"]

[EnvMatrix]
DATABRICKS_BUNDLE_ENGINE = ["direct"]
6 changes: 3 additions & 3 deletions bundle/direct/apply.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ func (d *DeploymentUnit) Deploy(ctx context.Context, db *dstate.DeploymentState,
case deployplan.UpdateWithID:
return d.UpdateWithID(ctx, db, oldID, newState)
case deployplan.Resize:
return d.Resize(ctx, db, oldID, newState)
return d.Resize(ctx, db, oldID, newState, planEntry)
default:
return fmt.Errorf("internal error: unexpected actionType: %#v", actionType)
}
Expand Down Expand Up @@ -246,8 +246,8 @@ func (d *DeploymentUnit) Delete(ctx context.Context, db *dstate.DeploymentState,
return nil
}

func (d *DeploymentUnit) Resize(ctx context.Context, db *dstate.DeploymentState, id string, newState any) error {
err := retryOnTransientErr(ctx, func() error { return d.Adapter.DoResize(ctx, id, newState) })
func (d *DeploymentUnit) Resize(ctx context.Context, db *dstate.DeploymentState, id string, newState any, entry *deployplan.PlanEntry) error {
err := retryOnTransientErr(ctx, func() error { return d.Adapter.DoResize(ctx, id, newState, entry) })
if err != nil {
return fmt.Errorf("resizing id=%s: %w", id, err)
}
Expand Down
6 changes: 3 additions & 3 deletions bundle/direct/dresources/adapter.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ type IResource interface {
DoUpdateWithID(ctx context.Context, id string, newState any) (newID string, remoteState any, e error)

// [Optional] DoResize resizes the resource. Only supported by clusters
DoResize(ctx context.Context, id string, newState any) error
DoResize(ctx context.Context, id string, newState any, entry *PlanEntry) error

// [Optional] WaitAfterCreate waits for the resource to become ready after creation. Returns optionally updated remote state.
// TODO: wait status should be persisted in the state.
Expand Down Expand Up @@ -494,12 +494,12 @@ func (a *Adapter) DoUpdateWithID(ctx context.Context, oldID string, newState any
return id, remoteState, nil
}

func (a *Adapter) DoResize(ctx context.Context, id string, newState any) error {
func (a *Adapter) DoResize(ctx context.Context, id string, newState any, entry *PlanEntry) error {
if a.doResize == nil {
return errors.New("internal error: DoResize not found")
}

_, err := a.doResize.Call(ctx, id, newState)
_, err := a.doResize.Call(ctx, id, newState, entry)
return err
}

Expand Down
15 changes: 14 additions & 1 deletion bundle/direct/dresources/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (

"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/cli/bundle/deployplan"
"github.com/databricks/cli/libs/log"
"github.com/databricks/cli/libs/structs/structpath"
"github.com/databricks/cli/libs/utils"
"github.com/databricks/databricks-sdk-go"
Expand Down Expand Up @@ -241,13 +242,25 @@ func (r *ResourceCluster) WaitAfterCreate(ctx context.Context, id string, config
return nil, nil
}

func (r *ResourceCluster) DoResize(ctx context.Context, id string, config *ClusterState) error {
func (r *ResourceCluster) DoResize(ctx context.Context, id string, config *ClusterState, entry *PlanEntry) error {
_, err := r.client.Clusters.Resize(ctx, compute.ResizeCluster{
ClusterId: id,
NumWorkers: config.NumWorkers,
Autoscale: config.Autoscale,
ForceSendFields: utils.FilterFields[compute.ResizeCluster](config.ForceSendFields),
})
if err == nil {
return nil
}

apiErr, ok := errors.AsType[*apierr.APIError](err)
if !ok || apiErr.ErrorCode != "INVALID_STATE" {
return err
}

// Cluster is not running; fall back to the full clusters/edit path.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's log (at info / debug level) that resize failed and we fail back, might be useful later

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good idea, added.

log.Debugf(ctx, "cluster %s: resize returned INVALID_STATE (%s), falling back to edit", id, err)
_, err = r.DoUpdate(ctx, id, config, entry)
return err
}

Expand Down
8 changes: 8 additions & 0 deletions libs/testserver/clusters.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,14 @@ func (s *FakeWorkspace) ClustersResize(req Request) any {
return Response{StatusCode: 404}
}

// Only running clusters can be resized; match the real API behavior.
if cluster.State != compute.StateRunning {
return Response{
StatusCode: 400,
Body: map[string]string{"error_code": "INVALID_STATE", "message": "Cluster is not running"},
}
}

cluster.NumWorkers = request.NumWorkers
cluster.Autoscale = request.Autoscale
s.Clusters[request.ClusterId] = cluster
Expand Down
Loading