Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions acceptance/experimental/air/run/invalid.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
experiment_name: bad.name
command: x
compute:
accelerator_type: GPU_8xH100
num_accelerators: 3
3 changes: 3 additions & 0 deletions acceptance/experimental/air/run/out.test.toml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 39 additions & 0 deletions acceptance/experimental/air/run/output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@

=== dry-run (text)
>>> [CLI] experimental air run -f valid.yaml --dry-run
Dry run: configuration for "smoke-test" is valid; not submitting.

=== dry-run (json)
>>> [CLI] experimental air run -f valid.yaml --dry-run -o json
{
"v": 1,
"ts": "[TIMESTAMP]",
"data": {
"status": "DRY_RUN_OK",
"dry_run": true
}
}

=== override not yet supported
>>> [CLI] experimental air run -f valid.yaml --dry-run --override a=b
Error: --override is not yet supported

Exit code: 1

=== watch not yet supported
>>> [CLI] experimental air run -f valid.yaml --dry-run --watch
Error: --watch is not yet supported

Exit code: 1

=== invalid config is rejected
>>> [CLI] experimental air run -f invalid.yaml --dry-run
Error: invalid experiment_name "bad.name": only alphanumeric characters, hyphens (-), and underscores (_) are allowed

Exit code: 1

=== missing --file
>>> [CLI] experimental air run --dry-run
Error: required flag(s) "file" not set

Exit code: 1
17 changes: 17 additions & 0 deletions acceptance/experimental/air/run/script
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
title "dry-run (text)"
trace $CLI experimental air run -f valid.yaml --dry-run

title "dry-run (json)"
trace $CLI experimental air run -f valid.yaml --dry-run -o json

title "override not yet supported"
errcode trace $CLI experimental air run -f valid.yaml --dry-run --override a=b

title "watch not yet supported"
errcode trace $CLI experimental air run -f valid.yaml --dry-run --watch

title "invalid config is rejected"
errcode trace $CLI experimental air run -f invalid.yaml --dry-run

title "missing --file"
errcode trace $CLI experimental air run --dry-run
4 changes: 4 additions & 0 deletions acceptance/experimental/air/run/test.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# `air run --dry-run` validates the config locally and makes no workspace calls,
# so no engine matrix or server stubs are needed.
[EnvMatrix]
DATABRICKS_BUNDLE_ENGINE = []
5 changes: 5 additions & 0 deletions acceptance/experimental/air/run/valid.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
experiment_name: smoke-test
command: python train.py
compute:
accelerator_type: GPU_1xH100
num_accelerators: 1
6 changes: 0 additions & 6 deletions acceptance/experimental/air/unimplemented/output.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,4 @@

=== run
>>> [CLI] experimental air run
Error: `air run` is not implemented yet

Exit code: 1

=== logs
>>> [CLI] experimental air logs 123
Error: `air logs` is not implemented yet
Expand Down
3 changes: 0 additions & 3 deletions acceptance/experimental/air/unimplemented/script
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
# Each stub must fail with "not implemented"; errcode records the exit code.

title "run"
errcode trace $CLI experimental air run

title "logs"
errcode trace $CLI experimental air logs 123

Expand Down
68 changes: 65 additions & 3 deletions experimental/air/cmd/run.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,25 @@
package aircmd

import (
"errors"
"fmt"
"strconv"

"github.com/databricks/cli/cmd/root"
"github.com/databricks/cli/libs/cmdctx"
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/cli/libs/flags"
"github.com/spf13/cobra"
)

// runResult is the JSON payload for `air run`.
type runResult struct {
Status string `json:"status"`
DryRun bool `json:"dry_run,omitempty"`
RunID string `json:"run_id,omitempty"`
DashboardURL string `json:"dashboard_url,omitempty"`
}

func newRunCommand() *cobra.Command {
var (
file string
Expand All @@ -21,16 +36,63 @@ func newRunCommand() *cobra.Command {
Long: `Submit a training workload to Databricks serverless GPU compute.

The workload is described by a YAML config file (see --file).`,
RunE: func(cmd *cobra.Command, args []string) error {
return notImplemented("run")
},
}

cmd.Flags().StringVarP(&file, "file", "f", "", "Path to the workload YAML config")
cmd.Flags().BoolVar(&watch, "watch", false, "Stream logs until the run completes")
cmd.Flags().StringArrayVar(&overrides, "override", nil, "Override a YAML field, e.g. compute.num_accelerators=8 (repeatable)")
cmd.Flags().BoolVar(&dryRun, "dry-run", false, "Validate the config without submitting")
cmd.Flags().StringVar(&idempotencyKey, "idempotency-key", "", "Return the existing run if this key was already used")
_ = cmd.MarkFlagRequired("file")

// --dry-run only validates the config locally, so it needs no workspace.
// Submission requires an authenticated client.
cmd.PreRunE = func(cmd *cobra.Command, args []string) error {
if dryRun {
return nil
}
return root.MustWorkspaceClient(cmd, args)
}

cmd.RunE = func(cmd *cobra.Command, args []string) error {
ctx := cmd.Context()

// These flags' pipelines are not ported yet; reject rather than silently
// ignore them.
if len(overrides) > 0 {
return errors.New("--override is not yet supported")
}
if watch {
return errors.New("--watch is not yet supported")
}

cfg, err := loadRunConfig(file)
if err != nil {
return err
}

if dryRun {
if root.OutputType(cmd) == flags.OutputText {
cmdio.LogString(ctx, fmt.Sprintf("Dry run: configuration for %q is valid; not submitting.", cfg.ExperimentName))
return nil
}
return renderEnvelope(ctx, runResult{Status: "DRY_RUN_OK", DryRun: true})
}

w := cmdctx.WorkspaceClient(ctx)
runID, dashboardURL, err := submitWorkload(ctx, w, cfg, file, idempotencyKey)
if err != nil {
return err
}

runIDStr := strconv.FormatInt(runID, 10)
if root.OutputType(cmd) == flags.OutputText {
cmdio.LogString(ctx, "Submitted run "+runIDStr)
cmdio.LogString(ctx, "View at: "+dashboardURL)
return nil
}
return renderEnvelope(ctx, runResult{Status: "SUBMITTED", RunID: runIDStr, DashboardURL: dashboardURL})
}

return cmd
}
62 changes: 62 additions & 0 deletions experimental/air/cmd/runconfig_launch.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package aircmd

// This file flattens the validated runConfig schema into the derived values the
// launch path consumes, replacing the Python CLI's _convert_to_run_config step.
// There is no separate internal config type: handle_run reads runConfig directly,
// using these accessors for the values that need computing rather than a plain
// field read.

const defaultMaxRetries = 3

// timeoutSeconds converts timeout_minutes to seconds. Zero means the user set no
// timeout and the backend default applies.
func (c *runConfig) timeoutSeconds() int {
if c.TimeoutMinutes == nil {
return 0
}
return *c.TimeoutMinutes * 60
}

// maxRetries returns the retry count, applying the schema default when unset.
func (c *runConfig) maxRetries() int {
if c.MaxRetries == nil {
return defaultMaxRetries
}
return *c.MaxRetries
}

// dockerImageURL returns the custom docker image URL, or "" when none is set.
func (c *runConfig) dockerImageURL() string {
if c.Environment != nil && c.Environment.DockerImage != nil {
return c.Environment.DockerImage.URL
}
return ""
}

// requirementsFile returns the path to a requirements file when
// environment.dependencies is a string, and whether it was set.
func (c *runConfig) requirementsFile() (string, bool) {
if c.Environment == nil || !c.Environment.Dependencies.set || c.Environment.Dependencies.isList {
return "", false
}
return c.Environment.Dependencies.path, true
}

// inlineDependencies returns the inline package list when
// environment.dependencies is a list, and whether it was set.
func (c *runConfig) inlineDependencies() ([]string, bool) {
if c.Environment == nil || !c.Environment.Dependencies.set || !c.Environment.Dependencies.isList {
return nil, false
}
return c.Environment.Dependencies.list, true
}

// runtimeVersion returns the client image version from environment.version when
// set. For a requirements-file dependency set, the version lives in that file and
// is resolved at launch, not here.
func (c *runConfig) runtimeVersion() (string, bool) {
if c.Environment == nil || !c.Environment.Version.set {
return "", false
}
return c.Environment.Version.raw, true
}
80 changes: 80 additions & 0 deletions experimental/air/cmd/runconfig_launch_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package aircmd

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestRunConfigTimeoutSeconds(t *testing.T) {
c := &runConfig{}
assert.Equal(t, 0, c.timeoutSeconds())

c.TimeoutMinutes = new(2)
assert.Equal(t, 120, c.timeoutSeconds())
}

func TestRunConfigMaxRetries(t *testing.T) {
c := &runConfig{}
assert.Equal(t, defaultMaxRetries, c.maxRetries())

c.MaxRetries = new(0)
assert.Equal(t, 0, c.maxRetries())

c.MaxRetries = new(7)
assert.Equal(t, 7, c.maxRetries())
}

func TestRunConfigDockerImageURL(t *testing.T) {
c := &runConfig{}
assert.Empty(t, c.dockerImageURL())

c.Environment = &environmentConfig{}
assert.Empty(t, c.dockerImageURL())

c.Environment.DockerImage = &dockerImageConfig{URL: "org/repo:tag"}
assert.Equal(t, "org/repo:tag", c.dockerImageURL())
}

func TestRunConfigDependencies(t *testing.T) {
t.Run("unset", func(t *testing.T) {
c := &runConfig{}
_, ok := c.requirementsFile()
assert.False(t, ok)
_, ok = c.inlineDependencies()
assert.False(t, ok)
})

t.Run("file path", func(t *testing.T) {
c := &runConfig{Environment: &environmentConfig{
Dependencies: dependencies{set: true, isList: false, path: "req.yaml"},
}}
path, ok := c.requirementsFile()
assert.True(t, ok)
assert.Equal(t, "req.yaml", path)
_, ok = c.inlineDependencies()
assert.False(t, ok)
})

t.Run("inline list", func(t *testing.T) {
c := &runConfig{Environment: &environmentConfig{
Dependencies: dependencies{set: true, isList: true, list: []string{"torch", "numpy"}},
}}
list, ok := c.inlineDependencies()
assert.True(t, ok)
assert.Equal(t, []string{"torch", "numpy"}, list)
_, ok = c.requirementsFile()
assert.False(t, ok)
})
}

func TestRunConfigRuntimeVersion(t *testing.T) {
c := &runConfig{}
_, ok := c.runtimeVersion()
assert.False(t, ok)

c.Environment = &environmentConfig{Version: stringOrInt{set: true, raw: "5"}}
v, ok := c.runtimeVersion()
assert.True(t, ok)
assert.Equal(t, "5", v)
}
Loading
Loading