Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions .github/actions/setup-build-environment/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ inputs:
cache-key:
description: 'Cache key identifier for Go cache'
required: true
is-fork:
description: 'Whether this is a fork PR (uses pre-warmed Go module cache instead of JFrog)'
required: false
default: 'false'

runs:
using: 'composite'
Expand All @@ -13,20 +17,58 @@ runs:
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

- name: Setup JFrog
if: inputs.is-fork == 'false'
uses: ./.github/actions/setup-jfrog

# Fork path: restore the pre-warmed Go module cache instead of using JFrog.
# The cache is populated by the warmGoCache workflow and contains modules
# downloaded through JFrog on the main branch. See: .github/workflows/warmGoCache.yml
- name: Restore Go module cache (fork)
if: inputs.is-fork == 'true'
uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with:
path: ~/go/pkg/mod/cache/download
key: go-modules-${{ runner.os }}-${{ hashFiles('go.sum') }}
restore-keys: go-modules-${{ runner.os }}-

- name: Configure Python to use PyPI (fork)
if: inputs.is-fork == 'true'
shell: bash
run: |
echo "UV_INDEX_URL=https://pypi.org/simple" >> "$GITHUB_ENV"
echo "PIP_INDEX_URL=https://pypi.org/simple" >> "$GITHUB_ENV"

- name: Create cache identifier
run: echo "${{ inputs.cache-key }}" > cache.txt
shell: bash

# Non-fork: use setup-go with built-in cache (backed by JFrog proxy)
- name: Setup Go
if: inputs.is-fork == 'false'
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
go-version-file: go.mod
cache-dependency-path: |
go.sum
cache.txt

# Fork: setup-go without built-in cache; the warmer workflow manages the cache
- name: Setup Go (fork)
if: inputs.is-fork == 'true'
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
go-version-file: go.mod
cache: false

# Point Go at the restored module cache so no network access to JFrog is needed.
# Go's file:// proxy reads the same proxy-format layout that 'go mod download' writes.
- name: Configure Go to use pre-warmed module cache (fork)
if: inputs.is-fork == 'true'
shell: bash
run: |
echo "GOPROXY=file://$(go env GOMODCACHE)/cache/download,off" >> "$GITHUB_ENV"
echo "GONOSUMDB=*" >> "$GITHUB_ENV"

- name: Setup Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
Expand Down
157 changes: 149 additions & 8 deletions .github/workflows/push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,27 @@ env:
GOTESTSUM_FORMAT: github-actions

jobs:
fork-check:
runs-on: ubuntu-latest
outputs:
is_fork: ${{ steps.check.outputs.is_fork }}
steps:
- name: Detect fork PR
id: check
run: |
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe can be simplied? Claude recommended:

  env:
    IS_FORK: ${{ github.event.pull_request.head.repo.fork }}
  run: echo "is_fork=${IS_FORK:-false}" >> "$GITHUB_OUTPUT"

if [ "${{ github.event.pull_request.head.repo.full_name }}" != "" ] && \
[ "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]; then
echo "is_fork=true" >> "$GITHUB_OUTPUT"
else
echo "is_fork=false" >> "$GITHUB_OUTPUT"
fi

cleanups:
# Skip for fork PRs: the deco runner group is inaccessible to forks and
# cache cleanup is only meaningful on scheduled runs from the main branch.
if: >-
github.event_name != 'pull_request' ||
!github.event.pull_request.head.repo.fork
runs-on:
group: databricks-deco-testing-runner-group
labels: ubuntu-latest-deco
Expand Down Expand Up @@ -74,9 +94,11 @@ jobs:
needs:
- cleanups
- testmask
- fork-check

# Only run if the target is in the list of targets from testmask
if: ${{ contains(fromJSON(needs.testmask.outputs.targets), 'test') }}
# Only run if the target is in the list of targets from testmask, and not a fork PR
# (fork PRs use the test-fork job below with public runners and no JFrog auth)
if: ${{ contains(fromJSON(needs.testmask.outputs.targets), 'test') && needs.fork-check.outputs.is_fork != 'true' }}
name: "make test (${{matrix.os.name}}, ${{matrix.deployment}})"
runs-on: ${{ matrix.os.runner }}

Expand Down Expand Up @@ -161,9 +183,10 @@ jobs:
needs:
- cleanups
- testmask
- fork-check

# Only run if the target is in the list of targets from testmask
if: ${{ contains(fromJSON(needs.testmask.outputs.targets), 'test-exp-aitools') }}
# Only run if the target is in the list of targets from testmask, and not a fork PR
if: ${{ contains(fromJSON(needs.testmask.outputs.targets), 'test-exp-aitools') && needs.fork-check.outputs.is_fork != 'true' }}
name: "make test-exp-aitools (${{matrix.os.name}})"
runs-on: ${{ matrix.os.runner }}

Expand Down Expand Up @@ -207,9 +230,10 @@ jobs:
needs:
- cleanups
- testmask
- fork-check

# Only run if the target is in the list of targets from testmask
if: ${{ contains(fromJSON(needs.testmask.outputs.targets), 'test-exp-ssh') }}
# Only run if the target is in the list of targets from testmask, and not a fork PR
if: ${{ contains(fromJSON(needs.testmask.outputs.targets), 'test-exp-ssh') && needs.fork-check.outputs.is_fork != 'true' }}
name: "make test-exp-ssh (${{matrix.os.name}})"
runs-on: ${{ matrix.os.runner }}

Expand Down Expand Up @@ -252,9 +276,10 @@ jobs:
needs:
- cleanups
- testmask
- fork-check

# Only run if the target is in the list of targets from testmask
if: ${{ contains(fromJSON(needs.testmask.outputs.targets), 'test-pipelines') }}
# Only run if the target is in the list of targets from testmask, and not a fork PR
if: ${{ contains(fromJSON(needs.testmask.outputs.targets), 'test-pipelines') && needs.fork-check.outputs.is_fork != 'true' }}
name: "make test-pipelines (${{matrix.os.name}})"
runs-on: ${{ matrix.os.runner }}

Expand Down Expand Up @@ -293,6 +318,118 @@ jobs:
run: |
make test-pipelines

# Fork PR variants: run on ubuntu-latest with public Go/Python proxies.
# Protected runner groups and JFrog OIDC auth are not available for fork PRs.
# Each job mirrors its non-fork counterpart but uses a single OS and is-fork: 'true'.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

single OS

what single OS? why?


test-fork:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question - Can we reuse the same jobs as regular bit parametrize runner?

Ideally we would not have to maintain separate jobs, especially given that they are hard to test.

needs:
- fork-check
- testmask

if: ${{ contains(fromJSON(needs.testmask.outputs.targets), 'test') && needs.fork-check.outputs.is_fork == 'true' }}
name: "make test (fork, ${{matrix.deployment}})"
runs-on: ubuntu-latest

strategy:
fail-fast: false
matrix:
deployment:
- "terraform"
- "direct"

steps:
- name: Checkout repository and submodules
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

- name: Setup build environment
uses: ./.github/actions/setup-build-environment
with:
cache-key: test-fork-${{ matrix.deployment }}
is-fork: 'true'

- name: Run tests without coverage
env:
ENVFILTER: DATABRICKS_BUNDLE_ENGINE=${{ matrix.deployment }}
run: make test

- name: Analyze slow tests
run: make slowest

- name: Check out.test.toml files are up to date
shell: bash
run: |
if ! git diff --exit-code; then
echo "ERROR: detected changed files in the repository; Most likely you have out.test.toml files that are out of date. Run 'make generate-out-test-toml' to update."
exit 1
fi

test-exp-aitools-fork:
needs:
- fork-check
- testmask

if: ${{ contains(fromJSON(needs.testmask.outputs.targets), 'test-exp-aitools') && needs.fork-check.outputs.is_fork == 'true' }}
name: "make test-exp-aitools (fork)"
runs-on: ubuntu-latest

steps:
- name: Checkout repository and submodules
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

- name: Setup build environment
uses: ./.github/actions/setup-build-environment
with:
cache-key: test-exp-aitools-fork
is-fork: 'true'

- name: Run tests
run: make test-exp-aitools

test-exp-ssh-fork:
needs:
- fork-check
- testmask

if: ${{ contains(fromJSON(needs.testmask.outputs.targets), 'test-exp-ssh') && needs.fork-check.outputs.is_fork == 'true' }}
name: "make test-exp-ssh (fork)"
runs-on: ubuntu-latest

steps:
- name: Checkout repository and submodules
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

- name: Setup build environment
uses: ./.github/actions/setup-build-environment
with:
cache-key: test-exp-ssh-fork
is-fork: 'true'

- name: Run tests
run: make test-exp-ssh

test-pipelines-fork:
needs:
- fork-check
- testmask

if: ${{ contains(fromJSON(needs.testmask.outputs.targets), 'test-pipelines') && needs.fork-check.outputs.is_fork == 'true' }}
name: "make test-pipelines (fork)"
runs-on: ubuntu-latest

steps:
- name: Checkout repository and submodules
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

- name: Setup build environment
uses: ./.github/actions/setup-build-environment
with:
cache-key: test-pipelines-fork
is-fork: 'true'

- name: Run tests
run: make test-pipelines

# This job groups the result of all the above test jobs.
# It is a required check, so it blocks auto-merge and the merge queue.
#
Expand All @@ -305,9 +442,13 @@ jobs:
test-result:
needs:
- test
- test-fork
- test-exp-aitools
- test-exp-aitools-fork
- test-exp-ssh
- test-exp-ssh-fork
- test-pipelines
- test-pipelines-fork

if: ${{ always() }}
name: test-result
Expand Down
85 changes: 85 additions & 0 deletions .github/workflows/warmGoCache.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
name: Warm Go Module Cache

# Pre-warms the Go module download cache using JFrog as the proxy.
# Fork PRs cannot mint OIDC tokens, so they cannot authenticate to JFrog directly.
# Instead, they restore this pre-warmed cache and use Go's file:// proxy in offline
# mode, which reads the same proxy-format layout that 'go mod download' writes.
#
# Cache is OS-scoped in GitHub Actions, so this job runs on Linux only because
# fork test jobs also run on Linux (ubuntu-latest).
#
# Gotcha — fork PR with new Go dependency:
# CI fails because the new module is not in the cache.
# Fix: Actions → Warm Go Module Cache → Run workflow → pr_number = <N>
# The warmer fetches go.mod/go.sum from the fork (no source code) and rebuilds
# the cache. Re-run the fork PR's CI after the warmer completes.

on:
push:
branches: [main]
paths:
- go.mod
- go.sum
schedule:
- cron: '0 6 * * *' # Daily — prevents 7-day GitHub cache eviction
workflow_dispatch:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this workflow require an approval from us? Claude flagged that historically there have been arbitary code execution vectors via things like this (e.g. if you use cgo, and then use compile time macros)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is triggered on main barcnh only so can't be executed from PR code

inputs:
pr_number:
description: 'Fork PR number to fetch go.mod/go.sum from. Leave empty to use main.'
required: false

permissions:
id-token: write
contents: read
pull-requests: read

jobs:
warm-cache:
runs-on:
group: databricks-protected-runner-group-large
labels: linux-ubuntu-latest-large

steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

# Sparse-checkout of go.mod/go.sum from the fork PR (no source code).
# This lets the warmer download any new dependencies the fork PR introduces
# without executing untrusted code from the fork.
- name: Fetch go.mod and go.sum from fork PR
if: inputs.pr_number != ''
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
PR_JSON=$(gh pr view ${{ inputs.pr_number }} \
--json headRepositoryOwner,headRepository,headRefName)
OWNER=$(echo "$PR_JSON" | jq -r '.headRepositoryOwner.login')
REPO=$(echo "$PR_JSON" | jq -r '.headRepository.name')
BRANCH=$(echo "$PR_JSON" | jq -r '.headRefName')
git fetch "https://github.com/${OWNER}/${REPO}.git" "${BRANCH}"
git checkout FETCH_HEAD -- go.mod go.sum

- name: Setup JFrog
uses: ./.github/actions/setup-jfrog

# Disable setup-go's built-in caching — the warmer is the sole cache writer.
- name: Setup Go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
go-version-file: go.mod
cache: false

# Download all modules (direct + indirect + test deps) through JFrog.
# This populates ~/go/pkg/mod/cache/download in Go's proxy wire format,
# which is what GOPROXY=file:// reads in fork PR runs.
- name: Download all Go module dependencies via JFrog
run: go mod download all

- name: Save Go module cache
uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with:
path: ~/go/pkg/mod/cache/download
# Timestamp suffix makes the key unique so GitHub never rejects it as
# an immutable duplicate. restore-keys in fork runs prefix-match to
# find the latest cache for the current go.sum hash.
key: go-modules-${{ runner.os }}-${{ hashFiles('go.sum') }}-${{ github.run_id }}