-
Notifications
You must be signed in to change notification settings - Fork 14
222 lines (192 loc) · 7.39 KB
/
wheel.yml
File metadata and controls
222 lines (192 loc) · 7.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
name: Build Wheels
on:
workflow_dispatch:
push:
branches:
- master
- release-*
- dev
pull_request:
branches:
- master
- dev
jobs:
wheel:
name: Build wheel (CUDA ${{ matrix.cuda_version }})
runs-on: ubuntu-24.04
container:
image: ${{ matrix.cuda_image }}
strategy:
matrix:
include:
- cuda_version: "12.8"
cuda_image: "nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04"
wheel_tag: "cu128"
archs: "89;90;100;120"
runtime_deps: |
nvidia-cuda-runtime-cu12>=12.8
nvidia-cudnn-cu12>=9.0
nvidia-cufile-cu12>=1.10
nvidia-cublas-cu12>=12.8
nvidia-nccl-cu12>=2.0
- cuda_version: "13.0"
cuda_image: "nvidia/cuda:13.0.1-cudnn-devel-ubuntu22.04"
wheel_tag: "cu130"
archs: "89;90;100f;120f"
runtime_deps: |
cuda-toolkit[cudart,cublas,cufile]>=13.0
nvidia-cudnn-cu13>=9.0
nvidia-nccl-cu13>=2.0
env:
CMAKE_GENERATOR: Ninja
CMAKE_C_COMPILER: gcc-12
CMAKE_CXX_COMPILER: g++-12
CMAKE_C_COMPILER_LAUNCHER: ccache
CMAKE_CXX_COMPILER_LAUNCHER: ccache
CMAKE_CUDA_COMPILER_LAUNCHER: ccache
CUDAARCHS: ${{ matrix.archs }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install deps
run: apt update && apt install -y git g++-12
- name: ccache
uses: hendrikmuhs/ccache-action@v1.2
with:
key: wheel-${{ matrix.wheel_tag }}
- name: Install the latest version of uv
uses: astral-sh/setup-uv@v7
# we cannot statically declare the cuda dependencies in pyproject.toml
# so we have to patch them here
- name: Add CUDA runtime dependencies
run: uv run --no-project --with tomlkit python3 .github/scripts/add_cuda_deps.py "${{ matrix.runtime_deps }}" "${{ matrix.wheel_tag }}"
- name: Build wheel
run: uv build --wheel
- name: Repair wheel with auditwheel
run: |
uv run --no-project --with auditwheel --with patchelf auditwheel repair dist/*.whl -w wheelhouse/ --exclude libcuda.so.1 --exclude libcudart.so.12 --exclude libcudart.so.13 --exclude libcudnn.so.9 --exclude libcufile.so.0 --exclude libnccl.so.2 --exclude libcublasLt.so.12 --exclude libcublasLt.so.13 --exclude libnvidia-ml.so.1
rm dist/*-linux_*.whl # Remove non-repaired wheel
- name: Upload wheel
uses: actions/upload-artifact@v4
with:
name: wheel-${{ matrix.wheel_tag }}
path: wheelhouse/pyllmq*.whl
deploy-modal:
name: Deploy to Modal
needs: wheel
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Download wheel artifact
uses: actions/download-artifact@v4
with:
name: wheel-cu128
path: wheelhouse/
- name: Install Modal
run: pip install modal
- name: Set Modal token
run: modal token set --token-id ${{ secrets.MODAL_TOKEN_ID }} --token-secret ${{ secrets.MODAL_TOKEN_SECRET }}
- name: Deploy to Modal
run: modal deploy scripts/modal_test_app.py
test-modal-recompute:
# These tests verify that recomputation options do not change the results at all
name: Recompute - ${{ matrix.recompute.name }} - ${{ matrix.dtype.name }}
needs: deploy-modal
runs-on: ubuntu-latest
strategy:
fail-fast: false
max-parallel: 3
matrix:
recompute:
- name: "Determinism test"
args: ""
- name: "Full Block"
args: "--recompute-block --offload-residual --use-cuda-graphs"
- name: "Blockwise"
args: "--recompute-att --recompute-ffn"
- name: "Non-linearities"
args: "--recompute-swiglu --recompute-norm"
- name: "Offload Opt"
args: "--offload-opt-m --offload-opt-v --offload-master"
- name: "Offload Gradient"
args: "--shard-gradients --offload-grads"
# While not strictly a recomputation, chunked attention should be bitwise identical, too
- name: "Chunked attention"
args: "--recompute-att --attn-bwd-chunks=2"
dtype:
- name: "BF16"
args: "--matmul-dtype=bf16"
- name: "FP8"
args: "--matmul-dtype=e4m3"
steps:
- name: Checkout code
uses: actions/checkout@v4
# Note: No need to download wheel again, it's already in the deployed image
- name: Install Modal
run: pip install modal
- name: Set Modal token
run: modal token set --token-id ${{ secrets.MODAL_TOKEN_ID }} --token-secret ${{ secrets.MODAL_TOKEN_SECRET }}
- name: Run test on Modal
run: python3 scripts/modal_test_ci.py recompute ${{ matrix.recompute.args }} ${{ matrix.dtype.args }}
test-modal-fixed:
# These tests run a few steps and compare the resulting losses and norms against a known, fixed reference
# A failure in these tests doesn't necessarily mean that the code is broken, but it indicates that
# the changes need to be *carefully* reviewed, and tested end-to-end, before the reference is updated.
name: Test fixed - ${{ matrix.config.name }}
needs: deploy-modal
runs-on: ubuntu-latest
strategy:
fail-fast: false
max-parallel: 3
matrix:
config:
- name: "LLMQ BF16"
args: "fixed bf16"
- name: "LLMQ FP8"
args: "fixed e4m3"
- name: "LLMQ E5M2"
args: "fixed e5m2"
- name: "Torch BF16 ga=1"
args: "torch-step --grad-accum 1 --model-dtype bf16 --matmul-dtype bf16"
- name: "Torch BF16 ga=4"
args: "torch-step --grad-accum 4 --model-dtype bf16 --matmul-dtype bf16"
- name: "Torch FP32"
args: "torch-step --grad-accum 2 --model-dtype fp32 --matmul-dtype fp32"
- name: "Torch AMP"
args: "torch-step --grad-accum 2 --model-dtype fp32 --matmul-dtype bf16"
- name: "Torch Chunking"
args: "torch-step --grad-accum 4 --model-dtype bf16 --matmul-dtype bf16 --lmhead-chunks 4"
- name: "Torch BF16 Custom Matmul"
args: "torch-step --grad-accum 1 --model-dtype bf16 --matmul-dtype bf16 --custom-matmul"
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install Modal
run: pip install modal
- name: Set Modal token
run: modal token set --token-id ${{ secrets.MODAL_TOKEN_ID }} --token-secret ${{ secrets.MODAL_TOKEN_SECRET }}
- name: Run test on Modal
run: python3 scripts/modal_test_ci.py ${{ matrix.config.args }}
release:
if: github.event_name == 'workflow_dispatch' || startsWith(github.ref, 'refs/heads/release-') || startsWith(github.ref, 'refs/tags/')
needs:
- test-modal-recompute
- test-modal-fixed
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Download all wheels
uses: actions/download-artifact@v4
with:
pattern: wheel-*
path: wheelhouse/
merge-multiple: true
- name: Create Release
uses: softprops/action-gh-release@v1
with:
tag_name: ${{ github.ref_name }}
files: wheelhouse/*.whl
generate_release_notes: true
draft: true