Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 20 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@ megatron = [
"transformer-engine==2.11.0",
"transformer-engine-cu12==2.11.0",
"transformer-engine-torch==2.11.0",
"megatron-core==0.15.2",
"megatron-bridge==0.2.0rc6",
"megatron-core==0.16.0rc0",
"pybind11>=2.13.6",
"megatron-bridge",
"nvidia-ml-py==13.580.82",
"ml-dtypes>=0.5.0 ; python_full_version < '3.13'",
]
Expand Down Expand Up @@ -122,14 +123,26 @@ required-version = ">=0.6.15"
# Override numpy to <2.0 for compatibility with megatron-core in the training
# environment. vLLM 0.15.1 pulls opencv-python-headless>=4.13 which wants
# numpy>=2 on Python 3.9+, but megatron-core requires numpy<2.
override-dependencies = ["transformer-engine>=2.11.0", "numpy<2"]
# Keep apex build isolation enabled so uv can inject torch from
# `extra-build-dependencies` during lock/sync on non-GPU client machines.
no-build-isolation-package = ["transformer-engine", "transformer-engine-cu12", "transformer-engine-torch", "megatron-core", "megatron-bridge", "nv-grouped-gemm", "mamba-ssm", "causal-conv1d"]
override-dependencies = [
"transformer-engine==2.11.0",
"numpy<2",
# 0.5.0 only ships manylinux_2_39 wheels (no sdist), which fails on
# manylinux_2_35 hosts used by some dev/CI environments.
"nvidia-resiliency-ext<0.5",
# Keep flashinfer aligned with vLLM; Megatron's dev extra pins <0.6 but ART
# does not use flashinfer through Megatron runtime paths.
"flashinfer-python==0.6.1",
# Override unsloth's overly strict constraint on transformers — v5.x
# is confirmed working per unsloth February-2026 release notes
"transformers==5.2.0",
]
exclude-dependencies = ["pynvml", "emerging-optimizers"]
no-build-isolation-package = ["apex", "transformer-engine", "transformer-engine-cu12", "transformer-engine-torch", "megatron-core", "megatron-bridge", "nv-grouped-gemm", "mamba-ssm", "causal-conv1d"]

[tool.uv.extra-build-dependencies]
apex = ["torch>=2.8.0"]
transformer-engine-torch = ["torch>=2.8.0"]
megatron-core = ["pybind11"]

[tool.uv.extra-build-variables]
apex = { APEX_CPP_EXT = "1", APEX_CUDA_EXT = "1", APEX_FAST_LAYER_NORM = "1", APEX_PARALLEL_BUILD = "16", NVCC_APPEND_FLAGS = "--threads 4" }
Expand Down Expand Up @@ -210,3 +223,4 @@ dev = [
[tool.uv.sources]
panza = { git = "https://github.com/corbt/panza.git" }
apex = { git = "https://github.com/NVIDIA/apex.git", branch = "25.09" }
megatron-bridge = { git = "https://github.com/NVIDIA-NeMo/Megatron-Bridge.git", rev = "75f2c5ad4afb702b57b4781a00f5291a66bcf183" }
1 change: 1 addition & 0 deletions src/art/megatron/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,5 @@ def _flex_attention_layer_spec(
provider.moe_router_dtype = "fp32"
if provider.tensor_model_parallel_size > 1:
provider.sequence_parallel = True
provider.finalize()
return provider
Loading