Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 14 additions & 11 deletions ci/tools/run-tests
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env bash

# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

Expand All @@ -20,6 +20,14 @@ fi

test_module=${1}

FREE_THREADING=""
PYTEST_PARALLEL_ARGS=()
if python -c 'import sys; assert not sys._is_gil_enabled()' 2> /dev/null; then
FREE_THREADING="-ft"
PYTEST_PARALLEL_ARGS=(--parallel-threads=4)
pip install pytest-run-parallel
fi

# For standard modes, install pathfinder up front (it is a direct dependency
# of bindings, and a transitive dependency of core). Nightly modes install
# all wheels together in a single pip call further below.
Expand All @@ -36,7 +44,7 @@ if [[ "${test_module}" == "pathfinder" ]]; then
"LD:${CUDA_PATHFINDER_TEST_LOAD_NVIDIA_DYNAMIC_LIB_STRICTNESS} " \
"FH:${CUDA_PATHFINDER_TEST_FIND_NVIDIA_HEADERS_STRICTNESS} " \
"BC:${CUDA_PATHFINDER_TEST_FIND_NVIDIA_BITCODE_LIB_STRICTNESS}"
pytest -ra -s -v --durations=0 tests/ |& tee /tmp/pathfinder_test_log.txt
pytest -ra -s -v --durations=0 "${PYTEST_PARALLEL_ARGS[@]}" tests/ |& tee /tmp/pathfinder_test_log.txt
# Report the number of "INFO test_" lines (including zero)
# to support quick validations based on GHA log archives.
line_count=$(awk '/^INFO test_/ {count++} END {print count+0}' /tmp/pathfinder_test_log.txt)
Expand All @@ -51,21 +59,16 @@ elif [[ "${test_module}" == "bindings" ]]; then
pip install $(ls "${CUDA_BINDINGS_ARTIFACTS_DIR}"/*.whl)[all] --group test
fi
echo "Running bindings tests"
${SANITIZER_CMD} pytest -rxXs -v --durations=0 --randomly-dont-reorganize tests/
${SANITIZER_CMD} pytest -rxXs -v --durations=0 --randomly-dont-reorganize "${PYTEST_PARALLEL_ARGS[@]}" tests/
if [[ "${SKIP_CYTHON_TEST}" == 0 ]]; then
${SANITIZER_CMD} pytest -rxXs -v --durations=0 --randomly-dont-reorganize tests/cython
${SANITIZER_CMD} pytest -rxXs -v --durations=0 --randomly-dont-reorganize "${PYTEST_PARALLEL_ARGS[@]}" tests/cython
fi
popd
elif [[ "${test_module}" == "core" || "${test_module}" == nightly-* ]]; then
# Shared setup for core and nightly modes.
TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${CUDA_VER})"
CUDA_VER_MINOR="$(cut -d '.' -f 1-2 <<< "${CUDA_VER}")"

FREE_THREADING=""
if python -c 'import sys; assert not sys._is_gil_enabled()' 2> /dev/null; then
FREE_THREADING+="-ft"
fi

# Resolve bindings based on BINDINGS_SOURCE (set by env-vars):
# main/backport → local wheel from artifacts dir
# published → install from PyPI by version
Expand Down Expand Up @@ -106,11 +109,11 @@ elif [[ "${test_module}" == "core" || "${test_module}" == nightly-* ]]; then
echo "Installed packages before core tests:"
pip list
echo "Running core tests"
${SANITIZER_CMD} pytest -rxXs -v --durations=0 --randomly-dont-reorganize tests/
${SANITIZER_CMD} pytest -rxXs -v --durations=0 --randomly-dont-reorganize "${PYTEST_PARALLEL_ARGS[@]}" tests/
# Currently our CI always installs the latest bindings (from either major version).
# This is not compatible with the test requirements.
if [[ "${SKIP_CYTHON_TEST}" == 0 ]]; then
${SANITIZER_CMD} pytest -rxXs -v --durations=0 --randomly-dont-reorganize tests/cython
${SANITIZER_CMD} pytest -rxXs -v --durations=0 --randomly-dont-reorganize "${PYTEST_PARALLEL_ARGS[@]}" tests/cython
fi
else
# Nightly optional-dependency testing.
Expand Down
83 changes: 82 additions & 1 deletion cuda_bindings/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

import functools
import inspect
import pathlib
import sys
from contextlib import contextmanager
from importlib.metadata import PackageNotFoundError, distribution

import pytest
Expand All @@ -25,6 +28,84 @@
sys.path.insert(0, test_helpers_root)


def _parallel_threads_enabled(config):
parallel_threads = getattr(config.option, "parallel_threads", 0)
if parallel_threads == "auto":
return True
return parallel_threads is not None and int(parallel_threads) > 0


def pytest_configure(config):
if _parallel_threads_enabled(config):
config.pluginmanager.register(_CudaBindingsParallelPlugin(), name="_cuda_bindings_parallel_plugin")


@contextmanager
def _thread_context():
# Defensive: if this worker thread already has an active context (e.g. from
# double-wrapping), reuse it rather than pushing another one.
# Note: fixtures never run on the test thread; this is purely a safety net.
err, existing = cuda.cuCtxGetCurrent()
if err == cuda.CUresult.CUDA_SUCCESS and existing and int(existing) != 0:
yield None, existing
return

# cuInit(0) is idempotent; safe to call even if cuda_driver fixture already ran.
(err,) = cuda.cuInit(0)
assert err == cuda.CUresult.CUDA_SUCCESS
err, device = cuda.cuDeviceGet(0)
assert err == cuda.CUresult.CUDA_SUCCESS
err, ctx = cuda.cuCtxCreate(None, 0, device)
assert err == cuda.CUresult.CUDA_SUCCESS
try:
yield device, ctx
finally:
(err,) = cuda.cuCtxDestroy(ctx)
assert err == cuda.CUresult.CUDA_SUCCESS


def _wrap_worker_cuda_test(func):
if getattr(func, "_cuda_bindings_worker_cuda_wrapped", False):
return func

sig = inspect.signature(func)
wants_device = "device" in sig.parameters
wants_ctx = "ctx" in sig.parameters

@functools.wraps(func)
def wrapper(*args, **kwargs):
with _thread_context() as (device, ctx):
# device is None when reusing an existing context (defensive path);
# keep whatever the fixture provided in kwargs as-is.
if wants_device and device is not None:
kwargs["device"] = device
if wants_ctx:
kwargs["ctx"] = ctx
return func(*args, **kwargs)

wrapper._cuda_bindings_worker_cuda_wrapped = True
return wrapper


def _item_needs_thread_ctx(item):
fixturenames = getattr(item, "fixturenames", ())
# 'device' is present when the module-level ctx(device) autouse chain is
# active (test_cuda.py, test_kernelParams.py, nvml tests, …).
# 'driver' is present for test_cufile.py tests that use the local driver
# fixture; their local ctx() shadows the parent ctx(device) so 'device'
# does not appear in their fixture chain, but they still need a per-thread
# CUDA context for cuMemAlloc and similar calls made inside the test.
return "device" in fixturenames or "driver" in fixturenames


class _CudaBindingsParallelPlugin:
@pytest.hookimpl(tryfirst=True)
def pytest_collection_modifyitems(self, config, items):
for item in items:
if _item_needs_thread_ctx(item):
item.obj = _wrap_worker_cuda_test(item.obj)


@pytest.fixture(scope="module")
def cuda_driver():
(err,) = cuda.cuInit(0)
Expand Down
1 change: 1 addition & 0 deletions cuda_bindings/tests/nvml/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def get_architecture_name(arch):


@pytest.mark.skipif(sys.platform == "win32", reason="Test not supported on Windows")
@pytest.mark.thread_unsafe(reason="nvml init affects other threads")
def test_init_ref_count():
"""
Verifies that we can call NVML shutdown and init(2) multiple times, and that ref counting works
Expand Down
1 change: 1 addition & 0 deletions cuda_bindings/tests/test_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,7 @@ def test_cuda_mem_range_attr(device):


@pytest.mark.skipif(driverVersionLessThan(11040) or not supportsMemoryPool(), reason="Mempool for graphs not supported")
@pytest.mark.thread_unsafe(reason="used high memory can be higher if threaded.")
def test_cuda_graphMem_attr(device):
err, stream = cuda.cuStreamCreate(0)
assert err == cuda.CUresult.CUDA_SUCCESS
Expand Down
Loading
Loading