Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .github/workflows/build-wheel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,16 @@ jobs:
path: ${{ env.CUDA_CORE_CYTHON_TESTS_DIR }}/test_*${{ env.PY_EXT_SUFFIX }}
if-no-files-found: error

- name: Build cuda.core test binaries
run: bash ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}/build_test_binaries.sh

- name: Upload cuda.core test binaries
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}-test-binaries
path: ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}/*.o
if-no-files-found: error

# Note: This overwrites CUDA_PATH etc
- name: Set up mini CTK
uses: ./.github/actions/fetch_ctk
Expand Down
13 changes: 13 additions & 0 deletions .github/workflows/test-wheel-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,19 @@ jobs:
pwd
ls -lahR $CUDA_CORE_CYTHON_TESTS_DIR

- name: Download cuda.core test binaries
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}-test-binaries
path: ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}
run-id: ${{ inputs.run-id || github.run_id }}
github-token: ${{ secrets.GITHUB_TOKEN }}

- name: Display structure of downloaded cuda.core test binaries
run: |
pwd
ls -lahR $CUDA_CORE_TEST_BINARIES_DIR

- name: Set up Python ${{ matrix.PY_VER }}
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
Expand Down
13 changes: 13 additions & 0 deletions .github/workflows/test-wheel-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,19 @@ jobs:
Get-Location
Get-ChildItem -Recurse -Force $env:CUDA_CORE_CYTHON_TESTS_DIR | Select-Object Mode, LastWriteTime, Length, FullName

- name: Download cuda.core test binaries
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}-test-binaries
path: ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}
run-id: ${{ inputs.run-id || github.run_id }}
github-token: ${{ secrets.GITHUB_TOKEN }}

- name: Display structure of downloaded cuda.core test binaries
run: |
Get-Location
Get-ChildItem -Recurse -Force $env:CUDA_CORE_TEST_BINARIES_DIR | Select-Object Mode, LastWriteTime, Length, FullName

- name: Set up Python ${{ matrix.PY_VER }}
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ cache_driver
cache_runtime
cache_nvrtc

# cuda.core test object fixtures built locally / downloaded as CI artifacts
cuda_core/tests/test_binaries/*.o

# CUDA Python specific (auto-generated)
cuda_bindings/cuda/bindings/_bindings/cyruntime.pxd
cuda_bindings/cuda/bindings/_bindings/cyruntime.pyx
Expand Down
1 change: 1 addition & 0 deletions ci/tools/env-vars
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${HOST_
echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${SHA}"
echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "${REPO_DIR}/cuda_core/dist")"
echo "CUDA_CORE_CYTHON_TESTS_DIR=$(realpath "${REPO_DIR}/cuda_core/tests/cython")"
echo "CUDA_CORE_TEST_BINARIES_DIR=$(realpath "${REPO_DIR}/cuda_core/tests/test_binaries")"
echo "PYTHON_VERSION_FORMATTED=${PYTHON_VERSION_FORMATTED}"
} >> $GITHUB_ENV

Expand Down
14 changes: 14 additions & 0 deletions cuda_core/tests/test_binaries/build_test_binaries.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash

# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

set -euo pipefail

# Build .o test fixtures. Invoked at CI build stage

SCRIPTPATH=$(dirname "$(realpath "$0")")

nvcc -dc -o "${SCRIPTPATH}/saxpy.o" "${SCRIPTPATH}/saxpy.cu"

ls -lah "${SCRIPTPATH}/saxpy.o"
16 changes: 16 additions & 0 deletions cuda_core/tests/test_binaries/saxpy.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

#include <cuda/std/cstddef>

__device__ float saxpy_step(float a, float x, float y) {
return a * x + y;
}

template<typename T>
__global__ void saxpy(const T a, const T* x, const T* y, T* out, size_t N) {
const unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x;
for (size_t i = tid; i < N; i += gridDim.x * blockDim.x) {
out[tid] = a * x[tid] + y[tid];
}
}
93 changes: 92 additions & 1 deletion cuda_core/tests/test_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pytest

import cuda.core
from cuda.core import Device, Kernel, ObjectCode, Program, ProgramOptions
from cuda.core import Device, Kernel, Linker, LinkerOptions, ObjectCode, Program, ProgramOptions
from cuda.core._program import _can_load_generated_ptx
from cuda.core._utils.cuda_utils import CUDAError, driver, handle_return
from cuda.core._utils.version import binding_version, driver_version
Expand Down Expand Up @@ -172,6 +172,36 @@ def get_saxpy_fatbin(init_cuda):
return bytes(fatbin), sym_map


@pytest.fixture(scope="module")
def get_saxpy_object():
"""Read the pre-built saxpy.o.

In CI: produced by build stage into a test wheel file.
In local dev: auto-built on demand if nvcc is available; if you edit
saxpy.cu, remove the stale saxpy.o to force a rebuild.
"""
import subprocess
from pathlib import Path

from cuda.pathfinder import find_nvidia_binary_utility

binaries_dir = Path(__file__).parent / "test_binaries"
obj_path = binaries_dir / "saxpy.o"

if not obj_path.is_file():
if find_nvidia_binary_utility("nvcc") is None:
pytest.skip(
f"saxpy.o not found at {obj_path} and nvcc is unavailable. "
"In CI this is downloaded from the build stage."
)
subprocess.run( # noqa: S603
["bash", str(binaries_dir / "build_test_binaries.sh")], # noqa: S607
check=True,
)

return obj_path.read_bytes()


def test_get_kernel(init_cuda):
kernel = """extern "C" __global__ void ABC() { }"""

Expand Down Expand Up @@ -330,6 +360,67 @@ def test_object_code_load_fatbin_from_file(get_saxpy_fatbin, tmp_path, convert_p
mod_obj.get_kernel("saxpy<double>") # force loading


def test_object_code_load_object(get_saxpy_object):
obj = get_saxpy_object
assert isinstance(obj, bytes)
mod_obj = ObjectCode.from_object(obj)
assert mod_obj.code == obj
assert mod_obj.code_type == "object"
with pytest.raises(RuntimeError, match=r'Unsupported code type "object"'):
mod_obj.get_kernel("saxpy<float>")


def test_object_code_load_object_from_file(get_saxpy_object, tmp_path):
obj_file = tmp_path / "test.o"
obj_file.write_bytes(get_saxpy_object)
arg = str(obj_file)
mod_obj = ObjectCode.from_object(arg)
assert mod_obj.code == arg
assert mod_obj.code_type == "object"


def test_object_code_load_object_with_linker(get_saxpy_object, init_cuda):
arch = "sm_" + "".join(f"{i}" for i in init_cuda.compute_capability)
kernel_ptx = Program(
r"""
extern __device__ float saxpy_step(float a, float x, float y);
extern "C" __global__ void linked_kernel(float a, float x, float y, float* out) {
if (threadIdx.x == 0 && blockIdx.x == 0) *out = saxpy_step(a, x, y);
}
""",
"c++",
ProgramOptions(relocatable_device_code=True, arch=arch),
).compile("ptx")
linked = Linker(
kernel_ptx,
ObjectCode.from_object(get_saxpy_object),
options=LinkerOptions(arch=arch),
).link("cubin")
kernel = linked.get_kernel("linked_kernel")

import numpy as np

stream = init_cuda.create_stream()
host_buf = cuda.core.LegacyPinnedMemoryResource().allocate(4)
result = np.from_dlpack(host_buf).view(np.float32)
result[:] = 0.0
dev_buf = init_cuda.memory_resource.allocate(4, stream=init_cuda.default_stream)

cuda.core.launch(
stream,
cuda.core.LaunchConfig(grid=1, block=1),
kernel,
np.float32(2.0),
np.float32(3.0),
np.float32(4.0),
dev_buf,
)
dev_buf.copy_to(host_buf, stream=stream)
stream.sync()

assert result[0] == 10.0


def test_saxpy_arguments(get_saxpy_kernel_cubin, cuda12_4_prerequisite_check):
krn, _ = get_saxpy_kernel_cubin

Expand Down
Loading