Skip to content

Commit 33b06cd

Browse files
committed
New approach: let the LLVM auto-upgrade mechanism take care of selecting the correct barrier intrinsic.
1 parent 9499444 commit 33b06cd

File tree

3 files changed

+18
-13
lines changed

3 files changed

+18
-13
lines changed

cmake/FindHalide_LLVM.cmake

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ if (LLVM_FOUND)
4040
# fallback locations.
4141
find_package(
4242
Clang
43+
${LLVM_PACKAGE_VERSION}
4344
HINTS
4445
"${LLVM_INSTALL_PREFIX}" # Same root as the LLVM we found
4546
"${LLVM_DIR}/../clang" # LLVM found in $ROOT/lib/cmake/llvm
@@ -51,7 +52,9 @@ if (LLVM_FOUND)
5152
set(Halide_LLVM_${comp}_FOUND 0)
5253

5354
find_package(
54-
LLD HINTS
55+
LLD
56+
${LLVM_PACKAGE_VERSION}
57+
HINTS
5558
"${LLVM_INSTALL_PREFIX}"
5659
# Homebrew split the LLVM and LLD packages as of version 19, so
5760
# having multiple LLVM versions installed leads to the newest

src/CodeGen_PTX_Dev.cpp

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -262,16 +262,9 @@ void CodeGen_PTX_Dev::visit(const Call *op) {
262262
auto fence_type_ptr = as_const_int(op->args[0]);
263263
internal_assert(fence_type_ptr) << "gpu_thread_barrier() parameter is not a constant integer.\n";
264264

265-
llvm::Function *barrier;
266-
if ((barrier = module->getFunction("llvm.nvvm.barrier.cta.sync.aligned.all"))) {
267-
// LLVM 20 and above: https://github.com/llvm/llvm-project/pull/140615
268-
builder->CreateCall(barrier, builder->getInt32(0));
269-
} else if ((barrier = module->getFunction("llvm.nvvm.barrier0"))) {
270-
// LLVM 19: Testing for llvm.nvvm.barrier0 can be removed once we drop support for LLVM 19
271-
builder->CreateCall(barrier);
272-
} else {
273-
internal_error << "Could not find PTX barrier intrinsic llvm.nvvm.barrier0 nor llvm.nvvm.barrier.cta.sync.aligned.all\n";
274-
}
265+
llvm::Function *barrier = module->getFunction("halide_ptx_barrier0");
266+
internal_assert(barrier) << "Could not find PTX barrier intrinsic: halide_ptx_barrier0";
267+
builder->CreateCall(barrier);
275268
value = ConstantInt::get(i32_t, 0);
276269
return;
277270
}

src/runtime/ptx_dev.ll

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
declare void @llvm.nvvm.barrier0()
2-
declare void @llvm.nvvm.barrier.cta.sync.aligned.all(i32)
31
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
42
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
53
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
@@ -18,6 +16,17 @@ declare i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
1816
declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
1917
declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
2018

19+
20+
; We define our own wrapper function that calls the intrinsic above.
21+
; Thanks to this wrapper, it's automatically upgraded in the new LLVM to the
22+
; new name of the intrinsic.
23+
; Once we drop support for LLVM 20.1.5, we can immediately call the new intrinsic ourselves.
24+
declare void @llvm.nvvm.barrier0()
25+
define weak_odr void @halide_ptx_barrier0() nounwind uwtable readnone alwaysinline {
26+
call void @llvm.nvvm.barrier0()
27+
ret void
28+
}
29+
2130
; Remove these two once the minimum required llvm version is 9.0
2231
declare float @llvm.nvvm.atomic.load.add.f32.p0f32(float*, float)
2332
declare double @llvm.nvvm.atomic.load.add.f64.p0f64(double *, double)

0 commit comments

Comments
 (0)