Skip to content

Commit f1aab5c

Browse files
committed
New approach: let the LLVM auto-upgrade mechanism take care of selecting the correct barrier intrinsic.
1 parent 5cd35d5 commit f1aab5c

File tree

3 files changed

+17
-13
lines changed

3 files changed

+17
-13
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,9 @@ xcuserdata
240240
# NeoVim + clangd
241241
.cache
242242

243+
# CCLS
244+
.ccls-cache/
245+
243246
# Emacs
244247
tags
245248
TAGS

src/CodeGen_PTX_Dev.cpp

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -262,17 +262,9 @@ void CodeGen_PTX_Dev::visit(const Call *op) {
262262
auto fence_type_ptr = as_const_int(op->args[0]);
263263
internal_assert(fence_type_ptr) << "gpu_thread_barrier() parameter is not a constant integer.\n";
264264

265-
llvm::Function *barrier0 = module->getFunction("llvm.nvvm.barrier0");
266-
if (barrier0) {
267-
internal_assert(barrier0) << "Could not find PTX barrier intrinsic (llvm.nvvm.barrier0)\n";
268-
269-
builder->CreateCall(barrier0);
270-
} else {
271-
// Changed in LLVM 20.
272-
barrier0 = module->getFunction("llvm.nvvm.barrier.cta.sync.aligned.all");
273-
internal_assert(barrier0) << "Could not find PTX barrier intrinsic (llvm.nvvm.barrier.cta.sync.aligned.all)\n";
274-
builder->CreateCall(barrier0, builder->getInt32(0));
275-
}
265+
llvm::Function *barrier = module->getFunction("halide_ptx_barrier0");
266+
internal_assert(barrier) << "Could not find PTX barrier intrinsic: halide_ptx_barrier0";
267+
builder->CreateCall(barrier);
276268
value = ConstantInt::get(i32_t, 0);
277269
return;
278270
}

src/runtime/ptx_dev.ll

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
declare void @llvm.nvvm.barrier0()
2-
declare void @llvm.nvvm.barrier.cta.sync.aligned.all(i32)
31
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
42
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
53
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
@@ -18,6 +16,17 @@ declare i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
1816
declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
1917
declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
2018

19+
20+
; We define our own wrapper function that calls the intrinsic above.
21+
; Thanks to this wrapper, it's automatically upgraded in the new LLVM to the
22+
; new name of the intrinsic.
23+
; Once we drop support for LLVM 20.1.5, we can immediately call the new intrinsic ourselves.
24+
declare void @llvm.nvvm.barrier0()
25+
define weak_odr void @halide_ptx_barrier0() nounwind uwtable readnone alwaysinline {
26+
call void @llvm.nvvm.barrier0()
27+
ret void
28+
}
29+
2130
; Remove these two once the minimum required llvm version is 9.0
2231
declare float @llvm.nvvm.atomic.load.add.f32.p0f32(float*, float)
2332
declare double @llvm.nvvm.atomic.load.add.f64.p0f64(double *, double)

0 commit comments

Comments
 (0)