File tree Expand file tree Collapse file tree 3 files changed +17
-13
lines changed
Expand file tree Collapse file tree 3 files changed +17
-13
lines changed Original file line number Diff line number Diff line change @@ -240,6 +240,9 @@ xcuserdata
240240# NeoVim + clangd
241241.cache
242242
243+ # CCLS
244+ .ccls-cache /
245+
243246# Emacs
244247tags
245248TAGS
Original file line number Diff line number Diff line change @@ -262,17 +262,9 @@ void CodeGen_PTX_Dev::visit(const Call *op) {
262262 auto fence_type_ptr = as_const_int (op->args [0 ]);
263263 internal_assert (fence_type_ptr) << " gpu_thread_barrier() parameter is not a constant integer.\n " ;
264264
265- llvm::Function *barrier0 = module ->getFunction (" llvm.nvvm.barrier0" );
266- if (barrier0) {
267- internal_assert (barrier0) << " Could not find PTX barrier intrinsic (llvm.nvvm.barrier0)\n " ;
268-
269- builder->CreateCall (barrier0);
270- } else {
271- // Changed in LLVM 20.
272- barrier0 = module ->getFunction (" llvm.nvvm.barrier.cta.sync.aligned.all" );
273- internal_assert (barrier0) << " Could not find PTX barrier intrinsic (llvm.nvvm.barrier.cta.sync.aligned.all)\n " ;
274- builder->CreateCall (barrier0, builder->getInt32 (0 ));
275- }
265+ llvm::Function *barrier = module ->getFunction (" halide_ptx_barrier0" );
266+ internal_assert (barrier) << " Could not find PTX barrier intrinsic: halide_ptx_barrier0" ;
267+ builder->CreateCall (barrier);
276268 value = ConstantInt::get (i32_t , 0 );
277269 return ;
278270 }
Original file line number Diff line number Diff line change 1- declare void @llvm.nvvm.barrier0 ()
2- declare void @llvm.nvvm.barrier.cta.sync.aligned.all (i32 )
31declare i32 @llvm.nvvm.read.ptx.sreg.tid.x ()
42declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x ()
53declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x ()
@@ -18,6 +16,17 @@ declare i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
1816declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.w ()
1917declare i32 @llvm.nvvm.read.ptx.sreg.warpsize ()
2018
19+
20+ ; We define our own wrapper function that calls the intrinsic above.
21+ ; Thanks to this wrapper, it's automatically upgraded in the new LLVM to the
22+ ; new name of the intrinsic.
23+ ; Once we drop support for LLVM 20.1.5, we can immediately call the new intrinsic ourselves.
24+ declare void @llvm.nvvm.barrier0 ()
25+ define weak_odr void @halide_ptx_barrier0 () nounwind uwtable readnone alwaysinline {
26+ call void @llvm.nvvm.barrier0 ()
27+ ret void
28+ }
29+
2130; Remove these two once the minimum required llvm version is 9.0
2231declare float @llvm.nvvm.atomic.load.add.f32.p0f32 (float *, float )
2332declare double @llvm.nvvm.atomic.load.add.f64.p0f64 (double *, double )
You can’t perform that action at this time.
0 commit comments