From 09291e659904ca67566c819c90422b1b1e2432a8 Mon Sep 17 00:00:00 2001 From: Martijn Courteaux Date: Thu, 22 May 2025 21:46:01 +0200 Subject: [PATCH 1/8] Attempt to fix the barrier0 intrinsic in LLVM 20. --- src/CodeGen_PTX_Dev.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/CodeGen_PTX_Dev.cpp b/src/CodeGen_PTX_Dev.cpp index 14e5234d2038..7d2bf42445d6 100644 --- a/src/CodeGen_PTX_Dev.cpp +++ b/src/CodeGen_PTX_Dev.cpp @@ -263,9 +263,17 @@ void CodeGen_PTX_Dev::visit(const Call *op) { internal_assert(fence_type_ptr) << "gpu_thread_barrier() parameter is not a constant integer.\n"; llvm::Function *barrier0 = module->getFunction("llvm.nvvm.barrier0"); - internal_assert(barrier0) << "Could not find PTX barrier intrinsic (llvm.nvvm.barrier0)\n"; - builder->CreateCall(barrier0); - value = ConstantInt::get(i32_t, 0); + if (barrier0) { + internal_assert(barrier0) << "Could not find PTX barrier intrinsic (llvm.nvvm.barrier0)\n"; + + builder->CreateCall(barrier0); + value = ConstantInt::get(i32_t, 0); + } else { + // Changed in LLVM 20. + barrier0 = module->getFunction("llvm.nvvm.barrier.cta.sync.aligned.all"); + internal_assert(barrier0) << "Could not find PTX barrier intrinsic (llvm.nvvm.barrier.cta.sync.aligned.all)\n"; + builder->CreateCall(barrier0, builder->getInt32(0)); + } return; } From 98b2f011d4e3053d9b29b34811e16faf4ecc96c4 Mon Sep 17 00:00:00 2001 From: Martijn Courteaux Date: Thu, 22 May 2025 22:50:15 +0200 Subject: [PATCH 2/8] Attempt to fix LLVM barrier for PTX. --- src/runtime/ptx_dev.ll | 1 + 1 file changed, 1 insertion(+) diff --git a/src/runtime/ptx_dev.ll b/src/runtime/ptx_dev.ll index 9cefaa53ec5b..8e179d4d6b12 100644 --- a/src/runtime/ptx_dev.ll +++ b/src/runtime/ptx_dev.ll @@ -1,4 +1,5 @@ declare void @llvm.nvvm.barrier0() +declare void @llvm.nvvm.barrier.cta.sync.aligned.all(i32) declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() From 5cd35d5b5f2dc092ffc08fb46fcaf8136ad9e9e6 Mon Sep 17 00:00:00 2001 From: Martijn Courteaux Date: Thu, 22 May 2025 23:07:21 +0200 Subject: [PATCH 3/8] Small fix. --- src/CodeGen_PTX_Dev.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CodeGen_PTX_Dev.cpp b/src/CodeGen_PTX_Dev.cpp index 7d2bf42445d6..dfc8d1a2eb91 100644 --- a/src/CodeGen_PTX_Dev.cpp +++ b/src/CodeGen_PTX_Dev.cpp @@ -267,13 +267,13 @@ void CodeGen_PTX_Dev::visit(const Call *op) { internal_assert(barrier0) << "Could not find PTX barrier intrinsic (llvm.nvvm.barrier0)\n"; builder->CreateCall(barrier0); - value = ConstantInt::get(i32_t, 0); } else { // Changed in LLVM 20. barrier0 = module->getFunction("llvm.nvvm.barrier.cta.sync.aligned.all"); internal_assert(barrier0) << "Could not find PTX barrier intrinsic (llvm.nvvm.barrier.cta.sync.aligned.all)\n"; builder->CreateCall(barrier0, builder->getInt32(0)); } + value = ConstantInt::get(i32_t, 0); return; } From 9cd8ddab8ecc2be0a761370740703429d8bb2f5f Mon Sep 17 00:00:00 2001 From: Martijn Courteaux Date: Thu, 22 May 2025 23:31:07 +0200 Subject: [PATCH 4/8] Cleanup --- src/CodeGen_PTX_Dev.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/CodeGen_PTX_Dev.cpp b/src/CodeGen_PTX_Dev.cpp index dfc8d1a2eb91..108b574d2665 100644 --- a/src/CodeGen_PTX_Dev.cpp +++ b/src/CodeGen_PTX_Dev.cpp @@ -262,15 +262,13 @@ void CodeGen_PTX_Dev::visit(const Call *op) { auto fence_type_ptr = as_const_int(op->args[0]); internal_assert(fence_type_ptr) << "gpu_thread_barrier() parameter is not a constant integer.\n"; - llvm::Function *barrier0 = module->getFunction("llvm.nvvm.barrier0"); - if (barrier0) { - internal_assert(barrier0) << "Could not find PTX barrier intrinsic (llvm.nvvm.barrier0)\n"; - + llvm::Function *barrier = module->getFunction("llvm.nvvm.barrier0"); + if (barrier) { builder->CreateCall(barrier0); } else { - // Changed in LLVM 20. - barrier0 = module->getFunction("llvm.nvvm.barrier.cta.sync.aligned.all"); - internal_assert(barrier0) << "Could not find PTX barrier intrinsic (llvm.nvvm.barrier.cta.sync.aligned.all)\n"; + // Changed in LLVM 20: https://github.com/llvm/llvm-project/pull/140615 + barrier = module->getFunction("llvm.nvvm.barrier.cta.sync.aligned.all"); + internal_assert(barrier) << "Could not find PTX barrier intrinsic llvm.nvvm.barrier0 nor llvm.nvvm.barrier.cta.sync.aligned.all\n"; builder->CreateCall(barrier0, builder->getInt32(0)); } value = ConstantInt::get(i32_t, 0); From c9649201ac7489c0e6c72d6b849e2c2cd6272e68 Mon Sep 17 00:00:00 2001 From: Martijn Courteaux Date: Thu, 22 May 2025 23:46:57 +0200 Subject: [PATCH 5/8] Clearer code flow, and deprecation note for the future. --- src/CodeGen_PTX_Dev.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/CodeGen_PTX_Dev.cpp b/src/CodeGen_PTX_Dev.cpp index 108b574d2665..2199513664aa 100644 --- a/src/CodeGen_PTX_Dev.cpp +++ b/src/CodeGen_PTX_Dev.cpp @@ -262,14 +262,15 @@ void CodeGen_PTX_Dev::visit(const Call *op) { auto fence_type_ptr = as_const_int(op->args[0]); internal_assert(fence_type_ptr) << "gpu_thread_barrier() parameter is not a constant integer.\n"; - llvm::Function *barrier = module->getFunction("llvm.nvvm.barrier0"); - if (barrier) { + llvm::Function *barrier; + if (barrier = module->getFunction("llvm.nvvm.barrier.cta.sync.aligned.all")) { + // LLVM 20 and above: https://github.com/llvm/llvm-project/pull/140615 + builder->CreateCall(barrier0, builder->getInt32(0)); + } else if (barrier = module->getFunction("llvm.nvvm.barrier0")) { + // LLVM 19: Testing for llvm.nvvm.barrier0 can be removed once we drop support for LLVM 19 builder->CreateCall(barrier0); } else { - // Changed in LLVM 20: https://github.com/llvm/llvm-project/pull/140615 - barrier = module->getFunction("llvm.nvvm.barrier.cta.sync.aligned.all"); - internal_assert(barrier) << "Could not find PTX barrier intrinsic llvm.nvvm.barrier0 nor llvm.nvvm.barrier.cta.sync.aligned.all\n"; - builder->CreateCall(barrier0, builder->getInt32(0)); + internal_error << "Could not find PTX barrier intrinsic llvm.nvvm.barrier0 nor llvm.nvvm.barrier.cta.sync.aligned.all\n"; } value = ConstantInt::get(i32_t, 0); return; From c84e8d3c65e47073e487c05826b83827e6070511 Mon Sep 17 00:00:00 2001 From: Martijn Courteaux Date: Thu, 22 May 2025 23:52:38 +0200 Subject: [PATCH 6/8] Fix typo --- src/CodeGen_PTX_Dev.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/CodeGen_PTX_Dev.cpp b/src/CodeGen_PTX_Dev.cpp index 2199513664aa..e44439413528 100644 --- a/src/CodeGen_PTX_Dev.cpp +++ b/src/CodeGen_PTX_Dev.cpp @@ -265,10 +265,10 @@ void CodeGen_PTX_Dev::visit(const Call *op) { llvm::Function *barrier; if (barrier = module->getFunction("llvm.nvvm.barrier.cta.sync.aligned.all")) { // LLVM 20 and above: https://github.com/llvm/llvm-project/pull/140615 - builder->CreateCall(barrier0, builder->getInt32(0)); + builder->CreateCall(barrier, builder->getInt32(0)); } else if (barrier = module->getFunction("llvm.nvvm.barrier0")) { // LLVM 19: Testing for llvm.nvvm.barrier0 can be removed once we drop support for LLVM 19 - builder->CreateCall(barrier0); + builder->CreateCall(barrier); } else { internal_error << "Could not find PTX barrier intrinsic llvm.nvvm.barrier0 nor llvm.nvvm.barrier.cta.sync.aligned.all\n"; } From 9499444beccc344c5716365d54407346687ed2f0 Mon Sep 17 00:00:00 2001 From: Martijn Courteaux Date: Fri, 23 May 2025 01:17:07 +0200 Subject: [PATCH 7/8] Please the compiler. --- src/CodeGen_PTX_Dev.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/CodeGen_PTX_Dev.cpp b/src/CodeGen_PTX_Dev.cpp index e44439413528..77c1bd765e42 100644 --- a/src/CodeGen_PTX_Dev.cpp +++ b/src/CodeGen_PTX_Dev.cpp @@ -263,10 +263,10 @@ void CodeGen_PTX_Dev::visit(const Call *op) { internal_assert(fence_type_ptr) << "gpu_thread_barrier() parameter is not a constant integer.\n"; llvm::Function *barrier; - if (barrier = module->getFunction("llvm.nvvm.barrier.cta.sync.aligned.all")) { + if ((barrier = module->getFunction("llvm.nvvm.barrier.cta.sync.aligned.all"))) { // LLVM 20 and above: https://github.com/llvm/llvm-project/pull/140615 builder->CreateCall(barrier, builder->getInt32(0)); - } else if (barrier = module->getFunction("llvm.nvvm.barrier0")) { + } else if ((barrier = module->getFunction("llvm.nvvm.barrier0"))) { // LLVM 19: Testing for llvm.nvvm.barrier0 can be removed once we drop support for LLVM 19 builder->CreateCall(barrier); } else { From 3daccd7af8b597a4d29d755dbcbf8b5de3150ab6 Mon Sep 17 00:00:00 2001 From: Martijn Courteaux Date: Sat, 24 May 2025 11:24:21 +0200 Subject: [PATCH 8/8] Use getIntrinsicID to test which one is actually the working one. --- src/CodeGen_PTX_Dev.cpp | 8 ++++---- src/runtime/ptx_dev.ll | 10 ++++++++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/CodeGen_PTX_Dev.cpp b/src/CodeGen_PTX_Dev.cpp index 77c1bd765e42..17f9a5a34c79 100644 --- a/src/CodeGen_PTX_Dev.cpp +++ b/src/CodeGen_PTX_Dev.cpp @@ -263,11 +263,11 @@ void CodeGen_PTX_Dev::visit(const Call *op) { internal_assert(fence_type_ptr) << "gpu_thread_barrier() parameter is not a constant integer.\n"; llvm::Function *barrier; - if ((barrier = module->getFunction("llvm.nvvm.barrier.cta.sync.aligned.all"))) { - // LLVM 20 and above: https://github.com/llvm/llvm-project/pull/140615 + if ((barrier = module->getFunction("llvm.nvvm.barrier.cta.sync.aligned.all")) && barrier->getIntrinsicID() != 0) { + // LLVM 20.1.6 and above: https://github.com/llvm/llvm-project/pull/140615 builder->CreateCall(barrier, builder->getInt32(0)); - } else if ((barrier = module->getFunction("llvm.nvvm.barrier0"))) { - // LLVM 19: Testing for llvm.nvvm.barrier0 can be removed once we drop support for LLVM 19 + } else if ((barrier = module->getFunction("llvm.nvvm.barrier0")) && barrier->getIntrinsicID() != 0) { + // LLVM 21.1.5 and below: Testing for llvm.nvvm.barrier0 can be removed once we drop support for LLVM 20 builder->CreateCall(barrier); } else { internal_error << "Could not find PTX barrier intrinsic llvm.nvvm.barrier0 nor llvm.nvvm.barrier.cta.sync.aligned.all\n"; diff --git a/src/runtime/ptx_dev.ll b/src/runtime/ptx_dev.ll index 8e179d4d6b12..e29574c74e91 100644 --- a/src/runtime/ptx_dev.ll +++ b/src/runtime/ptx_dev.ll @@ -1,5 +1,11 @@ -declare void @llvm.nvvm.barrier0() -declare void @llvm.nvvm.barrier.cta.sync.aligned.all(i32) +; The two forward declared intrinsics below refer to the same thing. +; LLVM 20.1.6 introduced a new naming scheme for these intrinsics +; We have to declare both, such that we can access them from the Module's +; getFunction(), but one of those will map to an intrinsic, which we +; will use to determine which intrinsic is supported by LLVM. +declare void @llvm.nvvm.barrier0() ; LLVM <=20.1.5 +declare void @llvm.nvvm.barrier.cta.sync.aligned.all(i32) ; LLVM >=20.1.6 + declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()