From 0bafbf50bf25639408fb2f52d3fc737603ad5915 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Tue, 3 Mar 2026 13:47:55 -0800 Subject: [PATCH 01/12] first attempt --- test/WaveOps/WaveActiveAllEqual.Wave128.test | 351 +++++++++++++++++++ test/WaveOps/WaveActiveAllEqual.Wave32.test | 261 ++++++++++++++ test/WaveOps/WaveActiveAllEqual.Wave64.test | 291 +++++++++++++++ 3 files changed, 903 insertions(+) create mode 100644 test/WaveOps/WaveActiveAllEqual.Wave128.test create mode 100644 test/WaveOps/WaveActiveAllEqual.Wave32.test create mode 100644 test/WaveOps/WaveActiveAllEqual.Wave64.test diff --git a/test/WaveOps/WaveActiveAllEqual.Wave128.test b/test/WaveOps/WaveActiveAllEqual.Wave128.test new file mode 100644 index 000000000..f2983ef78 --- /dev/null +++ b/test/WaveOps/WaveActiveAllEqual.Wave128.test @@ -0,0 +1,351 @@ +#--- source.hlsl +StructuredBuffer In1 : register(t0); +StructuredBuffer In2 : register(t1); +StructuredBuffer InVec1 : register(t2); +StructuredBuffer InVec2 : register(t3); +RWStructuredBuffer Out1 : register(u4); +RWStructuredBuffer Out2 : register(u5); +RWStructuredBuffer Out3 : register(u6); +RWStructuredBuffer Out4 : register(u7); +RWStructuredBuffer Out5 : register(u8); +RWStructuredBuffer Out6 : register(u9); + +[WaveSize(128)] +[numthreads(128, 1, 1)] +void main(uint3 TID : SV_DispatchThreadID) { + // First test, we expect all true for even threads, since at the call site, + // all values are identical: 1 + bool Result1 = false; + if (TID.x % 2 == 0) + Result1 = WaveActiveAllEqual(In1[TID.x]); + + Out1[TID.x] = Result1; + + // Second test, just like the first test, except there's + // a different value, the 2 at the front of In2. Expect all falses. + bool Result2 = false; + if (TID.x % 2 == 0) + Result2 = WaveActiveAllEqual(In2[TID.x]); + + Out2[TID.x] = Result2; + + // Third test, just like test 2, except the first value + // is excluded. Expect trues for even threads except the first thread. + bool Result3 = false; + if (TID.x % 2 == 0 && TID.x != 0) + Result3 = WaveActiveAllEqual(In2[TID.x]); + + Out3[TID.x] = Result3; + + // Fourth test, just test that identical vectors pass. + // Expect all trues for all threads. + bool2 Result4 = WaveActiveAllEqual(InVec1[TID.x]); + Out4[TID.x] = Result4; + + // Fifth, test that non-identical vectors fail. + // Expect [false, true] for all threads. + // This is because the 2nd component of each vector is identical, + // so it sets the 2nd component of the boolean vector result of + // WaveActiveAllEqual to true. + bool2 Result5 = WaveActiveAllEqual(InVec2[TID.x]); + Out5[TID.x] = Result5; + + // Sixth and finally, test that identical vectors pass + // in the right conditions. Expect [true, true] for relevant threads. + bool2 Result6 = false; + if (TID.x %2 == 0 && TID.x != 0) + Result6 = WaveActiveAllEqual(InVec2[TID.x]); + + Out6[TID.x] = Result6; +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] +Buffers: + - Name: In1 + Format: Int32 + Stride: 4 + Data: [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] + - Name: In2 + Format: Int32 + Stride: 4 + Data: [2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] + - Name: InVec1 + Format: Int32 + Stride: 8 + Data: [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] + - Name: InVec2 + Format: Int32 + Stride: 8 + Data: [2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] + - Name: Out1 + Format: Bool + Stride: 4 + FillSize: 512 + - Name: Out2 + Format: Bool + Stride: 4 + FillSize: 512 + - Name: Out3 + Format: Bool + Stride: 4 + FillSize: 512 + - Name: Out4 + Format: Bool + Stride: 8 + FillSize: 1024 + - Name: Out5 + Format: Bool + Stride: 8 + FillSize: 1024 + - Name: Out6 + Format: Bool + Stride: 8 + FillSize: 1024 + - Name: ExpectedOut1 + Format: Bool + Stride: 4 + Data: [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] + - Name: ExpectedOut2 + Format: Bool + Stride: 4 + Data: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + - Name: ExpectedOut3 + Format: Bool + Stride: 4 + Data: [0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] + - Name: ExpectedOut4 + Format: Bool + Stride: 8 + Data: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + - Name: ExpectedOut5 + Format: Bool + Stride: 8 + Data: [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1] + - Name: ExpectedOut6 + Format: Bool + Stride: 8 + Data: [0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0] + + +Results: + - Result: Test1 + Rule: BufferExact + Actual: Out1 + Expected: ExpectedOut1 + - Result: Test2 + Rule: BufferExact + Actual: Out2 + Expected: ExpectedOut2 + - Result: Test3 + Rule: BufferExact + Actual: Out3 + Expected: ExpectedOut3 + - Result: Test4 + Rule: BufferExact + Actual: Out4 + Expected: ExpectedOut4 + - Result: Test5 + Rule: BufferExact + Actual: Out5 + Expected: ExpectedOut5 + - Result: Test6 + Rule: BufferExact + Actual: Out6 + Expected: ExpectedOut6 + +DescriptorSets: + - Resources: + - Name: In1 + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: In2 + Kind: StructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: InVec1 + Kind: StructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: InVec2 + Kind: StructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 + - Name: Out1 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 + - Name: Out2 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 5 + Space: 0 + VulkanBinding: + Binding: 5 + - Name: Out3 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 6 + Space: 0 + VulkanBinding: + Binding: 6 + - Name: Out4 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 7 + Space: 0 + VulkanBinding: + Binding: 7 + - Name: Out5 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 8 + Space: 0 + VulkanBinding: + Binding: 8 + - Name: Out6 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 9 + Space: 0 + VulkanBinding: + Binding: 9 +... +#--- end + +# REQUIRES: WaveSize_128 + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_6 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o \ No newline at end of file diff --git a/test/WaveOps/WaveActiveAllEqual.Wave32.test b/test/WaveOps/WaveActiveAllEqual.Wave32.test new file mode 100644 index 000000000..adfa37195 --- /dev/null +++ b/test/WaveOps/WaveActiveAllEqual.Wave32.test @@ -0,0 +1,261 @@ +#--- source.hlsl +StructuredBuffer In1 : register(t0); +StructuredBuffer In2 : register(t1); +StructuredBuffer InVec1 : register(t2); +StructuredBuffer InVec2 : register(t3); +RWStructuredBuffer Out1 : register(u4); +RWStructuredBuffer Out2 : register(u5); +RWStructuredBuffer Out3 : register(u6); +RWStructuredBuffer Out4 : register(u7); +RWStructuredBuffer Out5 : register(u8); +RWStructuredBuffer Out6 : register(u9); + +[WaveSize(32)] +[numthreads(32, 1, 1)] +void main(uint3 TID : SV_DispatchThreadID) { + // First test, we expect all true for even threads, since at the call site, + // all values are identical: 1 + bool Result1 = false; + if (TID.x % 2 == 0) + Result1 = WaveActiveAllEqual(In1[TID.x]); + + Out1[TID.x] = Result1; + + // Second test, just like the first test, except there's + // a different value, the 2 at the front of In2. Expect all falses. + bool Result2 = false; + if (TID.x % 2 == 0) + Result2 = WaveActiveAllEqual(In2[TID.x]); + + Out2[TID.x] = Result2; + + // Third test, just like test 2, except the first value + // is excluded. Expect trues for even threads except the first thread. + bool Result3 = false; + if (TID.x % 2 == 0 && TID.x != 0) + Result3 = WaveActiveAllEqual(In2[TID.x]); + + Out3[TID.x] = Result3; + + // Fourth test, just test that identical vectors pass. + // Expect all trues for all threads. + bool2 Result4 = WaveActiveAllEqual(InVec1[TID.x]); + Out4[TID.x] = Result4; + + // Fifth, test that non-identical vectors fail. + // Expect [false, true] for all threads. + // This is because the 2nd component of each vector is identical, + // so it sets the 2nd component of the boolean vector result of + // WaveActiveAllEqual to true. + bool2 Result5 = WaveActiveAllEqual(InVec2[TID.x]); + Out5[TID.x] = Result5; + + // Sixth and finally, test that identical vectors pass + // in the right conditions. Expect [true, true] for relevant threads. + bool2 Result6 = false; + if (TID.x %2 == 0 && TID.x != 0) + Result6 = WaveActiveAllEqual(InVec2[TID.x]); + + Out6[TID.x] = Result6; +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] +Buffers: + - Name: In1 + Format: Int32 + Stride: 4 + Data: [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] + - Name: In2 + Format: Int32 + Stride: 4 + Data: [2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] + - Name: InVec1 + Format: Int32 + Stride: 8 + Data: [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] + - Name: InVec2 + Format: Int32 + Stride: 8 + Data: [2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] + - Name: Out1 + Format: Bool + Stride: 4 + FillSize: 128 + - Name: Out2 + Format: Bool + Stride: 4 + FillSize: 128 + - Name: Out3 + Format: Bool + Stride: 4 + FillSize: 128 + - Name: Out4 + Format: Bool + Stride: 8 + FillSize: 256 + - Name: Out5 + Format: Bool + Stride: 8 + FillSize: 256 + - Name: Out6 + Format: Bool + Stride: 8 + FillSize: 256 + - Name: ExpectedOut1 + Format: Bool + Stride: 4 + Data: [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] + - Name: ExpectedOut2 + Format: Bool + Stride: 4 + Data: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + - Name: ExpectedOut3 + Format: Bool + Stride: 4 + Data: [0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] + - Name: ExpectedOut4 + Format: Bool + Stride: 8 + Data: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + - Name: ExpectedOut5 + Format: Bool + Stride: 8 + Data: [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1] + - Name: ExpectedOut6 + Format: Bool + Stride: 8 + Data: [0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0] + + +Results: + - Result: Test1 + Rule: BufferExact + Actual: Out1 + Expected: ExpectedOut1 + - Result: Test2 + Rule: BufferExact + Actual: Out2 + Expected: ExpectedOut2 + - Result: Test3 + Rule: BufferExact + Actual: Out3 + Expected: ExpectedOut3 + - Result: Test4 + Rule: BufferExact + Actual: Out4 + Expected: ExpectedOut4 + - Result: Test5 + Rule: BufferExact + Actual: Out5 + Expected: ExpectedOut5 + - Result: Test6 + Rule: BufferExact + Actual: Out6 + Expected: ExpectedOut6 + +DescriptorSets: + - Resources: + - Name: In1 + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: In2 + Kind: StructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: InVec1 + Kind: StructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: InVec2 + Kind: StructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 + - Name: Out1 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 + - Name: Out2 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 5 + Space: 0 + VulkanBinding: + Binding: 5 + - Name: Out3 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 6 + Space: 0 + VulkanBinding: + Binding: 6 + - Name: Out4 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 7 + Space: 0 + VulkanBinding: + Binding: 7 + - Name: Out5 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 8 + Space: 0 + VulkanBinding: + Binding: 8 + - Name: Out6 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 9 + Space: 0 + VulkanBinding: + Binding: 9 +... +#--- end + +# REQUIRES: WaveSize_32 + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_6 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o \ No newline at end of file diff --git a/test/WaveOps/WaveActiveAllEqual.Wave64.test b/test/WaveOps/WaveActiveAllEqual.Wave64.test new file mode 100644 index 000000000..46c859f81 --- /dev/null +++ b/test/WaveOps/WaveActiveAllEqual.Wave64.test @@ -0,0 +1,291 @@ +#--- source.hlsl +StructuredBuffer In1 : register(t0); +StructuredBuffer In2 : register(t1); +StructuredBuffer InVec1 : register(t2); +StructuredBuffer InVec2 : register(t3); +RWStructuredBuffer Out1 : register(u4); +RWStructuredBuffer Out2 : register(u5); +RWStructuredBuffer Out3 : register(u6); +RWStructuredBuffer Out4 : register(u7); +RWStructuredBuffer Out5 : register(u8); +RWStructuredBuffer Out6 : register(u9); + +[WaveSize(64)] +[numthreads(64, 1, 1)] +void main(uint3 TID : SV_DispatchThreadID) { + // First test, we expect all true for even threads, since at the call site, + // all values are identical: 1 + bool Result1 = false; + if (TID.x % 2 == 0) + Result1 = WaveActiveAllEqual(In1[TID.x]); + + Out1[TID.x] = Result1; + + // Second test, just like the first test, except there's + // a different value, the 2 at the front of In2. Expect all falses. + bool Result2 = false; + if (TID.x % 2 == 0) + Result2 = WaveActiveAllEqual(In2[TID.x]); + + Out2[TID.x] = Result2; + + // Third test, just like test 2, except the first value + // is excluded. Expect trues for even threads except the first thread. + bool Result3 = false; + if (TID.x % 2 == 0 && TID.x != 0) + Result3 = WaveActiveAllEqual(In2[TID.x]); + + Out3[TID.x] = Result3; + + // Fourth test, just test that identical vectors pass. + // Expect all trues for all threads. + bool2 Result4 = WaveActiveAllEqual(InVec1[TID.x]); + Out4[TID.x] = Result4; + + // Fifth, test that non-identical vectors fail. + // Expect [false, true] for all threads. + // This is because the 2nd component of each vector is identical, + // so it sets the 2nd component of the boolean vector result of + // WaveActiveAllEqual to true. + bool2 Result5 = WaveActiveAllEqual(InVec2[TID.x]); + Out5[TID.x] = Result5; + + // Sixth and finally, test that identical vectors pass + // in the right conditions. Expect [true, true] for relevant threads. + bool2 Result6 = false; + if (TID.x %2 == 0 && TID.x != 0) + Result6 = WaveActiveAllEqual(InVec2[TID.x]); + + Out6[TID.x] = Result6; +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] +Buffers: + - Name: In1 + Format: Int32 + Stride: 4 + Data: [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] + - Name: In2 + Format: Int32 + Stride: 4 + Data: [2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] + - Name: InVec1 + Format: Int32 + Stride: 8 + Data: [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] + - Name: InVec2 + Format: Int32 + Stride: 8 + Data: [2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] + - Name: Out1 + Format: Bool + Stride: 4 + FillSize: 256 + - Name: Out2 + Format: Bool + Stride: 4 + FillSize: 256 + - Name: Out3 + Format: Bool + Stride: 4 + FillSize: 256 + - Name: Out4 + Format: Bool + Stride: 8 + FillSize: 512 + - Name: Out5 + Format: Bool + Stride: 8 + FillSize: 512 + - Name: Out6 + Format: Bool + Stride: 8 + FillSize: 512 + - Name: ExpectedOut1 + Format: Bool + Stride: 4 + Data: [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] + - Name: ExpectedOut2 + Format: Bool + Stride: 4 + Data: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + - Name: ExpectedOut3 + Format: Bool + Stride: 4 + Data: [0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] + - Name: ExpectedOut4 + Format: Bool + Stride: 8 + Data: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + - Name: ExpectedOut5 + Format: Bool + Stride: 8 + Data: [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1] + - Name: ExpectedOut6 + Format: Bool + Stride: 8 + Data: [0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0] + + +Results: + - Result: Test1 + Rule: BufferExact + Actual: Out1 + Expected: ExpectedOut1 + - Result: Test2 + Rule: BufferExact + Actual: Out2 + Expected: ExpectedOut2 + - Result: Test3 + Rule: BufferExact + Actual: Out3 + Expected: ExpectedOut3 + - Result: Test4 + Rule: BufferExact + Actual: Out4 + Expected: ExpectedOut4 + - Result: Test5 + Rule: BufferExact + Actual: Out5 + Expected: ExpectedOut5 + - Result: Test6 + Rule: BufferExact + Actual: Out6 + Expected: ExpectedOut6 + +DescriptorSets: + - Resources: + - Name: In1 + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: In2 + Kind: StructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: InVec1 + Kind: StructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: InVec2 + Kind: StructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 + - Name: Out1 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 + - Name: Out2 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 5 + Space: 0 + VulkanBinding: + Binding: 5 + - Name: Out3 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 6 + Space: 0 + VulkanBinding: + Binding: 6 + - Name: Out4 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 7 + Space: 0 + VulkanBinding: + Binding: 7 + - Name: Out5 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 8 + Space: 0 + VulkanBinding: + Binding: 8 + - Name: Out6 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 9 + Space: 0 + VulkanBinding: + Binding: 9 +... +#--- end + +# REQUIRES: WaveSize_64 + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_6 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o \ No newline at end of file From 84645468b67d9ee83a69230cffdc1f6256fd7cfa Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Wed, 4 Mar 2026 15:48:16 -0800 Subject: [PATCH 02/12] address Finn --- test/WaveOps/WaveActiveAllEqual.32.test | 317 +++++++++++++++++++ test/WaveOps/WaveActiveAllEqual.Wave128.test | 2 +- test/WaveOps/WaveActiveAllEqual.Wave32.test | 2 +- test/WaveOps/WaveActiveAllEqual.Wave64.test | 2 +- test/WaveOps/WaveActiveAllEqual.fp16.test | 139 ++++++++ test/WaveOps/WaveActiveAllEqual.fp64.test | 138 ++++++++ test/WaveOps/WaveActiveAllEqual.int16.test | 223 +++++++++++++ test/WaveOps/WaveActiveAllEqual.int64.test | 223 +++++++++++++ 8 files changed, 1043 insertions(+), 3 deletions(-) create mode 100644 test/WaveOps/WaveActiveAllEqual.32.test create mode 100644 test/WaveOps/WaveActiveAllEqual.fp16.test create mode 100644 test/WaveOps/WaveActiveAllEqual.fp64.test create mode 100644 test/WaveOps/WaveActiveAllEqual.int16.test create mode 100644 test/WaveOps/WaveActiveAllEqual.int64.test diff --git a/test/WaveOps/WaveActiveAllEqual.32.test b/test/WaveOps/WaveActiveAllEqual.32.test new file mode 100644 index 000000000..b09eba905 --- /dev/null +++ b/test/WaveOps/WaveActiveAllEqual.32.test @@ -0,0 +1,317 @@ +#--- source.hlsl +StructuredBuffer In : register(t0); +StructuredBuffer In2 : register(t1); +StructuredBuffer In3 : register(t2); +StructuredBuffer In4 : register(t3); + +StructuredBuffer UIn : register(t4); +StructuredBuffer UIn2 : register(t5); +StructuredBuffer UIn3 : register(t6); +StructuredBuffer UIn4 : register(t7); + +StructuredBuffer FIn : register(t8); +StructuredBuffer FIn2 : register(t9); +StructuredBuffer FIn3 : register(t10); +StructuredBuffer FIn4 : register(t11); + +RWStructuredBuffer Out : register(u12); +RWStructuredBuffer UOut : register(u13); +RWStructuredBuffer FOut : register(u14); + +// Expect all trues, all elements will be the same. +// Output buffers start off as true, and each +// lane writes its result value anded with the existing +// result in the output buffer. +// Since we expect all results to be true, output buffers +// should remain all true at the end. + +[numthreads(4,1,1)] +void main(uint3 TID : SV_GroupThreadID) +{ + int index = 0; + Out[index++] &= WaveActiveAllEqual(In[TID.x]); + + bool2 Result2 = WaveActiveAllEqual(In2[TID.x]); + Out[index++] &= Result2.x; + Out[index++] &= Result2.y; + + bool3 Result3 = WaveActiveAllEqual(In3[TID.x]); + Out[index++] &= Result3.x; + Out[index++] &= Result3.y; + Out[index++] &= Result3.z; + + bool4 Result4 = WaveActiveAllEqual(In4[TID.x]); + Out[index++] &= Result4.x; + Out[index++] &= Result4.y; + Out[index++] &= Result4.z; + Out[index++] &= Result4.w; + + // constant folding case + bool4 ResultCF = WaveActiveSum(int4(1,1,1,1)); + Out[index++] &= ResultCF.x; + Out[index++] &= ResultCF.y; + Out[index++] &= ResultCF.z; + Out[index++] &= ResultCF.w; + + int uindex = 0; + UOut[uindex++] &= WaveActiveAllEqual(UIn[TID.x]); + + bool2 UResult2 = WaveActiveAllEqual(UIn2[TID.x]); + UOut[uindex++] &= UResult2.x; + UOut[uindex++] &= UResult2.y; + + bool3 UResult3 = WaveActiveAllEqual(UIn3[TID.x]); + UOut[uindex++] &= UResult3.x; + UOut[uindex++] &= UResult3.y; + UOut[uindex++] &= UResult3.z; + + bool4 UResult4 = WaveActiveAllEqual(UIn4[TID.x]); + UOut[uindex++] &= UResult4.x; + UOut[uindex++] &= UResult4.y; + UOut[uindex++] &= UResult4.z; + UOut[uindex++] &= UResult4.w; + + // constant folding case + bool4 UResultCF = WaveActiveSum(uint4(1,1,1,1)); + UOut[uindex++] &= UResultCF.x; + UOut[uindex++] &= UResultCF.y; + UOut[uindex++] &= UResultCF.z; + UOut[uindex++] &= UResultCF.w; + + int findex = 0; + FOut[findex++] &= WaveActiveAllEqual(FIn[TID.x]); + + bool2 FResult2 = WaveActiveAllEqual(FIn2[TID.x]); + FOut[findex++] &= FResult2.x; + FOut[findex++] &= FResult2.y; + + bool3 FResult3 = WaveActiveAllEqual(FIn3[TID.x]); + FOut[findex++] &= FResult3.x; + FOut[findex++] &= FResult3.y; + FOut[findex++] &= FResult3.z; + + bool4 FResult4 = WaveActiveAllEqual(FIn4[TID.x]); + FOut[findex++] &= FResult4.x; + FOut[findex++] &= FResult4.y; + FOut[findex++] &= FResult4.z; + FOut[findex++] &= FResult4.w; + + // constant folding case + bool4 FResultCF = WaveActiveSum(float4(1,1,1,1)); + FOut[findex++] &= FResultCF.x; + FOut[findex++] &= FResultCF.y; + FOut[findex++] &= FResultCF.z; + FOut[findex++] &= FResultCF.w; +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] +Buffers: + - Name: In + Format: Int32 + Stride: 4 + Data: [ -1, -1, -1, -1] + - Name: In2 + Format: Int32 + Stride: 8 + Data: [ -1, -1, -1, -1, -1, -1, -1, -1] + - Name: In3 + Format: Int32 + Stride: 12 + Data: [ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1] + - Name: In4 + Format: Int32 + Stride: 16 + Data: [ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1] + - Name: UIn + Format: UInt32 + Stride: 4 + Data: [ 1, 1, 1, 1] + - Name: UIn2 + Format: UInt32 + Stride: 8 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1] + - Name: UIn3 + Format: UInt32 + Stride: 12 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + - Name: UIn4 + Format: UInt32 + Stride: 16 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + + - Name: FIn + Format: Float32 + Stride: 4 + Data: [ 1.0, 1.0, 1.0, 1.0 ] + - Name: FIn2 + Format: Float32 + Stride: 8 + Data: [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ] + - Name: FIn3 + Format: Float32 + Stride: 12 + Data: [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ] + - Name: FIn4 + Format: Float32 + Stride: 16 + Data: [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ] + + - Name: Out + Format: Bool + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + - Name: UOut + Format: Bool + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + - Name: FOut + Format: Bool + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + + - Name: ExpectedOut + Format: Bool + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + - Name: UExpectedOut + Format: Bool + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + - Name: FExpectedOut + Format: Bool + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + +Results: + - Result: ExpectedOut + Rule: BufferExact + Actual: Out + Expected: ExpectedOut + - Result: UExpectedOut + Rule: BufferExact + Actual: UOut + Expected: UExpectedOut + - Result: FExpectedOut + Rule: BufferExact + Actual: FOut + Expected: FExpectedOut + +DescriptorSets: + - Resources: + - Name: In + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: In2 + Kind: StructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: In3 + Kind: StructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: In4 + Kind: StructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 + - Name: UIn + Kind: StructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 + - Name: UIn2 + Kind: StructuredBuffer + DirectXBinding: + Register: 5 + Space: 0 + VulkanBinding: + Binding: 5 + - Name: UIn3 + Kind: StructuredBuffer + DirectXBinding: + Register: 6 + Space: 0 + VulkanBinding: + Binding: 6 + - Name: UIn4 + Kind: StructuredBuffer + DirectXBinding: + Register: 7 + Space: 0 + VulkanBinding: + Binding: 7 + - Name: FIn + Kind: StructuredBuffer + DirectXBinding: + Register: 8 + Space: 0 + VulkanBinding: + Binding: 8 + - Name: FIn2 + Kind: StructuredBuffer + DirectXBinding: + Register: 9 + Space: 0 + VulkanBinding: + Binding: 9 + - Name: FIn3 + Kind: StructuredBuffer + DirectXBinding: + Register: 10 + Space: 0 + VulkanBinding: + Binding: 10 + - Name: FIn4 + Kind: StructuredBuffer + DirectXBinding: + Register: 11 + Space: 0 + VulkanBinding: + Binding: 11 + - Name: Out + Kind: RWStructuredBuffer + DirectXBinding: + Register: 12 + Space: 0 + VulkanBinding: + Binding: 12 + - Name: UOut + Kind: RWStructuredBuffer + DirectXBinding: + Register: 13 + Space: 0 + VulkanBinding: + Binding: 13 + - Name: FOut + Kind: RWStructuredBuffer + DirectXBinding: + Register: 14 + Space: 0 + VulkanBinding: + Binding: 14 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/WaveActiveAllEqual.Wave128.test b/test/WaveOps/WaveActiveAllEqual.Wave128.test index f2983ef78..1cb370abb 100644 --- a/test/WaveOps/WaveActiveAllEqual.Wave128.test +++ b/test/WaveOps/WaveActiveAllEqual.Wave128.test @@ -348,4 +348,4 @@ DescriptorSets: # RUN: split-file %s %t # RUN: %dxc_target -T cs_6_6 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o \ No newline at end of file +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/WaveActiveAllEqual.Wave32.test b/test/WaveOps/WaveActiveAllEqual.Wave32.test index adfa37195..235c720af 100644 --- a/test/WaveOps/WaveActiveAllEqual.Wave32.test +++ b/test/WaveOps/WaveActiveAllEqual.Wave32.test @@ -258,4 +258,4 @@ DescriptorSets: # RUN: split-file %s %t # RUN: %dxc_target -T cs_6_6 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o \ No newline at end of file +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/WaveActiveAllEqual.Wave64.test b/test/WaveOps/WaveActiveAllEqual.Wave64.test index 46c859f81..6b4f6b326 100644 --- a/test/WaveOps/WaveActiveAllEqual.Wave64.test +++ b/test/WaveOps/WaveActiveAllEqual.Wave64.test @@ -288,4 +288,4 @@ DescriptorSets: # RUN: split-file %s %t # RUN: %dxc_target -T cs_6_6 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o \ No newline at end of file +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/WaveActiveAllEqual.fp16.test b/test/WaveOps/WaveActiveAllEqual.fp16.test new file mode 100644 index 000000000..47e7bae27 --- /dev/null +++ b/test/WaveOps/WaveActiveAllEqual.fp16.test @@ -0,0 +1,139 @@ +#--- source.hlsl +StructuredBuffer In : register(t0); +StructuredBuffer In2 : register(t1); +StructuredBuffer In3 : register(t2); +StructuredBuffer In4 : register(t3); + +RWStructuredBuffer Out : register(u4); + +// Expect all trues, all elements will be the same. +// Output buffers start off as true, and each +// lane writes its result value anded with the existing +// result in the output buffer. +// Since we expect all results to be true, output buffers +// should remain all true at the end. + +[numthreads(4,1,1)] +void main(uint3 TID : SV_GroupThreadID) +{ + int index = 0; + Out[index++] &= WaveActiveAllEqual(In[TID.x]); + + bool2 Result2 = WaveActiveAllEqual(In2[TID.x]); + Out[index++] &= Result2.x; + Out[index++] &= Result2.y; + + bool3 Result3 = WaveActiveAllEqual(In3[TID.x]); + Out[index++] &= Result3.x; + Out[index++] &= Result3.y; + Out[index++] &= Result3.z; + + bool4 Result4 = WaveActiveAllEqual(In4[TID.x]); + Out[index++] &= Result4.x; + Out[index++] &= Result4.y; + Out[index++] &= Result4.z; + Out[index++] &= Result4.w; + + // constant folding case + bool4 ResultCF = WaveActiveSum(half4(1,1,1,1)); + Out[index++] &= ResultCF.x; + Out[index++] &= ResultCF.y; + Out[index++] &= ResultCF.z; + Out[index++] &= ResultCF.w; +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] +Buffers: + # Everything is 0xbc00, which is -1.0 + - Name: In + Format: Float16 + Stride: 2 + Data: [ 0xbc00, 0xbc00, 0xbc00, 0xbc00] + - Name: In2 + Format: Float16 + Stride: 4 + Data: [ 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00] + - Name: In3 + Format: Float16 + Stride: 6 + Data: [ 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00] + - Name: In4 + Format: Float16 + Stride: 8 + Data: [ 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00] + + - Name: Out + Format: Bool + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + - Name: UOut + Format: Bool + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + + - Name: ExpectedOut + Format: Bool + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + - Name: UExpectedOut + Format: Bool + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + +Results: + - Result: ExpectedOut + Rule: BufferExact + Actual: Out + Expected: ExpectedOut + +DescriptorSets: + - Resources: + - Name: In + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: In2 + Kind: StructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: In3 + Kind: StructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: In4 + Kind: StructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 + - Name: Out + Kind: RWStructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 + +... +#--- end + +# REQUIRES Half +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -enable-16bit-types -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/WaveActiveAllEqual.fp64.test b/test/WaveOps/WaveActiveAllEqual.fp64.test new file mode 100644 index 000000000..50739f4e0 --- /dev/null +++ b/test/WaveOps/WaveActiveAllEqual.fp64.test @@ -0,0 +1,138 @@ +#--- source.hlsl +StructuredBuffer In : register(t0); +StructuredBuffer In2 : register(t1); +StructuredBuffer In3 : register(t2); +StructuredBuffer In4 : register(t3); + +RWStructuredBuffer Out : register(u4); + +// Expect all trues, all elements will be the same. +// Output buffers start off as true, and each +// lane writes its result value anded with the existing +// result in the output buffer. +// Since we expect all results to be true, output buffers +// should remain all true at the end. + +[numthreads(4,1,1)] +void main(uint3 TID : SV_GroupThreadID) +{ + int index = 0; + Out[index++] &= WaveActiveAllEqual(In[TID.x]); + + bool2 Result2 = WaveActiveAllEqual(In2[TID.x]); + Out[index++] &= Result2.x; + Out[index++] &= Result2.y; + + bool3 Result3 = WaveActiveAllEqual(In3[TID.x]); + Out[index++] &= Result3.x; + Out[index++] &= Result3.y; + Out[index++] &= Result3.z; + + bool4 Result4 = WaveActiveAllEqual(In4[TID.x]); + Out[index++] &= Result4.x; + Out[index++] &= Result4.y; + Out[index++] &= Result4.z; + Out[index++] &= Result4.w; + + // constant folding case + bool4 ResultCF = WaveActiveSum(double4(1,1,1,1)); + Out[index++] &= ResultCF.x; + Out[index++] &= ResultCF.y; + Out[index++] &= ResultCF.z; + Out[index++] &= ResultCF.w; +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] +Buffers: + - Name: In + Format: Float64 + Stride: 8 + Data: [ -1.0, -1.0, -1.0, -1.0] + - Name: In2 + Format: Float64 + Stride: 16 + Data: [ -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] + - Name: In3 + Format: Float64 + Stride: 24 + Data: [ -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] + - Name: In4 + Format: Float64 + Stride: 32 + Data: [ -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] + + - Name: Out + Format: Bool + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + - Name: UOut + Format: Bool + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + + - Name: ExpectedOut + Format: Bool + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + - Name: UExpectedOut + Format: Bool + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + +Results: + - Result: ExpectedOut + Rule: BufferExact + Actual: Out + Expected: ExpectedOut + +DescriptorSets: + - Resources: + - Name: In + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: In2 + Kind: StructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: In3 + Kind: StructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: In4 + Kind: StructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 + - Name: Out + Kind: RWStructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 + +... +#--- end + +# REQUIRES double +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -enable-16bit-types -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/WaveActiveAllEqual.int16.test b/test/WaveOps/WaveActiveAllEqual.int16.test new file mode 100644 index 000000000..e3c258504 --- /dev/null +++ b/test/WaveOps/WaveActiveAllEqual.int16.test @@ -0,0 +1,223 @@ +#--- source.hlsl +StructuredBuffer In : register(t0); +StructuredBuffer In2 : register(t1); +StructuredBuffer In3 : register(t2); +StructuredBuffer In4 : register(t3); + +StructuredBuffer UIn : register(t4); +StructuredBuffer UIn2 : register(t5); +StructuredBuffer UIn3 : register(t6); +StructuredBuffer UIn4 : register(t7); + +RWStructuredBuffer Out : register(u8); +RWStructuredBuffer UOut : register(u9); + +// Expect all trues, all elements will be the same. +// Output buffers start off as true, and each +// lane writes its result value anded with the existing +// result in the output buffer. +// Since we expect all results to be true, output buffers +// should remain all true at the end. + +[numthreads(4,1,1)] +void main(uint3 TID : SV_GroupThreadID) +{ + int index = 0; + Out[index++] &= WaveActiveAllEqual(In[TID.x]); + + bool2 Result2 = WaveActiveAllEqual(In2[TID.x]); + Out[index++] &= Result2.x; + Out[index++] &= Result2.y; + + bool3 Result3 = WaveActiveAllEqual(In3[TID.x]); + Out[index++] &= Result3.x; + Out[index++] &= Result3.y; + Out[index++] &= Result3.z; + + bool4 Result4 = WaveActiveAllEqual(In4[TID.x]); + Out[index++] &= Result4.x; + Out[index++] &= Result4.y; + Out[index++] &= Result4.z; + Out[index++] &= Result4.w; + + // constant folding case + bool4 ResultCF = WaveActiveSum(int4(1,1,1,1)); + Out[index++] &= ResultCF.x; + Out[index++] &= ResultCF.y; + Out[index++] &= ResultCF.z; + Out[index++] &= ResultCF.w; + + int uindex = 0; + UOut[uindex++] &= WaveActiveAllEqual(UIn[TID.x]); + + bool2 UResult2 = WaveActiveAllEqual(UIn2[TID.x]); + UOut[uindex++] &= UResult2.x; + UOut[uindex++] &= UResult2.y; + + bool3 UResult3 = WaveActiveAllEqual(UIn3[TID.x]); + UOut[uindex++] &= UResult3.x; + UOut[uindex++] &= UResult3.y; + UOut[uindex++] &= UResult3.z; + + bool4 UResult4 = WaveActiveAllEqual(UIn4[TID.x]); + UOut[uindex++] &= UResult4.x; + UOut[uindex++] &= UResult4.y; + UOut[uindex++] &= UResult4.z; + UOut[uindex++] &= UResult4.w; + + // constant folding case + bool4 UResultCF = WaveActiveSum(uint4(1,1,1,1)); + UOut[uindex++] &= UResultCF.x; + UOut[uindex++] &= UResultCF.y; + UOut[uindex++] &= UResultCF.z; + UOut[uindex++] &= UResultCF.w; +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] +Buffers: + - Name: In + Format: Int16 + Stride: 2 + Data: [ -1, -1, -1, -1] + - Name: In2 + Format: Int16 + Stride: 4 + Data: [ -1, -1, -1, -1, -1, -1, -1, -1] + - Name: In3 + Format: Int16 + Stride: 6 + Data: [ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1] + - Name: In4 + Format: Int16 + Stride: 8 + Data: [ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1] + - Name: UIn + Format: UInt16 + Stride: 2 + Data: [ 1, 1, 1, 1] + - Name: UIn2 + Format: UInt16 + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1] + - Name: UIn3 + Format: UInt16 + Stride: 6 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + - Name: UIn4 + Format: UInt16 + Stride: 8 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + + - Name: Out + Format: Bool + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + - Name: UOut + Format: Bool + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + + - Name: ExpectedOut + Format: Int32 + Stride: 8 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + - Name: UExpectedOut + Format: Bool + Stride: 8 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + +Results: + - Result: ExpectedOut + Rule: BufferExact + Actual: Out + Expected: ExpectedOut + - Result: UExpectedOut + Rule: BufferExact + Actual: UOut + Expected: UExpectedOut + +DescriptorSets: + - Resources: + - Name: In + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: In2 + Kind: StructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: In3 + Kind: StructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: In4 + Kind: StructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 + - Name: UIn + Kind: StructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 + - Name: UIn2 + Kind: StructuredBuffer + DirectXBinding: + Register: 5 + Space: 0 + VulkanBinding: + Binding: 5 + - Name: UIn3 + Kind: StructuredBuffer + DirectXBinding: + Register: 6 + Space: 0 + VulkanBinding: + Binding: 6 + - Name: UIn4 + Kind: StructuredBuffer + DirectXBinding: + Register: 7 + Space: 0 + VulkanBinding: + Binding: 7 + - Name: Out + Kind: RWStructuredBuffer + DirectXBinding: + Register: 8 + Space: 0 + VulkanBinding: + Binding: 8 + - Name: UOut + Kind: RWStructuredBuffer + DirectXBinding: + Register: 9 + Space: 0 + VulkanBinding: + Binding: 9 +... +#--- end + +# REQUIRES Int16 +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -enable-16bit-types -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/WaveActiveAllEqual.int64.test b/test/WaveOps/WaveActiveAllEqual.int64.test new file mode 100644 index 000000000..f6e3c9859 --- /dev/null +++ b/test/WaveOps/WaveActiveAllEqual.int64.test @@ -0,0 +1,223 @@ +#--- source.hlsl +StructuredBuffer In : register(t0); +StructuredBuffer In2 : register(t1); +StructuredBuffer In3 : register(t2); +StructuredBuffer In4 : register(t3); + +StructuredBuffer UIn : register(t4); +StructuredBuffer UIn2 : register(t5); +StructuredBuffer UIn3 : register(t6); +StructuredBuffer UIn4 : register(t7); + +RWStructuredBuffer Out : register(u8); +RWStructuredBuffer UOut : register(u9); + +// Expect all trues, all elements will be the same. +// Output buffers start off as true, and each +// lane writes its result value anded with the existing +// result in the output buffer. +// Since we expect all results to be true, output buffers +// should remain all true at the end. + +[numthreads(4,1,1)] +void main(uint3 TID : SV_GroupThreadID) +{ + int index = 0; + Out[index++] &= WaveActiveAllEqual(In[TID.x]); + + bool2 Result2 = WaveActiveAllEqual(In2[TID.x]); + Out[index++] &= Result2.x; + Out[index++] &= Result2.y; + + bool3 Result3 = WaveActiveAllEqual(In3[TID.x]); + Out[index++] &= Result3.x; + Out[index++] &= Result3.y; + Out[index++] &= Result3.z; + + bool4 Result4 = WaveActiveAllEqual(In4[TID.x]); + Out[index++] &= Result4.x; + Out[index++] &= Result4.y; + Out[index++] &= Result4.z; + Out[index++] &= Result4.w; + + // constant folding case + bool4 ResultCF = WaveActiveSum(int4(1,1,1,1)); + Out[index++] &= ResultCF.x; + Out[index++] &= ResultCF.y; + Out[index++] &= ResultCF.z; + Out[index++] &= ResultCF.w; + + int uindex = 0; + UOut[uindex++] &= WaveActiveAllEqual(UIn[TID.x]); + + bool2 UResult2 = WaveActiveAllEqual(UIn2[TID.x]); + UOut[uindex++] &= UResult2.x; + UOut[uindex++] &= UResult2.y; + + bool3 UResult3 = WaveActiveAllEqual(UIn3[TID.x]); + UOut[uindex++] &= UResult3.x; + UOut[uindex++] &= UResult3.y; + UOut[uindex++] &= UResult3.z; + + bool4 UResult4 = WaveActiveAllEqual(UIn4[TID.x]); + UOut[uindex++] &= UResult4.x; + UOut[uindex++] &= UResult4.y; + UOut[uindex++] &= UResult4.z; + UOut[uindex++] &= UResult4.w; + + // constant folding case + bool4 UResultCF = WaveActiveSum(uint4(1,1,1,1)); + UOut[uindex++] &= UResultCF.x; + UOut[uindex++] &= UResultCF.y; + UOut[uindex++] &= UResultCF.z; + UOut[uindex++] &= UResultCF.w; +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] +Buffers: + - Name: In + Format: Int64 + Stride: 8 + Data: [ -1, -1, -1, -1] + - Name: In2 + Format: Int64 + Stride: 16 + Data: [ -1, -1, -1, -1, -1, -1, -1, -1] + - Name: In3 + Format: Int64 + Stride: 24 + Data: [ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1] + - Name: In4 + Format: Int64 + Stride: 32 + Data: [ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1] + - Name: UIn + Format: UInt64 + Stride: 8 + Data: [ 1, 1, 1, 1] + - Name: UIn2 + Format: UInt64 + Stride: 16 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1] + - Name: UIn3 + Format: UInt64 + Stride: 24 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + - Name: UIn4 + Format: UInt64 + Stride: 32 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + + - Name: Out + Format: Bool + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + - Name: UOut + Format: Bool + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + + - Name: ExpectedOut + Format: Bool + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + - Name: UExpectedOut + Format: Bool + Stride: 4 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + +Results: + - Result: ExpectedOut + Rule: BufferExact + Actual: Out + Expected: ExpectedOut + - Result: UExpectedOut + Rule: BufferExact + Actual: UOut + Expected: UExpectedOut + +DescriptorSets: + - Resources: + - Name: In + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: In2 + Kind: StructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: In3 + Kind: StructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: In4 + Kind: StructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 + - Name: UIn + Kind: StructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 + - Name: UIn2 + Kind: StructuredBuffer + DirectXBinding: + Register: 5 + Space: 0 + VulkanBinding: + Binding: 5 + - Name: UIn3 + Kind: StructuredBuffer + DirectXBinding: + Register: 6 + Space: 0 + VulkanBinding: + Binding: 6 + - Name: UIn4 + Kind: StructuredBuffer + DirectXBinding: + Register: 7 + Space: 0 + VulkanBinding: + Binding: 7 + - Name: Out + Kind: RWStructuredBuffer + DirectXBinding: + Register: 8 + Space: 0 + VulkanBinding: + Binding: 8 + - Name: UOut + Kind: RWStructuredBuffer + DirectXBinding: + Register: 9 + Space: 0 + VulkanBinding: + Binding: 9 +... +#--- end + +# REQUIRES Int64 +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -enable-16bit-types -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o From 505caf1d9f3900ea0f0ce9f3badd6ff39887d40d Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Wed, 4 Mar 2026 16:03:10 -0800 Subject: [PATCH 03/12] prevent implicit conversion warnings on mac --- test/WaveOps/WaveActiveAllEqual.32.test | 6 +++--- test/WaveOps/WaveActiveAllEqual.fp16.test | 2 +- test/WaveOps/WaveActiveAllEqual.fp64.test | 2 +- test/WaveOps/WaveActiveAllEqual.int16.test | 4 ++-- test/WaveOps/WaveActiveAllEqual.int64.test | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/test/WaveOps/WaveActiveAllEqual.32.test b/test/WaveOps/WaveActiveAllEqual.32.test index b09eba905..e007dfc39 100644 --- a/test/WaveOps/WaveActiveAllEqual.32.test +++ b/test/WaveOps/WaveActiveAllEqual.32.test @@ -28,7 +28,7 @@ RWStructuredBuffer FOut : register(u14); [numthreads(4,1,1)] void main(uint3 TID : SV_GroupThreadID) { - int index = 0; + unsigned int index = 0; Out[index++] &= WaveActiveAllEqual(In[TID.x]); bool2 Result2 = WaveActiveAllEqual(In2[TID.x]); @@ -53,7 +53,7 @@ void main(uint3 TID : SV_GroupThreadID) Out[index++] &= ResultCF.z; Out[index++] &= ResultCF.w; - int uindex = 0; + unsigned int uindex = 0; UOut[uindex++] &= WaveActiveAllEqual(UIn[TID.x]); bool2 UResult2 = WaveActiveAllEqual(UIn2[TID.x]); @@ -78,7 +78,7 @@ void main(uint3 TID : SV_GroupThreadID) UOut[uindex++] &= UResultCF.z; UOut[uindex++] &= UResultCF.w; - int findex = 0; + unsigned int findex = 0; FOut[findex++] &= WaveActiveAllEqual(FIn[TID.x]); bool2 FResult2 = WaveActiveAllEqual(FIn2[TID.x]); diff --git a/test/WaveOps/WaveActiveAllEqual.fp16.test b/test/WaveOps/WaveActiveAllEqual.fp16.test index 47e7bae27..5179504d1 100644 --- a/test/WaveOps/WaveActiveAllEqual.fp16.test +++ b/test/WaveOps/WaveActiveAllEqual.fp16.test @@ -16,7 +16,7 @@ RWStructuredBuffer Out : register(u4); [numthreads(4,1,1)] void main(uint3 TID : SV_GroupThreadID) { - int index = 0; + unsigned int index = 0; Out[index++] &= WaveActiveAllEqual(In[TID.x]); bool2 Result2 = WaveActiveAllEqual(In2[TID.x]); diff --git a/test/WaveOps/WaveActiveAllEqual.fp64.test b/test/WaveOps/WaveActiveAllEqual.fp64.test index 50739f4e0..b269d9909 100644 --- a/test/WaveOps/WaveActiveAllEqual.fp64.test +++ b/test/WaveOps/WaveActiveAllEqual.fp64.test @@ -16,7 +16,7 @@ RWStructuredBuffer Out : register(u4); [numthreads(4,1,1)] void main(uint3 TID : SV_GroupThreadID) { - int index = 0; + unsigned int index = 0; Out[index++] &= WaveActiveAllEqual(In[TID.x]); bool2 Result2 = WaveActiveAllEqual(In2[TID.x]); diff --git a/test/WaveOps/WaveActiveAllEqual.int16.test b/test/WaveOps/WaveActiveAllEqual.int16.test index e3c258504..8f9997c41 100644 --- a/test/WaveOps/WaveActiveAllEqual.int16.test +++ b/test/WaveOps/WaveActiveAllEqual.int16.test @@ -22,7 +22,7 @@ RWStructuredBuffer UOut : register(u9); [numthreads(4,1,1)] void main(uint3 TID : SV_GroupThreadID) { - int index = 0; + unsigned int index = 0; Out[index++] &= WaveActiveAllEqual(In[TID.x]); bool2 Result2 = WaveActiveAllEqual(In2[TID.x]); @@ -47,7 +47,7 @@ void main(uint3 TID : SV_GroupThreadID) Out[index++] &= ResultCF.z; Out[index++] &= ResultCF.w; - int uindex = 0; + unsigned int uindex = 0; UOut[uindex++] &= WaveActiveAllEqual(UIn[TID.x]); bool2 UResult2 = WaveActiveAllEqual(UIn2[TID.x]); diff --git a/test/WaveOps/WaveActiveAllEqual.int64.test b/test/WaveOps/WaveActiveAllEqual.int64.test index f6e3c9859..73534ef13 100644 --- a/test/WaveOps/WaveActiveAllEqual.int64.test +++ b/test/WaveOps/WaveActiveAllEqual.int64.test @@ -22,7 +22,7 @@ RWStructuredBuffer UOut : register(u9); [numthreads(4,1,1)] void main(uint3 TID : SV_GroupThreadID) { - int index = 0; + unsigned int index = 0; Out[index++] &= WaveActiveAllEqual(In[TID.x]); bool2 Result2 = WaveActiveAllEqual(In2[TID.x]); @@ -47,7 +47,7 @@ void main(uint3 TID : SV_GroupThreadID) Out[index++] &= ResultCF.z; Out[index++] &= ResultCF.w; - int uindex = 0; + unsigned int uindex = 0; UOut[uindex++] &= WaveActiveAllEqual(UIn[TID.x]); bool2 UResult2 = WaveActiveAllEqual(UIn2[TID.x]); From 5752421181cc61bf82723a1552edfa03b64fbe69 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Thu, 5 Mar 2026 12:18:41 -0800 Subject: [PATCH 04/12] address Tex --- test/WaveOps/WaveActiveAllEqual.32.test | 152 ++++++++++----------- test/WaveOps/WaveActiveAllEqual.fp16.test | 49 +++---- test/WaveOps/WaveActiveAllEqual.fp64.test | 49 +++---- test/WaveOps/WaveActiveAllEqual.int16.test | 86 ++++++------ test/WaveOps/WaveActiveAllEqual.int64.test | 84 ++++++------ 5 files changed, 213 insertions(+), 207 deletions(-) diff --git a/test/WaveOps/WaveActiveAllEqual.32.test b/test/WaveOps/WaveActiveAllEqual.32.test index e007dfc39..792066f49 100644 --- a/test/WaveOps/WaveActiveAllEqual.32.test +++ b/test/WaveOps/WaveActiveAllEqual.32.test @@ -1,107 +1,107 @@ #--- source.hlsl StructuredBuffer In : register(t0); -StructuredBuffer In2 : register(t1); -StructuredBuffer In3 : register(t2); -StructuredBuffer In4 : register(t3); +StructuredBuffer In2 : register(t1); +StructuredBuffer In3 : register(t2); +StructuredBuffer In4 : register(t3); StructuredBuffer UIn : register(t4); -StructuredBuffer UIn2 : register(t5); -StructuredBuffer UIn3 : register(t6); -StructuredBuffer UIn4 : register(t7); +StructuredBuffer UIn2 : register(t5); +StructuredBuffer UIn3 : register(t6); +StructuredBuffer UIn4 : register(t7); StructuredBuffer FIn : register(t8); -StructuredBuffer FIn2 : register(t9); -StructuredBuffer FIn3 : register(t10); -StructuredBuffer FIn4 : register(t11); +StructuredBuffer FIn2 : register(t9); +StructuredBuffer FIn3 : register(t10); +StructuredBuffer FIn4 : register(t11); -RWStructuredBuffer Out : register(u12); -RWStructuredBuffer UOut : register(u13); -RWStructuredBuffer FOut : register(u14); +RWStructuredBuffer Out : register(u12); +RWStructuredBuffer UOut : register(u13); +RWStructuredBuffer FOut : register(u14); // Expect all trues, all elements will be the same. -// Output buffers start off as true, and each -// lane writes its result value anded with the existing -// result in the output buffer. // Since we expect all results to be true, output buffers -// should remain all true at the end. +// should result in 4 at the end, since there are 4 threads. [numthreads(4,1,1)] void main(uint3 TID : SV_GroupThreadID) { unsigned int index = 0; - Out[index++] &= WaveActiveAllEqual(In[TID.x]); + bool Result = WaveActiveAllEqual(In[TID.x]); + InterlockedAdd(Out[index++], (unsigned int)Result); bool2 Result2 = WaveActiveAllEqual(In2[TID.x]); - Out[index++] &= Result2.x; - Out[index++] &= Result2.y; + InterlockedAdd(Out[index++], (unsigned int)Result2.x); + InterlockedAdd(Out[index++], (unsigned int)Result2.y); bool3 Result3 = WaveActiveAllEqual(In3[TID.x]); - Out[index++] &= Result3.x; - Out[index++] &= Result3.y; - Out[index++] &= Result3.z; + InterlockedAdd(Out[index++], (unsigned int)Result3.x); + InterlockedAdd(Out[index++], (unsigned int)Result3.y); + InterlockedAdd(Out[index++], (unsigned int)Result3.z); bool4 Result4 = WaveActiveAllEqual(In4[TID.x]); - Out[index++] &= Result4.x; - Out[index++] &= Result4.y; - Out[index++] &= Result4.z; - Out[index++] &= Result4.w; + InterlockedAdd(Out[index++], (unsigned int)Result4.x); + InterlockedAdd(Out[index++], (unsigned int)Result4.y); + InterlockedAdd(Out[index++], (unsigned int)Result4.z); + InterlockedAdd(Out[index++], (unsigned int)Result4.w); // constant folding case - bool4 ResultCF = WaveActiveSum(int4(1,1,1,1)); - Out[index++] &= ResultCF.x; - Out[index++] &= ResultCF.y; - Out[index++] &= ResultCF.z; - Out[index++] &= ResultCF.w; + bool4 ResultCF = WaveActiveAllEqual(int4(1,1,1,1)); + InterlockedAdd(Out[index++], (unsigned int)ResultCF.x); + InterlockedAdd(Out[index++], (unsigned int)ResultCF.y); + InterlockedAdd(Out[index++], (unsigned int)ResultCF.z); + InterlockedAdd(Out[index++], (unsigned int)ResultCF.w); unsigned int uindex = 0; - UOut[uindex++] &= WaveActiveAllEqual(UIn[TID.x]); - + bool UResult = WaveActiveAllEqual(UIn[TID.x]); + InterlockedAdd(UOut[uindex++], (unsigned int)UResult); + bool2 UResult2 = WaveActiveAllEqual(UIn2[TID.x]); - UOut[uindex++] &= UResult2.x; - UOut[uindex++] &= UResult2.y; + InterlockedAdd(UOut[uindex++], (unsigned int)UResult2.x); + InterlockedAdd(UOut[uindex++], (unsigned int)UResult2.y); bool3 UResult3 = WaveActiveAllEqual(UIn3[TID.x]); - UOut[uindex++] &= UResult3.x; - UOut[uindex++] &= UResult3.y; - UOut[uindex++] &= UResult3.z; + InterlockedAdd(UOut[uindex++], (unsigned int)UResult3.x); + InterlockedAdd(UOut[uindex++], (unsigned int)UResult3.y); + InterlockedAdd(UOut[uindex++], (unsigned int)UResult3.z); bool4 UResult4 = WaveActiveAllEqual(UIn4[TID.x]); - UOut[uindex++] &= UResult4.x; - UOut[uindex++] &= UResult4.y; - UOut[uindex++] &= UResult4.z; - UOut[uindex++] &= UResult4.w; + InterlockedAdd(UOut[uindex++], (unsigned int)UResult4.x); + InterlockedAdd(UOut[uindex++], (unsigned int)UResult4.y); + InterlockedAdd(UOut[uindex++], (unsigned int)UResult4.z); + InterlockedAdd(UOut[uindex++], (unsigned int)UResult4.w); // constant folding case - bool4 UResultCF = WaveActiveSum(uint4(1,1,1,1)); - UOut[uindex++] &= UResultCF.x; - UOut[uindex++] &= UResultCF.y; - UOut[uindex++] &= UResultCF.z; - UOut[uindex++] &= UResultCF.w; + bool4 UResultCF = WaveActiveAllEqual(uint4(1,1,1,1)); + InterlockedAdd(UOut[uindex++], (unsigned int)UResultCF.x); + InterlockedAdd(UOut[uindex++], (unsigned int)UResultCF.y); + InterlockedAdd(UOut[uindex++], (unsigned int)UResultCF.z); + InterlockedAdd(UOut[uindex++], (unsigned int)UResultCF.w); unsigned int findex = 0; - FOut[findex++] &= WaveActiveAllEqual(FIn[TID.x]); + bool FResult = WaveActiveAllEqual(FIn[TID.x]); + InterlockedAdd(FOut[findex++], (unsigned int)FResult); bool2 FResult2 = WaveActiveAllEqual(FIn2[TID.x]); - FOut[findex++] &= FResult2.x; - FOut[findex++] &= FResult2.y; + InterlockedAdd(FOut[findex++], (unsigned int)FResult2.x); + InterlockedAdd(FOut[findex++], (unsigned int)FResult2.y); bool3 FResult3 = WaveActiveAllEqual(FIn3[TID.x]); - FOut[findex++] &= FResult3.x; - FOut[findex++] &= FResult3.y; - FOut[findex++] &= FResult3.z; + InterlockedAdd(FOut[findex++], (unsigned int)FResult3.x); + InterlockedAdd(FOut[findex++], (unsigned int)FResult3.y); + InterlockedAdd(FOut[findex++], (unsigned int)FResult3.z); bool4 FResult4 = WaveActiveAllEqual(FIn4[TID.x]); - FOut[findex++] &= FResult4.x; - FOut[findex++] &= FResult4.y; - FOut[findex++] &= FResult4.z; - FOut[findex++] &= FResult4.w; + InterlockedAdd(FOut[findex++], (unsigned int)FResult4.x); + InterlockedAdd(FOut[findex++], (unsigned int)FResult4.y); + InterlockedAdd(FOut[findex++], (unsigned int)FResult4.z); + InterlockedAdd(FOut[findex++], (unsigned int)FResult4.w); // constant folding case - bool4 FResultCF = WaveActiveSum(float4(1,1,1,1)); - FOut[findex++] &= FResultCF.x; - FOut[findex++] &= FResultCF.y; - FOut[findex++] &= FResultCF.z; - FOut[findex++] &= FResultCF.w; + bool4 FResultCF = WaveActiveAllEqual(float4(1,1,1,1)); + InterlockedAdd(FOut[findex++], (unsigned int)FResultCF.x); + InterlockedAdd(FOut[findex++], (unsigned int)FResultCF.y); + InterlockedAdd(FOut[findex++], (unsigned int)FResultCF.z); + InterlockedAdd(FOut[findex++], (unsigned int)FResultCF.w); } //--- pipeline.yaml @@ -163,30 +163,30 @@ Buffers: Data: [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ] - Name: Out - Format: Bool + Format: Int32 Stride: 4 - Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + Data: [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] - Name: UOut - Format: Bool - Stride: 4 - Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + Format: Int32 + Stride: 0 + Data: [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] - Name: FOut - Format: Bool - Stride: 4 - Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + Format: Int32 + Stride: 0 + Data: [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] - Name: ExpectedOut - Format: Bool + Format: Int32 Stride: 4 - Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + Data: [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ] - Name: UExpectedOut - Format: Bool + Format: Int32 Stride: 4 - Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + Data: [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ] - Name: FExpectedOut - Format: Bool + Format: Int32 Stride: 4 - Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + Data: [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ] Results: - Result: ExpectedOut diff --git a/test/WaveOps/WaveActiveAllEqual.fp16.test b/test/WaveOps/WaveActiveAllEqual.fp16.test index 5179504d1..090d1b37c 100644 --- a/test/WaveOps/WaveActiveAllEqual.fp16.test +++ b/test/WaveOps/WaveActiveAllEqual.fp16.test @@ -4,7 +4,7 @@ StructuredBuffer In2 : register(t1); StructuredBuffer In3 : register(t2); StructuredBuffer In4 : register(t3); -RWStructuredBuffer Out : register(u4); +RWStructuredBuffer Out : register(u4); // Expect all trues, all elements will be the same. // Output buffers start off as true, and each @@ -17,29 +17,30 @@ RWStructuredBuffer Out : register(u4); void main(uint3 TID : SV_GroupThreadID) { unsigned int index = 0; - Out[index++] &= WaveActiveAllEqual(In[TID.x]); + bool Result = WaveActiveAllEqual(In[TID.x]); + InterlockedAdd(Out[index++], (unsigned int)Result); bool2 Result2 = WaveActiveAllEqual(In2[TID.x]); - Out[index++] &= Result2.x; - Out[index++] &= Result2.y; + InterlockedAdd(Out[index++], (unsigned int)Result2.x); + InterlockedAdd(Out[index++], (unsigned int)Result2.y); bool3 Result3 = WaveActiveAllEqual(In3[TID.x]); - Out[index++] &= Result3.x; - Out[index++] &= Result3.y; - Out[index++] &= Result3.z; + InterlockedAdd(Out[index++], (unsigned int)Result3.x); + InterlockedAdd(Out[index++], (unsigned int)Result3.y); + InterlockedAdd(Out[index++], (unsigned int)Result3.z); bool4 Result4 = WaveActiveAllEqual(In4[TID.x]); - Out[index++] &= Result4.x; - Out[index++] &= Result4.y; - Out[index++] &= Result4.z; - Out[index++] &= Result4.w; + InterlockedAdd(Out[index++], (unsigned int)Result4.x); + InterlockedAdd(Out[index++], (unsigned int)Result4.y); + InterlockedAdd(Out[index++], (unsigned int)Result4.z); + InterlockedAdd(Out[index++], (unsigned int)Result4.w); // constant folding case - bool4 ResultCF = WaveActiveSum(half4(1,1,1,1)); - Out[index++] &= ResultCF.x; - Out[index++] &= ResultCF.y; - Out[index++] &= ResultCF.z; - Out[index++] &= ResultCF.w; + bool4 ResultCF = WaveActiveAllEqual(half4(1,1,1,1)); + InterlockedAdd(Out[index++], (unsigned int)ResultCF.x); + InterlockedAdd(Out[index++], (unsigned int)ResultCF.y); + InterlockedAdd(Out[index++], (unsigned int)ResultCF.z); + InterlockedAdd(Out[index++], (unsigned int)ResultCF.w); } //--- pipeline.yaml @@ -69,22 +70,22 @@ Buffers: Data: [ 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00, 0xbc00] - Name: Out - Format: Bool + Format: Int32 Stride: 4 - Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + Data: [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] - Name: UOut - Format: Bool + Format: Int32 Stride: 4 - Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + Data: [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] - Name: ExpectedOut - Format: Bool + Format: Int32 Stride: 4 - Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + Data: [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ] - Name: UExpectedOut - Format: Bool + Format: Int32 Stride: 4 - Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + Data: [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ] Results: - Result: ExpectedOut diff --git a/test/WaveOps/WaveActiveAllEqual.fp64.test b/test/WaveOps/WaveActiveAllEqual.fp64.test index b269d9909..e6e6a1f9a 100644 --- a/test/WaveOps/WaveActiveAllEqual.fp64.test +++ b/test/WaveOps/WaveActiveAllEqual.fp64.test @@ -4,7 +4,7 @@ StructuredBuffer In2 : register(t1); StructuredBuffer In3 : register(t2); StructuredBuffer In4 : register(t3); -RWStructuredBuffer Out : register(u4); +RWStructuredBuffer Out : register(u4); // Expect all trues, all elements will be the same. // Output buffers start off as true, and each @@ -17,29 +17,30 @@ RWStructuredBuffer Out : register(u4); void main(uint3 TID : SV_GroupThreadID) { unsigned int index = 0; - Out[index++] &= WaveActiveAllEqual(In[TID.x]); + bool Result = WaveActiveAllEqual(In[TID.x]); + InterlockedAdd(Out[index++], (unsigned int)Result); bool2 Result2 = WaveActiveAllEqual(In2[TID.x]); - Out[index++] &= Result2.x; - Out[index++] &= Result2.y; + InterlockedAdd(Out[index++], (unsigned int)Result2.x); + InterlockedAdd(Out[index++], (unsigned int)Result2.y); bool3 Result3 = WaveActiveAllEqual(In3[TID.x]); - Out[index++] &= Result3.x; - Out[index++] &= Result3.y; - Out[index++] &= Result3.z; + InterlockedAdd(Out[index++], (unsigned int)Result3.x); + InterlockedAdd(Out[index++], (unsigned int)Result3.y); + InterlockedAdd(Out[index++], (unsigned int)Result3.z); bool4 Result4 = WaveActiveAllEqual(In4[TID.x]); - Out[index++] &= Result4.x; - Out[index++] &= Result4.y; - Out[index++] &= Result4.z; - Out[index++] &= Result4.w; + InterlockedAdd(Out[index++], (unsigned int)Result4.x); + InterlockedAdd(Out[index++], (unsigned int)Result4.y); + InterlockedAdd(Out[index++], (unsigned int)Result4.z); + InterlockedAdd(Out[index++], (unsigned int)Result4.w); // constant folding case - bool4 ResultCF = WaveActiveSum(double4(1,1,1,1)); - Out[index++] &= ResultCF.x; - Out[index++] &= ResultCF.y; - Out[index++] &= ResultCF.z; - Out[index++] &= ResultCF.w; + bool4 ResultCF = WaveActiveAllEqual(double4(1,1,1,1)); + InterlockedAdd(Out[index++], (unsigned int)ResultCF.x); + InterlockedAdd(Out[index++], (unsigned int)ResultCF.y); + InterlockedAdd(Out[index++], (unsigned int)ResultCF.z); + InterlockedAdd(Out[index++], (unsigned int)ResultCF.w); } //--- pipeline.yaml @@ -68,22 +69,22 @@ Buffers: Data: [ -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] - Name: Out - Format: Bool + Format: Int32 Stride: 4 - Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + Data: [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] - Name: UOut - Format: Bool + Format: Int32 Stride: 4 - Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + Data: [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] - Name: ExpectedOut - Format: Bool + Format: Int32 Stride: 4 - Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + Data: [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ] - Name: UExpectedOut - Format: Bool + Format: Int32 Stride: 4 - Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + Data: [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ] Results: - Result: ExpectedOut diff --git a/test/WaveOps/WaveActiveAllEqual.int16.test b/test/WaveOps/WaveActiveAllEqual.int16.test index 8f9997c41..d044e9548 100644 --- a/test/WaveOps/WaveActiveAllEqual.int16.test +++ b/test/WaveOps/WaveActiveAllEqual.int16.test @@ -9,8 +9,8 @@ StructuredBuffer UIn2 : register(t5); StructuredBuffer UIn3 : register(t6); StructuredBuffer UIn4 : register(t7); -RWStructuredBuffer Out : register(u8); -RWStructuredBuffer UOut : register(u9); +RWStructuredBuffer Out : register(u8); +RWStructuredBuffer UOut : register(u9); // Expect all trues, all elements will be the same. // Output buffers start off as true, and each @@ -23,54 +23,56 @@ RWStructuredBuffer UOut : register(u9); void main(uint3 TID : SV_GroupThreadID) { unsigned int index = 0; - Out[index++] &= WaveActiveAllEqual(In[TID.x]); + bool Result = WaveActiveAllEqual(In[TID.x]); + InterlockedAdd(Out[index++], (unsigned int)Result); bool2 Result2 = WaveActiveAllEqual(In2[TID.x]); - Out[index++] &= Result2.x; - Out[index++] &= Result2.y; + InterlockedAdd(Out[index++], (unsigned int)Result2.x); + InterlockedAdd(Out[index++], (unsigned int)Result2.y); bool3 Result3 = WaveActiveAllEqual(In3[TID.x]); - Out[index++] &= Result3.x; - Out[index++] &= Result3.y; - Out[index++] &= Result3.z; + InterlockedAdd(Out[index++], (unsigned int)Result3.x); + InterlockedAdd(Out[index++], (unsigned int)Result3.y); + InterlockedAdd(Out[index++], (unsigned int)Result3.z); bool4 Result4 = WaveActiveAllEqual(In4[TID.x]); - Out[index++] &= Result4.x; - Out[index++] &= Result4.y; - Out[index++] &= Result4.z; - Out[index++] &= Result4.w; + InterlockedAdd(Out[index++], (unsigned int)Result4.x); + InterlockedAdd(Out[index++], (unsigned int)Result4.y); + InterlockedAdd(Out[index++], (unsigned int)Result4.z); + InterlockedAdd(Out[index++], (unsigned int)Result4.w); // constant folding case - bool4 ResultCF = WaveActiveSum(int4(1,1,1,1)); - Out[index++] &= ResultCF.x; - Out[index++] &= ResultCF.y; - Out[index++] &= ResultCF.z; - Out[index++] &= ResultCF.w; + bool4 ResultCF = WaveActiveAllEqual(int16_t4(1,1,1,1)); + InterlockedAdd(Out[index++], (unsigned int)ResultCF.x); + InterlockedAdd(Out[index++], (unsigned int)ResultCF.y); + InterlockedAdd(Out[index++], (unsigned int)ResultCF.z); + InterlockedAdd(Out[index++], (unsigned int)ResultCF.w); unsigned int uindex = 0; - UOut[uindex++] &= WaveActiveAllEqual(UIn[TID.x]); - + bool UResult = WaveActiveAllEqual(UIn[TID.x]); + InterlockedAdd(UOut[uindex++], (unsigned int)UResult); + bool2 UResult2 = WaveActiveAllEqual(UIn2[TID.x]); - UOut[uindex++] &= UResult2.x; - UOut[uindex++] &= UResult2.y; + InterlockedAdd(UOut[uindex++], (unsigned int)UResult2.x); + InterlockedAdd(UOut[uindex++], (unsigned int)UResult2.y); bool3 UResult3 = WaveActiveAllEqual(UIn3[TID.x]); - UOut[uindex++] &= UResult3.x; - UOut[uindex++] &= UResult3.y; - UOut[uindex++] &= UResult3.z; + InterlockedAdd(UOut[uindex++], (unsigned int)UResult3.x); + InterlockedAdd(UOut[uindex++], (unsigned int)UResult3.y); + InterlockedAdd(UOut[uindex++], (unsigned int)UResult3.z); bool4 UResult4 = WaveActiveAllEqual(UIn4[TID.x]); - UOut[uindex++] &= UResult4.x; - UOut[uindex++] &= UResult4.y; - UOut[uindex++] &= UResult4.z; - UOut[uindex++] &= UResult4.w; + InterlockedAdd(UOut[uindex++], (unsigned int)UResult4.x); + InterlockedAdd(UOut[uindex++], (unsigned int)UResult4.y); + InterlockedAdd(UOut[uindex++], (unsigned int)UResult4.z); + InterlockedAdd(UOut[uindex++], (unsigned int)UResult4.w); // constant folding case - bool4 UResultCF = WaveActiveSum(uint4(1,1,1,1)); - UOut[uindex++] &= UResultCF.x; - UOut[uindex++] &= UResultCF.y; - UOut[uindex++] &= UResultCF.z; - UOut[uindex++] &= UResultCF.w; + bool4 UResultCF = WaveActiveAllEqual(uint16_t4(1,1,1,1)); + InterlockedAdd(UOut[uindex++], (unsigned int)UResultCF.x); + InterlockedAdd(UOut[uindex++], (unsigned int)UResultCF.y); + InterlockedAdd(UOut[uindex++], (unsigned int)UResultCF.z); + InterlockedAdd(UOut[uindex++], (unsigned int)UResultCF.w); } //--- pipeline.yaml @@ -115,22 +117,22 @@ Buffers: Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] - Name: Out - Format: Bool + Format: Int32 Stride: 4 - Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + Data: [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] - Name: UOut - Format: Bool + Format: Int32 Stride: 4 - Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + Data: [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] - Name: ExpectedOut Format: Int32 - Stride: 8 - Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + Stride: 4 + Data: [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ] - Name: UExpectedOut - Format: Bool - Stride: 8 - Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + Format: Int32 + Stride: 4 + Data: [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ] Results: - Result: ExpectedOut diff --git a/test/WaveOps/WaveActiveAllEqual.int64.test b/test/WaveOps/WaveActiveAllEqual.int64.test index 73534ef13..0b7d433a1 100644 --- a/test/WaveOps/WaveActiveAllEqual.int64.test +++ b/test/WaveOps/WaveActiveAllEqual.int64.test @@ -9,8 +9,8 @@ StructuredBuffer UIn2 : register(t5); StructuredBuffer UIn3 : register(t6); StructuredBuffer UIn4 : register(t7); -RWStructuredBuffer Out : register(u8); -RWStructuredBuffer UOut : register(u9); +RWStructuredBuffer Out : register(u8); +RWStructuredBuffer UOut : register(u9); // Expect all trues, all elements will be the same. // Output buffers start off as true, and each @@ -23,54 +23,56 @@ RWStructuredBuffer UOut : register(u9); void main(uint3 TID : SV_GroupThreadID) { unsigned int index = 0; - Out[index++] &= WaveActiveAllEqual(In[TID.x]); + bool Result = WaveActiveAllEqual(In[TID.x]); + InterlockedAdd(Out[index++], (unsigned int)Result); bool2 Result2 = WaveActiveAllEqual(In2[TID.x]); - Out[index++] &= Result2.x; - Out[index++] &= Result2.y; + InterlockedAdd(Out[index++], (unsigned int)Result2.x); + InterlockedAdd(Out[index++], (unsigned int)Result2.y); bool3 Result3 = WaveActiveAllEqual(In3[TID.x]); - Out[index++] &= Result3.x; - Out[index++] &= Result3.y; - Out[index++] &= Result3.z; + InterlockedAdd(Out[index++], (unsigned int)Result3.x); + InterlockedAdd(Out[index++], (unsigned int)Result3.y); + InterlockedAdd(Out[index++], (unsigned int)Result3.z); bool4 Result4 = WaveActiveAllEqual(In4[TID.x]); - Out[index++] &= Result4.x; - Out[index++] &= Result4.y; - Out[index++] &= Result4.z; - Out[index++] &= Result4.w; + InterlockedAdd(Out[index++], (unsigned int)Result4.x); + InterlockedAdd(Out[index++], (unsigned int)Result4.y); + InterlockedAdd(Out[index++], (unsigned int)Result4.z); + InterlockedAdd(Out[index++], (unsigned int)Result4.w); // constant folding case - bool4 ResultCF = WaveActiveSum(int4(1,1,1,1)); - Out[index++] &= ResultCF.x; - Out[index++] &= ResultCF.y; - Out[index++] &= ResultCF.z; - Out[index++] &= ResultCF.w; + bool4 ResultCF = WaveActiveAllEqual(int64_t4(1,1,1,1)); + InterlockedAdd(Out[index++], (unsigned int)ResultCF.x); + InterlockedAdd(Out[index++], (unsigned int)ResultCF.y); + InterlockedAdd(Out[index++], (unsigned int)ResultCF.z); + InterlockedAdd(Out[index++], (unsigned int)ResultCF.w); unsigned int uindex = 0; - UOut[uindex++] &= WaveActiveAllEqual(UIn[TID.x]); - + bool UResult = WaveActiveAllEqual(UIn[TID.x]); + InterlockedAdd(UOut[uindex++], (unsigned int)UResult); + bool2 UResult2 = WaveActiveAllEqual(UIn2[TID.x]); - UOut[uindex++] &= UResult2.x; - UOut[uindex++] &= UResult2.y; + InterlockedAdd(UOut[uindex++], (unsigned int)UResult2.x); + InterlockedAdd(UOut[uindex++], (unsigned int)UResult2.y); bool3 UResult3 = WaveActiveAllEqual(UIn3[TID.x]); - UOut[uindex++] &= UResult3.x; - UOut[uindex++] &= UResult3.y; - UOut[uindex++] &= UResult3.z; + InterlockedAdd(UOut[uindex++], (unsigned int)UResult3.x); + InterlockedAdd(UOut[uindex++], (unsigned int)UResult3.y); + InterlockedAdd(UOut[uindex++], (unsigned int)UResult3.z); bool4 UResult4 = WaveActiveAllEqual(UIn4[TID.x]); - UOut[uindex++] &= UResult4.x; - UOut[uindex++] &= UResult4.y; - UOut[uindex++] &= UResult4.z; - UOut[uindex++] &= UResult4.w; + InterlockedAdd(UOut[uindex++], (unsigned int)UResult4.x); + InterlockedAdd(UOut[uindex++], (unsigned int)UResult4.y); + InterlockedAdd(UOut[uindex++], (unsigned int)UResult4.z); + InterlockedAdd(UOut[uindex++], (unsigned int)UResult4.w); // constant folding case - bool4 UResultCF = WaveActiveSum(uint4(1,1,1,1)); - UOut[uindex++] &= UResultCF.x; - UOut[uindex++] &= UResultCF.y; - UOut[uindex++] &= UResultCF.z; - UOut[uindex++] &= UResultCF.w; + bool4 UResultCF = WaveActiveAllEqual(uint64_t4(1,1,1,1)); + InterlockedAdd(UOut[uindex++], (unsigned int)UResultCF.x); + InterlockedAdd(UOut[uindex++], (unsigned int)UResultCF.y); + InterlockedAdd(UOut[uindex++], (unsigned int)UResultCF.z); + InterlockedAdd(UOut[uindex++], (unsigned int)UResultCF.w); } //--- pipeline.yaml @@ -115,22 +117,22 @@ Buffers: Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] - Name: Out - Format: Bool + Format: Int32 Stride: 4 - Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + Data: [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] - Name: UOut - Format: Bool + Format: Int32 Stride: 4 - Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + Data: [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] - Name: ExpectedOut - Format: Bool + Format: Int32 Stride: 4 - Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + Data: [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ] - Name: UExpectedOut - Format: Bool + Format: Int32 Stride: 4 - Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] + Data: [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ] Results: - Result: ExpectedOut From e7bb423db7e72ed818f6c0ec751998fec2fabacc Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Thu, 5 Mar 2026 13:47:41 -0800 Subject: [PATCH 05/12] address one more thing --- test/WaveOps/WaveActiveAllEqual.Wave128.test | 2 +- test/WaveOps/WaveActiveAllEqual.Wave32.test | 2 +- test/WaveOps/WaveActiveAllEqual.Wave64.test | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/WaveOps/WaveActiveAllEqual.Wave128.test b/test/WaveOps/WaveActiveAllEqual.Wave128.test index 1cb370abb..020a63e11 100644 --- a/test/WaveOps/WaveActiveAllEqual.Wave128.test +++ b/test/WaveOps/WaveActiveAllEqual.Wave128.test @@ -53,7 +53,7 @@ void main(uint3 TID : SV_DispatchThreadID) { // Sixth and finally, test that identical vectors pass // in the right conditions. Expect [true, true] for relevant threads. bool2 Result6 = false; - if (TID.x %2 == 0 && TID.x != 0) + if (TID.x != 0) Result6 = WaveActiveAllEqual(InVec2[TID.x]); Out6[TID.x] = Result6; diff --git a/test/WaveOps/WaveActiveAllEqual.Wave32.test b/test/WaveOps/WaveActiveAllEqual.Wave32.test index 235c720af..c561d6bb0 100644 --- a/test/WaveOps/WaveActiveAllEqual.Wave32.test +++ b/test/WaveOps/WaveActiveAllEqual.Wave32.test @@ -53,7 +53,7 @@ void main(uint3 TID : SV_DispatchThreadID) { // Sixth and finally, test that identical vectors pass // in the right conditions. Expect [true, true] for relevant threads. bool2 Result6 = false; - if (TID.x %2 == 0 && TID.x != 0) + if (TID.x != 0) Result6 = WaveActiveAllEqual(InVec2[TID.x]); Out6[TID.x] = Result6; diff --git a/test/WaveOps/WaveActiveAllEqual.Wave64.test b/test/WaveOps/WaveActiveAllEqual.Wave64.test index 6b4f6b326..0be50863d 100644 --- a/test/WaveOps/WaveActiveAllEqual.Wave64.test +++ b/test/WaveOps/WaveActiveAllEqual.Wave64.test @@ -53,7 +53,7 @@ void main(uint3 TID : SV_DispatchThreadID) { // Sixth and finally, test that identical vectors pass // in the right conditions. Expect [true, true] for relevant threads. bool2 Result6 = false; - if (TID.x %2 == 0 && TID.x != 0) + if (TID.x != 0) Result6 = WaveActiveAllEqual(InVec2[TID.x]); Out6[TID.x] = Result6; From 9b68b6cf28a5b70fffee67a4dc36811c1ba95b45 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Thu, 5 Mar 2026 16:01:32 -0800 Subject: [PATCH 06/12] update expected results --- test/WaveOps/WaveActiveAllEqual.Wave128.test | 32 ++++++++++---------- test/WaveOps/WaveActiveAllEqual.Wave32.test | 8 ++--- test/WaveOps/WaveActiveAllEqual.Wave64.test | 16 +++++----- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/test/WaveOps/WaveActiveAllEqual.Wave128.test b/test/WaveOps/WaveActiveAllEqual.Wave128.test index 020a63e11..654b9692b 100644 --- a/test/WaveOps/WaveActiveAllEqual.Wave128.test +++ b/test/WaveOps/WaveActiveAllEqual.Wave128.test @@ -225,22 +225,22 @@ Buffers: - Name: ExpectedOut6 Format: Bool Stride: 8 - Data: [0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0] + Data: [0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] Results: diff --git a/test/WaveOps/WaveActiveAllEqual.Wave32.test b/test/WaveOps/WaveActiveAllEqual.Wave32.test index c561d6bb0..1944f915b 100644 --- a/test/WaveOps/WaveActiveAllEqual.Wave32.test +++ b/test/WaveOps/WaveActiveAllEqual.Wave32.test @@ -147,10 +147,10 @@ Buffers: - Name: ExpectedOut6 Format: Bool Stride: 8 - Data: [0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0] + Data: [0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] Results: diff --git a/test/WaveOps/WaveActiveAllEqual.Wave64.test b/test/WaveOps/WaveActiveAllEqual.Wave64.test index 0be50863d..9fcfe3334 100644 --- a/test/WaveOps/WaveActiveAllEqual.Wave64.test +++ b/test/WaveOps/WaveActiveAllEqual.Wave64.test @@ -173,14 +173,14 @@ Buffers: - Name: ExpectedOut6 Format: Bool Stride: 8 - Data: [0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, - 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0] + Data: [0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] Results: From 19b9fa518ced8cba6fd5b1d612eb7335a8ec0716 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Thu, 5 Mar 2026 16:16:08 -0800 Subject: [PATCH 07/12] add appropriate xfails --- test/WaveOps/WaveActiveAllEqual.32.test | 4 ++++ test/WaveOps/WaveActiveAllEqual.fp16.test | 4 ++++ test/WaveOps/WaveActiveAllEqual.fp64.test | 8 ++++++++ test/WaveOps/WaveActiveAllEqual.int16.test | 4 ++++ test/WaveOps/WaveActiveAllEqual.int64.test | 8 ++++++++ 5 files changed, 28 insertions(+) diff --git a/test/WaveOps/WaveActiveAllEqual.32.test b/test/WaveOps/WaveActiveAllEqual.32.test index 792066f49..8aa5ec51a 100644 --- a/test/WaveOps/WaveActiveAllEqual.32.test +++ b/test/WaveOps/WaveActiveAllEqual.32.test @@ -312,6 +312,10 @@ DescriptorSets: ... #--- end +# Unsupported: InterlockedAdd isn't supported in clang yet +# https://github.com/llvm/llvm-project/issues/99122 +# XFAIL: Clang + # RUN: split-file %s %t # RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl # RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/WaveActiveAllEqual.fp16.test b/test/WaveOps/WaveActiveAllEqual.fp16.test index 090d1b37c..203d4c815 100644 --- a/test/WaveOps/WaveActiveAllEqual.fp16.test +++ b/test/WaveOps/WaveActiveAllEqual.fp16.test @@ -134,6 +134,10 @@ DescriptorSets: ... #--- end +# Unsupported: InterlockedAdd isn't supported in clang yet +# https://github.com/llvm/llvm-project/issues/99122 +# XFAIL: Clang + # REQUIRES Half # RUN: split-file %s %t # RUN: %dxc_target -T cs_6_5 -enable-16bit-types -Fo %t.o %t/source.hlsl diff --git a/test/WaveOps/WaveActiveAllEqual.fp64.test b/test/WaveOps/WaveActiveAllEqual.fp64.test index e6e6a1f9a..1bbeb4164 100644 --- a/test/WaveOps/WaveActiveAllEqual.fp64.test +++ b/test/WaveOps/WaveActiveAllEqual.fp64.test @@ -133,6 +133,14 @@ DescriptorSets: ... #--- end +# Unsupported: InterlockedAdd isn't supported in clang yet +# https://github.com/llvm/llvm-project/issues/99122 +# XFAIL: Clang + +# Metal doesn't support 64-bit wave operations +# Bug: https://github.com/llvm/offload-test-suite/issues/355 +# XFAIL: Metal + # REQUIRES double # RUN: split-file %s %t # RUN: %dxc_target -T cs_6_5 -enable-16bit-types -Fo %t.o %t/source.hlsl diff --git a/test/WaveOps/WaveActiveAllEqual.int16.test b/test/WaveOps/WaveActiveAllEqual.int16.test index d044e9548..7fce6c520 100644 --- a/test/WaveOps/WaveActiveAllEqual.int16.test +++ b/test/WaveOps/WaveActiveAllEqual.int16.test @@ -219,6 +219,10 @@ DescriptorSets: ... #--- end +# Unsupported: InterlockedAdd isn't supported in clang yet +# https://github.com/llvm/llvm-project/issues/99122 +# XFAIL: Clang + # REQUIRES Int16 # RUN: split-file %s %t # RUN: %dxc_target -T cs_6_5 -enable-16bit-types -Fo %t.o %t/source.hlsl diff --git a/test/WaveOps/WaveActiveAllEqual.int64.test b/test/WaveOps/WaveActiveAllEqual.int64.test index 0b7d433a1..43c12f921 100644 --- a/test/WaveOps/WaveActiveAllEqual.int64.test +++ b/test/WaveOps/WaveActiveAllEqual.int64.test @@ -219,6 +219,14 @@ DescriptorSets: ... #--- end +# Unsupported: InterlockedAdd isn't supported in clang yet +# https://github.com/llvm/llvm-project/issues/99122 +# XFAIL: Clang + +# Metal doesn't support 64-bit wave operations +# Bug: https://github.com/llvm/offload-test-suite/issues/355 +# XFAIL: Metal + # REQUIRES Int64 # RUN: split-file %s %t # RUN: %dxc_target -T cs_6_5 -enable-16bit-types -Fo %t.o %t/source.hlsl From ab93dfdbc3b1768c9534566fb5f71e9c7503b9c5 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Fri, 6 Mar 2026 13:32:31 -0800 Subject: [PATCH 08/12] add XFAILS --- test/WaveOps/WaveActiveAllEqual.32.test | 7 ++++--- test/WaveOps/WaveActiveAllEqual.fp16.test | 10 ++++------ test/WaveOps/WaveActiveAllEqual.fp64.test | 13 +++++++------ test/WaveOps/WaveActiveAllEqual.int16.test | 4 ++-- test/WaveOps/WaveActiveAllEqual.int64.test | 13 +++++++------ 5 files changed, 24 insertions(+), 23 deletions(-) diff --git a/test/WaveOps/WaveActiveAllEqual.32.test b/test/WaveOps/WaveActiveAllEqual.32.test index 8aa5ec51a..97d5ec927 100644 --- a/test/WaveOps/WaveActiveAllEqual.32.test +++ b/test/WaveOps/WaveActiveAllEqual.32.test @@ -18,9 +18,7 @@ RWStructuredBuffer Out : register(u12); RWStructuredBuffer UOut : register(u13); RWStructuredBuffer FOut : register(u14); -// Expect all trues, all elements will be the same. -// Since we expect all results to be true, output buffers -// should result in 4 at the end, since there are 4 threads. +// Expect all 4s, all elements will be the same. [numthreads(4,1,1)] void main(uint3 TID : SV_GroupThreadID) @@ -312,6 +310,9 @@ DescriptorSets: ... #--- end +# Bug: https://github.com/llvm/offload-test-suite/issues/943 +# XFAIL: Intel && Vulkan + # Unsupported: InterlockedAdd isn't supported in clang yet # https://github.com/llvm/llvm-project/issues/99122 # XFAIL: Clang diff --git a/test/WaveOps/WaveActiveAllEqual.fp16.test b/test/WaveOps/WaveActiveAllEqual.fp16.test index 203d4c815..60a341861 100644 --- a/test/WaveOps/WaveActiveAllEqual.fp16.test +++ b/test/WaveOps/WaveActiveAllEqual.fp16.test @@ -6,12 +6,7 @@ StructuredBuffer In4 : register(t3); RWStructuredBuffer Out : register(u4); -// Expect all trues, all elements will be the same. -// Output buffers start off as true, and each -// lane writes its result value anded with the existing -// result in the output buffer. -// Since we expect all results to be true, output buffers -// should remain all true at the end. +// Expect all 4s, all elements will be the same. [numthreads(4,1,1)] void main(uint3 TID : SV_GroupThreadID) @@ -134,6 +129,9 @@ DescriptorSets: ... #--- end +# Bug: https://github.com/llvm/offload-test-suite/issues/943 +# XFAIL: Intel && Vulkan + # Unsupported: InterlockedAdd isn't supported in clang yet # https://github.com/llvm/llvm-project/issues/99122 # XFAIL: Clang diff --git a/test/WaveOps/WaveActiveAllEqual.fp64.test b/test/WaveOps/WaveActiveAllEqual.fp64.test index 1bbeb4164..05a3658b7 100644 --- a/test/WaveOps/WaveActiveAllEqual.fp64.test +++ b/test/WaveOps/WaveActiveAllEqual.fp64.test @@ -6,12 +6,7 @@ StructuredBuffer In4 : register(t3); RWStructuredBuffer Out : register(u4); -// Expect all trues, all elements will be the same. -// Output buffers start off as true, and each -// lane writes its result value anded with the existing -// result in the output buffer. -// Since we expect all results to be true, output buffers -// should remain all true at the end. +// Expect all 4s, all elements will be the same. [numthreads(4,1,1)] void main(uint3 TID : SV_GroupThreadID) @@ -133,6 +128,12 @@ DescriptorSets: ... #--- end +# Bug: https://github.com/llvm/offload-test-suite/issues/944 +# XFAIL: Intel && DirectX + +# Bug: https://github.com/llvm/offload-test-suite/issues/943 +# XFAIL: Intel && Vulkan + # Unsupported: InterlockedAdd isn't supported in clang yet # https://github.com/llvm/llvm-project/issues/99122 # XFAIL: Clang diff --git a/test/WaveOps/WaveActiveAllEqual.int16.test b/test/WaveOps/WaveActiveAllEqual.int16.test index 7fce6c520..409e529d6 100644 --- a/test/WaveOps/WaveActiveAllEqual.int16.test +++ b/test/WaveOps/WaveActiveAllEqual.int16.test @@ -219,9 +219,9 @@ DescriptorSets: ... #--- end -# Unsupported: InterlockedAdd isn't supported in clang yet +# InterlockedAdd isn't supported in clang yet # https://github.com/llvm/llvm-project/issues/99122 -# XFAIL: Clang +# UNSUPPORTED: Clang # REQUIRES Int16 # RUN: split-file %s %t diff --git a/test/WaveOps/WaveActiveAllEqual.int64.test b/test/WaveOps/WaveActiveAllEqual.int64.test index 43c12f921..f2a15ac1e 100644 --- a/test/WaveOps/WaveActiveAllEqual.int64.test +++ b/test/WaveOps/WaveActiveAllEqual.int64.test @@ -12,12 +12,7 @@ StructuredBuffer UIn4 : register(t7); RWStructuredBuffer Out : register(u8); RWStructuredBuffer UOut : register(u9); -// Expect all trues, all elements will be the same. -// Output buffers start off as true, and each -// lane writes its result value anded with the existing -// result in the output buffer. -// Since we expect all results to be true, output buffers -// should remain all true at the end. +// Expect all 4s, all elements will be the same. [numthreads(4,1,1)] void main(uint3 TID : SV_GroupThreadID) @@ -219,6 +214,12 @@ DescriptorSets: ... #--- end +# Bug: https://github.com/llvm/offload-test-suite/issues/944 +# XFAIL: Intel && DirectX + +# Bug: https://github.com/llvm/offload-test-suite/issues/943 +# XFAIL: Intel && Vulkan + # Unsupported: InterlockedAdd isn't supported in clang yet # https://github.com/llvm/llvm-project/issues/99122 # XFAIL: Clang From bdd7f64ff3a7718284a753385e6490bf1406f4ce Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Fri, 6 Mar 2026 16:26:03 -0800 Subject: [PATCH 09/12] somehow forgot one --- test/WaveOps/WaveActiveAllEqual.int16.test | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/WaveOps/WaveActiveAllEqual.int16.test b/test/WaveOps/WaveActiveAllEqual.int16.test index 409e529d6..dc5146684 100644 --- a/test/WaveOps/WaveActiveAllEqual.int16.test +++ b/test/WaveOps/WaveActiveAllEqual.int16.test @@ -219,6 +219,9 @@ DescriptorSets: ... #--- end +# Bug: https://github.com/llvm/offload-test-suite/issues/943 +# XFAIL: Intel && Vulkan + # InterlockedAdd isn't supported in clang yet # https://github.com/llvm/llvm-project/issues/99122 # UNSUPPORTED: Clang From 516e1f1c359856f84d4a051e698bd0bd05ecce49 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Mon, 16 Mar 2026 13:21:51 -0700 Subject: [PATCH 10/12] address tex --- test/WaveOps/WaveActiveAllEqual.32.test | 156 ++++++++++++++------- test/WaveOps/WaveActiveAllEqual.fp16.test | 66 +++++---- test/WaveOps/WaveActiveAllEqual.fp64.test | 68 +++++---- test/WaveOps/WaveActiveAllEqual.int16.test | 119 ++++++++++------ test/WaveOps/WaveActiveAllEqual.int64.test | 113 ++++++++++----- 5 files changed, 342 insertions(+), 180 deletions(-) diff --git a/test/WaveOps/WaveActiveAllEqual.32.test b/test/WaveOps/WaveActiveAllEqual.32.test index 97d5ec927..bc1b327c7 100644 --- a/test/WaveOps/WaveActiveAllEqual.32.test +++ b/test/WaveOps/WaveActiveAllEqual.32.test @@ -25,81 +25,124 @@ void main(uint3 TID : SV_GroupThreadID) { unsigned int index = 0; bool Result = WaveActiveAllEqual(In[TID.x]); - InterlockedAdd(Out[index++], (unsigned int)Result); + Out[index + TID.x] = Result; + index += 4; bool2 Result2 = WaveActiveAllEqual(In2[TID.x]); - InterlockedAdd(Out[index++], (unsigned int)Result2.x); - InterlockedAdd(Out[index++], (unsigned int)Result2.y); + Out[index + TID.x] = Result2.x; + index += 4; + Out[index + TID.x] = Result2.y; + index += 4; bool3 Result3 = WaveActiveAllEqual(In3[TID.x]); - InterlockedAdd(Out[index++], (unsigned int)Result3.x); - InterlockedAdd(Out[index++], (unsigned int)Result3.y); - InterlockedAdd(Out[index++], (unsigned int)Result3.z); + Out[index + TID.x] = Result3.x; + index += 4; + Out[index + TID.x] = Result3.y; + index += 4; + Out[index + TID.x] = Result3.z; + index += 4; bool4 Result4 = WaveActiveAllEqual(In4[TID.x]); - InterlockedAdd(Out[index++], (unsigned int)Result4.x); - InterlockedAdd(Out[index++], (unsigned int)Result4.y); - InterlockedAdd(Out[index++], (unsigned int)Result4.z); - InterlockedAdd(Out[index++], (unsigned int)Result4.w); + Out[index + TID.x] = Result4.x; + index += 4; + Out[index + TID.x] = Result4.y; + index += 4; + Out[index + TID.x] = Result4.z; + index += 4; + Out[index + TID.x] = Result4.w; + index += 4; + // constant folding case bool4 ResultCF = WaveActiveAllEqual(int4(1,1,1,1)); - InterlockedAdd(Out[index++], (unsigned int)ResultCF.x); - InterlockedAdd(Out[index++], (unsigned int)ResultCF.y); - InterlockedAdd(Out[index++], (unsigned int)ResultCF.z); - InterlockedAdd(Out[index++], (unsigned int)ResultCF.w); + Out[index + TID.x] = ResultCF.x; + index += 4; + Out[index + TID.x] = ResultCF.y; + index += 4; + Out[index + TID.x] = ResultCF.z; + index += 4; + Out[index + TID.x] = ResultCF.w; + index += 4; unsigned int uindex = 0; bool UResult = WaveActiveAllEqual(UIn[TID.x]); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult); + UOut[uindex + TID.x] = UResult; + uindex += 4; bool2 UResult2 = WaveActiveAllEqual(UIn2[TID.x]); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult2.x); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult2.y); + UOut[uindex + TID.x] = UResult2.x; + uindex += 4; + UOut[uindex + TID.x] = UResult2.y; + uindex += 4; bool3 UResult3 = WaveActiveAllEqual(UIn3[TID.x]); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult3.x); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult3.y); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult3.z); + UOut[uindex + TID.x] = UResult3.x; + uindex += 4; + UOut[uindex + TID.x] = UResult3.y; + uindex += 4; + UOut[uindex + TID.x] = UResult3.z; + uindex += 4; bool4 UResult4 = WaveActiveAllEqual(UIn4[TID.x]); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult4.x); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult4.y); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult4.z); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult4.w); + UOut[uindex + TID.x] = UResult4.x; + uindex += 4; + UOut[uindex + TID.x] = UResult4.y; + uindex += 4; + UOut[uindex + TID.x] = UResult4.z; + uindex += 4; + UOut[uindex + TID.x] = UResult4.w; + uindex += 4; // constant folding case bool4 UResultCF = WaveActiveAllEqual(uint4(1,1,1,1)); - InterlockedAdd(UOut[uindex++], (unsigned int)UResultCF.x); - InterlockedAdd(UOut[uindex++], (unsigned int)UResultCF.y); - InterlockedAdd(UOut[uindex++], (unsigned int)UResultCF.z); - InterlockedAdd(UOut[uindex++], (unsigned int)UResultCF.w); + UOut[uindex + TID.x] = UResultCF.x; + uindex += 4; + UOut[uindex + TID.x] = UResultCF.y; + uindex += 4; + UOut[uindex + TID.x] = UResultCF.z; + uindex += 4; + UOut[uindex + TID.x] = UResultCF.w; + uindex += 4; unsigned int findex = 0; bool FResult = WaveActiveAllEqual(FIn[TID.x]); - InterlockedAdd(FOut[findex++], (unsigned int)FResult); + FOut[findex + TID.x] = FResult; + findex += 4; bool2 FResult2 = WaveActiveAllEqual(FIn2[TID.x]); - InterlockedAdd(FOut[findex++], (unsigned int)FResult2.x); - InterlockedAdd(FOut[findex++], (unsigned int)FResult2.y); + FOut[findex + TID.x] = FResult2.x; + findex += 4; + FOut[findex + TID.x] = FResult2.y; + findex += 4; bool3 FResult3 = WaveActiveAllEqual(FIn3[TID.x]); - InterlockedAdd(FOut[findex++], (unsigned int)FResult3.x); - InterlockedAdd(FOut[findex++], (unsigned int)FResult3.y); - InterlockedAdd(FOut[findex++], (unsigned int)FResult3.z); + FOut[findex + TID.x] = FResult3.x; + findex += 4; + FOut[findex + TID.x] = FResult3.x; + findex += 4; + FOut[findex + TID.x] = FResult3.y; + findex += 4; bool4 FResult4 = WaveActiveAllEqual(FIn4[TID.x]); - InterlockedAdd(FOut[findex++], (unsigned int)FResult4.x); - InterlockedAdd(FOut[findex++], (unsigned int)FResult4.y); - InterlockedAdd(FOut[findex++], (unsigned int)FResult4.z); - InterlockedAdd(FOut[findex++], (unsigned int)FResult4.w); + FOut[findex + TID.x] = FResult4.x; + findex += 4; + FOut[findex + TID.x] = FResult4.y; + findex += 4; + FOut[findex + TID.x] = FResult4.x; + findex += 4; + FOut[findex + TID.x] = FResult4.y; + findex += 4; // constant folding case bool4 FResultCF = WaveActiveAllEqual(float4(1,1,1,1)); - InterlockedAdd(FOut[findex++], (unsigned int)FResultCF.x); - InterlockedAdd(FOut[findex++], (unsigned int)FResultCF.y); - InterlockedAdd(FOut[findex++], (unsigned int)FResultCF.z); - InterlockedAdd(FOut[findex++], (unsigned int)FResultCF.w); + FOut[findex + TID.x] = FResultCF.x; + findex += 4; + FOut[findex + TID.x] = FResultCF.y; + findex += 4; + FOut[findex + TID.x] = FResultCF.x; + findex += 4; + FOut[findex + TID.x] = FResultCF.y; + findex += 4; } //--- pipeline.yaml @@ -142,7 +185,6 @@ Buffers: Format: UInt32 Stride: 16 Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] - - Name: FIn Format: Float32 Stride: 4 @@ -163,28 +205,37 @@ Buffers: - Name: Out Format: Int32 Stride: 4 - Data: [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] + FillSize: 224 - Name: UOut Format: Int32 - Stride: 0 - Data: [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] + Stride: 4 + FillSize: 224 - Name: FOut Format: Int32 - Stride: 0 - Data: [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] + Stride: 4 + FillSize: 224 - Name: ExpectedOut Format: Int32 Stride: 4 - Data: [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ] + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] - Name: UExpectedOut Format: Int32 Stride: 4 - Data: [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ] + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] - Name: FExpectedOut Format: Int32 Stride: 4 - Data: [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ] + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] Results: - Result: ExpectedOut @@ -313,8 +364,7 @@ DescriptorSets: # Bug: https://github.com/llvm/offload-test-suite/issues/943 # XFAIL: Intel && Vulkan -# Unsupported: InterlockedAdd isn't supported in clang yet -# https://github.com/llvm/llvm-project/issues/99122 +# Bug: https://github.com/llvm/offload-test-suite/issues/981 # XFAIL: Clang # RUN: split-file %s %t diff --git a/test/WaveOps/WaveActiveAllEqual.fp16.test b/test/WaveOps/WaveActiveAllEqual.fp16.test index 60a341861..63f3c3617 100644 --- a/test/WaveOps/WaveActiveAllEqual.fp16.test +++ b/test/WaveOps/WaveActiveAllEqual.fp16.test @@ -6,36 +6,49 @@ StructuredBuffer In4 : register(t3); RWStructuredBuffer Out : register(u4); -// Expect all 4s, all elements will be the same. - [numthreads(4,1,1)] void main(uint3 TID : SV_GroupThreadID) { unsigned int index = 0; bool Result = WaveActiveAllEqual(In[TID.x]); - InterlockedAdd(Out[index++], (unsigned int)Result); + Out[index + TID.x] = Result; + index += 4; bool2 Result2 = WaveActiveAllEqual(In2[TID.x]); - InterlockedAdd(Out[index++], (unsigned int)Result2.x); - InterlockedAdd(Out[index++], (unsigned int)Result2.y); + Out[index + TID.x] = Result2.x; + index += 4; + Out[index + TID.x] = Result2.y; + index += 4; bool3 Result3 = WaveActiveAllEqual(In3[TID.x]); - InterlockedAdd(Out[index++], (unsigned int)Result3.x); - InterlockedAdd(Out[index++], (unsigned int)Result3.y); - InterlockedAdd(Out[index++], (unsigned int)Result3.z); + Out[index + TID.x] = Result3.x; + index += 4; + Out[index + TID.x] = Result3.y; + index += 4; + Out[index + TID.x] = Result3.z; + index += 4; bool4 Result4 = WaveActiveAllEqual(In4[TID.x]); - InterlockedAdd(Out[index++], (unsigned int)Result4.x); - InterlockedAdd(Out[index++], (unsigned int)Result4.y); - InterlockedAdd(Out[index++], (unsigned int)Result4.z); - InterlockedAdd(Out[index++], (unsigned int)Result4.w); + Out[index + TID.x] = Result4.x; + index += 4; + Out[index + TID.x] = Result4.y; + index += 4; + Out[index + TID.x] = Result4.z; + index += 4; + Out[index + TID.x] = Result4.w; + index += 4; + // constant folding case bool4 ResultCF = WaveActiveAllEqual(half4(1,1,1,1)); - InterlockedAdd(Out[index++], (unsigned int)ResultCF.x); - InterlockedAdd(Out[index++], (unsigned int)ResultCF.y); - InterlockedAdd(Out[index++], (unsigned int)ResultCF.z); - InterlockedAdd(Out[index++], (unsigned int)ResultCF.w); + Out[index + TID.x] = ResultCF.x; + index += 4; + Out[index + TID.x] = ResultCF.y; + index += 4; + Out[index + TID.x] = ResultCF.z; + index += 4; + Out[index + TID.x] = ResultCF.w; + index += 4; } //--- pipeline.yaml @@ -67,20 +80,26 @@ Buffers: - Name: Out Format: Int32 Stride: 4 - Data: [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] + FillSize: 224 - Name: UOut Format: Int32 Stride: 4 - Data: [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] + FillSize: 224 - Name: ExpectedOut Format: Int32 - Stride: 4 - Data: [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ] + Stride: 2 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] - Name: UExpectedOut Format: Int32 - Stride: 4 - Data: [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ] + Stride: 2 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] Results: - Result: ExpectedOut @@ -132,8 +151,7 @@ DescriptorSets: # Bug: https://github.com/llvm/offload-test-suite/issues/943 # XFAIL: Intel && Vulkan -# Unsupported: InterlockedAdd isn't supported in clang yet -# https://github.com/llvm/llvm-project/issues/99122 +# Bug: https://github.com/llvm/offload-test-suite/issues/981 # XFAIL: Clang # REQUIRES Half diff --git a/test/WaveOps/WaveActiveAllEqual.fp64.test b/test/WaveOps/WaveActiveAllEqual.fp64.test index 05a3658b7..5786810b7 100644 --- a/test/WaveOps/WaveActiveAllEqual.fp64.test +++ b/test/WaveOps/WaveActiveAllEqual.fp64.test @@ -13,29 +13,44 @@ void main(uint3 TID : SV_GroupThreadID) { unsigned int index = 0; bool Result = WaveActiveAllEqual(In[TID.x]); - InterlockedAdd(Out[index++], (unsigned int)Result); + Out[index + TID.x] = Result; + index += 4; bool2 Result2 = WaveActiveAllEqual(In2[TID.x]); - InterlockedAdd(Out[index++], (unsigned int)Result2.x); - InterlockedAdd(Out[index++], (unsigned int)Result2.y); + Out[index + TID.x] = Result2.x; + index += 4; + Out[index + TID.x] = Result2.y; + index += 4; bool3 Result3 = WaveActiveAllEqual(In3[TID.x]); - InterlockedAdd(Out[index++], (unsigned int)Result3.x); - InterlockedAdd(Out[index++], (unsigned int)Result3.y); - InterlockedAdd(Out[index++], (unsigned int)Result3.z); + Out[index + TID.x] = Result3.x; + index += 4; + Out[index + TID.x] = Result3.y; + index += 4; + Out[index + TID.x] = Result3.z; + index += 4; bool4 Result4 = WaveActiveAllEqual(In4[TID.x]); - InterlockedAdd(Out[index++], (unsigned int)Result4.x); - InterlockedAdd(Out[index++], (unsigned int)Result4.y); - InterlockedAdd(Out[index++], (unsigned int)Result4.z); - InterlockedAdd(Out[index++], (unsigned int)Result4.w); + Out[index + TID.x] = Result4.x; + index += 4; + Out[index + TID.x] = Result4.y; + index += 4; + Out[index + TID.x] = Result4.z; + index += 4; + Out[index + TID.x] = Result4.w; + index += 4; + // constant folding case bool4 ResultCF = WaveActiveAllEqual(double4(1,1,1,1)); - InterlockedAdd(Out[index++], (unsigned int)ResultCF.x); - InterlockedAdd(Out[index++], (unsigned int)ResultCF.y); - InterlockedAdd(Out[index++], (unsigned int)ResultCF.z); - InterlockedAdd(Out[index++], (unsigned int)ResultCF.w); + Out[index + TID.x] = ResultCF.x; + index += 4; + Out[index + TID.x] = ResultCF.y; + index += 4; + Out[index + TID.x] = ResultCF.z; + index += 4; + Out[index + TID.x] = ResultCF.w; + index += 4; } //--- pipeline.yaml @@ -65,21 +80,27 @@ Buffers: - Name: Out Format: Int32 - Stride: 4 - Data: [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] + Stride: 8 + FillSize: 224 - Name: UOut Format: Int32 - Stride: 4 - Data: [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] + Stride: 8 + FillSize: 224 - Name: ExpectedOut Format: Int32 - Stride: 4 - Data: [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ] + Stride: 8 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] - Name: UExpectedOut Format: Int32 - Stride: 4 - Data: [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ] + Stride: 8 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] Results: - Result: ExpectedOut @@ -134,8 +155,7 @@ DescriptorSets: # Bug: https://github.com/llvm/offload-test-suite/issues/943 # XFAIL: Intel && Vulkan -# Unsupported: InterlockedAdd isn't supported in clang yet -# https://github.com/llvm/llvm-project/issues/99122 +# Bug: https://github.com/llvm/offload-test-suite/issues/981 # XFAIL: Clang # Metal doesn't support 64-bit wave operations diff --git a/test/WaveOps/WaveActiveAllEqual.int16.test b/test/WaveOps/WaveActiveAllEqual.int16.test index dc5146684..e1025678c 100644 --- a/test/WaveOps/WaveActiveAllEqual.int16.test +++ b/test/WaveOps/WaveActiveAllEqual.int16.test @@ -24,55 +24,87 @@ void main(uint3 TID : SV_GroupThreadID) { unsigned int index = 0; bool Result = WaveActiveAllEqual(In[TID.x]); - InterlockedAdd(Out[index++], (unsigned int)Result); + Out[index + TID.x] = Result; + index += 4; bool2 Result2 = WaveActiveAllEqual(In2[TID.x]); - InterlockedAdd(Out[index++], (unsigned int)Result2.x); - InterlockedAdd(Out[index++], (unsigned int)Result2.y); + Out[index + TID.x] = Result2.x; + index += 4; + Out[index + TID.x] = Result2.y; + index += 4; bool3 Result3 = WaveActiveAllEqual(In3[TID.x]); - InterlockedAdd(Out[index++], (unsigned int)Result3.x); - InterlockedAdd(Out[index++], (unsigned int)Result3.y); - InterlockedAdd(Out[index++], (unsigned int)Result3.z); + Out[index + TID.x] = Result3.x; + index += 4; + Out[index + TID.x] = Result3.y; + index += 4; + Out[index + TID.x] = Result3.z; + index += 4; bool4 Result4 = WaveActiveAllEqual(In4[TID.x]); - InterlockedAdd(Out[index++], (unsigned int)Result4.x); - InterlockedAdd(Out[index++], (unsigned int)Result4.y); - InterlockedAdd(Out[index++], (unsigned int)Result4.z); - InterlockedAdd(Out[index++], (unsigned int)Result4.w); + Out[index + TID.x] = Result4.x; + index += 4; + Out[index + TID.x] = Result4.y; + index += 4; + Out[index + TID.x] = Result4.z; + index += 4; + Out[index + TID.x] = Result4.w; + index += 4; + // constant folding case bool4 ResultCF = WaveActiveAllEqual(int16_t4(1,1,1,1)); - InterlockedAdd(Out[index++], (unsigned int)ResultCF.x); - InterlockedAdd(Out[index++], (unsigned int)ResultCF.y); - InterlockedAdd(Out[index++], (unsigned int)ResultCF.z); - InterlockedAdd(Out[index++], (unsigned int)ResultCF.w); + Out[index + TID.x] = ResultCF.x; + index += 4; + Out[index + TID.x] = ResultCF.y; + index += 4; + Out[index + TID.x] = ResultCF.z; + index += 4; + Out[index + TID.x] = ResultCF.w; + index += 4; + + unsigned int uindex = 0; - bool UResult = WaveActiveAllEqual(UIn[TID.x]); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult); - + bool UResult = WaveActiveAllEqual(UIn[TID.x]); + UOut[uindex + TID.x] = UResult; + uindex += 4; + bool2 UResult2 = WaveActiveAllEqual(UIn2[TID.x]); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult2.x); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult2.y); + UOut[uindex + TID.x] = UResult2.x; + uindex += 4; + UOut[uindex + TID.x] = UResult2.y; + uindex += 4; bool3 UResult3 = WaveActiveAllEqual(UIn3[TID.x]); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult3.x); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult3.y); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult3.z); + UOut[uindex + TID.x] = UResult3.x; + uindex += 4; + UOut[uindex + TID.x] = UResult3.y; + uindex += 4; + UOut[uindex + TID.x] = UResult3.z; + uindex += 4; bool4 UResult4 = WaveActiveAllEqual(UIn4[TID.x]); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult4.x); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult4.y); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult4.z); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult4.w); + UOut[uindex + TID.x] = UResult4.x; + uindex += 4; + UOut[uindex + TID.x] = UResult4.y; + uindex += 4; + UOut[uindex + TID.x] = UResult4.z; + uindex += 4; + UOut[uindex + TID.x] = UResult4.w; + uindex += 4; + // constant folding case bool4 UResultCF = WaveActiveAllEqual(uint16_t4(1,1,1,1)); - InterlockedAdd(UOut[uindex++], (unsigned int)UResultCF.x); - InterlockedAdd(UOut[uindex++], (unsigned int)UResultCF.y); - InterlockedAdd(UOut[uindex++], (unsigned int)UResultCF.z); - InterlockedAdd(UOut[uindex++], (unsigned int)UResultCF.w); + UOut[uindex + TID.x] = UResultCF.x; + uindex += 4; + UOut[uindex + TID.x] = UResultCF.y; + uindex += 4; + UOut[uindex + TID.x] = UResultCF.z; + uindex += 4; + UOut[uindex + TID.x] = UResultCF.w; + uindex += 4; } //--- pipeline.yaml @@ -118,21 +150,27 @@ Buffers: - Name: Out Format: Int32 - Stride: 4 - Data: [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] + Stride: 2 + FillSize: 224 - Name: UOut Format: Int32 - Stride: 4 - Data: [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] + Stride: 2 + FillSize: 224 - Name: ExpectedOut Format: Int32 - Stride: 4 - Data: [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ] + Stride: 2 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] - Name: UExpectedOut Format: Int32 - Stride: 4 - Data: [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ] + Stride: 2 + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] Results: - Result: ExpectedOut @@ -222,9 +260,8 @@ DescriptorSets: # Bug: https://github.com/llvm/offload-test-suite/issues/943 # XFAIL: Intel && Vulkan -# InterlockedAdd isn't supported in clang yet -# https://github.com/llvm/llvm-project/issues/99122 -# UNSUPPORTED: Clang +# Bug: https://github.com/llvm/offload-test-suite/issues/981 +# XFAIL: Clang # REQUIRES Int16 # RUN: split-file %s %t diff --git a/test/WaveOps/WaveActiveAllEqual.int64.test b/test/WaveOps/WaveActiveAllEqual.int64.test index f2a15ac1e..f786d8d70 100644 --- a/test/WaveOps/WaveActiveAllEqual.int64.test +++ b/test/WaveOps/WaveActiveAllEqual.int64.test @@ -19,55 +19,87 @@ void main(uint3 TID : SV_GroupThreadID) { unsigned int index = 0; bool Result = WaveActiveAllEqual(In[TID.x]); - InterlockedAdd(Out[index++], (unsigned int)Result); + Out[index + TID.x] = Result; + index += 4; bool2 Result2 = WaveActiveAllEqual(In2[TID.x]); - InterlockedAdd(Out[index++], (unsigned int)Result2.x); - InterlockedAdd(Out[index++], (unsigned int)Result2.y); + Out[index + TID.x] = Result2.x; + index += 4; + Out[index + TID.x] = Result2.y; + index += 4; bool3 Result3 = WaveActiveAllEqual(In3[TID.x]); - InterlockedAdd(Out[index++], (unsigned int)Result3.x); - InterlockedAdd(Out[index++], (unsigned int)Result3.y); - InterlockedAdd(Out[index++], (unsigned int)Result3.z); + Out[index + TID.x] = Result3.x; + index += 4; + Out[index + TID.x] = Result3.y; + index += 4; + Out[index + TID.x] = Result3.z; + index += 4; bool4 Result4 = WaveActiveAllEqual(In4[TID.x]); - InterlockedAdd(Out[index++], (unsigned int)Result4.x); - InterlockedAdd(Out[index++], (unsigned int)Result4.y); - InterlockedAdd(Out[index++], (unsigned int)Result4.z); - InterlockedAdd(Out[index++], (unsigned int)Result4.w); + Out[index + TID.x] = Result4.x; + index += 4; + Out[index + TID.x] = Result4.y; + index += 4; + Out[index + TID.x] = Result4.z; + index += 4; + Out[index + TID.x] = Result4.w; + index += 4; + // constant folding case bool4 ResultCF = WaveActiveAllEqual(int64_t4(1,1,1,1)); - InterlockedAdd(Out[index++], (unsigned int)ResultCF.x); - InterlockedAdd(Out[index++], (unsigned int)ResultCF.y); - InterlockedAdd(Out[index++], (unsigned int)ResultCF.z); - InterlockedAdd(Out[index++], (unsigned int)ResultCF.w); + Out[index + TID.x] = ResultCF.x; + index += 4; + Out[index + TID.x] = ResultCF.y; + index += 4; + Out[index + TID.x] = ResultCF.z; + index += 4; + Out[index + TID.x] = ResultCF.w; + index += 4; + + unsigned int uindex = 0; - bool UResult = WaveActiveAllEqual(UIn[TID.x]); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult); - + bool UResult = WaveActiveAllEqual(UIn[TID.x]); + UOut[uindex + TID.x] = UResult; + uindex += 4; + bool2 UResult2 = WaveActiveAllEqual(UIn2[TID.x]); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult2.x); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult2.y); + UOut[uindex + TID.x] = UResult2.x; + uindex += 4; + UOut[uindex + TID.x] = UResult2.y; + uindex += 4; bool3 UResult3 = WaveActiveAllEqual(UIn3[TID.x]); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult3.x); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult3.y); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult3.z); + UOut[uindex + TID.x] = UResult3.x; + uindex += 4; + UOut[uindex + TID.x] = UResult3.y; + uindex += 4; + UOut[uindex + TID.x] = UResult3.z; + uindex += 4; bool4 UResult4 = WaveActiveAllEqual(UIn4[TID.x]); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult4.x); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult4.y); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult4.z); - InterlockedAdd(UOut[uindex++], (unsigned int)UResult4.w); + UOut[uindex + TID.x] = UResult4.x; + uindex += 4; + UOut[uindex + TID.x] = UResult4.y; + uindex += 4; + UOut[uindex + TID.x] = UResult4.z; + uindex += 4; + UOut[uindex + TID.x] = UResult4.w; + uindex += 4; + // constant folding case bool4 UResultCF = WaveActiveAllEqual(uint64_t4(1,1,1,1)); - InterlockedAdd(UOut[uindex++], (unsigned int)UResultCF.x); - InterlockedAdd(UOut[uindex++], (unsigned int)UResultCF.y); - InterlockedAdd(UOut[uindex++], (unsigned int)UResultCF.z); - InterlockedAdd(UOut[uindex++], (unsigned int)UResultCF.w); + UOut[uindex + TID.x] = UResultCF.x; + uindex += 4; + UOut[uindex + TID.x] = UResultCF.y; + uindex += 4; + UOut[uindex + TID.x] = UResultCF.z; + uindex += 4; + UOut[uindex + TID.x] = UResultCF.w; + uindex += 4; } //--- pipeline.yaml @@ -113,21 +145,27 @@ Buffers: - Name: Out Format: Int32 - Stride: 4 - Data: [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] + Stride: 8 + FillSize: 224 - Name: UOut Format: Int32 - Stride: 4 - Data: [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] + Stride: 8 + FillSize: 224 - Name: ExpectedOut Format: Int32 Stride: 4 - Data: [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ] + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] - Name: UExpectedOut Format: Int32 Stride: 4 - Data: [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ] + Data: [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] Results: - Result: ExpectedOut @@ -220,8 +258,7 @@ DescriptorSets: # Bug: https://github.com/llvm/offload-test-suite/issues/943 # XFAIL: Intel && Vulkan -# Unsupported: InterlockedAdd isn't supported in clang yet -# https://github.com/llvm/llvm-project/issues/99122 +# Bug: https://github.com/llvm/offload-test-suite/issues/981 # XFAIL: Clang # Metal doesn't support 64-bit wave operations From f4543f17dadccf9b60e26f0fe4875cd8b8ed4939 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Mon, 16 Mar 2026 13:22:47 -0700 Subject: [PATCH 11/12] remove comment --- test/WaveOps/WaveActiveAllEqual.int16.test | 7 ------- 1 file changed, 7 deletions(-) diff --git a/test/WaveOps/WaveActiveAllEqual.int16.test b/test/WaveOps/WaveActiveAllEqual.int16.test index e1025678c..57846cc0d 100644 --- a/test/WaveOps/WaveActiveAllEqual.int16.test +++ b/test/WaveOps/WaveActiveAllEqual.int16.test @@ -12,13 +12,6 @@ StructuredBuffer UIn4 : register(t7); RWStructuredBuffer Out : register(u8); RWStructuredBuffer UOut : register(u9); -// Expect all trues, all elements will be the same. -// Output buffers start off as true, and each -// lane writes its result value anded with the existing -// result in the output buffer. -// Since we expect all results to be true, output buffers -// should remain all true at the end. - [numthreads(4,1,1)] void main(uint3 TID : SV_GroupThreadID) { From ef347d99f7bcd23be6f7ba8337e3b999101f4be8 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Mon, 23 Mar 2026 17:39:12 -0700 Subject: [PATCH 12/12] address Tex --- test/WaveOps/WaveActiveAllEqual.32.test | 2 -- test/WaveOps/WaveActiveAllEqual.fp64.test | 2 -- test/WaveOps/WaveActiveAllEqual.int64.test | 2 -- 3 files changed, 6 deletions(-) diff --git a/test/WaveOps/WaveActiveAllEqual.32.test b/test/WaveOps/WaveActiveAllEqual.32.test index bc1b327c7..5ffff9423 100644 --- a/test/WaveOps/WaveActiveAllEqual.32.test +++ b/test/WaveOps/WaveActiveAllEqual.32.test @@ -18,8 +18,6 @@ RWStructuredBuffer Out : register(u12); RWStructuredBuffer UOut : register(u13); RWStructuredBuffer FOut : register(u14); -// Expect all 4s, all elements will be the same. - [numthreads(4,1,1)] void main(uint3 TID : SV_GroupThreadID) { diff --git a/test/WaveOps/WaveActiveAllEqual.fp64.test b/test/WaveOps/WaveActiveAllEqual.fp64.test index 5786810b7..82c627936 100644 --- a/test/WaveOps/WaveActiveAllEqual.fp64.test +++ b/test/WaveOps/WaveActiveAllEqual.fp64.test @@ -6,8 +6,6 @@ StructuredBuffer In4 : register(t3); RWStructuredBuffer Out : register(u4); -// Expect all 4s, all elements will be the same. - [numthreads(4,1,1)] void main(uint3 TID : SV_GroupThreadID) { diff --git a/test/WaveOps/WaveActiveAllEqual.int64.test b/test/WaveOps/WaveActiveAllEqual.int64.test index f786d8d70..f737e7f0a 100644 --- a/test/WaveOps/WaveActiveAllEqual.int64.test +++ b/test/WaveOps/WaveActiveAllEqual.int64.test @@ -12,8 +12,6 @@ StructuredBuffer UIn4 : register(t7); RWStructuredBuffer Out : register(u8); RWStructuredBuffer UOut : register(u9); -// Expect all 4s, all elements will be the same. - [numthreads(4,1,1)] void main(uint3 TID : SV_GroupThreadID) {