From a615fd9d07d3b07b0aaf55c94d59aa9d74bab8b8 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Tue, 12 May 2026 17:30:42 -0600 Subject: [PATCH 1/5] [SM6.10] Implement VectorAccumDescriptor Builtin --- docs/DXIL.rst | 4 +- include/dxc/DXIL/DxilConstants.h | 16 ++++-- include/dxc/DXIL/DxilInstructions.h | 34 ++++++++++++ include/dxc/HlslIntrinsicOp.h | 3 +- lib/DXIL/DxilOperations.cpp | 34 +++++++----- lib/HLSL/HLOperationLower.cpp | 28 ++++++++++ tools/clang/lib/Headers/hlsl/dx/linalg.h | 7 +++ .../CodeGenDXIL/hlsl/linalg/api/vectors.hlsl | 55 +++++++++++-------- .../vectoraccumulatetodescriptor/nominal.hlsl | 19 +++++++ .../LinAlgMatrix/linalgmatrix-as.ll | 44 ++++++++------- .../LinAlgMatrix/linalgmatrix-cs.ll | 44 ++++++++------- .../LinAlgMatrix/linalgmatrix-ds.ll | 42 ++++++++------ .../LinAlgMatrix/linalgmatrix-gs.ll | 42 ++++++++------ .../LinAlgMatrix/linalgmatrix-hs.ll | 40 ++++++++------ .../LinAlgMatrix/linalgmatrix-ms.ll | 42 ++++++++------ .../LinAlgMatrix/linalgmatrix-node.ll | 42 ++++++++------ .../LinAlgMatrix/linalgmatrix-ps.ll | 40 ++++++++------ .../LinAlgMatrix/linalgmatrix-raytracing.ll | 41 +++++++++----- .../LinAlgMatrix/linalgmatrix-vs.ll | 42 ++++++++------ .../linalg/builtins/matrix-builtins-ast.hlsl | 8 +++ .../hlsl/linalg/builtins/stage-errors.hlsl | 1 + .../builtins/unavailable_pre_sm610.hlsl | 3 + utils/hct/gen_intrin_main.txt | 2 + utils/hct/hctdb.py | 19 ++++++- utils/hct/hlsl_intrinsic_opcodes.json | 5 +- 25 files changed, 431 insertions(+), 226 deletions(-) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/vectoraccumulatetodescriptor/nominal.hlsl diff --git a/docs/DXIL.rst b/docs/DXIL.rst index 8891e7e056..6516d8e1f1 100644 --- a/docs/DXIL.rst +++ b/docs/DXIL.rst @@ -3096,8 +3096,8 @@ ID Name Description 2147483676 LinAlgMatrixAccumulateToMemory accumulates a matrix to groupshared memory 2147483677 LinAlgMatrixOuterProduct Outer products an M sized vector and a N sized vector producing an MxN matrix 2147483678 LinAlgConvert Convert vector components from one interpretation to another -2147483679 ReservedE0 reserved -2147483680 ReservedE1 reserved +2147483679 VectorAccumulateToDescriptor Accumulates given vector to the buffer at the given offset +2147483680 ReservedE0 reserved 2147483681 DebugBreak triggers a breakpoint if a debugger is attached 2147483682 IsDebuggerPresent returns true if a debugger is attached ========== ======================================== =================================================================================================================== diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index a535b8d768..4b73a36f1b 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -533,8 +533,7 @@ static const OpCodeTableID TableID = OpCodeTableID::ExperimentalOps; // Enumeration for ExperimentalOps DXIL operations enum class OpCode : unsigned { // - ReservedE0 = 31, // reserved - ReservedE1 = 32, // reserved + ReservedE0 = 32, // reserved // Debugging DebugBreak = 33, // triggers a breakpoint if a debugger is attached @@ -597,6 +596,8 @@ enum class OpCode : unsigned { LinAlgMatrixStoreToDescriptor = 20, // stores a matrix to a RWByteAddressBuffer LinAlgMatrixStoreToMemory = 21, // stores a matrix to groupshared memory + VectorAccumulateToDescriptor = + 31, // Accumulates given vector to the buffer at the given offset // No-op ExperimentalNop = 0, // nop does nothing @@ -1355,10 +1356,12 @@ enum class OpCode : unsigned { // LinAlgConvert = 0x8000001E, 2147483678U, -2147483618 EXP_OPCODE(ExperimentalOps, LinAlgConvert), // Convert vector components from // one interpretation to another - // ReservedE0 = 0x8000001F, 2147483679U, -2147483617 + // VectorAccumulateToDescriptor = 0x8000001F, 2147483679U, -2147483617 + EXP_OPCODE(ExperimentalOps, + VectorAccumulateToDescriptor), // Accumulates given vector to the + // buffer at the given offset + // ReservedE0 = 0x80000020, 2147483680U, -2147483616 EXP_OPCODE(ExperimentalOps, ReservedE0), // reserved - // ReservedE1 = 0x80000020, 2147483680U, -2147483616 - EXP_OPCODE(ExperimentalOps, ReservedE1), // reserved // DebugBreak = 0x80000021, 2147483681U, -2147483615 EXP_OPCODE(ExperimentalOps, DebugBreak), // triggers a breakpoint if a debugger is attached @@ -1544,6 +1547,7 @@ enum class OpCodeClass : unsigned { LinAlgMatrixSetElement, LinAlgMatrixStoreToDescriptor, LinAlgMatrixStoreToMemory, + VectorAccumulateToDescriptor, // Mesh shader instructions EmitIndices, @@ -1730,7 +1734,7 @@ enum class OpCodeClass : unsigned { NodeOutputIsValid, OutputComplete, - NumOpClasses = 222, // exclusive last value of enumeration + NumOpClasses = 223, // exclusive last value of enumeration }; // OPCODECLASS-ENUM:END diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index 511f5b476f..79b87c7e12 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -10960,6 +10960,40 @@ struct DxilInst_LinAlgConvert { void set_outputInterpretation(llvm::Value *val) { Instr->setOperand(3, val); } }; +/// This instruction Accumulates given vector to the buffer at the given offset +struct DxilInst_VectorAccumulateToDescriptor { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_VectorAccumulateToDescriptor(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::VectorAccumulateToDescriptor); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_vector = 1, + arg_handle = 2, + arg_offset = 3, + }; + // Accessors + llvm::Value *get_vector() const { return Instr->getOperand(1); } + void set_vector(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_handle() const { return Instr->getOperand(2); } + void set_handle(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_offset() const { return Instr->getOperand(3); } + void set_offset(llvm::Value *val) { Instr->setOperand(3, val); } +}; + /// This instruction triggers a breakpoint if a debugger is attached struct DxilInst_DebugBreak { llvm::Instruction *Instr; diff --git a/include/dxc/HlslIntrinsicOp.h b/include/dxc/HlslIntrinsicOp.h index 03e439b6b0..b7e16f1f46 100644 --- a/include/dxc/HlslIntrinsicOp.h +++ b/include/dxc/HlslIntrinsicOp.h @@ -132,6 +132,7 @@ enum class IntrinsicOp { IOP___builtin_LinAlg_MatrixStoreToMemory = 410, IOP___builtin_LinAlg_MatrixVectorMultiply = 418, IOP___builtin_LinAlg_MatrixVectorMultiplyAdd = 419, + IOP___builtin_VectorAccumulateToDescriptor = 423, IOP_abort = 102, IOP_abs = 103, IOP_acos = 104, @@ -429,7 +430,7 @@ enum class IntrinsicOp { IOP_usign = 355, MOP_InterlockedUMax = 356, MOP_InterlockedUMin = 357, - Num_Intrinsics = 423, + Num_Intrinsics = 424, }; inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) { switch (opcode) { diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index b786fee9fc..29e0eb6769 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -2984,6 +2984,14 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { 2, {{0x400}, {0x400}}, {{0xe7}, {0xe7}}}, // Overloads: getNumParams() <= 1) return nullptr; return FT->getParamType(1); @@ -7009,7 +7014,6 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::ClusterID: case OpCode::LinAlgMatrixQueryAccumulatorLayout: case OpCode::ReservedE0: - case OpCode::ReservedE1: case OpCode::DebugBreak: case OpCode::IsDebuggerPresent: return Type::getVoidTy(Ctx); diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index abf0ad86be..0a2b0cb8be 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -7163,6 +7163,31 @@ Value *TranslateLinAlgConvert(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, return nullptr; } +Value *TranslateLinAlgVectorAccumulate(CallInst *CI, IntrinsicOp IOP, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { + + hlsl::OP *HlslOp = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + Constant *OpArg = HlslOp->GetU32Const(static_cast(OpCode)); + + // Input vector parameter + Value *InputVector = CI->getArgOperand(1); + + // Matrix parameters + Value *MatrixBuffer = CI->getArgOperand(2); + Value *MatrixOffset = CI->getArgOperand(3); + + // Get the DXIL function for the operation + Function *DxilFunc = HlslOp->GetOpFunc(OpCode, InputVector->getType()); + + return Builder.CreateCall(DxilFunc, + {OpArg, InputVector, MatrixBuffer, MatrixOffset}); +} + } // namespace // Lower table. @@ -7957,6 +7982,9 @@ constexpr IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP___builtin_LinAlg_Convert, TranslateLinAlgConvert, DXIL::OpCode::LinAlgConvert}, + {IntrinsicOp::IOP___builtin_VectorAccumulateToDescriptor, + TranslateLinAlgVectorAccumulate, + DXIL::OpCode::VectorAccumulateToDescriptor}, }; constexpr size_t NumLowerTableEntries = sizeof(gLowerTable) / sizeof(gLowerTable[0]); diff --git a/tools/clang/lib/Headers/hlsl/dx/linalg.h b/tools/clang/lib/Headers/hlsl/dx/linalg.h index 9b43dcc6cb..4155474a35 100644 --- a/tools/clang/lib/Headers/hlsl/dx/linalg.h +++ b/tools/clang/lib/Headers/hlsl/dx/linalg.h @@ -575,6 +575,13 @@ OuterProduct(vector VecA, vector VecB) { return Result; } +template +typename hlsl::enable_if::value, void>::type +InterlockedAccumulate(vector Vec, RWByteAddressBuffer Res, + uint StartOffset) { + __builtin_VectorAccumulateToDescriptor(Vec, Res, StartOffset); +} + } // namespace linalg } // namespace dx diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl index 58f19b887c..b3588f885f 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl @@ -9,35 +9,36 @@ using MatrixAccum_8_8_Ty = Matrix; using Matrix_7_15_ATy = Matrix; +RWByteAddressBuffer RWBAB : register(u0); ByteAddressBuffer BAB : register(t0); [numthreads(4, 4, 4)] void main(uint ID : SV_GroupID) { -// CHECK: %[[MAT1:.*]] = call %dx.types.LinAlgMatrixC8M8N4U0S0 @dx.op.linAlgMatrixLoadFromDescriptor.mC8M8N4U0S0( -// CHECK-SAME: i32 -2147483634, %dx.types.Handle %{{[0-9]+}}, i32 0, i32 8, i32 1, i32 128) -// CHECK-SAME: ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) + // CHECK: %[[MAT1:.*]] = call %dx.types.LinAlgMatrixC8M8N4U0S0 @dx.op.linAlgMatrixLoadFromDescriptor.mC8M8N4U0S0( + // CHECK-SAME: i32 -2147483634, %dx.types.Handle %{{[0-9]+}}, i32 0, i32 8, i32 1, i32 128) + // CHECK-SAME: ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) MatrixATy Mat1 = MatrixATy::Load(BAB, 0, 8); vector vec1 = 10.3f; -// CHECK: %[[VEC2:.*]] = call <8 x half> @dx.op.linAlgMatVecMul.v8f16.mC8M8N4U0S0.v4f16(i32 -2147483623, -// CHECK-SAME: %dx.types.LinAlgMatrixC8M8N4U0S0 %[[MAT1]], i1 true, <4 x half> , i32 8) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) + // CHECK: %[[VEC2:.*]] = call <8 x half> @dx.op.linAlgMatVecMul.v8f16.mC8M8N4U0S0.v4f16(i32 -2147483623, + // CHECK-SAME: %dx.types.LinAlgMatrixC8M8N4U0S0 %[[MAT1]], i1 true, <4 x half> , i32 8) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) vector vec2 = Multiply(Mat1, vec1); -// CHECK: %[[VEC3:.*]] = call <8 x half> @dx.op.linAlgMatVecMulAdd.v8f16.mC8M8N4U0S0.v4f16.v8f16(i32 -2147483622, -// CHECK-SAME: %dx.types.LinAlgMatrixC8M8N4U0S0 %[[MAT1]], i1 true, <4 x half> , i32 8, <8 x half> %[[VEC2]], i32 8) -// CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) + // CHECK: %[[VEC3:.*]] = call <8 x half> @dx.op.linAlgMatVecMulAdd.v8f16.mC8M8N4U0S0.v4f16.v8f16(i32 -2147483622, + // CHECK-SAME: %dx.types.LinAlgMatrixC8M8N4U0S0 %[[MAT1]], i1 true, <4 x half> , i32 8, <8 x half> %[[VEC2]], i32 8) + // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) vector vec3 = MultiplyAdd(Mat1, vec1, vec2); -// CHECK: %[[VEC20:.*]] = shufflevector + // CHECK: %[[VEC20:.*]] = shufflevector vector vec20 = (vector)vec2; -// CHECK: %[[VEC4:.*]] = call <8 x half> @dx.op.linAlgMatVecMulAdd.v8f16.mC8M8N4U0S0.v4f16.v8f16(i32 -2147483622, -// CHECK-SAME: %dx.types.LinAlgMatrixC8M8N4U0S0 %[[MAT1]], i1 true, <4 x half> %[[VEC20]], i32 8, <8 x half> %[[VEC3]], i32 8) -// CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) + // CHECK: %[[VEC4:.*]] = call <8 x half> @dx.op.linAlgMatVecMulAdd.v8f16.mC8M8N4U0S0.v4f16.v8f16(i32 -2147483622, + // CHECK-SAME: %dx.types.LinAlgMatrixC8M8N4U0S0 %[[MAT1]], i1 true, <4 x half> %[[VEC20]], i32 8, <8 x half> %[[VEC3]], i32 8) + // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) InterpretedVector interpVec2 = MakeInterpretedVector(vec20); vector vec4 = MultiplyAdd(Mat1, interpVec2, vec3); @@ -77,20 +78,19 @@ void main(uint ID : SV_GroupID) { InterpretedVector convertedVec; convertedVec = Convert(vec6); - // CHECK: call <4 x i32> @dx.op.linAlgConvert.v4i32.v16f16(i32 -2147483618, <16 x half> %21, i32 8, i32 21) + // CHECK: call <4 x i32> @dx.op.linAlgConvert.v4i32.v16f16(i32 -2147483618, <16 x half> %{{[0-9]+}}, i32 8, i32 21) // CHECK: ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) typedef vector half16; half16 srcF16 = BAB.Load(128); InterpretedVector convertedPacked = Convert(srcF16); - // CHECK: call <1 x i32> @dx.op.linAlgConvert.v1i32.v3f16(i32 -2147483618, <3 x half> %25, i32 8, i32 21) + // CHECK: call <1 x i32> @dx.op.linAlgConvert.v1i32.v3f16(i32 -2147483618, <3 x half> %{{[0-9]+}}, i32 8, i32 21) // CHECK-SAME: ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) half3 ThreeF16 = BAB.Load(256); InterpretedVector convertedPacked2 = Convert(ThreeF16); // Test MultiplyAdd with odd sizes - // vector vecH15 = BAB.Load< vector >(168); vector vecH7 = BAB.Load< vector >(64); @@ -104,7 +104,7 @@ void main(uint ID : SV_GroupID) { // CHECK-SAME: %dx.types.LinAlgMatrixC8M7N15U0S0 %[[MAT_7_15]], i1 true, <15 x half> %{{[0-9]+}}, i32 8, <7 x half> %{{[0-9]+}}, i32 8) // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) vector vec7 = MultiplyAdd(Mat_7_15, vecH15, vecH7); - + // CHECK: call <7 x half> @dx.op.linAlgMatVecMulAdd.v7f16.mC8M7N15U0S0.v15f16.v7f16(i32 -2147483622, %dx.types.LinAlgMatrixC8M7N15U0S0 %[[MAT_7_15]], // CHECK-SAME; i1 true, <15 x half> %{{[0-9]+}}, i32 8, <7 x half> %{{[0-9]+}}, i32 8) // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) @@ -114,7 +114,7 @@ void main(uint ID : SV_GroupID) { // CHECK-SAME: ; RawBufferVectorLoad(buf,index,elementOffset,alignment) // CHECK: %[[MEM_BIAS1:.*]] = extractvalue %dx.types.ResRet.v7f16 %[[LOAD1]], 0 // CHECK: call <7 x half> @dx.op.linAlgMatVecMulAdd.v7f16.mC8M7N15U0S0.v15f16.v7f16(i32 -2147483622, - // CHECK-SAME: %dx.types.LinAlgMatrixC8M7N15U0S0 %[[MAT_7_15]], i1 true, <15 x half> %29, i32 8, <7 x half> %37, i32 8) + // CHECK-SAME: %dx.types.LinAlgMatrixC8M7N15U0S0 %[[MAT_7_15]], i1 true, <15 x half> %{{[0-9]+}}, i32 8, <7 x half> %{{[0-9]+}}, i32 8) // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) VectorRef memBias7 = {BAB, 512}; vector vec9 = MultiplyAdd(Mat_7_15, vecH15, memBias7); @@ -133,15 +133,24 @@ void main(uint ID : SV_GroupID) { InterpretedVector interpVecH15Packed = Convert(vecH15); // CHECK: call <7 x half> @dx.op.linAlgMatVecMulAdd.v7f16.mC8M7N15U0S0.v4i32.v7f16(i32 -2147483622, - // CHECK-SAME: %dx.types.LinAlgMatrixC8M7N15U0S0 %[[MAT_7_15]], i1 true, <4 x i32> %43, i32 21, <7 x half> %31, i32 8) + // CHECK-SAME: %dx.types.LinAlgMatrixC8M7N15U0S0 %[[MAT_7_15]], i1 true, <4 x i32> %{{[0-9]+}}, i32 21, <7 x half> %{{[0-9]+}}, i32 8) // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) vector vec11 = MultiplyAdd(Mat_7_15, interpVecH15Packed, vecH7); - // CHECK: %[[LOAD3:.+]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle %45, i32 512, i32 undef, i32 2) + // CHECK: %[[LOAD3:.+]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle %{{[0-9]+}}, i32 512, i32 undef, i32 2) // CHECK-SAME: ; RawBufferVectorLoad(buf,index,elementOffset,alignment) - // CHECK-NEXT: %[[MEM_BIAS3:.*]] = extractvalue %dx.types.ResRet.v7f16 %46, 0 + // CHECK-NEXT: %[[MEM_BIAS3:.*]] = extractvalue %dx.types.ResRet.v7f16 %{{[0-9]+}}, 0 // CHECK-NEXT: call <7 x half> @dx.op.linAlgMatVecMulAdd.v7f16.mC8M7N15U0S0.v4i32.v7f16(i32 -2147483622, // CHECK-SAME: %dx.types.LinAlgMatrixC8M7N15U0S0 %[[MAT_7_15]], i1 true, <4 x i32> %[[INTERP_VEC_H15_PACKED]], i32 21, <7 x half> %[[MEM_BIAS3]], i32 8) // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) - vector vec12 = MultiplyAdd(Mat_7_15, interpVecH15Packed, memBias7); + vector vec12 = MultiplyAdd(Mat_7_15, interpVecH15Packed, memBias7); + + // CHECK: call void @dx.op.vectorAccumulateToDescriptor.v4f16(i32 -2147483617, <4 x half> + // CHECK-SAME: , %dx.types.Handle %{{[0-9]+}}, i32 0) + // CHECK-SAME: ; VectorAccumulateToDescriptor(vector,handle,offset) + InterlockedAccumulate(vec1, RWBAB, 0); + + // CHECK: call void @dx.op.vectorAccumulateToDescriptor.v8f16(i32 -2147483617, <8 x half> %{{[0-9]+}}, + // CHECK-SAME: %dx.types.Handle %{{[0-9]+}}, i32 8) ; VectorAccumulateToDescriptor(vector,handle,offset) + InterlockedAccumulate(vec2, RWBAB, 8); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/vectoraccumulatetodescriptor/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/vectoraccumulatetodescriptor/nominal.hlsl new file mode 100644 index 0000000000..b99ec0fe31 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/vectoraccumulatetodescriptor/nominal.hlsl @@ -0,0 +1,19 @@ +// REQUIRES: dxil-1-10 +// RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s +// RUN: %dxc -T cs_6_10 -HV 202x -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 + +RWByteAddressBuffer outbuf; + +[numthreads(4,1,1)] +void main() { + // CHECK-LABEL: define void @main() + + // CHECK: call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> + // CHECK-SAME: , %dx.types.Handle %{{.*}}, i32 16) + // CHECK-SAME: ; VectorAccumulateToDescriptor(vector,handle,offset) + + // CHECK2: call void @"dx.hl.op..void (i32, <4 x float>, %dx.types.Handle, i32)" + // CHECK2-SAME: (i32 423, <4 x float> %{{.*}}, %dx.types.Handle %{{.*}}, i32 16) + float4 vec = {9.0, 8.0, 7.0, 6.0}; + __builtin_VectorAccumulateToDescriptor(vec, outbuf, 16); +} diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll index 297dd692f5..c0f3834c0f 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll @@ -33,7 +33,7 @@ target triple = "dxil-ms-dx" @"\01?SharedArr@@3PAMA" = external addrspace(3) global [64 x float], align 4 define void @mainAS() { - + %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) %handle = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer @@ -43,71 +43,74 @@ define void @mainAS() { ; dx.op.linAlgMatrixAccumulate %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - + ; dx.op.linAlgMatrixAccumulateToDescriptor call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) - + ; dx.op.linAlgMatrixLoadFromDescriptor %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) - + ; dx.op.linAlgMatrixQueryAccumulatorLayout %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() - + ; dx.op.linAlgMatVecMul %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) - + ; dx.op.linAlgMatVecMulAdd %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) - + ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) + ; dx.op.vectorAccumulateToDescriptor + call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; ; dx.op.linAlgCopyConvertMatrix %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) - + ; dx.op.linAlgFillMatrix %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) - + ; dx.op.linAlgMatrixGetCoordinate %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixGetElement %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixMultiply %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) - + ; dx.op.linAlgMatrixMultiplyAccumulate %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) - + ; dx.op.linAlgMatrixSetElement %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) - + ; dx.op.linAlgMatrixLoadFromMemory %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) - + ; dx.op.linAlgMatrixStoreToMemory call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) %2 = alloca %struct.AmpPayload.0, align 8 call void @dx.op.dispatchMesh.struct.AmpPayload.0(i32 173, i32 8, i32 1, i32 1, %struct.AmpPayload.0* nonnull %2) ; DispatchMesh(threadGroupCountX,threadGroupCountY,threadGroupCountZ,payload) - + ret void } @@ -144,6 +147,9 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % ; Function Attrs: nounwind declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.vectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 + ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll index c5b2ccfec7..f3381418c5 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll @@ -17,7 +17,7 @@ target triple = "dxil-ms-dx" @"\01?SharedArr@@3PAMA" = external addrspace(3) global [64 x float], align 4 define void @mainCS() { - + %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) %handle = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer @@ -27,68 +27,71 @@ define void @mainCS() { ; dx.op.linAlgMatrixAccumulate %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - + ; dx.op.linAlgMatrixAccumulateToDescriptor call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) - + ; dx.op.linAlgMatrixLoadFromDescriptor %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) - + ; dx.op.linAlgMatrixQueryAccumulatorLayout %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() - + ; dx.op.linAlgMatVecMul %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) - + ; dx.op.linAlgMatVecMulAdd %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) - + ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) + ; dx.op.vectorAccumulateToDescriptor + call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; ; dx.op.linAlgCopyConvertMatrix %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) - + ; dx.op.linAlgFillMatrix %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) - + ; dx.op.linAlgMatrixGetCoordinate %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixGetElement %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixMultiply %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) - + ; dx.op.linAlgMatrixMultiplyAccumulate %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) - + ; dx.op.linAlgMatrixSetElement %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) - + ; dx.op.linAlgMatrixLoadFromMemory %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) - + ; dx.op.linAlgMatrixStoreToMemory call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) - + ret void } @@ -125,6 +128,9 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % ; Function Attrs: nounwind declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.vectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 + ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll index b2a7eb0c53..eb65756f4a 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll @@ -43,65 +43,68 @@ define void @MainDS() { ; dx.op.linAlgMatrixAccumulate %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - + ; dx.op.linAlgMatrixAccumulateToDescriptor call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) - + ; dx.op.linAlgMatrixLoadFromDescriptor %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) - + ; dx.op.linAlgMatrixQueryAccumulatorLayout %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() - + ; dx.op.linAlgMatVecMul %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) - + ; dx.op.linAlgMatVecMulAdd %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) - + ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) + ; dx.op.vectorAccumulateToDescriptor + call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; ; dx.op.linAlgCopyConvertMatrix %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) - + ; dx.op.linAlgFillMatrix %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) - + ; dx.op.linAlgMatrixGetCoordinate %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixGetElement %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixMultiply %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) - + ; dx.op.linAlgMatrixMultiplyAccumulate %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) - + ; dx.op.linAlgMatrixSetElement %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) - + ; dx.op.linAlgMatrixLoadFromMemory %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) - + ; dx.op.linAlgMatrixStoreToMemory call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) @@ -112,7 +115,7 @@ define void @MainDS() { call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %2) ; StoreOutput(outputSigId,rowIndex,colIndex,value) call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %3) ; StoreOutput(outputSigId,rowIndex,colIndex,value) call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %4) ; StoreOutput(outputSigId,rowIndex,colIndex,value) - call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %5) ; StoreOutput(outputSigId,rowIndex,colIndex,value) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %5) ; StoreOutput(outputSigId,rowIndex,colIndex,value) ret void } @@ -149,6 +152,9 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % ; Function Attrs: nounwind declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.vectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 + ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll index edbca57b0b..8fa47d8484 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll @@ -43,65 +43,68 @@ define void @MainGS() { ; dx.op.linAlgMatrixAccumulate %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - + ; dx.op.linAlgMatrixAccumulateToDescriptor call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) - + ; dx.op.linAlgMatrixLoadFromDescriptor %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) - + ; dx.op.linAlgMatrixQueryAccumulatorLayout %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() - + ; dx.op.linAlgMatVecMul %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) - + ; dx.op.linAlgMatVecMulAdd %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) - + ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) + ; dx.op.vectorAccumulateToDescriptor + call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; ; dx.op.linAlgCopyConvertMatrix %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) - + ; dx.op.linAlgFillMatrix %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) - + ; dx.op.linAlgMatrixGetCoordinate %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixGetElement %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixMultiply %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) - + ; dx.op.linAlgMatrixMultiplyAccumulate %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) - + ; dx.op.linAlgMatrixSetElement %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) - + ; dx.op.linAlgMatrixLoadFromMemory %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) - + ; dx.op.linAlgMatrixStoreToMemory call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) @@ -111,7 +114,7 @@ define void @MainGS() { call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 1.000000e+01) ; StoreOutput(outputSigId,rowIndex,colIndex,value) call void @dx.op.emitStream(i32 97, i8 0) ; EmitStream(streamId) call void @dx.op.cutStream(i32 98, i8 0) ; CutStream(streamId) - + ret void } @@ -148,6 +151,9 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % ; Function Attrs: nounwind declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.vectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 + ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll index c148cc60f3..0fb7e26854 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll @@ -43,65 +43,68 @@ define void @MainHS() { ; dx.op.linAlgMatrixAccumulate %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - + ; dx.op.linAlgMatrixAccumulateToDescriptor call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) - + ; dx.op.linAlgMatrixLoadFromDescriptor %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) - + ; dx.op.linAlgMatrixQueryAccumulatorLayout %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() - + ; dx.op.linAlgMatVecMul %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) - + ; dx.op.linAlgMatVecMulAdd %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) - + ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) + ; dx.op.vectorAccumulateToDescriptor + call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; ; dx.op.linAlgCopyConvertMatrix %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) - + ; dx.op.linAlgFillMatrix %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) - + ; dx.op.linAlgMatrixGetCoordinate %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixGetElement %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixMultiply %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) - + ; dx.op.linAlgMatrixMultiplyAccumulate %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) - + ; dx.op.linAlgMatrixSetElement %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) - + ; dx.op.linAlgMatrixLoadFromMemory %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) - + ; dx.op.linAlgMatrixStoreToMemory call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) @@ -154,6 +157,9 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % ; Function Attrs: nounwind declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.vectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 + ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll index 3542055c65..e75859ccf0 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll @@ -43,65 +43,68 @@ define void @mainMeS() { ; dx.op.linAlgMatrixAccumulate %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - + ; dx.op.linAlgMatrixAccumulateToDescriptor call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) - + ; dx.op.linAlgMatrixLoadFromDescriptor %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) - + ; dx.op.linAlgMatrixQueryAccumulatorLayout %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() - + ; dx.op.linAlgMatVecMul %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) - + ; dx.op.linAlgMatVecMulAdd %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) - + ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) + ; dx.op.vectorAccumulateToDescriptor + call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; ; dx.op.linAlgCopyConvertMatrix %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) - + ; dx.op.linAlgFillMatrix %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) - + ; dx.op.linAlgMatrixGetCoordinate %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixGetElement %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixMultiply %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) - + ; dx.op.linAlgMatrixMultiplyAccumulate %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) - + ; dx.op.linAlgMatrixSetElement %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) - + ; dx.op.linAlgMatrixLoadFromMemory %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) - + ; dx.op.linAlgMatrixStoreToMemory call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) @@ -110,7 +113,7 @@ define void @mainMeS() { call void @dx.op.storeVertexOutput.f32(i32 171, i32 0, i32 0, i8 1, float 0.000000e+00, i32 %thread_id_group) ; StoreVertexOutput(outputSigId,rowIndex,colIndex,value,vertexIndex) call void @dx.op.storeVertexOutput.f32(i32 171, i32 0, i32 0, i8 2, float 0.000000e+00, i32 %thread_id_group) ; StoreVertexOutput(outputSigId,rowIndex,colIndex,value,vertexIndex) call void @dx.op.storeVertexOutput.f32(i32 171, i32 0, i32 0, i8 3, float 0.000000e+00, i32 %thread_id_group) ; StoreVertexOutput(outputSigId,rowIndex,colIndex,value,vertexIndex) - + ret void } @@ -147,6 +150,9 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % ; Function Attrs: nounwind declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.vectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 + ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll index f42208a5f6..5e11c4a105 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll @@ -44,68 +44,71 @@ define void @mainNS() { ; dx.op.linAlgMatrixAccumulate %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - + ; dx.op.linAlgMatrixAccumulateToDescriptor call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) - + ; dx.op.linAlgMatrixLoadFromDescriptor %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) - + ; dx.op.linAlgMatrixQueryAccumulatorLayout %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() - + ; dx.op.linAlgMatVecMul %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) - + ; dx.op.linAlgMatVecMulAdd %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) - + ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) + ; dx.op.vectorAccumulateToDescriptor + call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; ; dx.op.linAlgCopyConvertMatrix %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) - + ; dx.op.linAlgFillMatrix %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) - + ; dx.op.linAlgMatrixGetCoordinate %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixGetElement %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixMultiply %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) - + ; dx.op.linAlgMatrixMultiplyAccumulate %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) - + ; dx.op.linAlgMatrixSetElement %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) - + ; dx.op.linAlgMatrixLoadFromMemory %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) - + ; dx.op.linAlgMatrixStoreToMemory call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) - + ret void } @@ -142,6 +145,9 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % ; Function Attrs: nounwind declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.vectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 + ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll index d4d39c7f40..9165527b10 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll @@ -42,65 +42,68 @@ define void @mainPS() { ; dx.op.linAlgMatrixAccumulate %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - + ; dx.op.linAlgMatrixAccumulateToDescriptor call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) - + ; dx.op.linAlgMatrixLoadFromDescriptor %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) - + ; dx.op.linAlgMatrixQueryAccumulatorLayout %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() - + ; dx.op.linAlgMatVecMul %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) - + ; dx.op.linAlgMatVecMulAdd %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) - + ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) + ; dx.op.vectorAccumulateToDescriptor + call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; ; dx.op.linAlgCopyConvertMatrix %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) - + ; dx.op.linAlgFillMatrix %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) - + ; dx.op.linAlgMatrixGetCoordinate %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixGetElement %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixMultiply %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) - + ; dx.op.linAlgMatrixMultiplyAccumulate %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) - + ; dx.op.linAlgMatrixSetElement %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) - + ; dx.op.linAlgMatrixLoadFromMemory %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) - + ; dx.op.linAlgMatrixStoreToMemory call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) @@ -145,6 +148,9 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % ; Function Attrs: nounwind declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.vectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 + ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll index d6ff1a8c79..b7d5ba141d 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll @@ -127,7 +127,7 @@ target triple = "dxil-ms-dx" @"\01?SharedArr@@3PAMA" = external addrspace(3) global [64 x float], align 4 define void @"\01?MainRG@@YAXXZ"() #0 { - + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?buf@@3URWByteAddressBuffer@@A", align 4 %2 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) %handle = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer @@ -143,7 +143,9 @@ define void @"\01?MainRG@@YAXXZ"() #0 { %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - + ; dx.op.vectorAccumulateToDescriptor + call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; @@ -158,7 +160,7 @@ define void @"\01?MainRG@@YAXXZ"() #0 { call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) - + ret void } @@ -178,7 +180,9 @@ define void @"\01?MainIS@@YAXXZ"() #0 { %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - + ; dx.op.vectorAccumulateToDescriptor + call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; @@ -193,7 +197,7 @@ define void @"\01?MainIS@@YAXXZ"() #0 { call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) - + ret void } @@ -213,7 +217,9 @@ define void @"\01?MainCL@@YAXUAttribs@@@Z"(%struct.Attribs* noalias nocapture %a %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - + ; dx.op.vectorAccumulateToDescriptor + call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; @@ -228,7 +234,7 @@ define void @"\01?MainCL@@YAXUAttribs@@@Z"(%struct.Attribs* noalias nocapture %a call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) - + ret void } @@ -248,7 +254,9 @@ define void @"\01?MainAH@@YAXURayPayload@@UAttribs@@@Z"(%struct.RayPayload* noal %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - + ; dx.op.vectorAccumulateToDescriptor + call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; @@ -263,7 +271,7 @@ define void @"\01?MainAH@@YAXURayPayload@@UAttribs@@@Z"(%struct.RayPayload* noal call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) - + ret void } @@ -283,7 +291,9 @@ define void @"\01?MainCH@@YAXURayPayload@@UAttribs@@@Z"(%struct.RayPayload* noal %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - + ; dx.op.vectorAccumulateToDescriptor + call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; @@ -298,7 +308,7 @@ define void @"\01?MainCH@@YAXURayPayload@@UAttribs@@@Z"(%struct.RayPayload* noal call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) - + ret void } @@ -318,7 +328,9 @@ define void @"\01?MainMS@@YAXURayPayload@@@Z"(%struct.RayPayload* noalias nocapt %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - + ; dx.op.vectorAccumulateToDescriptor + call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; @@ -333,7 +345,7 @@ define void @"\01?MainMS@@YAXURayPayload@@@Z"(%struct.RayPayload* noalias nocapt call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) - + ret void } @@ -370,6 +382,9 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % ; Function Attrs: nounwind declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.vectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 + ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll index 09c9be8cf5..8daa769754 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll @@ -43,65 +43,68 @@ define void @mainVS() { ; dx.op.linAlgMatrixAccumulate %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - + ; dx.op.linAlgMatrixAccumulateToDescriptor call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) - + ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) - + ; dx.op.linAlgMatrixLoadFromDescriptor %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) - + ; dx.op.linAlgMatrixQueryAccumulatorLayout %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() - + ; dx.op.linAlgMatVecMul %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) - + ; dx.op.linAlgMatVecMulAdd %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) - + ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) + ; dx.op.vectorAccumulateToDescriptor + call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; ; dx.op.linAlgCopyConvertMatrix %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) - + ; dx.op.linAlgFillMatrix %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) - + ; dx.op.linAlgMatrixGetCoordinate %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixGetElement %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixMultiply %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) - + ; dx.op.linAlgMatrixMultiplyAccumulate %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) - + ; dx.op.linAlgMatrixSetElement %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) - + ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) - + ; dx.op.linAlgMatrixLoadFromMemory %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) - + ; dx.op.linAlgMatrixStoreToMemory call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) @@ -109,7 +112,7 @@ define void @mainVS() { call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 1.000000e+00) ; StoreOutput(outputSigId,rowIndex,colIndex,value) call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float 1.000000e+00) ; StoreOutput(outputSigId,rowIndex,colIndex,value) call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 1.000000e+00) ; StoreOutput(outputSigId,rowIndex,colIndex,value) - + ret void } @@ -146,6 +149,9 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % ; Function Attrs: nounwind declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.vectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 + ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl index af2ab8d2a3..e334e90a54 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl @@ -205,4 +205,12 @@ void main() { // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" int4 result2; __builtin_LinAlg_Convert(result2, vec, 0, 1); + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_VectorAccumulateToDescriptor 'void (vector, RWByteAddressBuffer, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} vec 'vector':'vector' +// CHECK-NEXT: ParmVarDecl {{.*}} buf 'RWByteAddressBuffer' +// CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 423 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + __builtin_VectorAccumulateToDescriptor(input, Buf, 10); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl index b174621ea3..e4b6c83d1f 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl @@ -88,6 +88,7 @@ void CallFunction() __builtin_LinAlg_MatrixVectorMultiplyAdd(vecA, mat1, true, vecB, 2, vecC, 3); int4 outVec; __builtin_LinAlg_Convert(outVec, vecA, 1, 2); + __builtin_VectorAccumulateToDescriptor(vecA, buf, 0); // expected-error@+12{{builtin unavailable in shader stage 'pixel' (requires 'compute', 'mesh' or 'amplification')}} // expected-error@+11{{builtin unavailable in shader stage 'vertex' (requires 'compute', 'mesh' or 'amplification')}} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/unavailable_pre_sm610.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/unavailable_pre_sm610.hlsl index c39b89f781..f067757267 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/unavailable_pre_sm610.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/unavailable_pre_sm610.hlsl @@ -71,4 +71,7 @@ void main() { // expected-error@+1{{intrinsic __builtin_LinAlg_Convert potentially used by ''main'' requires shader model 6.10 or greater}} __builtin_LinAlg_Convert(result, vec1, 1, 1); + + // expected-error@+1{{intrinsic __builtin_VectorAccumulateToDescriptor potentially used by ''main'' requires shader model 6.10 or greater}} + __builtin_VectorAccumulateToDescriptor(vec1, Buf, 1); } diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index 2768e1b65d..d8b335d358 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -411,6 +411,8 @@ void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToMemory(in LinAlgMatrix m void [[min_sm=6.10]] __builtin_LinAlg_MatrixOuterProduct(out LinAlgMatrix ret, in numeric vecA, in numeric vecB); void [[min_sm=6.10]] __builtin_LinAlg_Convert(out numeric ret, in numeric vec, in uint input_interp, in uint output_interp); +void [[min_sm=6.10]] __builtin_VectorAccumulateToDescriptor(in numeric<> vec, in RWByteAddressBuffer buf, in uint offset); + } namespace diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 634b62cc6f..b866aafc24 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -1163,7 +1163,8 @@ def populate_categories_and_models_ExperimentalOps(self): for i in insts( "LinAlgMatrixQueryAccumulatorLayout,LinAlgMatrixLoadFromDescriptor," + "LinAlgMatrixAccumulateToDescriptor,LinAlgMatVecMul," - + "LinAlgMatVecMulAdd,LinAlgMatrixOuterProduct,LinAlgConvert" + + "LinAlgMatVecMulAdd,LinAlgMatrixOuterProduct,LinAlgConvert," + + "VectorAccumulateToDescriptor" ): i.category = "Linear Algebra Operations" i.shader_model = experimental_sm @@ -6590,7 +6591,21 @@ def populate_ExperimentalOps(self): ], ) - op_table.reserve_dxil_op_range("ReservedE", 2) + add_dxil_op( + "VectorAccumulateToDescriptor", + "VectorAccumulateToDescriptor", + "Accumulates given vector to the buffer at the given offset", + " Date: Wed, 13 May 2026 17:34:35 -0600 Subject: [PATCH 2/5] Update operation name --- docs/DXIL.rst | 2 +- include/dxc/DXIL/DxilConstants.h | 13 +++++----- include/dxc/DXIL/DxilInstructions.h | 6 ++--- include/dxc/HlslIntrinsicOp.h | 2 +- lib/DXIL/DxilOperations.cpp | 14 +++++----- lib/HLSL/HLOperationLower.cpp | 4 +-- tools/clang/lib/Headers/hlsl/dx/linalg.h | 2 +- .../CodeGenDXIL/hlsl/linalg/api/vectors.hlsl | 8 +++--- .../vectoraccumulatetodescriptor/nominal.hlsl | 6 ++--- .../LinAlgMatrix/linalgmatrix-as.ll | 6 ++--- .../LinAlgMatrix/linalgmatrix-cs.ll | 6 ++--- .../LinAlgMatrix/linalgmatrix-ds.ll | 6 ++--- .../LinAlgMatrix/linalgmatrix-gs.ll | 6 ++--- .../LinAlgMatrix/linalgmatrix-hs.ll | 6 ++--- .../LinAlgMatrix/linalgmatrix-ms.ll | 6 ++--- .../LinAlgMatrix/linalgmatrix-node.ll | 6 ++--- .../LinAlgMatrix/linalgmatrix-ps.ll | 6 ++--- .../LinAlgMatrix/linalgmatrix-raytracing.ll | 26 +++++++++---------- .../LinAlgMatrix/linalgmatrix-vs.ll | 6 ++--- .../linalg/builtins/matrix-builtins-ast.hlsl | 4 +-- .../hlsl/linalg/builtins/stage-errors.hlsl | 2 +- .../builtins/unavailable_pre_sm610.hlsl | 4 +-- utils/hct/gen_intrin_main.txt | 3 +-- utils/hct/hctdb.py | 6 ++--- utils/hct/hlsl_intrinsic_opcodes.json | 2 +- 25 files changed, 79 insertions(+), 79 deletions(-) diff --git a/docs/DXIL.rst b/docs/DXIL.rst index 6516d8e1f1..89ad4c5661 100644 --- a/docs/DXIL.rst +++ b/docs/DXIL.rst @@ -3096,7 +3096,7 @@ ID Name Description 2147483676 LinAlgMatrixAccumulateToMemory accumulates a matrix to groupshared memory 2147483677 LinAlgMatrixOuterProduct Outer products an M sized vector and a N sized vector producing an MxN matrix 2147483678 LinAlgConvert Convert vector components from one interpretation to another -2147483679 VectorAccumulateToDescriptor Accumulates given vector to the buffer at the given offset +2147483679 LinAlgVectorAccumulateToDescriptor Accumulates given vector to the buffer at the given offset 2147483680 ReservedE0 reserved 2147483681 DebugBreak triggers a breakpoint if a debugger is attached 2147483682 IsDebuggerPresent returns true if a debugger is attached diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index 4b73a36f1b..4705b90c55 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -596,7 +596,7 @@ enum class OpCode : unsigned { LinAlgMatrixStoreToDescriptor = 20, // stores a matrix to a RWByteAddressBuffer LinAlgMatrixStoreToMemory = 21, // stores a matrix to groupshared memory - VectorAccumulateToDescriptor = + LinAlgVectorAccumulateToDescriptor = 31, // Accumulates given vector to the buffer at the given offset // No-op @@ -1356,10 +1356,11 @@ enum class OpCode : unsigned { // LinAlgConvert = 0x8000001E, 2147483678U, -2147483618 EXP_OPCODE(ExperimentalOps, LinAlgConvert), // Convert vector components from // one interpretation to another - // VectorAccumulateToDescriptor = 0x8000001F, 2147483679U, -2147483617 - EXP_OPCODE(ExperimentalOps, - VectorAccumulateToDescriptor), // Accumulates given vector to the - // buffer at the given offset + // LinAlgVectorAccumulateToDescriptor = 0x8000001F, 2147483679U, -2147483617 + EXP_OPCODE( + ExperimentalOps, + LinAlgVectorAccumulateToDescriptor), // Accumulates given vector to the + // buffer at the given offset // ReservedE0 = 0x80000020, 2147483680U, -2147483616 EXP_OPCODE(ExperimentalOps, ReservedE0), // reserved // DebugBreak = 0x80000021, 2147483681U, -2147483615 @@ -1547,7 +1548,7 @@ enum class OpCodeClass : unsigned { LinAlgMatrixSetElement, LinAlgMatrixStoreToDescriptor, LinAlgMatrixStoreToMemory, - VectorAccumulateToDescriptor, + LinAlgVectorAccumulateToDescriptor, // Mesh shader instructions EmitIndices, diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index 79b87c7e12..439affd162 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -10961,14 +10961,14 @@ struct DxilInst_LinAlgConvert { }; /// This instruction Accumulates given vector to the buffer at the given offset -struct DxilInst_VectorAccumulateToDescriptor { +struct DxilInst_LinAlgVectorAccumulateToDescriptor { llvm::Instruction *Instr; // Construction and identification - DxilInst_VectorAccumulateToDescriptor(llvm::Instruction *pInstr) + DxilInst_LinAlgVectorAccumulateToDescriptor(llvm::Instruction *pInstr) : Instr(pInstr) {} operator bool() const { return hlsl::OP::IsDxilOpFuncCallInst( - Instr, hlsl::OP::OpCode::VectorAccumulateToDescriptor); + Instr, hlsl::OP::OpCode::LinAlgVectorAccumulateToDescriptor); } // Validation support bool isAllowed() const { return true; } diff --git a/include/dxc/HlslIntrinsicOp.h b/include/dxc/HlslIntrinsicOp.h index b7e16f1f46..f70013a006 100644 --- a/include/dxc/HlslIntrinsicOp.h +++ b/include/dxc/HlslIntrinsicOp.h @@ -132,7 +132,7 @@ enum class IntrinsicOp { IOP___builtin_LinAlg_MatrixStoreToMemory = 410, IOP___builtin_LinAlg_MatrixVectorMultiply = 418, IOP___builtin_LinAlg_MatrixVectorMultiplyAdd = 419, - IOP___builtin_VectorAccumulateToDescriptor = 423, + IOP___builtin_LinAlg_VectorAccumulateToDescriptor = 423, IOP_abort = 102, IOP_abs = 103, IOP_acos = 104, diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index 29e0eb6769..c1d1475fa4 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -2984,10 +2984,10 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { 2, {{0x400}, {0x400}}, {{0xe7}, {0xe7}}}, // Overloads: getNumParams() <= 1) return nullptr; return FT->getParamType(1); diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 0a2b0cb8be..bc235b226b 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -7982,9 +7982,9 @@ constexpr IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP___builtin_LinAlg_Convert, TranslateLinAlgConvert, DXIL::OpCode::LinAlgConvert}, - {IntrinsicOp::IOP___builtin_VectorAccumulateToDescriptor, + {IntrinsicOp::IOP___builtin_LinAlg_VectorAccumulateToDescriptor, TranslateLinAlgVectorAccumulate, - DXIL::OpCode::VectorAccumulateToDescriptor}, + DXIL::OpCode::LinAlgVectorAccumulateToDescriptor}, }; constexpr size_t NumLowerTableEntries = sizeof(gLowerTable) / sizeof(gLowerTable[0]); diff --git a/tools/clang/lib/Headers/hlsl/dx/linalg.h b/tools/clang/lib/Headers/hlsl/dx/linalg.h index e305291afc..4dd2a65790 100644 --- a/tools/clang/lib/Headers/hlsl/dx/linalg.h +++ b/tools/clang/lib/Headers/hlsl/dx/linalg.h @@ -654,7 +654,7 @@ template typename hlsl::enable_if::value, void>::type InterlockedAccumulate(vector Vec, RWByteAddressBuffer Res, uint StartOffset) { - __builtin_VectorAccumulateToDescriptor(Vec, Res, StartOffset); + __builtin_LinAlg_VectorAccumulateToDescriptor(Vec, Res, StartOffset); } } // namespace linalg diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl index 31ce62a84c..2e99374c23 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl @@ -190,12 +190,12 @@ void main(uint ID : SV_GroupID) { // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) vector vec24 = MultiplyAdd(Mat_7_15_Packed, interpVecH15Packed, memBias7Packed); - // CHECK: call void @dx.op.vectorAccumulateToDescriptor.v4f16(i32 -2147483617, <4 x half> + // CHECK: call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f16(i32 -2147483617, <4 x half> // CHECK-SAME: , %dx.types.Handle %{{[0-9]+}}, i32 0) - // CHECK-SAME: ; VectorAccumulateToDescriptor(vector,handle,offset) + // CHECK-SAME: ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) InterlockedAccumulate(vec1, RWBAB, 0); - // CHECK: call void @dx.op.vectorAccumulateToDescriptor.v8f16(i32 -2147483617, <8 x half> %{{[0-9]+}}, - // CHECK-SAME: %dx.types.Handle %{{[0-9]+}}, i32 8) ; VectorAccumulateToDescriptor(vector,handle,offset) + // CHECK: call void @dx.op.linAlgVectorAccumulateToDescriptor.v8f16(i32 -2147483617, <8 x half> %{{[0-9]+}}, + // CHECK-SAME: %dx.types.Handle %{{[0-9]+}}, i32 8) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) InterlockedAccumulate(vec2, RWBAB, 8); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/vectoraccumulatetodescriptor/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/vectoraccumulatetodescriptor/nominal.hlsl index b99ec0fe31..9ee9faa9e6 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/vectoraccumulatetodescriptor/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/vectoraccumulatetodescriptor/nominal.hlsl @@ -8,12 +8,12 @@ RWByteAddressBuffer outbuf; void main() { // CHECK-LABEL: define void @main() - // CHECK: call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> + // CHECK: call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> // CHECK-SAME: , %dx.types.Handle %{{.*}}, i32 16) - // CHECK-SAME: ; VectorAccumulateToDescriptor(vector,handle,offset) + // CHECK-SAME: ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) // CHECK2: call void @"dx.hl.op..void (i32, <4 x float>, %dx.types.Handle, i32)" // CHECK2-SAME: (i32 423, <4 x float> %{{.*}}, %dx.types.Handle %{{.*}}, i32 16) float4 vec = {9.0, 8.0, 7.0, 6.0}; - __builtin_VectorAccumulateToDescriptor(vec, outbuf, 16); + __builtin_LinAlg_VectorAccumulateToDescriptor(vec, outbuf, 16); } diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll index c0f3834c0f..c5015c69ae 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll @@ -68,8 +68,8 @@ define void @mainAS() { ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - ; dx.op.vectorAccumulateToDescriptor - call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -148,7 +148,7 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.vectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll index f3381418c5..ea5fe31b22 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll @@ -52,8 +52,8 @@ define void @mainCS() { ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - ; dx.op.vectorAccumulateToDescriptor - call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -129,7 +129,7 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.vectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll index eb65756f4a..c44dd1fdf5 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll @@ -68,8 +68,8 @@ define void @MainDS() { ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - ; dx.op.vectorAccumulateToDescriptor - call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -153,7 +153,7 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.vectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll index 8fa47d8484..3ce9a04c71 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll @@ -68,8 +68,8 @@ define void @MainGS() { ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - ; dx.op.vectorAccumulateToDescriptor - call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -152,7 +152,7 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.vectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll index 0fb7e26854..d3a8abb65f 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll @@ -68,8 +68,8 @@ define void @MainHS() { ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - ; dx.op.vectorAccumulateToDescriptor - call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -158,7 +158,7 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.vectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll index e75859ccf0..d29348af2f 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll @@ -68,8 +68,8 @@ define void @mainMeS() { ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - ; dx.op.vectorAccumulateToDescriptor - call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -151,7 +151,7 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.vectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll index 5e11c4a105..eef0df4078 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll @@ -69,8 +69,8 @@ define void @mainNS() { ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - ; dx.op.vectorAccumulateToDescriptor - call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -146,7 +146,7 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.vectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll index 9165527b10..068c1f6730 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll @@ -67,8 +67,8 @@ define void @mainPS() { ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - ; dx.op.vectorAccumulateToDescriptor - call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -149,7 +149,7 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.vectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll index b7d5ba141d..81ab163d06 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll @@ -143,8 +143,8 @@ define void @"\01?MainRG@@YAXXZ"() #0 { %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - ; dx.op.vectorAccumulateToDescriptor - call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -180,8 +180,8 @@ define void @"\01?MainIS@@YAXXZ"() #0 { %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - ; dx.op.vectorAccumulateToDescriptor - call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -217,8 +217,8 @@ define void @"\01?MainCL@@YAXUAttribs@@@Z"(%struct.Attribs* noalias nocapture %a %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - ; dx.op.vectorAccumulateToDescriptor - call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -254,8 +254,8 @@ define void @"\01?MainAH@@YAXURayPayload@@UAttribs@@@Z"(%struct.RayPayload* noal %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - ; dx.op.vectorAccumulateToDescriptor - call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -291,8 +291,8 @@ define void @"\01?MainCH@@YAXURayPayload@@UAttribs@@@Z"(%struct.RayPayload* noal %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - ; dx.op.vectorAccumulateToDescriptor - call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -328,8 +328,8 @@ define void @"\01?MainMS@@YAXURayPayload@@@Z"(%struct.RayPayload* noalias nocapt %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - ; dx.op.vectorAccumulateToDescriptor - call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -383,7 +383,7 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.vectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll index 8daa769754..ca584b1880 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll @@ -68,8 +68,8 @@ define void @mainVS() { ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - ; dx.op.vectorAccumulateToDescriptor - call void @dx.op.vectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; VectorAccumulateToDescriptor(vector,handle,offset) + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -150,7 +150,7 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.vectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl index e334e90a54..601c7fba4f 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl @@ -206,11 +206,11 @@ void main() { int4 result2; __builtin_LinAlg_Convert(result2, vec, 0, 1); -// CHECK: FunctionDecl {{.*}} implicit used __builtin_VectorAccumulateToDescriptor 'void (vector, RWByteAddressBuffer, unsigned int)' extern +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_VectorAccumulateToDescriptor 'void (vector, RWByteAddressBuffer, unsigned int)' extern // CHECK-NEXT: ParmVarDecl {{.*}} vec 'vector':'vector' // CHECK-NEXT: ParmVarDecl {{.*}} buf 'RWByteAddressBuffer' // CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' // CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 423 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - __builtin_VectorAccumulateToDescriptor(input, Buf, 10); + __builtin_LinAlg_VectorAccumulateToDescriptor(input, Buf, 10); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl index e4b6c83d1f..657eb4a382 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl @@ -88,7 +88,7 @@ void CallFunction() __builtin_LinAlg_MatrixVectorMultiplyAdd(vecA, mat1, true, vecB, 2, vecC, 3); int4 outVec; __builtin_LinAlg_Convert(outVec, vecA, 1, 2); - __builtin_VectorAccumulateToDescriptor(vecA, buf, 0); + __builtin_LinAlg_VectorAccumulateToDescriptor(vecA, buf, 0); // expected-error@+12{{builtin unavailable in shader stage 'pixel' (requires 'compute', 'mesh' or 'amplification')}} // expected-error@+11{{builtin unavailable in shader stage 'vertex' (requires 'compute', 'mesh' or 'amplification')}} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/unavailable_pre_sm610.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/unavailable_pre_sm610.hlsl index f067757267..1db7b329ff 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/unavailable_pre_sm610.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/unavailable_pre_sm610.hlsl @@ -72,6 +72,6 @@ void main() { // expected-error@+1{{intrinsic __builtin_LinAlg_Convert potentially used by ''main'' requires shader model 6.10 or greater}} __builtin_LinAlg_Convert(result, vec1, 1, 1); - // expected-error@+1{{intrinsic __builtin_VectorAccumulateToDescriptor potentially used by ''main'' requires shader model 6.10 or greater}} - __builtin_VectorAccumulateToDescriptor(vec1, Buf, 1); + // expected-error@+1{{intrinsic __builtin_LinAlg_VectorAccumulateToDescriptor potentially used by ''main'' requires shader model 6.10 or greater}} + __builtin_LinAlg_VectorAccumulateToDescriptor(vec1, Buf, 1); } diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index d8b335d358..260477af62 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -410,8 +410,7 @@ void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToDescriptor(in LinAlgMatr void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToMemory(in LinAlgMatrix matrix, groupshared numeric[] memory, in uint offset, in uint stride, in uint layout); void [[min_sm=6.10]] __builtin_LinAlg_MatrixOuterProduct(out LinAlgMatrix ret, in numeric vecA, in numeric vecB); void [[min_sm=6.10]] __builtin_LinAlg_Convert(out numeric ret, in numeric vec, in uint input_interp, in uint output_interp); - -void [[min_sm=6.10]] __builtin_VectorAccumulateToDescriptor(in numeric<> vec, in RWByteAddressBuffer buf, in uint offset); +void [[min_sm=6.10]] __builtin_LinAlg_VectorAccumulateToDescriptor(in numeric<> vec, in RWByteAddressBuffer buf, in uint offset); } namespace diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index b866aafc24..ac0c6c27cf 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -1164,7 +1164,7 @@ def populate_categories_and_models_ExperimentalOps(self): "LinAlgMatrixQueryAccumulatorLayout,LinAlgMatrixLoadFromDescriptor," + "LinAlgMatrixAccumulateToDescriptor,LinAlgMatVecMul," + "LinAlgMatVecMulAdd,LinAlgMatrixOuterProduct,LinAlgConvert," - + "VectorAccumulateToDescriptor" + + "LinAlgVectorAccumulateToDescriptor" ): i.category = "Linear Algebra Operations" i.shader_model = experimental_sm @@ -6592,8 +6592,8 @@ def populate_ExperimentalOps(self): ) add_dxil_op( - "VectorAccumulateToDescriptor", - "VectorAccumulateToDescriptor", + "LinAlgVectorAccumulateToDescriptor", + "LinAlgVectorAccumulateToDescriptor", "Accumulates given vector to the buffer at the given offset", " Date: Wed, 13 May 2026 17:41:15 -0600 Subject: [PATCH 3/5] Fix Variable Names --- lib/HLSL/HLOperationLower.cpp | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index bc235b226b..58f38abf4e 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -7163,29 +7163,24 @@ Value *TranslateLinAlgConvert(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, return nullptr; } -Value *TranslateLinAlgVectorAccumulate(CallInst *CI, IntrinsicOp IOP, - OP::OpCode OpCode, - HLOperationLowerHelper &Helper, - HLObjectOperationLowerHelper *ObjHelper, - bool &Translated) { +Value *TranslateLinAlgVectorAccumulateToDescriptor( + CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, + HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { hlsl::OP *HlslOp = &Helper.hlslOP; IRBuilder<> Builder(CI); Constant *OpArg = HlslOp->GetU32Const(static_cast(OpCode)); - // Input vector parameter - Value *InputVector = CI->getArgOperand(1); - - // Matrix parameters - Value *MatrixBuffer = CI->getArgOperand(2); - Value *MatrixOffset = CI->getArgOperand(3); + Value *Vector = CI->getArgOperand(1); + Value *ResHandle = CI->getArgOperand(2); + Value *Offset = CI->getArgOperand(3); // Get the DXIL function for the operation - Function *DxilFunc = HlslOp->GetOpFunc(OpCode, InputVector->getType()); + Function *DxilFunc = HlslOp->GetOpFunc(OpCode, Vector->getType()); - return Builder.CreateCall(DxilFunc, - {OpArg, InputVector, MatrixBuffer, MatrixOffset}); + return Builder.CreateCall(DxilFunc, {OpArg, Vector, ResHandle, Offset}); } } // namespace @@ -7983,7 +7978,7 @@ constexpr IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP___builtin_LinAlg_Convert, TranslateLinAlgConvert, DXIL::OpCode::LinAlgConvert}, {IntrinsicOp::IOP___builtin_LinAlg_VectorAccumulateToDescriptor, - TranslateLinAlgVectorAccumulate, + TranslateLinAlgVectorAccumulateToDescriptor, DXIL::OpCode::LinAlgVectorAccumulateToDescriptor}, }; constexpr size_t NumLowerTableEntries = From c9b75c00f86cc8a598ec9f4ea01c661ce5234561 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Wed, 13 May 2026 18:23:39 -0600 Subject: [PATCH 4/5] Align InterlockAccum function parmeters --- tools/clang/lib/Headers/hlsl/dx/linalg.h | 2 +- tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/clang/lib/Headers/hlsl/dx/linalg.h b/tools/clang/lib/Headers/hlsl/dx/linalg.h index 4dd2a65790..64c60fab1a 100644 --- a/tools/clang/lib/Headers/hlsl/dx/linalg.h +++ b/tools/clang/lib/Headers/hlsl/dx/linalg.h @@ -652,7 +652,7 @@ OuterProduct(vector VecA, vector VecB) { template typename hlsl::enable_if::value, void>::type -InterlockedAccumulate(vector Vec, RWByteAddressBuffer Res, +InterlockedAccumulate(RWByteAddressBuffer Res, vector Vec, uint StartOffset) { __builtin_LinAlg_VectorAccumulateToDescriptor(Vec, Res, StartOffset); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl index 2e99374c23..b2e97274c6 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl @@ -193,9 +193,9 @@ void main(uint ID : SV_GroupID) { // CHECK: call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f16(i32 -2147483617, <4 x half> // CHECK-SAME: , %dx.types.Handle %{{[0-9]+}}, i32 0) // CHECK-SAME: ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) - InterlockedAccumulate(vec1, RWBAB, 0); + InterlockedAccumulate(RWBAB, vec1, 0); // CHECK: call void @dx.op.linAlgVectorAccumulateToDescriptor.v8f16(i32 -2147483617, <8 x half> %{{[0-9]+}}, // CHECK-SAME: %dx.types.Handle %{{[0-9]+}}, i32 8) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) - InterlockedAccumulate(vec2, RWBAB, 8); + InterlockedAccumulate(RWBAB, vec2, 8); } From 5c5e18d20f026ec5c7dda18ca942f346e8f689dd Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Wed, 13 May 2026 19:00:42 -0600 Subject: [PATCH 5/5] Add align param to vecaccum --- include/dxc/DXIL/DxilInstructions.h | 5 ++++- lib/DXIL/DxilOperations.cpp | 1 + lib/HLSL/HLOperationLower.cpp | 4 +++- tools/clang/lib/Headers/hlsl/dx/linalg.h | 4 ++-- .../test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl | 6 +++--- .../vectoraccumulatetodescriptor/nominal.hlsl | 10 +++++----- .../LinAlgMatrix/linalgmatrix-as.ll | 4 ++-- .../LinAlgMatrix/linalgmatrix-cs.ll | 4 ++-- .../LinAlgMatrix/linalgmatrix-ds.ll | 4 ++-- .../LinAlgMatrix/linalgmatrix-gs.ll | 4 ++-- .../LinAlgMatrix/linalgmatrix-hs.ll | 4 ++-- .../LinAlgMatrix/linalgmatrix-ms.ll | 4 ++-- .../LinAlgMatrix/linalgmatrix-node.ll | 4 ++-- .../LinAlgMatrix/linalgmatrix-ps.ll | 4 ++-- .../LinAlgMatrix/linalgmatrix-raytracing.ll | 14 +++++++------- .../LinAlgMatrix/linalgmatrix-vs.ll | 4 ++-- .../hlsl/linalg/builtins/matrix-builtins-ast.hlsl | 5 +++-- .../hlsl/linalg/builtins/stage-errors.hlsl | 2 +- .../linalg/builtins/unavailable_pre_sm610.hlsl | 2 +- utils/hct/gen_intrin_main.txt | 2 +- utils/hct/hctdb.py | 1 + 21 files changed, 50 insertions(+), 42 deletions(-) diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index 439affd162..b1c1bbaf73 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -10973,7 +10973,7 @@ struct DxilInst_LinAlgVectorAccumulateToDescriptor { // Validation support bool isAllowed() const { return true; } bool isArgumentListValid() const { - if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) + if (5 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; return true; } @@ -10984,6 +10984,7 @@ struct DxilInst_LinAlgVectorAccumulateToDescriptor { arg_vector = 1, arg_handle = 2, arg_offset = 3, + arg_align = 4, }; // Accessors llvm::Value *get_vector() const { return Instr->getOperand(1); } @@ -10992,6 +10993,8 @@ struct DxilInst_LinAlgVectorAccumulateToDescriptor { void set_handle(llvm::Value *val) { Instr->setOperand(2, val); } llvm::Value *get_offset() const { return Instr->getOperand(3); } void set_offset(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_align() const { return Instr->getOperand(4); } + void set_align(llvm::Value *val) { Instr->setOperand(4, val); } }; /// This instruction triggers a breakpoint if a debugger is attached diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index c1d1475fa4..6ec5855e1a 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -6690,6 +6690,7 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pETy); A(pRes); A(pI32); + A(pI32); break; // diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 58f38abf4e..ff30dcbf20 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -7176,11 +7176,13 @@ Value *TranslateLinAlgVectorAccumulateToDescriptor( Value *Vector = CI->getArgOperand(1); Value *ResHandle = CI->getArgOperand(2); Value *Offset = CI->getArgOperand(3); + Value *Align = CI->getArgOperand(4); // Get the DXIL function for the operation Function *DxilFunc = HlslOp->GetOpFunc(OpCode, Vector->getType()); - return Builder.CreateCall(DxilFunc, {OpArg, Vector, ResHandle, Offset}); + return Builder.CreateCall(DxilFunc, + {OpArg, Vector, ResHandle, Offset, Align}); } } // namespace diff --git a/tools/clang/lib/Headers/hlsl/dx/linalg.h b/tools/clang/lib/Headers/hlsl/dx/linalg.h index 64c60fab1a..e223f2286e 100644 --- a/tools/clang/lib/Headers/hlsl/dx/linalg.h +++ b/tools/clang/lib/Headers/hlsl/dx/linalg.h @@ -653,8 +653,8 @@ OuterProduct(vector VecA, vector VecB) { template typename hlsl::enable_if::value, void>::type InterlockedAccumulate(RWByteAddressBuffer Res, vector Vec, - uint StartOffset) { - __builtin_LinAlg_VectorAccumulateToDescriptor(Vec, Res, StartOffset); + uint StartOffset, uint Align = 64) { + __builtin_LinAlg_VectorAccumulateToDescriptor(Vec, Res, StartOffset, Align); } } // namespace linalg diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl index b2e97274c6..3d1735aa9c 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl @@ -191,11 +191,11 @@ void main(uint ID : SV_GroupID) { vector vec24 = MultiplyAdd(Mat_7_15_Packed, interpVecH15Packed, memBias7Packed); // CHECK: call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f16(i32 -2147483617, <4 x half> - // CHECK-SAME: , %dx.types.Handle %{{[0-9]+}}, i32 0) - // CHECK-SAME: ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) + // CHECK-SAME: , %dx.types.Handle %{{[0-9]+}}, i32 0, i32 64) + // CHECK-SAME: ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) InterlockedAccumulate(RWBAB, vec1, 0); // CHECK: call void @dx.op.linAlgVectorAccumulateToDescriptor.v8f16(i32 -2147483617, <8 x half> %{{[0-9]+}}, - // CHECK-SAME: %dx.types.Handle %{{[0-9]+}}, i32 8) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) + // CHECK-SAME: %dx.types.Handle %{{[0-9]+}}, i32 8, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) InterlockedAccumulate(RWBAB, vec2, 8); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/vectoraccumulatetodescriptor/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/vectoraccumulatetodescriptor/nominal.hlsl index 9ee9faa9e6..b10ca59831 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/vectoraccumulatetodescriptor/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/vectoraccumulatetodescriptor/nominal.hlsl @@ -9,11 +9,11 @@ void main() { // CHECK-LABEL: define void @main() // CHECK: call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> - // CHECK-SAME: , %dx.types.Handle %{{.*}}, i32 16) - // CHECK-SAME: ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) + // CHECK-SAME: , %dx.types.Handle %{{.*}}, i32 16, i32 64) + // CHECK-SAME: ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) - // CHECK2: call void @"dx.hl.op..void (i32, <4 x float>, %dx.types.Handle, i32)" - // CHECK2-SAME: (i32 423, <4 x float> %{{.*}}, %dx.types.Handle %{{.*}}, i32 16) + // CHECK2: call void @"dx.hl.op..void (i32, <4 x float>, %dx.types.Handle, i32, i32)" + // CHECK2-SAME: (i32 423, <4 x float> %{{.*}}, %dx.types.Handle %{{.*}}, i32 16, i32 64) float4 vec = {9.0, 8.0, 7.0, 6.0}; - __builtin_LinAlg_VectorAccumulateToDescriptor(vec, outbuf, 16); + __builtin_LinAlg_VectorAccumulateToDescriptor(vec, outbuf, 16, 64); } diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll index c5015c69ae..47c2f48629 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll @@ -69,7 +69,7 @@ define void @mainAS() { %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) ; dx.op.linAlgVectorAccumulateToDescriptor - call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -148,7 +148,7 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll index ea5fe31b22..576758b9b1 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll @@ -53,7 +53,7 @@ define void @mainCS() { %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) ; dx.op.linAlgVectorAccumulateToDescriptor - call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -129,7 +129,7 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll index c44dd1fdf5..c4292d7d7e 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll @@ -69,7 +69,7 @@ define void @MainDS() { %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) ; dx.op.linAlgVectorAccumulateToDescriptor - call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -153,7 +153,7 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll index 3ce9a04c71..d14a983ab4 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll @@ -69,7 +69,7 @@ define void @MainGS() { %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) ; dx.op.linAlgVectorAccumulateToDescriptor - call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -152,7 +152,7 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll index d3a8abb65f..c5411b9b2e 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll @@ -69,7 +69,7 @@ define void @MainHS() { %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) ; dx.op.linAlgVectorAccumulateToDescriptor - call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -158,7 +158,7 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll index d29348af2f..1575754a90 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll @@ -69,7 +69,7 @@ define void @mainMeS() { %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) ; dx.op.linAlgVectorAccumulateToDescriptor - call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -151,7 +151,7 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll index eef0df4078..1318eb5435 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll @@ -70,7 +70,7 @@ define void @mainNS() { %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) ; dx.op.linAlgVectorAccumulateToDescriptor - call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -146,7 +146,7 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll index 068c1f6730..676f573fb5 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll @@ -68,7 +68,7 @@ define void @mainPS() { %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) ; dx.op.linAlgVectorAccumulateToDescriptor - call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -149,7 +149,7 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll index 81ab163d06..b687b5945e 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll @@ -144,7 +144,7 @@ define void @"\01?MainRG@@YAXXZ"() #0 { %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) ; dx.op.linAlgVectorAccumulateToDescriptor - call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -181,7 +181,7 @@ define void @"\01?MainIS@@YAXXZ"() #0 { %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) ; dx.op.linAlgVectorAccumulateToDescriptor - call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -218,7 +218,7 @@ define void @"\01?MainCL@@YAXUAttribs@@@Z"(%struct.Attribs* noalias nocapture %a %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) ; dx.op.linAlgVectorAccumulateToDescriptor - call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -255,7 +255,7 @@ define void @"\01?MainAH@@YAXURayPayload@@UAttribs@@@Z"(%struct.RayPayload* noal %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) ; dx.op.linAlgVectorAccumulateToDescriptor - call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -292,7 +292,7 @@ define void @"\01?MainCH@@YAXURayPayload@@UAttribs@@@Z"(%struct.RayPayload* noal %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) ; dx.op.linAlgVectorAccumulateToDescriptor - call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -329,7 +329,7 @@ define void @"\01?MainMS@@YAXURayPayload@@@Z"(%struct.RayPayload* noalias nocapt %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) ; dx.op.linAlgVectorAccumulateToDescriptor - call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -383,7 +383,7 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll index ca584b1880..debf658676 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll @@ -69,7 +69,7 @@ define void @mainVS() { %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) ; dx.op.linAlgVectorAccumulateToDescriptor - call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset) + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) ; ; Built-ins restricted to compute, mesh and amplification shaders @@ -150,7 +150,7 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32) #0 +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl index 601c7fba4f..cb3a59cf5d 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl @@ -206,11 +206,12 @@ void main() { int4 result2; __builtin_LinAlg_Convert(result2, vec, 0, 1); -// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_VectorAccumulateToDescriptor 'void (vector, RWByteAddressBuffer, unsigned int)' extern +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_VectorAccumulateToDescriptor 'void (vector, RWByteAddressBuffer, unsigned int, unsigned int)' extern // CHECK-NEXT: ParmVarDecl {{.*}} vec 'vector':'vector' // CHECK-NEXT: ParmVarDecl {{.*}} buf 'RWByteAddressBuffer' // CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} align 'unsigned int' // CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 423 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - __builtin_LinAlg_VectorAccumulateToDescriptor(input, Buf, 10); + __builtin_LinAlg_VectorAccumulateToDescriptor(input, Buf, 10, 64); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl index 657eb4a382..b48cb88fff 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl @@ -88,7 +88,7 @@ void CallFunction() __builtin_LinAlg_MatrixVectorMultiplyAdd(vecA, mat1, true, vecB, 2, vecC, 3); int4 outVec; __builtin_LinAlg_Convert(outVec, vecA, 1, 2); - __builtin_LinAlg_VectorAccumulateToDescriptor(vecA, buf, 0); + __builtin_LinAlg_VectorAccumulateToDescriptor(vecA, buf, 0, 64); // expected-error@+12{{builtin unavailable in shader stage 'pixel' (requires 'compute', 'mesh' or 'amplification')}} // expected-error@+11{{builtin unavailable in shader stage 'vertex' (requires 'compute', 'mesh' or 'amplification')}} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/unavailable_pre_sm610.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/unavailable_pre_sm610.hlsl index 1db7b329ff..0005925d1c 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/unavailable_pre_sm610.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/unavailable_pre_sm610.hlsl @@ -73,5 +73,5 @@ void main() { __builtin_LinAlg_Convert(result, vec1, 1, 1); // expected-error@+1{{intrinsic __builtin_LinAlg_VectorAccumulateToDescriptor potentially used by ''main'' requires shader model 6.10 or greater}} - __builtin_LinAlg_VectorAccumulateToDescriptor(vec1, Buf, 1); + __builtin_LinAlg_VectorAccumulateToDescriptor(vec1, Buf, 1, 64); } diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index 260477af62..c0d1d3dcfc 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -410,7 +410,7 @@ void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToDescriptor(in LinAlgMatr void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToMemory(in LinAlgMatrix matrix, groupshared numeric[] memory, in uint offset, in uint stride, in uint layout); void [[min_sm=6.10]] __builtin_LinAlg_MatrixOuterProduct(out LinAlgMatrix ret, in numeric vecA, in numeric vecB); void [[min_sm=6.10]] __builtin_LinAlg_Convert(out numeric ret, in numeric vec, in uint input_interp, in uint output_interp); -void [[min_sm=6.10]] __builtin_LinAlg_VectorAccumulateToDescriptor(in numeric<> vec, in RWByteAddressBuffer buf, in uint offset); +void [[min_sm=6.10]] __builtin_LinAlg_VectorAccumulateToDescriptor(in numeric<> vec, in RWByteAddressBuffer buf, in uint offset, in uint align); } namespace diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index ac0c6c27cf..066123b540 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -6602,6 +6602,7 @@ def populate_ExperimentalOps(self): db_dxil_param(2, "$o", "vector", "vector to accumulate"), db_dxil_param(3, "res", "handle", "buffer to accumulate into"), db_dxil_param(4, "i32", "offset", "starting offset in the buffer"), + db_dxil_param(5, "i32", "align", "alignment of vector elements"), ], )