diff --git a/docs/DXIL.rst b/docs/DXIL.rst index 8891e7e056..89ad4c5661 100644 --- a/docs/DXIL.rst +++ b/docs/DXIL.rst @@ -3096,8 +3096,8 @@ ID Name Description 2147483676 LinAlgMatrixAccumulateToMemory accumulates a matrix to groupshared memory 2147483677 LinAlgMatrixOuterProduct Outer products an M sized vector and a N sized vector producing an MxN matrix 2147483678 LinAlgConvert Convert vector components from one interpretation to another -2147483679 ReservedE0 reserved -2147483680 ReservedE1 reserved +2147483679 LinAlgVectorAccumulateToDescriptor Accumulates given vector to the buffer at the given offset +2147483680 ReservedE0 reserved 2147483681 DebugBreak triggers a breakpoint if a debugger is attached 2147483682 IsDebuggerPresent returns true if a debugger is attached ========== ======================================== =================================================================================================================== diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index a535b8d768..4705b90c55 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -533,8 +533,7 @@ static const OpCodeTableID TableID = OpCodeTableID::ExperimentalOps; // Enumeration for ExperimentalOps DXIL operations enum class OpCode : unsigned { // - ReservedE0 = 31, // reserved - ReservedE1 = 32, // reserved + ReservedE0 = 32, // reserved // Debugging DebugBreak = 33, // triggers a breakpoint if a debugger is attached @@ -597,6 +596,8 @@ enum class OpCode : unsigned { LinAlgMatrixStoreToDescriptor = 20, // stores a matrix to a RWByteAddressBuffer LinAlgMatrixStoreToMemory = 21, // stores a matrix to groupshared memory + LinAlgVectorAccumulateToDescriptor = + 31, // Accumulates given vector to the buffer at the given offset // No-op ExperimentalNop = 0, // nop does nothing @@ -1355,10 +1356,13 @@ enum class OpCode : unsigned { // LinAlgConvert = 0x8000001E, 2147483678U, -2147483618 EXP_OPCODE(ExperimentalOps, LinAlgConvert), // Convert vector components from // one interpretation to another - // ReservedE0 = 0x8000001F, 2147483679U, -2147483617 + // LinAlgVectorAccumulateToDescriptor = 0x8000001F, 2147483679U, -2147483617 + EXP_OPCODE( + ExperimentalOps, + LinAlgVectorAccumulateToDescriptor), // Accumulates given vector to the + // buffer at the given offset + // ReservedE0 = 0x80000020, 2147483680U, -2147483616 EXP_OPCODE(ExperimentalOps, ReservedE0), // reserved - // ReservedE1 = 0x80000020, 2147483680U, -2147483616 - EXP_OPCODE(ExperimentalOps, ReservedE1), // reserved // DebugBreak = 0x80000021, 2147483681U, -2147483615 EXP_OPCODE(ExperimentalOps, DebugBreak), // triggers a breakpoint if a debugger is attached @@ -1544,6 +1548,7 @@ enum class OpCodeClass : unsigned { LinAlgMatrixSetElement, LinAlgMatrixStoreToDescriptor, LinAlgMatrixStoreToMemory, + LinAlgVectorAccumulateToDescriptor, // Mesh shader instructions EmitIndices, @@ -1730,7 +1735,7 @@ enum class OpCodeClass : unsigned { NodeOutputIsValid, OutputComplete, - NumOpClasses = 222, // exclusive last value of enumeration + NumOpClasses = 223, // exclusive last value of enumeration }; // OPCODECLASS-ENUM:END diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index 511f5b476f..b1c1bbaf73 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -10960,6 +10960,43 @@ struct DxilInst_LinAlgConvert { void set_outputInterpretation(llvm::Value *val) { Instr->setOperand(3, val); } }; +/// This instruction Accumulates given vector to the buffer at the given offset +struct DxilInst_LinAlgVectorAccumulateToDescriptor { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_LinAlgVectorAccumulateToDescriptor(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::LinAlgVectorAccumulateToDescriptor); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (5 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_vector = 1, + arg_handle = 2, + arg_offset = 3, + arg_align = 4, + }; + // Accessors + llvm::Value *get_vector() const { return Instr->getOperand(1); } + void set_vector(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_handle() const { return Instr->getOperand(2); } + void set_handle(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_offset() const { return Instr->getOperand(3); } + void set_offset(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_align() const { return Instr->getOperand(4); } + void set_align(llvm::Value *val) { Instr->setOperand(4, val); } +}; + /// This instruction triggers a breakpoint if a debugger is attached struct DxilInst_DebugBreak { llvm::Instruction *Instr; diff --git a/include/dxc/HlslIntrinsicOp.h b/include/dxc/HlslIntrinsicOp.h index 03e439b6b0..f70013a006 100644 --- a/include/dxc/HlslIntrinsicOp.h +++ b/include/dxc/HlslIntrinsicOp.h @@ -132,6 +132,7 @@ enum class IntrinsicOp { IOP___builtin_LinAlg_MatrixStoreToMemory = 410, IOP___builtin_LinAlg_MatrixVectorMultiply = 418, IOP___builtin_LinAlg_MatrixVectorMultiplyAdd = 419, + IOP___builtin_LinAlg_VectorAccumulateToDescriptor = 423, IOP_abort = 102, IOP_abs = 103, IOP_acos = 104, @@ -429,7 +430,7 @@ enum class IntrinsicOp { IOP_usign = 355, MOP_InterlockedUMax = 356, MOP_InterlockedUMin = 357, - Num_Intrinsics = 423, + Num_Intrinsics = 424, }; inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) { switch (opcode) { diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index b786fee9fc..6ec5855e1a 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -2984,6 +2984,14 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { 2, {{0x400}, {0x400}}, {{0xe7}, {0xe7}}}, // Overloads: getNumParams() <= 1) return nullptr; return FT->getParamType(1); @@ -7009,7 +7015,6 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::ClusterID: case OpCode::LinAlgMatrixQueryAccumulatorLayout: case OpCode::ReservedE0: - case OpCode::ReservedE1: case OpCode::DebugBreak: case OpCode::IsDebuggerPresent: return Type::getVoidTy(Ctx); diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index abf0ad86be..ff30dcbf20 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -7163,6 +7163,28 @@ Value *TranslateLinAlgConvert(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, return nullptr; } +Value *TranslateLinAlgVectorAccumulateToDescriptor( + CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, + HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { + + hlsl::OP *HlslOp = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + Constant *OpArg = HlslOp->GetU32Const(static_cast(OpCode)); + + Value *Vector = CI->getArgOperand(1); + Value *ResHandle = CI->getArgOperand(2); + Value *Offset = CI->getArgOperand(3); + Value *Align = CI->getArgOperand(4); + + // Get the DXIL function for the operation + Function *DxilFunc = HlslOp->GetOpFunc(OpCode, Vector->getType()); + + return Builder.CreateCall(DxilFunc, + {OpArg, Vector, ResHandle, Offset, Align}); +} + } // namespace // Lower table. @@ -7957,6 +7979,9 @@ constexpr IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP___builtin_LinAlg_Convert, TranslateLinAlgConvert, DXIL::OpCode::LinAlgConvert}, + {IntrinsicOp::IOP___builtin_LinAlg_VectorAccumulateToDescriptor, + TranslateLinAlgVectorAccumulateToDescriptor, + DXIL::OpCode::LinAlgVectorAccumulateToDescriptor}, }; constexpr size_t NumLowerTableEntries = sizeof(gLowerTable) / sizeof(gLowerTable[0]); diff --git a/tools/clang/lib/Headers/hlsl/dx/linalg.h b/tools/clang/lib/Headers/hlsl/dx/linalg.h index 29e0d55ee6..e223f2286e 100644 --- a/tools/clang/lib/Headers/hlsl/dx/linalg.h +++ b/tools/clang/lib/Headers/hlsl/dx/linalg.h @@ -650,6 +650,13 @@ OuterProduct(vector VecA, vector VecB) { return Result; } +template +typename hlsl::enable_if::value, void>::type +InterlockedAccumulate(RWByteAddressBuffer Res, vector Vec, + uint StartOffset, uint Align = 64) { + __builtin_LinAlg_VectorAccumulateToDescriptor(Vec, Res, StartOffset, Align); +} + } // namespace linalg } // namespace dx diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl index 44cbc449bd..3d1735aa9c 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl @@ -10,35 +10,36 @@ using MatrixAccum_8_4_Ty = Matrix; using MatrixPacked_7_15_ATy = Matrix; +RWByteAddressBuffer RWBAB : register(u0); ByteAddressBuffer BAB : register(t0); [numthreads(4, 4, 4)] void main(uint ID : SV_GroupID) { -// CHECK: %[[MAT1:.*]] = call %dx.types.LinAlgMatrixC8M8N4U0S0 @dx.op.linAlgMatrixLoadFromDescriptor.mC8M8N4U0S0( -// CHECK-SAME: i32 -2147483634, %dx.types.Handle %{{[0-9]+}}, i32 0, i32 8, i32 1, i32 128) -// CHECK-SAME: ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) + // CHECK: %[[MAT1:.*]] = call %dx.types.LinAlgMatrixC8M8N4U0S0 @dx.op.linAlgMatrixLoadFromDescriptor.mC8M8N4U0S0( + // CHECK-SAME: i32 -2147483634, %dx.types.Handle %{{[0-9]+}}, i32 0, i32 8, i32 1, i32 128) + // CHECK-SAME: ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) MatrixATy Mat1 = MatrixATy::Load(BAB, 0, 8); vector vec1 = 10.3f; -// CHECK: %[[VEC2:.*]] = call <8 x half> @dx.op.linAlgMatVecMul.v8f16.mC8M8N4U0S0.v4f16(i32 -2147483623, -// CHECK-SAME: %dx.types.LinAlgMatrixC8M8N4U0S0 %[[MAT1]], i1 true, <4 x half> , i32 8) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) + // CHECK: %[[VEC2:.*]] = call <8 x half> @dx.op.linAlgMatVecMul.v8f16.mC8M8N4U0S0.v4f16(i32 -2147483623, + // CHECK-SAME: %dx.types.LinAlgMatrixC8M8N4U0S0 %[[MAT1]], i1 true, <4 x half> , i32 8) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) vector vec2 = Multiply(Mat1, vec1); -// CHECK: %[[VEC3:.*]] = call <8 x half> @dx.op.linAlgMatVecMulAdd.v8f16.mC8M8N4U0S0.v4f16.v8f16(i32 -2147483622, -// CHECK-SAME: %dx.types.LinAlgMatrixC8M8N4U0S0 %[[MAT1]], i1 true, <4 x half> , i32 8, <8 x half> %[[VEC2]], i32 8) -// CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) + // CHECK: %[[VEC3:.*]] = call <8 x half> @dx.op.linAlgMatVecMulAdd.v8f16.mC8M8N4U0S0.v4f16.v8f16(i32 -2147483622, + // CHECK-SAME: %dx.types.LinAlgMatrixC8M8N4U0S0 %[[MAT1]], i1 true, <4 x half> , i32 8, <8 x half> %[[VEC2]], i32 8) + // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) vector vec3 = MultiplyAdd(Mat1, vec1, vec2); -// CHECK: %[[VEC20:.*]] = shufflevector + // CHECK: %[[VEC20:.*]] = shufflevector vector vec20 = (vector)vec2; -// CHECK: %[[VEC4:.*]] = call <8 x half> @dx.op.linAlgMatVecMulAdd.v8f16.mC8M8N4U0S0.v4f16.v8f16(i32 -2147483622, -// CHECK-SAME: %dx.types.LinAlgMatrixC8M8N4U0S0 %[[MAT1]], i1 true, <4 x half> %[[VEC20]], i32 8, <8 x half> %[[VEC3]], i32 8) -// CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) + // CHECK: %[[VEC4:.*]] = call <8 x half> @dx.op.linAlgMatVecMulAdd.v8f16.mC8M8N4U0S0.v4f16.v8f16(i32 -2147483622, + // CHECK-SAME: %dx.types.LinAlgMatrixC8M8N4U0S0 %[[MAT1]], i1 true, <4 x half> %[[VEC20]], i32 8, <8 x half> %[[VEC3]], i32 8) + // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) InterpretedVector interpVec2 = MakeInterpretedVector(vec20); vector vec4 = MultiplyAdd(Mat1, interpVec2, vec3); @@ -95,7 +96,6 @@ void main(uint ID : SV_GroupID) { Convert(ThreeF16); // Test MultiplyAdd with odd sizes - // vector vecH15 = BAB.Load< vector >(168); vector vecH7 = BAB.Load< vector >(64); @@ -109,7 +109,7 @@ void main(uint ID : SV_GroupID) { // CHECK-SAME: %dx.types.LinAlgMatrixC8M7N15U0S0 %[[MAT_7_15]], i1 true, <15 x half> %{{[0-9]+}}, i32 8, <7 x half> %{{[0-9]+}}, i32 8) // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) vector vec7 = MultiplyAdd(Mat_7_15, vecH15, vecH7); - + // CHECK: call <7 x half> @dx.op.linAlgMatVecMulAdd.v7f16.mC8M7N15U0S0.v15f16.v7f16(i32 -2147483622, %dx.types.LinAlgMatrixC8M7N15U0S0 %[[MAT_7_15]], // CHECK-SAME; i1 true, <15 x half> %{{[0-9]+}}, i32 8, <7 x half> %{{[0-9]+}}, i32 8) // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) @@ -148,6 +148,7 @@ void main(uint ID : SV_GroupID) { // CHECK-NEXT: call <7 x half> @dx.op.linAlgMatVecMulAdd.v7f16.mC8M7N15U0S0.v4i32.v7f16(i32 -2147483622, // CHECK-SAME: %dx.types.LinAlgMatrixC8M7N15U0S0 %[[MAT_7_15]], i1 true, <4 x i32> %[[INTERP_VEC_H15_PACKED]], i32 21, <7 x half> %[[MEM_BIAS3]], i32 8) // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) + vector vec12 = MultiplyAdd(Mat_7_15, interpVecH15Packed, memBias7); // Test Convert and MultiplyAdd with odd sizes and packed types @@ -188,4 +189,13 @@ void main(uint ID : SV_GroupID) { // CHECK-SAME: %dx.types.LinAlgMatrixC21M7N15U0S0 %[[MAT_7_15_PACKED]], i1 true, <4 x i32> %[[INTERP_VEC_H15_PACKED]], i32 21, <7 x half> %[[MEM_BIAS_CONV2]], i32 8) // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) vector vec24 = MultiplyAdd(Mat_7_15_Packed, interpVecH15Packed, memBias7Packed); + + // CHECK: call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f16(i32 -2147483617, <4 x half> + // CHECK-SAME: , %dx.types.Handle %{{[0-9]+}}, i32 0, i32 64) + // CHECK-SAME: ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) + InterlockedAccumulate(RWBAB, vec1, 0); + + // CHECK: call void @dx.op.linAlgVectorAccumulateToDescriptor.v8f16(i32 -2147483617, <8 x half> %{{[0-9]+}}, + // CHECK-SAME: %dx.types.Handle %{{[0-9]+}}, i32 8, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) + InterlockedAccumulate(RWBAB, vec2, 8); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/vectoraccumulatetodescriptor/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/vectoraccumulatetodescriptor/nominal.hlsl new file mode 100644 index 0000000000..b10ca59831 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/vectoraccumulatetodescriptor/nominal.hlsl @@ -0,0 +1,19 @@ +// REQUIRES: dxil-1-10 +// RUN: %dxc -T cs_6_10 -HV 202x -E main %s | FileCheck %s +// RUN: %dxc -T cs_6_10 -HV 202x -E main -fcgl %s | FileCheck %s --check-prefix=CHECK2 + +RWByteAddressBuffer outbuf; + +[numthreads(4,1,1)] +void main() { + // CHECK-LABEL: define void @main() + + // CHECK: call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> + // CHECK-SAME: , %dx.types.Handle %{{.*}}, i32 16, i32 64) + // CHECK-SAME: ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) + + // CHECK2: call void @"dx.hl.op..void (i32, <4 x float>, %dx.types.Handle, i32, i32)" + // CHECK2-SAME: (i32 423, <4 x float> %{{.*}}, %dx.types.Handle %{{.*}}, i32 16, i32 64) + float4 vec = {9.0, 8.0, 7.0, 6.0}; + __builtin_LinAlg_VectorAccumulateToDescriptor(vec, outbuf, 16, 64); +} diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll index 297dd692f5..47c2f48629 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll @@ -33,7 +33,7 @@ target triple = "dxil-ms-dx" @"\01?SharedArr@@3PAMA" = external addrspace(3) global [64 x float], align 4 define void @mainAS() { - + %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) %handle = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer @@ -43,71 +43,74 @@ define void @mainAS() { ; dx.op.linAlgMatrixAccumulate %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - + ; dx.op.linAlgMatrixAccumulateToDescriptor call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) - + ; dx.op.linAlgMatrixLoadFromDescriptor %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) - + ; dx.op.linAlgMatrixQueryAccumulatorLayout %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() - + ; dx.op.linAlgMatVecMul %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) - + ; dx.op.linAlgMatVecMulAdd %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) - + ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; ; dx.op.linAlgCopyConvertMatrix %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) - + ; dx.op.linAlgFillMatrix %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) - + ; dx.op.linAlgMatrixGetCoordinate %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixGetElement %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixMultiply %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) - + ; dx.op.linAlgMatrixMultiplyAccumulate %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) - + ; dx.op.linAlgMatrixSetElement %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) - + ; dx.op.linAlgMatrixLoadFromMemory %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) - + ; dx.op.linAlgMatrixStoreToMemory call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) %2 = alloca %struct.AmpPayload.0, align 8 call void @dx.op.dispatchMesh.struct.AmpPayload.0(i32 173, i32 8, i32 1, i32 1, %struct.AmpPayload.0* nonnull %2) ; DispatchMesh(threadGroupCountX,threadGroupCountY,threadGroupCountZ,payload) - + ret void } @@ -144,6 +147,9 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % ; Function Attrs: nounwind declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32, i32) #0 + ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll index c5b2ccfec7..576758b9b1 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll @@ -17,7 +17,7 @@ target triple = "dxil-ms-dx" @"\01?SharedArr@@3PAMA" = external addrspace(3) global [64 x float], align 4 define void @mainCS() { - + %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) %handle = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer @@ -27,68 +27,71 @@ define void @mainCS() { ; dx.op.linAlgMatrixAccumulate %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - + ; dx.op.linAlgMatrixAccumulateToDescriptor call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) - + ; dx.op.linAlgMatrixLoadFromDescriptor %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) - + ; dx.op.linAlgMatrixQueryAccumulatorLayout %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() - + ; dx.op.linAlgMatVecMul %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) - + ; dx.op.linAlgMatVecMulAdd %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) - + ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; ; dx.op.linAlgCopyConvertMatrix %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) - + ; dx.op.linAlgFillMatrix %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) - + ; dx.op.linAlgMatrixGetCoordinate %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixGetElement %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixMultiply %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) - + ; dx.op.linAlgMatrixMultiplyAccumulate %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) - + ; dx.op.linAlgMatrixSetElement %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) - + ; dx.op.linAlgMatrixLoadFromMemory %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) - + ; dx.op.linAlgMatrixStoreToMemory call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) - + ret void } @@ -125,6 +128,9 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % ; Function Attrs: nounwind declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32, i32) #0 + ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll index b2a7eb0c53..c4292d7d7e 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll @@ -43,65 +43,68 @@ define void @MainDS() { ; dx.op.linAlgMatrixAccumulate %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - + ; dx.op.linAlgMatrixAccumulateToDescriptor call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) - + ; dx.op.linAlgMatrixLoadFromDescriptor %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) - + ; dx.op.linAlgMatrixQueryAccumulatorLayout %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() - + ; dx.op.linAlgMatVecMul %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) - + ; dx.op.linAlgMatVecMulAdd %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) - + ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; ; dx.op.linAlgCopyConvertMatrix %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) - + ; dx.op.linAlgFillMatrix %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) - + ; dx.op.linAlgMatrixGetCoordinate %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixGetElement %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixMultiply %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) - + ; dx.op.linAlgMatrixMultiplyAccumulate %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) - + ; dx.op.linAlgMatrixSetElement %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) - + ; dx.op.linAlgMatrixLoadFromMemory %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) - + ; dx.op.linAlgMatrixStoreToMemory call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) @@ -112,7 +115,7 @@ define void @MainDS() { call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %2) ; StoreOutput(outputSigId,rowIndex,colIndex,value) call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %3) ; StoreOutput(outputSigId,rowIndex,colIndex,value) call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %4) ; StoreOutput(outputSigId,rowIndex,colIndex,value) - call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %5) ; StoreOutput(outputSigId,rowIndex,colIndex,value) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %5) ; StoreOutput(outputSigId,rowIndex,colIndex,value) ret void } @@ -149,6 +152,9 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % ; Function Attrs: nounwind declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32, i32) #0 + ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll index edbca57b0b..d14a983ab4 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll @@ -43,65 +43,68 @@ define void @MainGS() { ; dx.op.linAlgMatrixAccumulate %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - + ; dx.op.linAlgMatrixAccumulateToDescriptor call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) - + ; dx.op.linAlgMatrixLoadFromDescriptor %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) - + ; dx.op.linAlgMatrixQueryAccumulatorLayout %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() - + ; dx.op.linAlgMatVecMul %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) - + ; dx.op.linAlgMatVecMulAdd %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) - + ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; ; dx.op.linAlgCopyConvertMatrix %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) - + ; dx.op.linAlgFillMatrix %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) - + ; dx.op.linAlgMatrixGetCoordinate %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixGetElement %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixMultiply %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) - + ; dx.op.linAlgMatrixMultiplyAccumulate %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) - + ; dx.op.linAlgMatrixSetElement %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) - + ; dx.op.linAlgMatrixLoadFromMemory %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) - + ; dx.op.linAlgMatrixStoreToMemory call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) @@ -111,7 +114,7 @@ define void @MainGS() { call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 1.000000e+01) ; StoreOutput(outputSigId,rowIndex,colIndex,value) call void @dx.op.emitStream(i32 97, i8 0) ; EmitStream(streamId) call void @dx.op.cutStream(i32 98, i8 0) ; CutStream(streamId) - + ret void } @@ -148,6 +151,9 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % ; Function Attrs: nounwind declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32, i32) #0 + ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll index c148cc60f3..c5411b9b2e 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll @@ -43,65 +43,68 @@ define void @MainHS() { ; dx.op.linAlgMatrixAccumulate %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - + ; dx.op.linAlgMatrixAccumulateToDescriptor call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) - + ; dx.op.linAlgMatrixLoadFromDescriptor %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) - + ; dx.op.linAlgMatrixQueryAccumulatorLayout %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() - + ; dx.op.linAlgMatVecMul %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) - + ; dx.op.linAlgMatVecMulAdd %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) - + ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; ; dx.op.linAlgCopyConvertMatrix %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) - + ; dx.op.linAlgFillMatrix %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) - + ; dx.op.linAlgMatrixGetCoordinate %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixGetElement %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixMultiply %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) - + ; dx.op.linAlgMatrixMultiplyAccumulate %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) - + ; dx.op.linAlgMatrixSetElement %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) - + ; dx.op.linAlgMatrixLoadFromMemory %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) - + ; dx.op.linAlgMatrixStoreToMemory call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) @@ -154,6 +157,9 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % ; Function Attrs: nounwind declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32, i32) #0 + ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll index 3542055c65..1575754a90 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll @@ -43,65 +43,68 @@ define void @mainMeS() { ; dx.op.linAlgMatrixAccumulate %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - + ; dx.op.linAlgMatrixAccumulateToDescriptor call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) - + ; dx.op.linAlgMatrixLoadFromDescriptor %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) - + ; dx.op.linAlgMatrixQueryAccumulatorLayout %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() - + ; dx.op.linAlgMatVecMul %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) - + ; dx.op.linAlgMatVecMulAdd %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) - + ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; ; dx.op.linAlgCopyConvertMatrix %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) - + ; dx.op.linAlgFillMatrix %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) - + ; dx.op.linAlgMatrixGetCoordinate %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixGetElement %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixMultiply %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) - + ; dx.op.linAlgMatrixMultiplyAccumulate %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) - + ; dx.op.linAlgMatrixSetElement %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) - + ; dx.op.linAlgMatrixLoadFromMemory %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) - + ; dx.op.linAlgMatrixStoreToMemory call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) @@ -110,7 +113,7 @@ define void @mainMeS() { call void @dx.op.storeVertexOutput.f32(i32 171, i32 0, i32 0, i8 1, float 0.000000e+00, i32 %thread_id_group) ; StoreVertexOutput(outputSigId,rowIndex,colIndex,value,vertexIndex) call void @dx.op.storeVertexOutput.f32(i32 171, i32 0, i32 0, i8 2, float 0.000000e+00, i32 %thread_id_group) ; StoreVertexOutput(outputSigId,rowIndex,colIndex,value,vertexIndex) call void @dx.op.storeVertexOutput.f32(i32 171, i32 0, i32 0, i8 3, float 0.000000e+00, i32 %thread_id_group) ; StoreVertexOutput(outputSigId,rowIndex,colIndex,value,vertexIndex) - + ret void } @@ -147,6 +150,9 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % ; Function Attrs: nounwind declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32, i32) #0 + ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll index f42208a5f6..1318eb5435 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll @@ -44,68 +44,71 @@ define void @mainNS() { ; dx.op.linAlgMatrixAccumulate %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - + ; dx.op.linAlgMatrixAccumulateToDescriptor call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) - + ; dx.op.linAlgMatrixLoadFromDescriptor %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) - + ; dx.op.linAlgMatrixQueryAccumulatorLayout %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() - + ; dx.op.linAlgMatVecMul %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) - + ; dx.op.linAlgMatVecMulAdd %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) - + ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; ; dx.op.linAlgCopyConvertMatrix %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) - + ; dx.op.linAlgFillMatrix %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) - + ; dx.op.linAlgMatrixGetCoordinate %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixGetElement %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixMultiply %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) - + ; dx.op.linAlgMatrixMultiplyAccumulate %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) - + ; dx.op.linAlgMatrixSetElement %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) - + ; dx.op.linAlgMatrixLoadFromMemory %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) - + ; dx.op.linAlgMatrixStoreToMemory call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) - + ret void } @@ -142,6 +145,9 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % ; Function Attrs: nounwind declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32, i32) #0 + ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll index d4d39c7f40..676f573fb5 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll @@ -42,65 +42,68 @@ define void @mainPS() { ; dx.op.linAlgMatrixAccumulate %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - + ; dx.op.linAlgMatrixAccumulateToDescriptor call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) - + ; dx.op.linAlgMatrixLoadFromDescriptor %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) - + ; dx.op.linAlgMatrixQueryAccumulatorLayout %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() - + ; dx.op.linAlgMatVecMul %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) - + ; dx.op.linAlgMatVecMulAdd %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) - + ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; ; dx.op.linAlgCopyConvertMatrix %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) - + ; dx.op.linAlgFillMatrix %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) - + ; dx.op.linAlgMatrixGetCoordinate %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixGetElement %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixMultiply %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) - + ; dx.op.linAlgMatrixMultiplyAccumulate %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) - + ; dx.op.linAlgMatrixSetElement %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) - + ; dx.op.linAlgMatrixLoadFromMemory %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) - + ; dx.op.linAlgMatrixStoreToMemory call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) @@ -145,6 +148,9 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % ; Function Attrs: nounwind declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32, i32) #0 + ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll index d6ff1a8c79..b687b5945e 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll @@ -127,7 +127,7 @@ target triple = "dxil-ms-dx" @"\01?SharedArr@@3PAMA" = external addrspace(3) global [64 x float], align 4 define void @"\01?MainRG@@YAXXZ"() #0 { - + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?buf@@3URWByteAddressBuffer@@A", align 4 %2 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) %handle = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer @@ -143,7 +143,9 @@ define void @"\01?MainRG@@YAXXZ"() #0 { %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; @@ -158,7 +160,7 @@ define void @"\01?MainRG@@YAXXZ"() #0 { call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) - + ret void } @@ -178,7 +180,9 @@ define void @"\01?MainIS@@YAXXZ"() #0 { %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; @@ -193,7 +197,7 @@ define void @"\01?MainIS@@YAXXZ"() #0 { call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) - + ret void } @@ -213,7 +217,9 @@ define void @"\01?MainCL@@YAXUAttribs@@@Z"(%struct.Attribs* noalias nocapture %a %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; @@ -228,7 +234,7 @@ define void @"\01?MainCL@@YAXUAttribs@@@Z"(%struct.Attribs* noalias nocapture %a call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) - + ret void } @@ -248,7 +254,9 @@ define void @"\01?MainAH@@YAXURayPayload@@UAttribs@@@Z"(%struct.RayPayload* noal %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; @@ -263,7 +271,7 @@ define void @"\01?MainAH@@YAXURayPayload@@UAttribs@@@Z"(%struct.RayPayload* noal call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) - + ret void } @@ -283,7 +291,9 @@ define void @"\01?MainCH@@YAXURayPayload@@UAttribs@@@Z"(%struct.RayPayload* noal %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; @@ -298,7 +308,7 @@ define void @"\01?MainCH@@YAXURayPayload@@UAttribs@@@Z"(%struct.RayPayload* noal call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) - + ret void } @@ -318,7 +328,9 @@ define void @"\01?MainMS@@YAXURayPayload@@@Z"(%struct.RayPayload* noalias nocapt %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) - + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; @@ -333,7 +345,7 @@ define void @"\01?MainMS@@YAXURayPayload@@@Z"(%struct.RayPayload* noalias nocapt call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) - + ret void } @@ -370,6 +382,9 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % ; Function Attrs: nounwind declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32, i32) #0 + ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll index 09c9be8cf5..debf658676 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll @@ -43,65 +43,68 @@ define void @mainVS() { ; dx.op.linAlgMatrixAccumulate %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - + ; dx.op.linAlgMatrixAccumulateToDescriptor call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) - + ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) - + ; dx.op.linAlgMatrixLoadFromDescriptor %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) - + ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) - + ; dx.op.linAlgMatrixQueryAccumulatorLayout %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() - + ; dx.op.linAlgMatVecMul %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) - + ; dx.op.linAlgMatVecMulAdd %v7 = call <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483622, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true, <4 x i32> , i32 2, <4 x i32> , i32 3) ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) - + ; dx.op.linAlgConvert %v16 = call <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32 -2147483618, <4 x i32> zeroinitializer, i32 1, i32 2) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) + ; dx.op.linAlgVectorAccumulateToDescriptor + call void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32 -2147483617, <4 x float> zeroinitializer, %dx.types.Handle %handle, i32 0, i32 64) ; LinAlgVectorAccumulateToDescriptor(vector,handle,offset,align) + ; ; Built-ins restricted to compute, mesh and amplification shaders ; ; dx.op.linAlgCopyConvertMatrix %v8 = call %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32 -2147483635, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, i1 true) ; LinAlgCopyConvertMatrix(srcMatrix,transpose) - + ; dx.op.linAlgFillMatrix %v9 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgFillMatrix.mC4M5N4U0S2.i32(i32 -2147483636, i32 15) ; LinAlgFillMatrix(value) - + ; dx.op.linAlgMatrixGetCoordinate %v10 = call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC4M5N4U0S2(i32 -2147483631, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixGetElement %v11 = call float @dx.op.linAlgMatrixGetElement.f32.mC4M5N4U0S2(i32 -2147483630, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 0) ; LinAlgMatrixGetElement(matrix,threadLocalIndex) - + ; dx.op.linAlgMatrixMultiply %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) - + ; dx.op.linAlgMatrixMultiplyAccumulate %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) - + ; dx.op.linAlgMatrixSetElement %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) - + ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) - + ; dx.op.linAlgMatrixLoadFromMemory %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) - + ; dx.op.linAlgMatrixStoreToMemory call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) @@ -109,7 +112,7 @@ define void @mainVS() { call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 1.000000e+00) ; StoreOutput(outputSigId,rowIndex,colIndex,value) call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float 1.000000e+00) ; StoreOutput(outputSigId,rowIndex,colIndex,value) call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 1.000000e+00) ; StoreOutput(outputSigId,rowIndex,colIndex,value) - + ret void } @@ -146,6 +149,9 @@ declare <4 x i32> @dx.op.linAlgMatVecMulAdd.v4i32.mC4M5N4U0S2.v4i32.v4i32(i32, % ; Function Attrs: nounwind declare <4 x float> @dx.op.linAlgConvert.v4f32.v4i32(i32, <4 x i32>, i32, i32) #0 +; Function Attrs: nounwind +declare void @dx.op.linAlgVectorAccumulateToDescriptor.v4f32(i32, <4 x float>, %dx.types.Handle, i32, i32) #0 + ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M4N5U1S2 @dx.op.linAlgCopyConvertMatrix.mC4M4N5U1S2.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, i1) #0 diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl index af2ab8d2a3..cb3a59cf5d 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl @@ -205,4 +205,13 @@ void main() { // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" int4 result2; __builtin_LinAlg_Convert(result2, vec, 0, 1); + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_VectorAccumulateToDescriptor 'void (vector, RWByteAddressBuffer, unsigned int, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} vec 'vector':'vector' +// CHECK-NEXT: ParmVarDecl {{.*}} buf 'RWByteAddressBuffer' +// CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} align 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 423 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" + __builtin_LinAlg_VectorAccumulateToDescriptor(input, Buf, 10, 64); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl index b174621ea3..b48cb88fff 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl @@ -88,6 +88,7 @@ void CallFunction() __builtin_LinAlg_MatrixVectorMultiplyAdd(vecA, mat1, true, vecB, 2, vecC, 3); int4 outVec; __builtin_LinAlg_Convert(outVec, vecA, 1, 2); + __builtin_LinAlg_VectorAccumulateToDescriptor(vecA, buf, 0, 64); // expected-error@+12{{builtin unavailable in shader stage 'pixel' (requires 'compute', 'mesh' or 'amplification')}} // expected-error@+11{{builtin unavailable in shader stage 'vertex' (requires 'compute', 'mesh' or 'amplification')}} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/unavailable_pre_sm610.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/unavailable_pre_sm610.hlsl index c39b89f781..0005925d1c 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/unavailable_pre_sm610.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/unavailable_pre_sm610.hlsl @@ -71,4 +71,7 @@ void main() { // expected-error@+1{{intrinsic __builtin_LinAlg_Convert potentially used by ''main'' requires shader model 6.10 or greater}} __builtin_LinAlg_Convert(result, vec1, 1, 1); + + // expected-error@+1{{intrinsic __builtin_LinAlg_VectorAccumulateToDescriptor potentially used by ''main'' requires shader model 6.10 or greater}} + __builtin_LinAlg_VectorAccumulateToDescriptor(vec1, Buf, 1, 64); } diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index 2768e1b65d..c0d1d3dcfc 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -410,6 +410,7 @@ void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToDescriptor(in LinAlgMatr void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToMemory(in LinAlgMatrix matrix, groupshared numeric[] memory, in uint offset, in uint stride, in uint layout); void [[min_sm=6.10]] __builtin_LinAlg_MatrixOuterProduct(out LinAlgMatrix ret, in numeric vecA, in numeric vecB); void [[min_sm=6.10]] __builtin_LinAlg_Convert(out numeric ret, in numeric vec, in uint input_interp, in uint output_interp); +void [[min_sm=6.10]] __builtin_LinAlg_VectorAccumulateToDescriptor(in numeric<> vec, in RWByteAddressBuffer buf, in uint offset, in uint align); } namespace diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 634b62cc6f..066123b540 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -1163,7 +1163,8 @@ def populate_categories_and_models_ExperimentalOps(self): for i in insts( "LinAlgMatrixQueryAccumulatorLayout,LinAlgMatrixLoadFromDescriptor," + "LinAlgMatrixAccumulateToDescriptor,LinAlgMatVecMul," - + "LinAlgMatVecMulAdd,LinAlgMatrixOuterProduct,LinAlgConvert" + + "LinAlgMatVecMulAdd,LinAlgMatrixOuterProduct,LinAlgConvert," + + "LinAlgVectorAccumulateToDescriptor" ): i.category = "Linear Algebra Operations" i.shader_model = experimental_sm @@ -6590,7 +6591,22 @@ def populate_ExperimentalOps(self): ], ) - op_table.reserve_dxil_op_range("ReservedE", 2) + add_dxil_op( + "LinAlgVectorAccumulateToDescriptor", + "LinAlgVectorAccumulateToDescriptor", + "Accumulates given vector to the buffer at the given offset", + "