diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index faa5ccbaf6a50..f7b08f9dd0c48 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -194,7 +194,7 @@ int32_t GPUReconstructionCPU::InitDevice() ClearAllocatedMemory(); } if (GetProcessingSettings().inKernelParallel) { - mBlockCount = mMaxHostThreads; + mMultiprocessorCount = mMaxHostThreads; } mProcShadow.mProcessorsProc = processors(); return 0; diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index d93d1335d45c5..768c301f24327 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -94,7 +94,7 @@ class GPUReconstructionCPU : public GPUReconstructionProcessing::KernelInterface GPUProcessorProcessors mProcShadow; // Host copy of tracker objects that will be used on the GPU GPUConstantMem*& mProcessorsShadow = mProcShadow.mProcessorsProc; - uint32_t mBlockCount = 1; + uint32_t mMultiprocessorCount = 1; uint32_t mThreadCount = 1; uint32_t mWarpSize = 1; diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPUKernels.h b/GPU/GPUTracking/Base/GPUReconstructionCPUKernels.h index 837516a93b6ae..7bf819a74e1b6 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPUKernels.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPUKernels.h @@ -36,7 +36,7 @@ inline void GPUReconstructionCPU::runKernelInterface(krnlSetup&& setup, Args con const uint32_t stream = setup.x.stream; auto prop = getKernelProperties(); const int32_t autoThreads = cpuFallback ? 1 : prop.nThreads; - const int32_t autoBlocks = cpuFallback ? 1 : (prop.forceBlocks ? prop.forceBlocks : (prop.minBlocks * mBlockCount)); + const int32_t autoBlocks = cpuFallback ? 1 : (prop.forceBlocks ? prop.forceBlocks : (prop.minBlocks * mMultiprocessorCount)); if (nBlocks == (uint32_t)-1) { nBlocks = (nThreads + autoThreads - 1) / autoThreads; nThreads = autoThreads; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index dc904fa96fa2d..44b53facd9bd8 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -247,8 +247,8 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() throw std::runtime_error("Invalid warp size on GPU"); } mWarpSize = deviceProp.warpSize; - mBlockCount = deviceProp.multiProcessorCount; - mMaxBackendThreads = std::max(mMaxBackendThreads, deviceProp.maxThreadsPerBlock * mBlockCount); + mMultiprocessorCount = deviceProp.multiProcessorCount; + mMaxBackendThreads = std::max(mMaxBackendThreads, deviceProp.maxThreadsPerBlock * mMultiprocessorCount); mDeviceName = deviceProp.name; mDeviceName += " (CUDA GPU)"; @@ -329,9 +329,9 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() } #ifndef __HIPCC__ // CUDA - dummyInitKernel<<>>(mDeviceMemoryBase); + dummyInitKernel<<>>(mDeviceMemoryBase); // TODO: Can't we just use the CUDA version and hipify will take care of the rest? #else // HIP - hipLaunchKernelGGL(HIP_KERNEL_NAME(dummyInitKernel), dim3(mBlockCount), dim3(256), 0, 0, mDeviceMemoryBase); + hipLaunchKernelGGL(HIP_KERNEL_NAME(dummyInitKernel), dim3(mMultiprocessorCount), dim3(256), 0, 0, mDeviceMemoryBase); #endif if (GetProcessingSettings().rtc.enable) { @@ -373,7 +373,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() } else { GPUReconstructionCUDA* master = dynamic_cast(mMaster); mDeviceId = master->mDeviceId; - mBlockCount = master->mBlockCount; + mMultiprocessorCount = master->mMultiprocessorCount; mWarpSize = master->mWarpSize; mMaxBackendThreads = master->mMaxBackendThreads; mDeviceName = master->mDeviceName; diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index 49533216869d2..271fe494860cd 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -266,9 +266,9 @@ int32_t GPUReconstructionOCL::InitDevice_Runtime() mDeviceName = device_name.c_str(); mDeviceName += " (OpenCL)"; - mBlockCount = device_shaders; + mMultiprocessorCount = device_shaders; mWarpSize = 32; - mMaxBackendThreads = std::max(mMaxBackendThreads, deviceMaxWorkGroup * mBlockCount); + mMaxBackendThreads = std::max(mMaxBackendThreads, deviceMaxWorkGroup * mMultiprocessorCount); mInternals->context = clCreateContext(nullptr, 1, &mInternals->device, nullptr, nullptr, &ocl_error); if (GPUChkErrI(ocl_error)) { @@ -378,7 +378,7 @@ int32_t GPUReconstructionOCL::InitDevice_Runtime() GPUInfo("OPENCL Initialisation successfull (%d: %s %s (Frequency %d, Shaders %d), %ld / %ld bytes host / global memory, Stack frame %d, Constant memory %ld)", bestDevice, device_vendor, device_name, (int32_t)device_freq, (int32_t)device_shaders, (int64_t)mDeviceMemorySize, (int64_t)mHostMemorySize, -1, (int64_t)gGPUConstantMemBufferSize); } else { GPUReconstructionOCL* master = dynamic_cast(mMaster); - mBlockCount = master->mBlockCount; + mMultiprocessorCount = master->mMultiprocessorCount; mWarpSize = master->mWarpSize; mMaxBackendThreads = master->mMaxBackendThreads; mDeviceName = master->mDeviceName; diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index aca1bb2420fb6..e3a20ad81a2cb 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -210,7 +210,7 @@ class GPUChain krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep); krnlExec GetGridAutoStep(int32_t stream, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep); - inline uint32_t BlockCount() const { return mRec->mBlockCount; } + inline uint32_t BlockCount() const { return mRec->mMultiprocessorCount; } inline uint32_t WarpSize() const { return mRec->mWarpSize; } inline uint32_t ThreadCount() const { return mRec->mThreadCount; }