From 8e8965d77662daf87b270f79ac82a2225e77f932 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 13 Mar 2025 18:12:30 +0100 Subject: [PATCH 1/8] GPU: Provide general GPUFailedMsg functionality also externally --- GPU/Common/CMakeLists.txt | 1 + GPU/Common/GPUCommonChkErr.h | 21 +++++++++++++++++++ GPU/Common/GPUCommonDefAPI.h | 2 +- GPU/GPUTracking/Base/GPUReconstruction.cxx | 15 +++++++++++++ GPU/GPUTracking/Base/GPUReconstruction.h | 3 +++ .../Base/GPUReconstructionDeviceBase.cxx | 6 +++++- .../Base/GPUReconstructionDeviceBase.h | 1 + .../Base/cuda/GPUReconstructionCUDA.cu | 16 ++------------ .../Base/cuda/GPUReconstructionCUDA.h | 4 ++-- .../GPUReconstructionCUDAExternalProvider.cu | 2 +- .../cuda/GPUReconstructionCUDAInternals.h | 4 +--- .../Base/cuda/GPUReconstructionCUDAKernels.cu | 4 +++- .../Base/opencl/GPUReconstructionOCL.cxx | 17 +++------------ .../Base/opencl/GPUReconstructionOCL.h | 3 +-- .../opencl/GPUReconstructionOCLIncludesHost.h | 4 +--- 15 files changed, 61 insertions(+), 42 deletions(-) create mode 100644 GPU/Common/GPUCommonChkErr.h diff --git a/GPU/Common/CMakeLists.txt b/GPU/Common/CMakeLists.txt index 8466035d74ef7..8b0a75679479f 100644 --- a/GPU/Common/CMakeLists.txt +++ b/GPU/Common/CMakeLists.txt @@ -15,6 +15,7 @@ set(HDRS_INSTALL GPUCommonAlgorithm.h GPUCommonDef.h GPUCommonDefAPI.h + GPUCommonChkErr.h GPUCommonDefSettings.h GPUCommonConstants.h GPUCommonLogger.h diff --git a/GPU/Common/GPUCommonChkErr.h b/GPU/Common/GPUCommonChkErr.h new file mode 100644 index 0000000000000..df007b31dab64 --- /dev/null +++ b/GPU/Common/GPUCommonChkErr.h @@ -0,0 +1,21 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUCommonChkErr.h +/// \author David Rohr + +#ifndef GPUCOMMONCHKERR_H +#define GPUCOMMONCHKERR_H + +#define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__, true) +#define GPUFailedMsgI(x) GPUFailedMsgA(x, __FILE__, __LINE__, false) + +#endif diff --git a/GPU/Common/GPUCommonDefAPI.h b/GPU/Common/GPUCommonDefAPI.h index 0cd3c4ebddb7f..f7efbf7e976d4 100644 --- a/GPU/Common/GPUCommonDefAPI.h +++ b/GPU/Common/GPUCommonDefAPI.h @@ -36,7 +36,7 @@ #define GPUdni() // Device function, not-to-be-inlined #define GPUdnii() inline // Device function, not-to-be-inlined on device, inlined on host #define GPUh() // Host-only function - // NOTE: All GPUd*() functions are also compiled on the host during GCC compilation. + // NOTE: All GPUd*() functions are also compiled on the host during host compilation. // The GPUh*() macros are for the rare cases of functions that you want to compile for the host during GPU compilation. // Usually, you do not need the GPUh*() versions. If in doubt, use GPUd*()! #define GPUhi() inline // to-be-inlined host-only function diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index df9a7380834ce..28241cb7aeec5 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -1078,6 +1078,21 @@ int32_t GPUReconstruction::CheckErrorCodes(bool cpuOnly, bool forceShowErrors, s return retVal; } +int32_t GPUReconstruction::GPUFailedMsgA(const int64_t error, const char* file, int32_t line, bool failOnError) +{ + if (error == 0 || !GPUFailedMsgInternal(error, file, line)) { + return 0; + } + if (failOnError) { + if (mInitialized && mInErrorHandling == false) { + mInErrorHandling = true; + CheckErrorCodes(false, true); + } + throw std::runtime_error("GPU Backend Failure"); + } + return 1; +} + void GPUReconstruction::DumpSettings(const char* dir) { std::string f; diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index 93310284d7564..1fe08d08a8058 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -143,6 +143,7 @@ class GPUReconstruction virtual void* getGPUPointer(void* ptr) { return ptr; } virtual void startGPUProfiling() {} virtual void endGPUProfiling() {} + int32_t GPUFailedMsgA(const int64_t error, const char* file, int32_t line, bool failOnError); int32_t CheckErrorCodes(bool cpuOnly = false, bool forceShowErrors = false, std::vector>* fillErrors = nullptr); void RunPipelineWorker(); void TerminatePipelineWorker(); @@ -246,6 +247,7 @@ class GPUReconstruction void UpdateMaxMemoryUsed(); int32_t EnqueuePipeline(bool terminate = false); GPUChain* GetNextChainInQueue(); + virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const { return 0; } virtual int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) = 0; virtual int32_t unregisterMemoryForGPU_internal(const void* ptr) = 0; @@ -327,6 +329,7 @@ class GPUReconstruction // Others bool mInitialized = false; + bool mInErrorHandling = false; uint32_t mStatNEvents = 0; uint32_t mNEventsProcessed = 0; double mStatKernelTime = 0.; diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx index d1091f59b784a..b389e99a0b2bb 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx @@ -175,7 +175,11 @@ void GPUReconstructionDeviceBase::runConstantRegistrators() { auto& list = getDeviceConstantMemRegistratorsVector(); for (uint32_t i = 0; i < list.size(); i++) { - mDeviceConstantMemList.emplace_back(list[i]()); + auto* ptr = list[i](); + if (ptr == nullptr) { + GPUFatal("Error registering constant memory"); + } + mDeviceConstantMemList.emplace_back(ptr); } } diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h index 6cd3813ff1431..c4595bed4c3fb 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h @@ -46,6 +46,7 @@ class GPUReconstructionDeviceBase : public GPUReconstructionCPU virtual int32_t InitDevice_Runtime() = 0; int32_t ExitDevice() override; virtual int32_t ExitDevice_Runtime() = 0; + virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override = 0; int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) override; int32_t unregisterMemoryForGPU_internal(const void* ptr) override; void unregisterRemainingRegisteredMemory(); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index d2adc3cc1fd19..40e3fa9b90eae 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -61,9 +61,9 @@ GPUReconstructionCUDABackend::~GPUReconstructionCUDABackend() } } -int32_t GPUReconstructionCUDABackend::GPUFailedMsgAI(const int64_t error, const char* file, int32_t line) +static_assert(sizeof(cudaError_t) <= sizeof(int64_t) && cudaSuccess == 0); +int32_t GPUReconstructionCUDABackend::GPUFailedMsgStatic(const int64_t error, const char* file, int32_t line) { - // Check for CUDA Error and in the case of an error display the corresponding error string if (error == cudaSuccess) { return (0); } @@ -71,18 +71,6 @@ int32_t GPUReconstructionCUDABackend::GPUFailedMsgAI(const int64_t error, const return 1; } -void GPUReconstructionCUDABackend::GPUFailedMsgA(const int64_t error, const char* file, int32_t line) -{ - if (GPUFailedMsgAI(error, file, line)) { - static bool runningCallbacks = false; - if (IsInitialized() && runningCallbacks == false) { - runningCallbacks = true; - CheckErrorCodes(false, true); - } - throw std::runtime_error("CUDA Failure"); - } -} - GPUReconstructionCUDA::GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionKernels(cfg) { mDeviceBackendSettings.deviceType = DeviceType::CUDA; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index dde70b9076e08..e04e14bd383d3 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -33,13 +33,13 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase { public: ~GPUReconstructionCUDABackend() override; - static int32_t GPUFailedMsgAI(const int64_t error, const char* file, int32_t line); - void GPUFailedMsgA(const int64_t error, const char* file, int32_t line); + static int32_t GPUFailedMsgStatic(const int64_t error, const char* file, int32_t line); protected: GPUReconstructionCUDABackend(const GPUSettingsDeviceBackend& cfg); void PrintKernelOccupancies() override; + virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override { return GPUFailedMsgStatic(error, file, line); } template void runKernelBackend(const krnlSetupArgs& args); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu index 6bcafe565e930..521ca2182c9bb 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu @@ -36,7 +36,7 @@ using namespace o2::gpu; #ifndef GPUCA_NO_CONSTANT_MEMORY static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() { void* retVal = nullptr; - if (cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer) != cudaSuccess) { + if (GPUReconstructionCUDA::GPUFailedMsgStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) { throw std::runtime_error("Could not obtain GPU constant memory symbol"); } return retVal; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h index 49c466103c593..a6d55c2d729fd 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h @@ -22,13 +22,11 @@ #include #include #include +#include "GPUCommonChkErr.h" namespace o2::gpu { -#define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__) -#define GPUFailedMsgI(x) GPUFailedMsgAI(x, __FILE__, __LINE__) - struct GPUReconstructionCUDAInternals { std::vector> kernelModules; // module for RTC compilation std::vector> kernelFunctions; // vector of ptrs to RTC kernels diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index 0f8d9bf219ba6..a5ab353f3d43f 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -138,7 +138,9 @@ void GPUReconstructionCUDABackend::getRTCKernelCalls(std::vector& k #ifndef GPUCA_NO_CONSTANT_MEMORY static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() { void* retVal = nullptr; - GPUReconstructionCUDA::GPUFailedMsgI(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer)); + if (GPUReconstructionCUDA::GPUFailedMsgStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) { + throw std::runtime_error("Could not obtain GPU constant memory symbol"); + } return retVal; }); #endif diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index c7a8be62a12ea..6639c78b113e5 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -48,28 +48,17 @@ GPUReconstructionOCLBackend::~GPUReconstructionOCLBackend() } } -int32_t GPUReconstructionOCLBackend::GPUFailedMsgAI(const int64_t error, const char* file, int32_t line) +static_assert(sizeof(cl_int) <= sizeof(int64_t) && CL_SUCCESS == 0); +int32_t GPUReconstructionOCLBackend::GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const { // Check for OPENCL Error and in the case of an error display the corresponding error string if (error == CL_SUCCESS) { return (0); } - GPUError("OCL Error: %ld / %s (%s:%d)", error, convertErrorToString(error), file, line); + GPUError("OpenCL Error: %ld / %s (%s:%d)", error, convertErrorToString(error), file, line); return 1; } -void GPUReconstructionOCLBackend::GPUFailedMsgA(const int64_t error, const char* file, int32_t line) -{ - if (GPUFailedMsgAI(error, file, line)) { - static bool runningCallbacks = false; - if (IsInitialized() && runningCallbacks == false) { - runningCallbacks = true; - CheckErrorCodes(false, true); - } - throw std::runtime_error("OpenCL Failure"); - } -} - void GPUReconstructionOCLBackend::UpdateAutomaticProcessingSettings() { GPUCA_GPUReconstructionUpdateDefaults(); diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index 5132baa444cd9..79f54274cd32c 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -39,8 +39,7 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase int32_t ExitDevice_Runtime() override; void UpdateAutomaticProcessingSettings() override; - int32_t GPUFailedMsgAI(const int64_t error, const char* file, int32_t line); - void GPUFailedMsgA(const int64_t error, const char* file, int32_t line); + virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override; void SynchronizeGPU() override; int32_t DoStuckProtection(int32_t stream, deviceEvent event) override; diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h index aec5708a80f3c..9c8cdbe87c7c1 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h @@ -28,6 +28,7 @@ #include "GPUReconstructionOCL.h" #include "GPUReconstructionIncludes.h" +#include "GPUCommonChkErr.h" using namespace o2::gpu; @@ -36,9 +37,6 @@ using namespace o2::gpu; #include #include -#define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__) -#define GPUFailedMsgI(x) GPUFailedMsgAI(x, __FILE__, __LINE__) - namespace o2::gpu { struct GPUReconstructionOCLInternals { From e5e4e1c76624be0056179108d514e36256e2c683 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 13 Mar 2025 18:13:23 +0100 Subject: [PATCH 2/8] GPU: Rename GPUFailedMsg to GPUChkErr --- GPU/Common/GPUCommonChkErr.h | 13 +- GPU/GPUTracking/Base/GPUReconstruction.cxx | 4 +- GPU/GPUTracking/Base/GPUReconstruction.h | 4 +- .../Base/GPUReconstructionDeviceBase.h | 2 +- .../Base/cuda/GPUReconstructionCUDA.cu | 154 +++++++++--------- .../Base/cuda/GPUReconstructionCUDA.h | 4 +- .../GPUReconstructionCUDAExternalProvider.cu | 2 +- .../cuda/GPUReconstructionCUDAInternals.h | 10 +- .../Base/cuda/GPUReconstructionCUDAKernels.cu | 12 +- .../Base/opencl/GPUReconstructionOCL.cxx | 72 ++++---- .../Base/opencl/GPUReconstructionOCL.h | 2 +- .../opencl/GPUReconstructionOCLKernels.cxx | 16 +- 12 files changed, 152 insertions(+), 143 deletions(-) diff --git a/GPU/Common/GPUCommonChkErr.h b/GPU/Common/GPUCommonChkErr.h index df007b31dab64..00cb9e50d302f 100644 --- a/GPU/Common/GPUCommonChkErr.h +++ b/GPU/Common/GPUCommonChkErr.h @@ -12,10 +12,19 @@ /// \file GPUCommonChkErr.h /// \author David Rohr +// GPUChkErr and GPUChkErrI will both check x for an error, using the loaded backend of GPUReconstruction (requiring GPUReconstruction.h to be included by the user). +// In case of an error, it will print out the corresponding CUDA / HIP / OpenCL error code +// GPUChkErr will download GPUReconstruction error values from GPU, print them, and terminate the application with an exception if an error occured. +// GPUChkErrI will return 0 or 1, depending on whether an error has occurred. +// The Macros must be called ona GPUReconstruction instance, e.g.: +// if (mRec->GPUChkErrI(cudaMalloc(...))) { exit(1); } +// gpuRecObj.GPUChkErr(cudaMalloc(...)); + #ifndef GPUCOMMONCHKERR_H #define GPUCOMMONCHKERR_H -#define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__, true) -#define GPUFailedMsgI(x) GPUFailedMsgA(x, __FILE__, __LINE__, false) +// Please #include "GPUReconstruction.h" in your code, if you use these 2! +#define GPUChkErr(x) GPUChkErrA(x, __FILE__, __LINE__, true) +#define GPUChkErrI(x) GPUChkErrA(x, __FILE__, __LINE__, false) #endif diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 28241cb7aeec5..2bd4c0e937c20 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -1078,9 +1078,9 @@ int32_t GPUReconstruction::CheckErrorCodes(bool cpuOnly, bool forceShowErrors, s return retVal; } -int32_t GPUReconstruction::GPUFailedMsgA(const int64_t error, const char* file, int32_t line, bool failOnError) +int32_t GPUReconstruction::GPUChkErrA(const int64_t error, const char* file, int32_t line, bool failOnError) { - if (error == 0 || !GPUFailedMsgInternal(error, file, line)) { + if (error == 0 || !GPUChkErrInternal(error, file, line)) { return 0; } if (failOnError) { diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index 1fe08d08a8058..f363f3f58aa6f 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -143,7 +143,7 @@ class GPUReconstruction virtual void* getGPUPointer(void* ptr) { return ptr; } virtual void startGPUProfiling() {} virtual void endGPUProfiling() {} - int32_t GPUFailedMsgA(const int64_t error, const char* file, int32_t line, bool failOnError); + int32_t GPUChkErrA(const int64_t error, const char* file, int32_t line, bool failOnError); int32_t CheckErrorCodes(bool cpuOnly = false, bool forceShowErrors = false, std::vector>* fillErrors = nullptr); void RunPipelineWorker(); void TerminatePipelineWorker(); @@ -247,7 +247,7 @@ class GPUReconstruction void UpdateMaxMemoryUsed(); int32_t EnqueuePipeline(bool terminate = false); GPUChain* GetNextChainInQueue(); - virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const { return 0; } + virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const { return 0; } virtual int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) = 0; virtual int32_t unregisterMemoryForGPU_internal(const void* ptr) = 0; diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h index c4595bed4c3fb..f0e19f588e0f1 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h @@ -46,7 +46,7 @@ class GPUReconstructionDeviceBase : public GPUReconstructionCPU virtual int32_t InitDevice_Runtime() = 0; int32_t ExitDevice() override; virtual int32_t ExitDevice_Runtime() = 0; - virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override = 0; + virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override = 0; int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) override; int32_t unregisterMemoryForGPU_internal(const void* ptr) override; void unregisterRemainingRegisteredMemory(); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 40e3fa9b90eae..d30eb51bd4938 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -62,7 +62,7 @@ GPUReconstructionCUDABackend::~GPUReconstructionCUDABackend() } static_assert(sizeof(cudaError_t) <= sizeof(int64_t) && cudaSuccess == 0); -int32_t GPUReconstructionCUDABackend::GPUFailedMsgStatic(const int64_t error, const char* file, int32_t line) +int32_t GPUReconstructionCUDABackend::GPUChkErrStatic(const int64_t error, const char* file, int32_t line) { if (error == cudaSuccess) { return (0); @@ -123,7 +123,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() cudaDeviceProp deviceProp; int32_t count, bestDevice = -1; double bestDeviceSpeed = -1, deviceSpeed; - if (GPUFailedMsgI(cudaGetDeviceCount(&count))) { + if (GPUChkErrI(cudaGetDeviceCount(&count))) { GPUError("Error getting CUDA Device Count"); return (1); } @@ -139,9 +139,9 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() } size_t free, total; #ifndef __HIPCC__ // CUDA - if (GPUFailedMsgI(cudaInitDevice(i, 0, 0))) { + if (GPUChkErrI(cudaInitDevice(i, 0, 0))) { #else // HIP - if (GPUFailedMsgI(hipSetDevice(i))) { + if (GPUChkErrI(hipSetDevice(i))) { #endif if (mProcessingSettings.debugLevel >= 4) { GPUWarning("Couldn't create context for device %d. Skipping it.", i); @@ -149,21 +149,21 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() continue; } contextCreated = true; - if (GPUFailedMsgI(cudaMemGetInfo(&free, &total))) { + if (GPUChkErrI(cudaMemGetInfo(&free, &total))) { if (mProcessingSettings.debugLevel >= 4) { GPUWarning("Error obtaining CUDA memory info about device %d! Skipping it.", i); } - GPUFailedMsg(cudaDeviceReset()); + GPUChkErr(cudaDeviceReset()); continue; } if (count > 1) { - GPUFailedMsg(cudaDeviceReset()); + GPUChkErr(cudaDeviceReset()); contextCreated = false; } if (mProcessingSettings.debugLevel >= 4) { GPUInfo("Obtained current memory usage for device %d", i); } - if (GPUFailedMsgI(cudaGetDeviceProperties(&deviceProp, i))) { + if (GPUChkErrI(cudaGetDeviceProperties(&deviceProp, i))) { continue; } if (mProcessingSettings.debugLevel >= 4) { @@ -221,13 +221,13 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() } if (noDevice) { if (contextCreated) { - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); } return (1); } mDeviceId = bestDevice; - GPUFailedMsgI(cudaGetDeviceProperties(&deviceProp, mDeviceId)); + GPUChkErrI(cudaGetDeviceProperties(&deviceProp, mDeviceId)); if (mProcessingSettings.debugLevel >= 2) { GPUInfo("Using CUDA Device %s with Properties:", deviceProp.name); @@ -280,27 +280,27 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() #endif #ifndef __HIPCC__ // CUDA - if (contextCreated == 0 && GPUFailedMsgI(cudaInitDevice(mDeviceId, 0, 0))) { + if (contextCreated == 0 && GPUChkErrI(cudaInitDevice(mDeviceId, 0, 0))) { #else // HIP - if (contextCreated == 0 && GPUFailedMsgI(hipSetDevice(mDeviceId))) { + if (contextCreated == 0 && GPUChkErrI(hipSetDevice(mDeviceId))) { #endif GPUError("Could not set CUDA Device!"); return (1); } #ifndef __HIPCC__ // CUDA - if (GPUFailedMsgI(cudaDeviceSetLimit(cudaLimitStackSize, GPUCA_GPU_STACK_SIZE))) { + if (GPUChkErrI(cudaDeviceSetLimit(cudaLimitStackSize, GPUCA_GPU_STACK_SIZE))) { GPUError("Error setting CUDA stack size"); - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); return (1); } - if (GPUFailedMsgI(cudaDeviceSetLimit(cudaLimitMallocHeapSize, mProcessingSettings.deterministicGPUReconstruction ? std::max(1024 * 1024 * 1024, GPUCA_GPU_HEAP_SIZE) : GPUCA_GPU_HEAP_SIZE))) { + if (GPUChkErrI(cudaDeviceSetLimit(cudaLimitMallocHeapSize, mProcessingSettings.deterministicGPUReconstruction ? std::max(1024 * 1024 * 1024, GPUCA_GPU_HEAP_SIZE) : GPUCA_GPU_HEAP_SIZE))) { GPUError("Error setting CUDA stack size"); - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); return (1); } #else // HIP - if (GPUFailedMsgI(hipSetDeviceFlags(hipDeviceScheduleBlockingSync))) { + if (GPUChkErrI(hipSetDeviceFlags(hipDeviceScheduleBlockingSync))) { GPUError("Could not set HIP Device flags!"); return (1); } @@ -319,35 +319,35 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() if (mProcessingSettings.debugLevel >= 3) { GPUInfo("Allocating memory on GPU"); } - if (mDeviceMemorySize > deviceProp.totalGlobalMem || GPUFailedMsgI(cudaMalloc(&mDeviceMemoryBase, mDeviceMemorySize))) { + if (mDeviceMemorySize > deviceProp.totalGlobalMem || GPUChkErrI(cudaMalloc(&mDeviceMemoryBase, mDeviceMemorySize))) { size_t free, total; - GPUFailedMsg(cudaMemGetInfo(&free, &total)); + GPUChkErr(cudaMemGetInfo(&free, &total)); GPUError("CUDA Memory Allocation Error (trying %ld bytes, %ld available on GPU, %ld free)", (int64_t)mDeviceMemorySize, (int64_t)deviceProp.totalGlobalMem, (int64_t)free); - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); return (1); } if (mProcessingSettings.debugLevel >= 3) { GPUInfo("Allocating memory on Host"); } - if (GPUFailedMsgI(cudaMallocHost(&mHostMemoryBase, mHostMemorySize))) { + if (GPUChkErrI(cudaMallocHost(&mHostMemoryBase, mHostMemorySize))) { GPUError("Error allocating Page Locked Host Memory (trying %ld bytes)", (int64_t)mHostMemorySize); - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); return (1); } if (mProcessingSettings.debugLevel >= 1) { GPUInfo("Memory ptrs: GPU (%ld bytes): %p - Host (%ld bytes): %p", (int64_t)mDeviceMemorySize, mDeviceMemoryBase, (int64_t)mHostMemorySize, mHostMemoryBase); memset(mHostMemoryBase, 0xDD, mHostMemorySize); - if (GPUFailedMsgI(cudaMemset(mDeviceMemoryBase, 0xDD, mDeviceMemorySize))) { + if (GPUChkErrI(cudaMemset(mDeviceMemoryBase, 0xDD, mDeviceMemorySize))) { GPUError("Error during CUDA memset"); - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); return (1); } } for (int32_t i = 0; i < mNStreams; i++) { - if (GPUFailedMsgI(cudaStreamCreateWithFlags(&mInternals->Streams[i], cudaStreamNonBlocking))) { + if (GPUChkErrI(cudaStreamCreateWithFlags(&mInternals->Streams[i], cudaStreamNonBlocking))) { GPUError("Error creating CUDA Stream"); - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); return (1); } } @@ -365,7 +365,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() else { #define GPUCA_KRNL(x_class, ...) \ mInternals->kernelModules.emplace_back(std::make_unique()); \ - GPUFailedMsg(cuModuleLoadData(mInternals->kernelModules.back().get(), GPUCA_M_CAT3(_binary_cuda_kernel_module_fatbin_krnl_, GPUCA_M_KRNL_NAME(x_class), GPUCA_M_CAT(PER_KERNEL_OBJECT_EXT, _start)))); + GPUChkErr(cuModuleLoadData(mInternals->kernelModules.back().get(), GPUCA_M_CAT3(_binary_cuda_kernel_module_fatbin_krnl_, GPUCA_M_KRNL_NAME(x_class), GPUCA_M_CAT(PER_KERNEL_OBJECT_EXT, _start)))); #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL loadKernelModules(true); @@ -382,11 +382,11 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() CUdeviceptr tmp = nullptr; // HIP just uses void* #endif size_t tmpSize = 0; - GPUFailedMsg(cuModuleGetGlobal(&tmp, &tmpSize, *mInternals->kernelModules[i], "gGPUConstantMemBuffer")); + GPUChkErr(cuModuleGetGlobal(&tmp, &tmpSize, *mInternals->kernelModules[i], "gGPUConstantMemBuffer")); mDeviceConstantMemList.emplace_back((void*)tmp); } #else - GPUFailedMsg(cudaMalloc(&devPtrConstantMem, gGPUConstantMemBufferSize)); + GPUChkErr(cudaMalloc(&devPtrConstantMem, gGPUConstantMemBufferSize)); #endif mDeviceConstantMem = (GPUConstantMem*)devPtrConstantMem; @@ -402,7 +402,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() mDeviceConstantMemList.resize(master->mDeviceConstantMemList.size()); std::copy(master->mDeviceConstantMemList.begin(), master->mDeviceConstantMemList.end(), mDeviceConstantMemList.begin()); mInternals = master->mInternals; - GPUFailedMsg(cudaSetDevice(mDeviceId)); + GPUChkErr(cudaSetDevice(mDeviceId)); GPUInfo("CUDA Initialisation successfull (from master)"); } @@ -411,12 +411,12 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() cudaEvent_t* events = (cudaEvent_t*)mEvents[i].data(); for (uint32_t j = 0; j < mEvents[i].size(); j++) { #ifndef __HIPCC__ // CUDA - if (GPUFailedMsgI(cudaEventCreate(&events[j]))) { + if (GPUChkErrI(cudaEventCreate(&events[j]))) { #else - if (GPUFailedMsgI(hipEventCreateWithFlags(&events[j], hipEventBlockingSync))) { + if (GPUChkErrI(hipEventCreateWithFlags(&events[j], hipEventBlockingSync))) { #endif GPUError("Error creating event"); - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); return 1; } } @@ -435,7 +435,7 @@ void GPUReconstructionCUDA::genAndLoadRTC() for (uint32_t i = 0; i < nCompile; i++) { if (mProcessingSettings.rtc.runTest != 2) { mInternals->kernelModules.emplace_back(std::make_unique()); - GPUFailedMsg(cuModuleLoad(mInternals->kernelModules.back().get(), (filename + "_" + std::to_string(i) + mRtcBinExtension).c_str())); + GPUChkErr(cuModuleLoad(mInternals->kernelModules.back().get(), (filename + "_" + std::to_string(i) + mRtcBinExtension).c_str())); } remove((filename + "_" + std::to_string(i) + mRtcSrcExtension).c_str()); remove((filename + "_" + std::to_string(i) + mRtcBinExtension).c_str()); @@ -449,33 +449,33 @@ void GPUReconstructionCUDA::genAndLoadRTC() int32_t GPUReconstructionCUDA::ExitDevice_Runtime() { // Uninitialize CUDA - GPUFailedMsg(cudaSetDevice(mDeviceId)); + GPUChkErr(cudaSetDevice(mDeviceId)); SynchronizeGPU(); unregisterRemainingRegisteredMemory(); for (uint32_t i = 0; i < mEvents.size(); i++) { cudaEvent_t* events = (cudaEvent_t*)mEvents[i].data(); for (uint32_t j = 0; j < mEvents[i].size(); j++) { - GPUFailedMsgI(cudaEventDestroy(events[j])); + GPUChkErrI(cudaEventDestroy(events[j])); } } if (mMaster == nullptr) { - GPUFailedMsgI(cudaFree(mDeviceMemoryBase)); + GPUChkErrI(cudaFree(mDeviceMemoryBase)); #ifdef GPUCA_NO_CONSTANT_MEMORY - GPUFailedMsgI(cudaFree(mDeviceConstantMem)); + GPUChkErrI(cudaFree(mDeviceConstantMem)); #endif for (int32_t i = 0; i < mNStreams; i++) { - GPUFailedMsgI(cudaStreamDestroy(mInternals->Streams[i])); + GPUChkErrI(cudaStreamDestroy(mInternals->Streams[i])); } - GPUFailedMsgI(cudaFreeHost(mHostMemoryBase)); + GPUChkErrI(cudaFreeHost(mHostMemoryBase)); for (uint32_t i = 0; i < mInternals->kernelModules.size(); i++) { - GPUFailedMsg(cuModuleUnload(*mInternals->kernelModules[i])); + GPUChkErr(cuModuleUnload(*mInternals->kernelModules[i])); } - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); GPUInfo("CUDA Uninitialized"); } mDeviceMemoryBase = nullptr; @@ -491,18 +491,18 @@ size_t GPUReconstructionCUDA::GPUMemCpy(void* dst, const void* src, size_t size, } if (stream == -1) { SynchronizeGPU(); - GPUFailedMsg(cudaMemcpy(dst, src, size, toGPU ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost)); + GPUChkErr(cudaMemcpy(dst, src, size, toGPU ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost)); } else { if (evList == nullptr) { nEvents = 0; } for (int32_t k = 0; k < nEvents; k++) { - GPUFailedMsg(cudaStreamWaitEvent(mInternals->Streams[stream], evList[k].get(), 0)); + GPUChkErr(cudaStreamWaitEvent(mInternals->Streams[stream], evList[k].get(), 0)); } - GPUFailedMsg(cudaMemcpyAsync(dst, src, size, toGPU == -2 ? cudaMemcpyDeviceToDevice : toGPU ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost, mInternals->Streams[stream])); + GPUChkErr(cudaMemcpyAsync(dst, src, size, toGPU == -2 ? cudaMemcpyDeviceToDevice : toGPU ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost, mInternals->Streams[stream])); } if (ev) { - GPUFailedMsg(cudaEventRecord(ev->get(), mInternals->Streams[stream == -1 ? 0 : stream])); + GPUChkErr(cudaEventRecord(ev->get(), mInternals->Streams[stream == -1 ? 0 : stream])); } if (mProcessingSettings.serializeGPU & 2) { GPUDebug(("GPUMemCpy " + std::to_string(toGPU)).c_str(), stream, true); @@ -518,13 +518,13 @@ size_t GPUReconstructionCUDA::WriteToConstantMemory(size_t offset, const void* s continue; } if (stream == -1) { - GPUFailedMsg(cudaMemcpy(((char*)basePtr) + offset, src, size, cudaMemcpyHostToDevice)); + GPUChkErr(cudaMemcpy(((char*)basePtr) + offset, src, size, cudaMemcpyHostToDevice)); } else { - GPUFailedMsg(cudaMemcpyAsync(((char*)basePtr) + offset, src, size, cudaMemcpyHostToDevice, mInternals->Streams[stream])); + GPUChkErr(cudaMemcpyAsync(((char*)basePtr) + offset, src, size, cudaMemcpyHostToDevice, mInternals->Streams[stream])); } } if (ev && stream != -1) { - GPUFailedMsg(cudaEventRecord(ev->get(), mInternals->Streams[stream])); + GPUChkErr(cudaEventRecord(ev->get(), mInternals->Streams[stream])); } if (mProcessingSettings.serializeGPU & 2) { GPUDebug("WriteToConstantMemory", stream, true); @@ -533,28 +533,28 @@ size_t GPUReconstructionCUDA::WriteToConstantMemory(size_t offset, const void* s } void GPUReconstructionCUDA::ReleaseEvent(deviceEvent ev) {} -void GPUReconstructionCUDA::RecordMarker(deviceEvent* ev, int32_t stream) { GPUFailedMsg(cudaEventRecord(ev->get(), mInternals->Streams[stream])); } +void GPUReconstructionCUDA::RecordMarker(deviceEvent* ev, int32_t stream) { GPUChkErr(cudaEventRecord(ev->get(), mInternals->Streams[stream])); } std::unique_ptr GPUReconstructionCUDA::GetThreadContext() { - GPUFailedMsg(cudaSetDevice(mDeviceId)); + GPUChkErr(cudaSetDevice(mDeviceId)); return GPUReconstructionProcessing::GetThreadContext(); } -void GPUReconstructionCUDA::SynchronizeGPU() { GPUFailedMsg(cudaDeviceSynchronize()); } -void GPUReconstructionCUDA::SynchronizeStream(int32_t stream) { GPUFailedMsg(cudaStreamSynchronize(mInternals->Streams[stream])); } +void GPUReconstructionCUDA::SynchronizeGPU() { GPUChkErr(cudaDeviceSynchronize()); } +void GPUReconstructionCUDA::SynchronizeStream(int32_t stream) { GPUChkErr(cudaStreamSynchronize(mInternals->Streams[stream])); } void GPUReconstructionCUDA::SynchronizeEvents(deviceEvent* evList, int32_t nEvents) { for (int32_t i = 0; i < nEvents; i++) { - GPUFailedMsg(cudaEventSynchronize(evList[i].get())); + GPUChkErr(cudaEventSynchronize(evList[i].get())); } } void GPUReconstructionCUDA::StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents) { for (int32_t i = 0; i < nEvents; i++) { - GPUFailedMsg(cudaStreamWaitEvent(mInternals->Streams[stream], evList[i].get(), 0)); + GPUChkErr(cudaStreamWaitEvent(mInternals->Streams[stream], evList[i].get(), 0)); } } @@ -565,7 +565,7 @@ bool GPUReconstructionCUDA::IsEventDone(deviceEvent* evList, int32_t nEvents) if (retVal == cudaErrorNotReady) { return false; } - GPUFailedMsg(retVal); + GPUChkErr(retVal); } return (true); } @@ -582,7 +582,7 @@ int32_t GPUReconstructionCUDA::GPUDebug(const char* state, int32_t stream, bool if (!force && mProcessingSettings.debugLevel <= 0) { return (0); } - if (GPUFailedMsgI(stream == -1 ? cudaDeviceSynchronize() : cudaStreamSynchronize(mInternals->Streams[stream]))) { + if (GPUChkErrI(stream == -1 ? cudaDeviceSynchronize() : cudaStreamSynchronize(mInternals->Streams[stream]))) { GPUError("CUDA Error while synchronizing (%s) (Stream %d)", state, stream); return (1); } @@ -597,23 +597,23 @@ int32_t GPUReconstructionCUDA::registerMemoryForGPU_internal(const void* ptr, si if (mProcessingSettings.debugLevel >= 3) { GPUInfo("Registering %zu bytes of memory for GPU", size); } - return GPUFailedMsgI(cudaHostRegister((void*)ptr, size, cudaHostRegisterDefault)); + return GPUChkErrI(cudaHostRegister((void*)ptr, size, cudaHostRegisterDefault)); } int32_t GPUReconstructionCUDA::unregisterMemoryForGPU_internal(const void* ptr) { - return GPUFailedMsgI(cudaHostUnregister((void*)ptr)); + return GPUChkErrI(cudaHostUnregister((void*)ptr)); } void GPUReconstructionCUDABackend::PrintKernelOccupancies() { int32_t maxBlocks = 0, threads = 0, suggestedBlocks = 0, nRegs = 0, sMem = 0; - GPUFailedMsg(cudaSetDevice(mDeviceId)); + GPUChkErr(cudaSetDevice(mDeviceId)); for (uint32_t i = 0; i < mInternals->kernelFunctions.size(); i++) { - GPUFailedMsg(cuOccupancyMaxPotentialBlockSize(&suggestedBlocks, &threads, *mInternals->kernelFunctions[i], 0, 0, 0)); // NOLINT: failure in clang-tidy - GPUFailedMsg(cuOccupancyMaxActiveBlocksPerMultiprocessor(&maxBlocks, *mInternals->kernelFunctions[i], threads, 0)); - GPUFailedMsg(cuFuncGetAttribute(&nRegs, CU_FUNC_ATTRIBUTE_NUM_REGS, *mInternals->kernelFunctions[i])); - GPUFailedMsg(cuFuncGetAttribute(&sMem, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, *mInternals->kernelFunctions[i])); + GPUChkErr(cuOccupancyMaxPotentialBlockSize(&suggestedBlocks, &threads, *mInternals->kernelFunctions[i], 0, 0, 0)); // NOLINT: failure in clang-tidy + GPUChkErr(cuOccupancyMaxActiveBlocksPerMultiprocessor(&maxBlocks, *mInternals->kernelFunctions[i], threads, 0)); + GPUChkErr(cuFuncGetAttribute(&nRegs, CU_FUNC_ATTRIBUTE_NUM_REGS, *mInternals->kernelFunctions[i])); + GPUChkErr(cuFuncGetAttribute(&sMem, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, *mInternals->kernelFunctions[i])); GPUInfo("Kernel: %50s Block size: %4d, Maximum active blocks: %3d, Suggested blocks: %3d, Regs: %3d, smem: %3d", mInternals->kernelNames[i].c_str(), threads, maxBlocks, suggestedBlocks, nRegs, sMem); } } @@ -621,14 +621,14 @@ void GPUReconstructionCUDABackend::PrintKernelOccupancies() void GPUReconstructionCUDA::loadKernelModules(bool perKernel) { uint32_t j = 0; -#define GPUCA_KRNL(x_class, ...) \ - getRTCkernelNum(mInternals->kernelFunctions.size()); \ - mInternals->kernelFunctions.emplace_back(new CUfunction); \ - mInternals->kernelNames.emplace_back(GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class)))); \ - if (mProcessingSettings.debugLevel >= 3) { \ - GPUInfo("Loading kernel %s (j = %u)", GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), j); \ - } \ - GPUFailedMsg(cuModuleGetFunction(mInternals->kernelFunctions.back().get(), *mInternals->kernelModules[perKernel ? j : 0], GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))))); \ +#define GPUCA_KRNL(x_class, ...) \ + getRTCkernelNum(mInternals->kernelFunctions.size()); \ + mInternals->kernelFunctions.emplace_back(new CUfunction); \ + mInternals->kernelNames.emplace_back(GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class)))); \ + if (mProcessingSettings.debugLevel >= 3) { \ + GPUInfo("Loading kernel %s (j = %u)", GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), j); \ + } \ + GPUChkErr(cuModuleGetFunction(mInternals->kernelFunctions.back().get(), *mInternals->kernelModules[perKernel ? j : 0], GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))))); \ j++; #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL @@ -644,27 +644,27 @@ int32_t GPUReconstructionCUDA::PrepareTextures() #ifdef GPUCA_USE_TEXTURES cudaChannelFormatDesc channelDescu2 = cudaCreateChannelDesc(); size_t offset; - GPUFailedMsg(cudaBindTexture(&offset, &gAliTexRefu2, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu2, NSECTORS * GPUCA_SECTOR_DATA_MEMORY)); + GPUChkErr(cudaBindTexture(&offset, &gAliTexRefu2, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu2, NSECTORS * GPUCA_SECTOR_DATA_MEMORY)); cudaChannelFormatDesc channelDescu = cudaCreateChannelDesc(); - GPUFailedMsg(cudaBindTexture(&offset, &gAliTexRefu, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu, NSECTORS * GPUCA_SECTOR_DATA_MEMORY)); + GPUChkErr(cudaBindTexture(&offset, &gAliTexRefu, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu, NSECTORS * GPUCA_SECTOR_DATA_MEMORY)); #endif return (0); } void GPUReconstructionCUDA::startGPUProfiling() { - GPUFailedMsg(cudaProfilerStart()); + GPUChkErr(cudaProfilerStart()); } void GPUReconstructionCUDA::endGPUProfiling() { - GPUFailedMsg(cudaProfilerStop()); + GPUChkErr(cudaProfilerStop()); } #else // HIP void* GPUReconstructionHIP::getGPUPointer(void* ptr) { void* retVal = nullptr; - GPUFailedMsg(hipHostGetDevicePointer(&retVal, ptr, 0)); + GPUChkErr(hipHostGetDevicePointer(&retVal, ptr, 0)); return retVal; } #endif // __HIPCC__ diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index e04e14bd383d3..02e8f92bb2328 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -33,13 +33,13 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase { public: ~GPUReconstructionCUDABackend() override; - static int32_t GPUFailedMsgStatic(const int64_t error, const char* file, int32_t line); + static int32_t GPUChkErrStatic(const int64_t error, const char* file, int32_t line); protected: GPUReconstructionCUDABackend(const GPUSettingsDeviceBackend& cfg); void PrintKernelOccupancies() override; - virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override { return GPUFailedMsgStatic(error, file, line); } + virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override { return GPUChkErrStatic(error, file, line); } template void runKernelBackend(const krnlSetupArgs& args); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu index 521ca2182c9bb..bc1d573385598 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu @@ -36,7 +36,7 @@ using namespace o2::gpu; #ifndef GPUCA_NO_CONSTANT_MEMORY static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() { void* retVal = nullptr; - if (GPUReconstructionCUDA::GPUFailedMsgStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) { + if (GPUReconstructionCUDA::GPUChkErrStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) { throw std::runtime_error("Could not obtain GPU constant memory symbol"); } return retVal; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h index a6d55c2d729fd..027a9d5445b2c 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h @@ -49,7 +49,7 @@ class GPUDebugTiming { if (mDo) { if (mDeviceTimers) { - mRec->GPUFailedMsg(cudaEventRecord(mDeviceTimers[0].get(), mStreams[mXYZ.x.stream])); + mRec->GPUChkErr(cudaEventRecord(mDeviceTimers[0].get(), mStreams[mXYZ.x.stream])); } else { mTimer.ResetStart(); } @@ -59,13 +59,13 @@ class GPUDebugTiming { if (mDo && mXYZ.t == 0.) { if (mDeviceTimers) { - mRec->GPUFailedMsg(cudaEventRecord(mDeviceTimers[1].get(), mStreams[mXYZ.x.stream])); - mRec->GPUFailedMsg(cudaEventSynchronize(mDeviceTimers[1].get())); + mRec->GPUChkErr(cudaEventRecord(mDeviceTimers[1].get(), mStreams[mXYZ.x.stream])); + mRec->GPUChkErr(cudaEventSynchronize(mDeviceTimers[1].get())); float v; - mRec->GPUFailedMsg(cudaEventElapsedTime(&v, mDeviceTimers[0].get(), mDeviceTimers[1].get())); + mRec->GPUChkErr(cudaEventElapsedTime(&v, mDeviceTimers[0].get(), mDeviceTimers[1].get())); mXYZ.t = v * 1.e-3f; } else { - mRec->GPUFailedMsg(cudaStreamSynchronize(mStreams[mXYZ.x.stream])); + mRec->GPUChkErr(cudaStreamSynchronize(mStreams[mXYZ.x.stream])); mXYZ.t = mTimer.GetCurrentElapsedTime(); } } diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index a5ab353f3d43f..f60f00c13710d 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -34,7 +34,7 @@ __global__ void gGPUConstantMemBuffer_dummy(int32_t* p) { *p = *(int32_t*)&gGPUC template <> inline void GPUReconstructionCUDABackend::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { - GPUFailedMsg(cudaMemsetAsync(ptr, 0, size, mInternals->Streams[_xyz.x.stream])); + GPUChkErr(cudaMemsetAsync(ptr, 0, size, mInternals->Streams[_xyz.x.stream])); } template @@ -56,7 +56,7 @@ inline void GPUReconstructionCUDABackend::runKernelBackendInternal(const krnlSet #endif pArgs[arg_offset] = &y.index; GPUReconstructionCUDAInternals::getArgPtrs(&pArgs[arg_offset + 1], args...); - GPUFailedMsg(cuLaunchKernel(*mInternals->kernelFunctions[getRTCkernelNum()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr)); + GPUChkErr(cuLaunchKernel(*mInternals->kernelFunctions[getRTCkernelNum()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr)); } } @@ -67,16 +67,16 @@ void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgsStreams[x.stream], ((cudaEvent_t*)z.evList)[k], 0)); + GPUChkErr(cudaStreamWaitEvent(mInternals->Streams[x.stream], ((cudaEvent_t*)z.evList)[k], 0)); } } { GPUDebugTiming timer(mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0, (deviceEvent*)mDebugEvents, mInternals->Streams, args.s, this); std::apply([this, &args](auto&... vals) { this->runKernelBackendInternal(args.s, vals...); }, args.v); } - GPUFailedMsg(cudaGetLastError()); + GPUChkErr(cudaGetLastError()); if (z.ev) { - GPUFailedMsg(cudaEventRecord(*(cudaEvent_t*)z.ev, mInternals->Streams[x.stream])); + GPUChkErr(cudaEventRecord(*(cudaEvent_t*)z.ev, mInternals->Streams[x.stream])); } } @@ -138,7 +138,7 @@ void GPUReconstructionCUDABackend::getRTCKernelCalls(std::vector& k #ifndef GPUCA_NO_CONSTANT_MEMORY static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() { void* retVal = nullptr; - if (GPUReconstructionCUDA::GPUFailedMsgStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) { + if (GPUReconstructionCUDA::GPUChkErrStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) { throw std::runtime_error("Could not obtain GPU constant memory symbol"); } return retVal; diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index 6639c78b113e5..e52494937f8bf 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -49,7 +49,7 @@ GPUReconstructionOCLBackend::~GPUReconstructionOCLBackend() } static_assert(sizeof(cl_int) <= sizeof(int64_t) && CL_SUCCESS == 0); -int32_t GPUReconstructionOCLBackend::GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const +int32_t GPUReconstructionOCLBackend::GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const { // Check for OPENCL Error and in the case of an error display the corresponding error string if (error == CL_SUCCESS) { @@ -69,7 +69,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() if (mMaster == nullptr) { cl_int ocl_error; cl_uint num_platforms; - if (GPUFailedMsgI(clGetPlatformIDs(0, nullptr, &num_platforms))) { + if (GPUChkErrI(clGetPlatformIDs(0, nullptr, &num_platforms))) { GPUErrorReturn("Error getting OpenCL Platform Count"); } if (num_platforms == 0) { @@ -82,7 +82,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() // Query platforms and devices std::unique_ptr platforms; platforms.reset(new cl_platform_id[num_platforms]); - if (GPUFailedMsgI(clGetPlatformIDs(num_platforms, platforms.get(), nullptr))) { + if (GPUChkErrI(clGetPlatformIDs(num_platforms, platforms.get(), nullptr))) { GPUErrorReturn("Error getting OpenCL Platforms"); } @@ -227,7 +227,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() GPUErrorReturn("Did not find compatible OpenCL Platform / Device, aborting OPENCL Initialisation"); } mInternals->platform = platforms[bestPlatform]; - GPUFailedMsg(clGetDeviceIDs(mInternals->platform, CL_DEVICE_TYPE_ALL, devices.size(), devices.data(), nullptr)); + GPUChkErr(clGetDeviceIDs(mInternals->platform, CL_DEVICE_TYPE_ALL, devices.size(), devices.data(), nullptr)); mInternals->device = devices[bestDevice]; queryDevice(mInternals->device); @@ -267,7 +267,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() mMaxBackendThreads = std::max(mMaxBackendThreads, deviceMaxWorkGroup * mBlockCount); mInternals->context = clCreateContext(nullptr, 1, &mInternals->device, nullptr, nullptr, &ocl_error); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { GPUErrorReturn("Could not create OPENCL Device Context!"); } @@ -280,13 +280,13 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() } mInternals->mem_gpu = clCreateBuffer(mInternals->context, CL_MEM_READ_WRITE, mDeviceMemorySize, nullptr, &ocl_error); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { clReleaseContext(mInternals->context); GPUErrorReturn("OPENCL Memory Allocation Error"); } mInternals->mem_constant = clCreateBuffer(mInternals->context, CL_MEM_READ_ONLY, gGPUConstantMemBufferSize, nullptr, &ocl_error); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { clReleaseMemObject(mInternals->mem_gpu); clReleaseContext(mInternals->context); GPUErrorReturn("OPENCL Constant Memory Allocation Error"); @@ -314,44 +314,44 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() #else mInternals->command_queue[i] = clCreateCommandQueue(mInternals->context, mInternals->device, 0, &ocl_error); #endif - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { GPUErrorReturn("Error creating OpenCL command queue"); } } - if (GPUFailedMsgI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_gpu, 0, 0, nullptr, nullptr))) { + if (GPUChkErrI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_gpu, 0, 0, nullptr, nullptr))) { GPUErrorReturn("Error migrating buffer"); } - if (GPUFailedMsgI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_constant, 0, 0, nullptr, nullptr))) { + if (GPUChkErrI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_constant, 0, 0, nullptr, nullptr))) { GPUErrorReturn("Error migrating buffer"); } mInternals->mem_host = clCreateBuffer(mInternals->context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, mHostMemorySize, nullptr, &ocl_error); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { GPUErrorReturn("Error allocating pinned host memory"); } const char* krnlGetPtr = "__kernel void krnlGetPtr(__global char* gpu_mem, __global char* constant_mem, __global size_t* host_mem) {if (get_global_id(0) == 0) {host_mem[0] = (size_t) gpu_mem; host_mem[1] = (size_t) constant_mem;}}"; cl_program program = clCreateProgramWithSource(mInternals->context, 1, (const char**)&krnlGetPtr, nullptr, &ocl_error); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { GPUErrorReturn("Error creating program object"); } ocl_error = clBuildProgram(program, 1, &mInternals->device, "", nullptr, nullptr); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { char build_log[16384]; clGetProgramBuildInfo(program, mInternals->device, CL_PROGRAM_BUILD_LOG, 16384, build_log, nullptr); GPUImportant("Build Log:\n\n%s\n\n", build_log); GPUErrorReturn("Error compiling program"); } cl_kernel kernel = clCreateKernel(program, "krnlGetPtr", &ocl_error); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { GPUErrorReturn("Error creating kernel"); } - if (GPUFailedMsgI(OCLsetKernelParameters(kernel, mInternals->mem_gpu, mInternals->mem_constant, mInternals->mem_host)) || - GPUFailedMsgI(clExecuteKernelA(mInternals->command_queue[0], kernel, 16, 16, nullptr)) || - GPUFailedMsgI(clFinish(mInternals->command_queue[0])) || - GPUFailedMsgI(clReleaseKernel(kernel)) || - GPUFailedMsgI(clReleaseProgram(program))) { + if (GPUChkErrI(OCLsetKernelParameters(kernel, mInternals->mem_gpu, mInternals->mem_constant, mInternals->mem_host)) || + GPUChkErrI(clExecuteKernelA(mInternals->command_queue[0], kernel, 16, 16, nullptr)) || + GPUChkErrI(clFinish(mInternals->command_queue[0])) || + GPUChkErrI(clReleaseKernel(kernel)) || + GPUChkErrI(clReleaseProgram(program))) { GPUErrorReturn("Error obtaining device memory ptr"); } @@ -359,7 +359,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() GPUInfo("Mapping hostmemory"); } mHostMemoryBase = clEnqueueMapBuffer(mInternals->command_queue[0], mInternals->mem_host, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, mHostMemorySize, 0, nullptr, nullptr, &ocl_error); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { GPUErrorReturn("Error allocating Page Locked Host Memory"); } @@ -435,14 +435,14 @@ size_t GPUReconstructionOCLBackend::GPUMemCpy(void* dst, const void* src, size_t } if (size == 0) { if (ev || nEvents) { // Workaround for OCL runtimes, which can throw an error in case size = 0 - GPUFailedMsg(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream == -1 ? 0 : stream], nEvents, evList->getEventList(), ev->getEventList())); + GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream == -1 ? 0 : stream], nEvents, evList->getEventList(), ev->getEventList())); } } else if (toGPU == -2) { - GPUFailedMsg(clEnqueueCopyBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, mInternals->mem_gpu, (char*)src - (char*)mDeviceMemoryBase, (char*)dst - (char*)mDeviceMemoryBase, size, nEvents, evList->getEventList(), ev->getEventList())); + GPUChkErr(clEnqueueCopyBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, mInternals->mem_gpu, (char*)src - (char*)mDeviceMemoryBase, (char*)dst - (char*)mDeviceMemoryBase, size, nEvents, evList->getEventList(), ev->getEventList())); } else if (toGPU) { - GPUFailedMsg(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)dst - (char*)mDeviceMemoryBase, size, src, nEvents, evList->getEventList(), ev->getEventList())); + GPUChkErr(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)dst - (char*)mDeviceMemoryBase, size, src, nEvents, evList->getEventList(), ev->getEventList())); } else { - GPUFailedMsg(clEnqueueReadBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)src - (char*)mDeviceMemoryBase, size, dst, nEvents, evList->getEventList(), ev->getEventList())); + GPUChkErr(clEnqueueReadBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)src - (char*)mDeviceMemoryBase, size, dst, nEvents, evList->getEventList(), ev->getEventList())); } if (mProcessingSettings.serializeGPU & 2) { GPUDebug(("GPUMemCpy " + std::to_string(toGPU)).c_str(), stream, true); @@ -455,16 +455,16 @@ size_t GPUReconstructionOCLBackend::WriteToConstantMemory(size_t offset, const v if (stream == -1) { SynchronizeGPU(); } - GPUFailedMsg(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_constant, stream == -1, offset, size, src, 0, nullptr, ev->getEventList())); + GPUChkErr(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_constant, stream == -1, offset, size, src, 0, nullptr, ev->getEventList())); if (mProcessingSettings.serializeGPU & 2) { GPUDebug("WriteToConstantMemory", stream, true); } return size; } -void GPUReconstructionOCLBackend::ReleaseEvent(deviceEvent ev) { GPUFailedMsg(clReleaseEvent(ev.get())); } +void GPUReconstructionOCLBackend::ReleaseEvent(deviceEvent ev) { GPUChkErr(clReleaseEvent(ev.get())); } -void GPUReconstructionOCLBackend::RecordMarker(deviceEvent* ev, int32_t stream) { GPUFailedMsg(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], 0, nullptr, ev->getEventList())); } +void GPUReconstructionOCLBackend::RecordMarker(deviceEvent* ev, int32_t stream) { GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], 0, nullptr, ev->getEventList())); } int32_t GPUReconstructionOCLBackend::DoStuckProtection(int32_t stream, deviceEvent event) { @@ -490,18 +490,18 @@ int32_t GPUReconstructionOCLBackend::DoStuckProtection(int32_t stream, deviceEve void GPUReconstructionOCLBackend::SynchronizeGPU() { for (int32_t i = 0; i < mNStreams; i++) { - GPUFailedMsg(clFinish(mInternals->command_queue[i])); + GPUChkErr(clFinish(mInternals->command_queue[i])); } } -void GPUReconstructionOCLBackend::SynchronizeStream(int32_t stream) { GPUFailedMsg(clFinish(mInternals->command_queue[stream])); } +void GPUReconstructionOCLBackend::SynchronizeStream(int32_t stream) { GPUChkErr(clFinish(mInternals->command_queue[stream])); } -void GPUReconstructionOCLBackend::SynchronizeEvents(deviceEvent* evList, int32_t nEvents) { GPUFailedMsg(clWaitForEvents(nEvents, evList->getEventList())); } +void GPUReconstructionOCLBackend::SynchronizeEvents(deviceEvent* evList, int32_t nEvents) { GPUChkErr(clWaitForEvents(nEvents, evList->getEventList())); } void GPUReconstructionOCLBackend::StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents) { if (nEvents) { - GPUFailedMsg(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], nEvents, evList->getEventList(), nullptr)); + GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], nEvents, evList->getEventList(), nullptr)); } } @@ -509,7 +509,7 @@ bool GPUReconstructionOCLBackend::IsEventDone(deviceEvent* evList, int32_t nEven { cl_int eventdone; for (int32_t i = 0; i < nEvents; i++) { - GPUFailedMsg(clGetEventInfo(evList[i].get(), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(eventdone), &eventdone, nullptr)); + GPUChkErr(clGetEventInfo(evList[i].get(), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(eventdone), &eventdone, nullptr)); if (eventdone != CL_COMPLETE) { return false; } @@ -524,7 +524,7 @@ int32_t GPUReconstructionOCLBackend::GPUDebug(const char* state, int32_t stream, return (0); } for (int32_t i = 0; i < mNStreams; i++) { - if (GPUFailedMsgI(clFinish(mInternals->command_queue[i]))) { + if (GPUChkErrI(clFinish(mInternals->command_queue[i]))) { GPUError("OpenCL Error while synchronizing (%s) (Stream %d/%d)", state, stream, i); } } @@ -554,14 +554,14 @@ int32_t GPUReconstructionOCLBackend::GetOCLPrograms() mInternals->program = clCreateProgramWithSource(mInternals->context, (cl_uint)1, (const char**)&programs_sources, program_sizes, &ocl_error); } - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { GPUError("Error creating OpenCL program from binary"); return 1; } - if (GPUFailedMsgI(clBuildProgram(mInternals->program, 1, &mInternals->device, oclBuildFlags, nullptr, nullptr))) { + if (GPUChkErrI(clBuildProgram(mInternals->program, 1, &mInternals->device, oclBuildFlags, nullptr, nullptr))) { cl_build_status status; - if (GPUFailedMsgI(clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, nullptr)) == 0 && status == CL_BUILD_ERROR) { + if (GPUChkErrI(clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, nullptr)) == 0 && status == CL_BUILD_ERROR) { size_t log_size; clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_size); std::unique_ptr build_log(new char[log_size + 1]); diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index 79f54274cd32c..2abae229c74bb 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -39,7 +39,7 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase int32_t ExitDevice_Runtime() override; void UpdateAutomaticProcessingSettings() override; - virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override; + virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override; void SynchronizeGPU() override; int32_t DoStuckProtection(int32_t stream, deviceEvent event) override; diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx index 8a6c889773cb0..4f6a8725b4be5 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx @@ -18,7 +18,7 @@ template <> inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { cl_int4 val0 = {0, 0, 0, 0}; - GPUFailedMsg(clEnqueueFillBuffer(mInternals->command_queue[_xyz.x.stream], mInternals->mem_gpu, &val0, sizeof(val0), (char*)ptr - (char*)mDeviceMemoryBase, (size + sizeof(val0) - 1) & ~(sizeof(val0) - 1), _xyz.z.evList == nullptr ? 0 : _xyz.z.nEvents, _xyz.z.evList->getEventList(), _xyz.z.ev->getEventList())); + GPUChkErr(clEnqueueFillBuffer(mInternals->command_queue[_xyz.x.stream], mInternals->mem_gpu, &val0, sizeof(val0), (char*)ptr - (char*)mDeviceMemoryBase, (size + sizeof(val0) - 1) & ~(sizeof(val0) - 1), _xyz.z.evList == nullptr ? 0 : _xyz.z.nEvents, _xyz.z.evList->getEventList(), _xyz.z.ev->getEventList())); } template @@ -28,7 +28,7 @@ inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetu auto& x = _xyz.x; auto& y = _xyz.y; auto& z = _xyz.z; - GPUFailedMsg(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.index, args...)); + GPUChkErr(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.index, args...)); cl_event ev; cl_event* evr; @@ -39,15 +39,15 @@ inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetu } else { evr = (cl_event*)z.ev; } - GPUFailedMsg(clExecuteKernelA(mInternals->command_queue[x.stream], k, x.nThreads, x.nThreads * x.nBlocks, evr, (cl_event*)z.evList, z.nEvents)); + GPUChkErr(clExecuteKernelA(mInternals->command_queue[x.stream], k, x.nThreads, x.nThreads * x.nBlocks, evr, (cl_event*)z.evList, z.nEvents)); if (mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0) { cl_ulong time_start, time_end; - GPUFailedMsg(clWaitForEvents(1, evr)); - GPUFailedMsg(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, nullptr)); - GPUFailedMsg(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, nullptr)); + GPUChkErr(clWaitForEvents(1, evr)); + GPUChkErr(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, nullptr)); + GPUChkErr(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, nullptr)); _xyz.t = (time_end - time_start) * 1.e-9f; if (tmpEvent) { - GPUFailedMsg(clReleaseEvent(ev)); + GPUChkErr(clReleaseEvent(ev)); } } } @@ -80,7 +80,7 @@ int32_t GPUReconstructionOCLBackend::AddKernel() cl_int ocl_error; cl_kernel krnl = clCreateKernel(mInternals->program, kname.c_str(), &ocl_error); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { GPUError("Error creating OPENCL Kernel: %s", name.c_str()); return 1; } From d7d0ee9b194930b59ab49b4410c4d8afa6bb84fd Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 14 Mar 2025 14:52:53 +0100 Subject: [PATCH 3/8] Fix compiler warning --- Generators/src/GeneratorFromFile.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Generators/src/GeneratorFromFile.cxx b/Generators/src/GeneratorFromFile.cxx index 6d4e85afa6721..e37a3886c24e1 100644 --- a/Generators/src/GeneratorFromFile.cxx +++ b/Generators/src/GeneratorFromFile.cxx @@ -361,7 +361,7 @@ namespace std::vector executeCommand(const std::string& command) { std::vector result; - std::unique_ptr pipe(popen(command.c_str(), "r"), pclose); + std::unique_ptr pipe(popen(command.c_str(), "r"), pclose); if (!pipe) { throw std::runtime_error("Failed to open pipe"); } From 36d0ea0f3fc4b7905c37866a278c20b93ee059d7 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 14 Mar 2025 15:06:29 +0100 Subject: [PATCH 4/8] GPU: Clean up more of C++ < 11 compatibility code --- GPU/GPUTracking/Base/GPUConstantMem.h | 2 +- GPU/GPUTracking/Base/GPUGeneralKernels.h | 6 +- GPU/GPUTracking/Base/GPUReconstructionCPU.h | 4 +- .../Base/GPUReconstructionKernels.h | 8 +-- GPU/GPUTracking/DataTypes/GPUDataTypes.h | 60 ++++++++----------- GPU/GPUTracking/Global/GPUChain.cxx | 12 ++-- GPU/GPUTracking/Global/GPUChain.h | 12 ++-- .../SectorTracker/GPUTPCCreateTrackingData.h | 2 +- .../GPUTPCExtrapolationTracking.h | 4 +- .../SectorTracker/GPUTPCNeighboursCleaner.h | 2 +- .../SectorTracker/GPUTPCNeighboursFinder.h | 2 +- .../SectorTracker/GPUTPCStartHitsFinder.h | 2 +- .../SectorTracker/GPUTPCStartHitsSorter.h | 2 +- .../SectorTracker/GPUTPCTrackletConstructor.h | 2 +- .../SectorTracker/GPUTPCTrackletSelector.h | 2 +- .../TRDTracking/GPUTRDTrackerKernels.h | 2 +- 16 files changed, 58 insertions(+), 66 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUConstantMem.h b/GPU/GPUTracking/Base/GPUConstantMem.h index 8f1cc90f5ae93..e0b06f0a3ea55 100644 --- a/GPU/GPUTracking/Base/GPUConstantMem.h +++ b/GPU/GPUTracking/Base/GPUConstantMem.h @@ -96,7 +96,7 @@ static constexpr size_t gGPUConstantMemBufferSize = (sizeof(GPUConstantMem) + si #endif } // namespace o2::gpu #if defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) && !defined(GPUCA_GPUCODE_HOSTONLY) -GPUconstant() o2::gpu::GPUConstantMemCopyable gGPUConstantMemBuffer; +GPUconstant() o2::gpu::GPUConstantMemCopyable gGPUConstantMemBuffer; // TODO: This should go into o2::gpu namespace, but then CUDA or HIP would not find the symbol #endif // GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM namespace o2::gpu { diff --git a/GPU/GPUTracking/Base/GPUGeneralKernels.h b/GPU/GPUTracking/Base/GPUGeneralKernels.h index 71980d38fdc9e..ce93e2e5eead8 100644 --- a/GPU/GPUTracking/Base/GPUGeneralKernels.h +++ b/GPU/GPUTracking/Base/GPUGeneralKernels.h @@ -79,7 +79,7 @@ class GPUKernelTemplate }; typedef GPUconstantref() GPUConstantMem processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::NoRecoStep; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return &processors; @@ -94,7 +94,7 @@ class GPUKernelTemplate class GPUMemClean16 : public GPUKernelTemplate { public: - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::NoRecoStep; } template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUglobalref() void* ptr, uint64_t size); }; @@ -103,7 +103,7 @@ class GPUMemClean16 : public GPUKernelTemplate class GPUitoa : public GPUKernelTemplate { public: - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::NoRecoStep; } template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUglobalref() int32_t* ptr, uint64_t size); }; diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index f90820281c74d..fd999ec2304e1 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -134,8 +134,8 @@ template inline void GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args) { HighResTimer* t = nullptr; - GPUCA_RECO_STEP myStep = S::GetRecoStep() == GPUCA_RECO_STEP::NoRecoStep ? setup.x.step : S::GetRecoStep(); - if (myStep == GPUCA_RECO_STEP::NoRecoStep) { + GPUDataTypes::RecoStep myStep = S::GetRecoStep() == GPUDataTypes::RecoStep::NoRecoStep ? setup.x.step : S::GetRecoStep(); + if (myStep == GPUDataTypes::RecoStep::NoRecoStep) { throw std::runtime_error("Failure running general kernel without defining RecoStep"); } int32_t cpuFallback = IsGPU() ? (setup.x.device == krnlDeviceType::CPU ? 2 : (mRecoSteps.stepsGPUMask & myStep) != myStep) : 0; diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernels.h b/GPU/GPUTracking/Base/GPUReconstructionKernels.h index ba30f38e902ad..b8f3e3746c743 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernels.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernels.h @@ -30,14 +30,14 @@ struct classArgument { }; struct krnlExec { - constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto) : nBlocks(b), nThreads(t), stream(s), device(d), step(GPUCA_RECO_STEP::NoRecoStep) {} - constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUCA_RECO_STEP st) : nBlocks(b), nThreads(t), stream(s), device(GPUReconstruction::krnlDeviceType::Auto), step(st) {} - constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d, GPUCA_RECO_STEP st) : nBlocks(b), nThreads(t), stream(s), device(d), step(st) {} + constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto) : nBlocks(b), nThreads(t), stream(s), device(d), step(GPUDataTypes::RecoStep::NoRecoStep) {} + constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUDataTypes::RecoStep st) : nBlocks(b), nThreads(t), stream(s), device(GPUReconstruction::krnlDeviceType::Auto), step(st) {} + constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st) : nBlocks(b), nThreads(t), stream(s), device(d), step(st) {} uint32_t nBlocks; uint32_t nThreads; int32_t stream; GPUReconstruction::krnlDeviceType device; - GPUCA_RECO_STEP step; + GPUDataTypes::RecoStep step; }; struct krnlRunRange { constexpr krnlRunRange() = default; diff --git a/GPU/GPUTracking/DataTypes/GPUDataTypes.h b/GPU/GPUTracking/DataTypes/GPUDataTypes.h index f7bfe38be988d..6cc1e7266e722 100644 --- a/GPU/GPUTracking/DataTypes/GPUDataTypes.h +++ b/GPU/GPUTracking/DataTypes/GPUDataTypes.h @@ -96,9 +96,6 @@ struct TPCPadGainCalib; struct TPCZSLinkMapping; #include "utils/bitfield.h" -#define ENUM_CLASS class -#define ENUM_UINT : uint32_t -#define GPUCA_RECO_STEP GPUDataTypes::RecoStep class GPUTPCTrack; class GPUTPCHitId; @@ -117,36 +114,33 @@ struct GPUSettingsTF; class GPUDataTypes { public: - enum ENUM_CLASS GeometryType ENUM_UINT{RESERVED_GEOMETRY = 0, ALIROOT = 1, O2 = 2}; - enum DeviceType ENUM_UINT { INVALID_DEVICE = 0, - CPU = 1, - CUDA = 2, - HIP = 3, - OCL = 4 }; - enum ENUM_CLASS GeneralStep { Prepare = 1, - QA = 2 }; + // clang-format off + enum class GeometryType : uint32_t { RESERVED_GEOMETRY = 0, ALIROOT = 1, O2 = 2 }; + enum DeviceType : uint32_t { INVALID_DEVICE = 0, CPU = 1, CUDA = 2, HIP = 3, OCL = 4 }; + enum class GeneralStep { Prepare = 1, QA = 2 }; + // clang-format on - enum ENUM_CLASS RecoStep { TPCConversion = 1, - TPCSectorTracking = 2, - TPCMerging = 4, - TPCCompression = 8, - TRDTracking = 16, - ITSTracking = 32, - TPCdEdx = 64, - TPCClusterFinding = 128, - TPCDecompression = 256, - Refit = 512, - AllRecoSteps = 0x7FFFFFFF, - NoRecoStep = 0 }; - enum ENUM_CLASS InOutType { TPCClusters = 1, - OBSOLETE = 2, - TPCMergedTracks = 4, - TPCCompressedClusters = 8, - TRDTracklets = 16, - TRDTracks = 32, - TPCRaw = 64, - ITSClusters = 128, - ITSTracks = 256 }; + enum class RecoStep { TPCConversion = 1, + TPCSectorTracking = 2, + TPCMerging = 4, + TPCCompression = 8, + TRDTracking = 16, + ITSTracking = 32, + TPCdEdx = 64, + TPCClusterFinding = 128, + TPCDecompression = 256, + Refit = 512, + AllRecoSteps = 0x7FFFFFFF, + NoRecoStep = 0 }; + enum class InOutType { TPCClusters = 1, + OBSOLETE = 2, + TPCMergedTracks = 4, + TPCCompressedClusters = 8, + TRDTracklets = 16, + TRDTracks = 32, + TPCRaw = 64, + ITSClusters = 128, + ITSTracks = 256 }; #ifndef __OPENCL__ static constexpr const char* const DEVICE_TYPE_NAMES[] = {"INVALID", "CPU", "CUDA", "HIP", "OCL"}; static constexpr const char* const RECO_STEP_NAMES[] = {"TPC Transformation", "TPC Sector Tracking", "TPC Track Merging and Fit", "TPC Compression", "TRD Tracking", "ITS Tracking", "TPC dEdx Computation", "TPC Cluster Finding", "TPC Decompression", "Global Refit"}; @@ -312,8 +306,6 @@ struct GPUTrackingInOutPointers { const GPUSettingsTF* settingsTF = nullptr; }; -#undef ENUM_CLASS -#undef ENUM_UINT } // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Global/GPUChain.cxx b/GPU/GPUTracking/Global/GPUChain.cxx index 6990d5e08b638..300de31a509ba 100644 --- a/GPU/GPUTracking/Global/GPUChain.cxx +++ b/GPU/GPUTracking/Global/GPUChain.cxx @@ -18,33 +18,33 @@ using namespace o2::gpu; constexpr GPUChain::krnlRunRange GPUChain::krnlRunRangeNone; constexpr GPUChain::krnlEvent GPUChain::krnlEventNone; -GPUChain::krnlExec GPUChain::GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d, GPUCA_RECO_STEP st) +GPUChain::krnlExec GPUChain::GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st) { const uint32_t nBlocks = (totalItems + nThreads - 1) / nThreads; return {nBlocks, nThreads, stream, d, st}; } -GPUChain::krnlExec GPUChain::GetGrid(uint32_t totalItems, int32_t stream, GPUReconstruction::krnlDeviceType d, GPUCA_RECO_STEP st) +GPUChain::krnlExec GPUChain::GetGrid(uint32_t totalItems, int32_t stream, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st) { return {(uint32_t)-1, totalItems, stream, d, st}; } -GPUChain::krnlExec GPUChain::GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d, GPUCA_RECO_STEP st) +GPUChain::krnlExec GPUChain::GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st) { return {(uint32_t)-2, nBlocks, stream, d, st}; } -GPUChain::krnlExec GPUChain::GetGridBlkStep(uint32_t nBlocks, int32_t stream, GPUCA_RECO_STEP st) +GPUChain::krnlExec GPUChain::GetGridBlkStep(uint32_t nBlocks, int32_t stream, GPUDataTypes::RecoStep st) { return {(uint32_t)-2, nBlocks, stream, GPUReconstruction::krnlDeviceType::Auto, st}; } -GPUChain::krnlExec GPUChain::GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d, GPUCA_RECO_STEP st) +GPUChain::krnlExec GPUChain::GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st) { return {(uint32_t)-3, 0, stream, d, st}; } -GPUChain::krnlExec GPUChain::GetGridAutoStep(int32_t stream, GPUCA_RECO_STEP st) +GPUChain::krnlExec GPUChain::GetGridAutoStep(int32_t stream, GPUDataTypes::RecoStep st) { return {(uint32_t)-3, 0, stream, GPUReconstruction::krnlDeviceType::Auto, st}; } diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index b9da1c9a330d3..fff5d2efe0270 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -192,15 +192,15 @@ class GPUChain return mRec->getTimer(name, num); } // Get GRID with NBLOCKS minimal such that nThreads * NBLOCS >= totalItems - krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep); + krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep); // Get GRID with NBLOCKS minimal such that ideal number of threads * NBLOCKS >= totalItems - krnlExec GetGrid(uint32_t totalItems, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep); + krnlExec GetGrid(uint32_t totalItems, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep); // Get GRID with specified number of blocks, each block with ideal number of threads - krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep); - krnlExec GetGridBlkStep(uint32_t nBlocks, int32_t stream, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep); + krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep); + krnlExec GetGridBlkStep(uint32_t nBlocks, int32_t stream, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep); // Get GRID with ideal number of threads / blocks for GPU - krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep); - krnlExec GetGridAutoStep(int32_t stream, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep); + krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep); + krnlExec GetGridAutoStep(int32_t stream, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep); inline uint32_t BlockCount() const { return mRec->mBlockCount; } inline uint32_t WarpSize() const { return mRec->mWarpSize; } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h b/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h index 9327699c9404b..dc1beacf79d02 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h @@ -32,7 +32,7 @@ class GPUTPCCreateTrackingData : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h b/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h index 2d2b275d06399..91a33d132f136 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h @@ -30,7 +30,7 @@ class GPUTPCExtrapolationTracking : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; @@ -50,7 +50,7 @@ class GPUTPCExtrapolationTrackingCopyNumbers : public GPUKernelTemplate { public: typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h index 7af6e8eb1a582..de79b268aea78 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h @@ -38,7 +38,7 @@ class GPUTPCNeighboursCleaner : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h index 54dc0876f8a55..41b5eb8a4ffb8 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h @@ -49,7 +49,7 @@ class GPUTPCNeighboursFinder : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h index 5e620180570c8..c834b17369f0f 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h @@ -38,7 +38,7 @@ class GPUTPCStartHitsFinder : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h index b0349d660dbc1..0e2fd96dd2690 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h @@ -38,7 +38,7 @@ class GPUTPCStartHitsSorter : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h index 8757ed87072da..0f8314ee0fad4 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h @@ -97,7 +97,7 @@ class GPUTPCTrackletConstructor GPUd() static int32_t GPUTPCTrackletConstructorExtrapolationTracking(GPUconstantref() GPUTPCTracker& tracker, GPUsharedref() T& sMem, GPUTPCTrackParam& tParam, int32_t startrow, int32_t increment, int32_t iTracklet, calink* rowHits); typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h index bb969d866ef29..5009c672b030e 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h @@ -42,7 +42,7 @@ class GPUTPCTrackletSelector : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h index 70b525420f294..21135ddc48dfa 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h @@ -26,7 +26,7 @@ class GPUTRDTrackerKernels : public GPUKernelTemplate enum K { defaultKernel = 0, gpuVersion = 0, o2Version = 1 }; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TRDTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TRDTracking; } template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, T* externalInstance = nullptr); }; From cb499fb3faa77a036f83f813af9c6b09965ca037 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 14 Mar 2025 15:06:57 +0100 Subject: [PATCH 5/8] GPU: Provide static versions of GPUChkErr() macros test2 GPU: Provide static versions of GPUChkErr() macros --- GPU/Common/CMakeLists.txt | 2 +- GPU/Common/GPUCommonChkErr.h | 30 --------- GPU/Common/GPUCommonDef.h | 24 +++++-- GPU/Common/GPUCommonHelpers.h | 62 +++++++++++++++++++ GPU/GPUTracking/Base/cuda/CMakeLists.txt | 2 +- .../Base/cuda/GPUReconstructionCUDA.cu | 9 +-- .../Base/cuda/GPUReconstructionCUDA.h | 3 +- .../GPUReconstructionCUDAExternalProvider.cu | 3 +- .../cuda/GPUReconstructionCUDAHelpers.inc | 31 ++++++++++ .../cuda/GPUReconstructionCUDAInternals.h | 2 +- .../Base/cuda/GPUReconstructionCUDAKernels.cu | 2 +- GPU/GPUTracking/Base/hip/CMakeLists.txt | 4 +- .../Base/opencl/GPUReconstructionOCL.cxx | 7 +-- .../opencl/GPUReconstructionOCLIncludesHost.h | 2 +- 14 files changed, 127 insertions(+), 56 deletions(-) delete mode 100644 GPU/Common/GPUCommonChkErr.h create mode 100644 GPU/Common/GPUCommonHelpers.h create mode 100644 GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAHelpers.inc diff --git a/GPU/Common/CMakeLists.txt b/GPU/Common/CMakeLists.txt index 8b0a75679479f..bacf4454c39fd 100644 --- a/GPU/Common/CMakeLists.txt +++ b/GPU/Common/CMakeLists.txt @@ -15,7 +15,7 @@ set(HDRS_INSTALL GPUCommonAlgorithm.h GPUCommonDef.h GPUCommonDefAPI.h - GPUCommonChkErr.h + GPUCommonHelpers.h GPUCommonDefSettings.h GPUCommonConstants.h GPUCommonLogger.h diff --git a/GPU/Common/GPUCommonChkErr.h b/GPU/Common/GPUCommonChkErr.h deleted file mode 100644 index 00cb9e50d302f..0000000000000 --- a/GPU/Common/GPUCommonChkErr.h +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUCommonChkErr.h -/// \author David Rohr - -// GPUChkErr and GPUChkErrI will both check x for an error, using the loaded backend of GPUReconstruction (requiring GPUReconstruction.h to be included by the user). -// In case of an error, it will print out the corresponding CUDA / HIP / OpenCL error code -// GPUChkErr will download GPUReconstruction error values from GPU, print them, and terminate the application with an exception if an error occured. -// GPUChkErrI will return 0 or 1, depending on whether an error has occurred. -// The Macros must be called ona GPUReconstruction instance, e.g.: -// if (mRec->GPUChkErrI(cudaMalloc(...))) { exit(1); } -// gpuRecObj.GPUChkErr(cudaMalloc(...)); - -#ifndef GPUCOMMONCHKERR_H -#define GPUCOMMONCHKERR_H - -// Please #include "GPUReconstruction.h" in your code, if you use these 2! -#define GPUChkErr(x) GPUChkErrA(x, __FILE__, __LINE__, true) -#define GPUChkErrI(x) GPUChkErrA(x, __FILE__, __LINE__, false) - -#endif diff --git a/GPU/Common/GPUCommonDef.h b/GPU/Common/GPUCommonDef.h index ae8c48b500b69..b4a788e66a81c 100644 --- a/GPU/Common/GPUCommonDef.h +++ b/GPU/Common/GPUCommonDef.h @@ -35,13 +35,25 @@ #define GPUCA_GPUCODE // Compiled by GPU compiler #endif - #if defined(__CUDA_ARCH__) || defined(__OPENCL__) || defined(__HIP_DEVICE_COMPILE__) - #define GPUCA_GPUCODE_DEVICE // Executed on device + #if defined(GPUCA_GPUCODE) + #if defined(__CUDA_ARCH__) || defined(__OPENCL__) || defined(__HIP_DEVICE_COMPILE__) + #define GPUCA_GPUCODE_DEVICE // Executed on device + #endif + #if defined(__CUDACC__) + #define GPUCA_GPUTYPE CUDA + #elif defined(__HIPCC__) + #define GPUCA_GPUTYPE HIP + #elif defined(__OPENCL__) || defined(__OPENCL_HOST__) + #define GPUCA_GPUTYPE OCL + #endif #endif #endif +#ifndef GPUCA_GPUTYPE + #define GPUCA_GPUTYPE CPU +#endif #if defined(GPUCA_STANDALONE) || (defined(GPUCA_O2_LIB) && !defined(GPUCA_O2_INTERFACE)) || defined (GPUCA_GPUCODE) - #define GPUCA_ALIGPUCODE + #define GPUCA_ALIGPUCODE // Part of GPUTracking library but not of interface #endif #if (defined(__CUDACC__) && defined(GPUCA_CUDA_NO_CONSTANT_MEMORY)) || (defined(__HIPCC__) && defined(GPUCA_HIP_NO_CONSTANT_MEMORY)) || (defined(__OPENCL__) && defined(GPUCA_OPENCL_NO_CONSTANT_MEMORY)) @@ -51,13 +63,13 @@ #endif #if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && defined(DEBUG_STREAMER) -#define GPUCA_DEBUG_STREAMER_CHECK(...) __VA_ARGS__ + #define GPUCA_DEBUG_STREAMER_CHECK(...) __VA_ARGS__ #else -#define GPUCA_DEBUG_STREAMER_CHECK(...) + #define GPUCA_DEBUG_STREAMER_CHECK(...) #endif #ifndef GPUCA_RTC_SPECIAL_CODE -#define GPUCA_RTC_SPECIAL_CODE(...) + #define GPUCA_RTC_SPECIAL_CODE(...) #endif // API Definitions for GPU Compilation diff --git a/GPU/Common/GPUCommonHelpers.h b/GPU/Common/GPUCommonHelpers.h new file mode 100644 index 0000000000000..ad876db0d6c3a --- /dev/null +++ b/GPU/Common/GPUCommonHelpers.h @@ -0,0 +1,62 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUCommonHelpers.h +/// \author David Rohr + +// GPUChkErr and GPUChkErrI will both check x for an error, using the loaded backend of GPUReconstruction (requiring GPUReconstruction.h to be included by the user). +// In case of an error, it will print out the corresponding CUDA / HIP / OpenCL error code +// GPUChkErr will download GPUReconstruction error values from GPU, print them, and terminate the application with an exception if an error occured. +// GPUChkErrI will return 0 or 1, depending on whether an error has occurred. +// These Macros must be called ona GPUReconstruction instance. +// The GPUChkErrS and GPUChkErrSI are similar but static, without required GPUReconstruction instance. +// Examples: +// if (mRec->GPUChkErrI(cudaMalloc(...))) { exit(1); } +// gpuRecObj.GPUChkErr(cudaMalloc(...)); +// if (GPUChkErrSI(cudaMalloc(..))) { exit(1); } + +#ifndef GPUCOMMONHELPERS_H +#define GPUCOMMONHELPERS_H + +// Please #include "GPUReconstruction.h" in your code, if you use these 2! +#define GPUChkErr(x) GPUChkErrA(x, __FILE__, __LINE__, true) +#define GPUChkErrI(x) GPUChkErrA(x, __FILE__, __LINE__, false) +#define GPUChkErrS(x) o2::gpu::internal::GPUReconstructionChkErr(x, __FILE__, __LINE__, true) +#define GPUChkErrSI(x) o2::gpu::internal::GPUReconstructionChkErr(x, __FILE__, __LINE__, false) + +#include "GPUCommonDef.h" +#include + +namespace o2::gpu::internal +{ +#define GPUCOMMON_INTERNAL_CAT_A(a, b, c) a##b##c +#define GPUCOMMON_INTERNAL_CAT(...) GPUCOMMON_INTERNAL_CAT_A(__VA_ARGS__) +extern int32_t GPUCOMMON_INTERNAL_CAT(GPUReconstruction, GPUCA_GPUTYPE, ChkErr)(const int64_t error, const char* file, int32_t line); +inline int32_t GPUReconstructionCPUChkErr(const int64_t error, const char* file, int32_t line) +{ + if (error) { + GPUError("GPUCommon Error Code %d (%s:%d)", error, file, line); + } + return error != 0; +} +static inline int32_t GPUReconstructionChkErr(const int64_t error, const char* file, int32_t line, bool failOnError) +{ + int32_t retVal = error && GPUCOMMON_INTERNAL_CAT(GPUReconstruction, GPUCA_GPUTYPE, ChkErr)(error, file, line); + if (retVal && failOnError) { + throw std::runtime_error("GPU API Call Failure"); + } + return error; +} +#undef GPUCOMMON_INTERNAL_CAT_A +#undef GPUCOMMON_INTERNAL_CAT +} // namespace o2::gpu::internal + +#endif diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index a33234db49a27..5b2e53179e50c 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -18,7 +18,7 @@ endif() message(STATUS "Building GPUTracking with CUDA support ${TMP_TARGET}") set(SRCS GPUReconstructionCUDA.cu GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu) -set(HDRS GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDADef.h GPUReconstructionCUDAIncludesHost.h CUDAThrustHelpers.h) +set(HDRS GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDADef.h GPUReconstructionCUDAIncludesHost.h CUDAThrustHelpers.h) # -------------------------------- Prepare RTC ------------------------------------------------------- enable_language(ASM) if(ALIGPU_BUILD_TYPE STREQUAL "O2") diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index d30eb51bd4938..d0d5ef4680fac 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -22,6 +22,7 @@ #include "CUDAThrustHelpers.h" #include "GPUReconstructionIncludes.h" #include "GPUParamRTC.h" +#include "GPUReconstructionCUDAHelpers.inc" #if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 1 #include "utils/qGetLdBinarySymbols.h" @@ -62,13 +63,9 @@ GPUReconstructionCUDABackend::~GPUReconstructionCUDABackend() } static_assert(sizeof(cudaError_t) <= sizeof(int64_t) && cudaSuccess == 0); -int32_t GPUReconstructionCUDABackend::GPUChkErrStatic(const int64_t error, const char* file, int32_t line) +int32_t GPUReconstructionCUDABackend::GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const { - if (error == cudaSuccess) { - return (0); - } - GPUError("CUDA Error: %ld / %s (%s:%d)", error, cudaGetErrorString((cudaError_t)error), file, line); - return 1; + return internal::GPUReconstructionCUDAChkErr(error, file, line); } GPUReconstructionCUDA::GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionKernels(cfg) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index 02e8f92bb2328..f78270d40146c 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -33,13 +33,12 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase { public: ~GPUReconstructionCUDABackend() override; - static int32_t GPUChkErrStatic(const int64_t error, const char* file, int32_t line); protected: GPUReconstructionCUDABackend(const GPUSettingsDeviceBackend& cfg); void PrintKernelOccupancies() override; - virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override { return GPUChkErrStatic(error, file, line); } + virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override; template void runKernelBackend(const krnlSetupArgs& args); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu index bc1d573385598..f341a778076b8 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu @@ -32,11 +32,12 @@ using namespace o2::gpu; #include "TrackParametrizationWithError.cxx" #include "Propagator.cxx" #include "TrackLTIntegral.cxx" +#include "GPUReconstructionCUDAHelpers.inc" #ifndef GPUCA_NO_CONSTANT_MEMORY static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() { void* retVal = nullptr; - if (GPUReconstructionCUDA::GPUChkErrStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) { + if (GPUChkErrS(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer))) { throw std::runtime_error("Could not obtain GPU constant memory symbol"); } return retVal; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAHelpers.inc b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAHelpers.inc new file mode 100644 index 0000000000000..a34f940a1337a --- /dev/null +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAHelpers.inc @@ -0,0 +1,31 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionCUDAHelpers.inc +/// \author David Rohr + +#ifndef GPURECONSTRUCTIONCUDAHELPERS_INC_H +#define GPURECONSTRUCTIONCUDAHELPERS_INC_H + +#include "GPUCommonHelpers.h" + +namespace o2::gpu::internal +{ +int32_t __attribute__((weak)) GPUReconstructionCUDAChkErr(const int64_t error, const char* file, int32_t line) +{ + if (error != cudaSuccess) { + GPUError("CUDA Error: %ld / %s (%s:%d)", error, cudaGetErrorString((cudaError_t)error), file, line); + } + return error != cudaSuccess; +} +} // namespace o2::gpu::internal + +#endif diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h index 027a9d5445b2c..c85d98d85420e 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h @@ -22,7 +22,7 @@ #include #include #include -#include "GPUCommonChkErr.h" +#include "GPUCommonHelpers.h" namespace o2::gpu { diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index f60f00c13710d..0c83223ba238a 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -138,7 +138,7 @@ void GPUReconstructionCUDABackend::getRTCKernelCalls(std::vector& k #ifndef GPUCA_NO_CONSTANT_MEMORY static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() { void* retVal = nullptr; - if (GPUReconstructionCUDA::GPUChkErrStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) { + if (GPUChkErrS(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer))) { throw std::runtime_error("Could not obtain GPU constant memory symbol"); } return retVal; diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 30f6683ff93c5..21a641c0cc7c0 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -24,7 +24,7 @@ message(STATUS "Building GPUTracking with HIP support ${TMP_TARGET}") if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") set(GPUCA_HIP_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/hipify) file(MAKE_DIRECTORY ${GPUCA_HIP_SOURCE_DIR}) - set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAkernel.template.cu CUDAThrustHelpers.h GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDArtc.cu) + set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDAkernel.template.cu CUDAThrustHelpers.h GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDArtc.cu) set(GPUCA_HIP_LOCAL_FILE_LIST GPUReconstructionHIPIncludesHost.h) set(HIP_SOURCES "") foreach(file ${GPUCA_HIP_FILE_LIST}) @@ -63,7 +63,7 @@ endif() set(SRCS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.hip ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPKernels.hip) set(SRCS_CXX ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPGenRTC.cxx) -set(HDRS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPInternals.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPDef.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPIncludesHost.h ${GPUCA_HIP_SOURCE_DIR}/HIPThrustHelpers.h) +set(HDRS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPInternals.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPHelpers.inc ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPDef.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPIncludesHost.h ${GPUCA_HIP_SOURCE_DIR}/HIPThrustHelpers.h) # -------------------------------- Prepare RTC ------------------------------------------------------- enable_language(ASM) diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index e52494937f8bf..e92205b9864e6 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -52,11 +52,10 @@ static_assert(sizeof(cl_int) <= sizeof(int64_t) && CL_SUCCESS == 0); int32_t GPUReconstructionOCLBackend::GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const { // Check for OPENCL Error and in the case of an error display the corresponding error string - if (error == CL_SUCCESS) { - return (0); + if (error != CL_SUCCESS) { + GPUError("OpenCL Error: %ld / %s (%s:%d)", error, convertErrorToString(error), file, line); } - GPUError("OpenCL Error: %ld / %s (%s:%d)", error, convertErrorToString(error), file, line); - return 1; + return error != CL_SUCCESS; } void GPUReconstructionOCLBackend::UpdateAutomaticProcessingSettings() diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h index 9c8cdbe87c7c1..97316cf9aa32e 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h @@ -28,7 +28,7 @@ #include "GPUReconstructionOCL.h" #include "GPUReconstructionIncludes.h" -#include "GPUCommonChkErr.h" +#include "GPUCommonHelpers.h" using namespace o2::gpu; From 0a56fcdbe66b48e90a02c85ddd44b6ce1e32d604 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 14 Mar 2025 16:15:23 +0100 Subject: [PATCH 6/8] GPU: Automatically derive GPUReconstruction backend class from preprocessor constant --- GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h | 4 ++-- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h | 1 - .../Base/opencl/GPUReconstructionOCLKernels.cxx | 1 - GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 8 ++++---- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 2 +- 5 files changed, 7 insertions(+), 9 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h index cd1180cbc9991..0b1a501ebc094 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h @@ -68,7 +68,7 @@ // GPU Host wrappers for kernel #define GPUCA_KRNL_HOST(x_class, ...) \ GPUCA_KRNLGPU(x_class, __VA_ARGS__) \ - template <> class GPUCA_KRNL_BACKEND_CLASS::backendInternal { \ + template <> class GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::backendInternal { \ public: \ template \ static inline void runKernelBackendMacro(const krnlSetupTime& _xyz, T* me, const Args&... args) \ @@ -80,7 +80,7 @@ }; #define GPUCA_KRNL_PROP(x_class, x_attributes) \ - template <> gpu_reconstruction_kernels::krnlProperties GPUCA_KRNL_BACKEND_CLASS::getKernelPropertiesBackend() { \ + template <> gpu_reconstruction_kernels::krnlProperties GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::getKernelPropertiesBackend() { \ gpu_reconstruction_kernels::krnlProperties ret = gpu_reconstruction_kernels::krnlProperties{GPUCA_ATTRRES(_EXTRREG, GPUCA_M_STRIP(x_attributes))}; \ return ret.nThreads > 0 ? ret : gpu_reconstruction_kernels::krnlProperties{(int32_t)mThreadCount}; \ } diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h index 7f77925ca3aaa..4ed352279fb90 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h @@ -34,6 +34,5 @@ #define GPUCA_CONSMEM_CALL me->mDeviceConstantMem, #define GPUCA_CONSMEM ((GPUConstantMem&)(*gGPUConstantMemBuffer)) #endif -#define GPUCA_KRNL_BACKEND_CLASS GPUReconstructionCUDABackend #endif diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx index 4f6a8725b4be5..ce6b6553ae1f7 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx @@ -109,6 +109,5 @@ int32_t GPUReconstructionOCLBackend::AddKernels() #define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ GPUCA_KRNL_PROP(x_class, x_attributes) \ template void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs& args); -#define GPUCA_KRNL_BACKEND_CLASS GPUReconstructionOCLBackend #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index fa85d796baeba..e6312d767a496 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -769,7 +769,7 @@ struct MergeBorderTracks_compMin { } // namespace o2::gpu::internal template <> -inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) +inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) { thrust::device_ptr p(range); ThrustVolatileAsyncAllocator alloc(this); @@ -1873,7 +1873,7 @@ struct GPUTPCGMMergerSortTracksQPt_comp { } // namespace o2::gpu::internal template <> -inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackOrderProcess()); ThrustVolatileAsyncAllocator alloc(this); @@ -1881,7 +1881,7 @@ inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal -inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackSort()); ThrustVolatileAsyncAllocator alloc(this); @@ -2106,7 +2106,7 @@ struct GPUTPCGMMergerMergeLoopers_comp { } // namespace o2::gpu::internal template <> -inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr params(mProcessorsShadow->tpcMerger.LooperCandidates()); ThrustVolatileAsyncAllocator alloc(this); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 45293bae9820b..13f204d0f940a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -102,7 +102,7 @@ struct GPUTPCGMO2OutputSort_comp { }; template <> -inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr trackSort(mProcessorsShadow->tpcMerger.TrackSortO2()); ThrustVolatileAsyncAllocator alloc(this); From 1a4806d96c5ec4a053deda3f2d89611834b65234 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 14 Mar 2025 16:33:57 +0100 Subject: [PATCH 7/8] GPU: Plenty of clang-format fixes --- GPU/GPUTracking/Base/GPUParam.h | 8 +-- GPU/GPUTracking/Base/GPUReconstruction.h | 4 +- .../Base/GPUReconstructionConvert.cxx | 2 +- .../Base/GPUReconstructionProcessing.h | 2 +- .../Base/cuda/GPUReconstructionCUDA.cu | 2 +- GPU/GPUTracking/DataTypes/GPUOutputControl.h | 2 +- GPU/GPUTracking/DataTypes/GPUSettings.h | 6 +- GPU/GPUTracking/DataTypes/GPUTRDTrack.h | 6 +- GPU/GPUTracking/Global/GPUChain.h | 2 +- GPU/GPUTracking/Global/GPUChainTracking.h | 2 +- .../Global/GPUChainTrackingClusterizer.cxx | 3 +- .../Global/GPUChainTrackingRefit.cxx | 4 +- GPU/GPUTracking/Interface/GPUO2Interface.cxx | 2 +- .../GPUO2InterfaceConfigurableParam.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h | 16 ++--- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 6 +- .../Merger/GPUTPCGMPolynomialFieldManager.h | 4 +- GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx | 2 +- GPU/GPUTracking/Refit/GPUTrackingRefit.cxx | 2 +- .../SectorTracker/GPUTPCTracklet.h | 2 +- .../TPCClusterFinder/GPUTPCCFDecodeZS.cxx | 12 ++-- .../GPUTPCCFNoiseSuppression.cxx | 4 +- .../TRDTracking/GPUTRDInterfaces.h | 4 +- GPU/GPUTracking/TRDTracking/GPUTRDTrackData.h | 18 +++--- GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx | 24 ++++--- GPU/GPUTracking/TRDTracking/GPUTRDTracker.h | 62 +++++++++---------- .../TRDTracking/GPUTRDTrackletWord.h | 2 +- .../display/frontend/GPUDisplayFrontend.h | 8 +-- .../frontend/GPUDisplayFrontendWindows.cxx | 12 ++-- GPU/GPUTracking/qa/GPUQAHelper.h | 6 +- GPU/GPUTracking/qa/genEvents.h | 2 +- GPU/GPUTracking/utils/qconfig.cxx | 8 +-- GPU/GPUTracking/utils/threadserver.h | 4 +- GPU/GPUbenchmark/cuda/Kernels.cu | 6 +- GPU/TPCFastTransformation/BandMatrixSolver.h | 2 +- GPU/TPCFastTransformation/ChebyshevFit1D.cxx | 4 +- .../CorrectionMapsHelper.h | 28 ++++----- .../NDPiecewisePolynomials.h | 14 ++--- GPU/TPCFastTransformation/Spline1DHelper.cxx | 8 +-- GPU/TPCFastTransformation/Spline1DHelperOld.h | 20 +++--- GPU/TPCFastTransformation/Spline1DSpec.cxx | 2 +- GPU/TPCFastTransformation/Spline1DSpec.h | 12 ++-- GPU/TPCFastTransformation/Spline2DSpec.cxx | 2 +- GPU/TPCFastTransformation/SplineHelper.cxx | 44 ++++++------- GPU/TPCFastTransformation/SplineHelper.h | 6 +- GPU/TPCFastTransformation/SplineSpec.h | 14 ++--- .../TPCFastSpaceChargeCorrection.h | 2 +- .../IrregularSpline2D3DCalibrator.cxx | 2 +- .../devtools/RegularSpline1D.h | 12 ++-- .../devtools/SemiregularSpline2D3D.cxx | 8 +-- .../devtools/SemiregularSpline2D3D.h | 32 +++++----- .../test/testMultivarPolynomials.cxx | 4 +- GPU/Utils/FlatObject.h | 12 ++-- GPU/Workflow/helper/src/GPUWorkflowHelper.cxx | 26 ++++---- 54 files changed, 250 insertions(+), 255 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUParam.h b/GPU/GPUTracking/Base/GPUParam.h index 9bdf705dfeb59..fbce6246de112 100644 --- a/GPU/GPUTracking/Base/GPUParam.h +++ b/GPU/GPUTracking/Base/GPUParam.h @@ -59,10 +59,10 @@ struct GPUParam_t { int32_t continuousMaxTimeBin; int32_t tpcCutTimeBin; - GPUTPCGeometry tpcGeometry; // TPC Geometry - GPUTPCGMPolynomialField polynomialField; // Polynomial approx. of magnetic field for TPC GM - const uint32_t* occupancyMap; // Ptr to TPC occupancy map - uint32_t occupancyTotal; // Total occupancy in the TPC (nCl / nHbf) + GPUTPCGeometry tpcGeometry; // TPC Geometry + GPUTPCGMPolynomialField polynomialField; // Polynomial approx. of magnetic field for TPC GM + const uint32_t* occupancyMap; // Ptr to TPC occupancy map + uint32_t occupancyTotal; // Total occupancy in the TPC (nCl / nHbf) GPUParamSector SectorParam[GPUCA_NSECTORS]; diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index f363f3f58aa6f..5e03c77f08230 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -52,7 +52,7 @@ namespace gpu_reconstruction_kernels { struct deviceEvent; class threadContext; -} +} // namespace gpu_reconstruction_kernels class GPUReconstruction { @@ -193,7 +193,7 @@ class GPUReconstruction bool IsInitialized() const { return mInitialized; } void SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration* workflow = nullptr); void SetSettings(const GPUSettingsGRP* grp, const GPUSettingsRec* rec = nullptr, const GPUSettingsProcessing* proc = nullptr, const GPURecoStepConfiguration* workflow = nullptr); - void SetResetTimers(bool reset) { mProcessingSettings.resetTimers = reset; } // May update also after Init() + void SetResetTimers(bool reset) { mProcessingSettings.resetTimers = reset; } // May update also after Init() void SetDebugLevelTmp(int32_t level) { mProcessingSettings.debugLevel = level; } // Temporarily, before calling SetSettings() void UpdateSettings(const GPUSettingsGRP* g, const GPUSettingsProcessing* p = nullptr, const GPUSettingsRecDynamic* d = nullptr); void UpdateDynamicSettings(const GPUSettingsRecDynamic* d); diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx index 8f5cab6807050..bc760f6188caa 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx @@ -923,7 +923,7 @@ void zsEncoderDenseLinkBased::decodePage(std::vector& outputBuff if (decLinkX & 0b00100000) { bitmaskL2.set(); } else { - bitmaskL2 = std::bitset<10>(((((uint16_t)decLinkX) & 0b11000000) << 2) | (uint16_t) * ((const uint8_t*)decPagePtr)); + bitmaskL2 = std::bitset<10>(((((uint16_t)decLinkX) & 0b11000000) << 2) | (uint16_t)*((const uint8_t*)decPagePtr)); decPagePtr += sizeof(uint8_t); } diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h index 4ccfb9ff10311..43560616782db 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h @@ -28,7 +28,7 @@ namespace gpu_reconstruction_kernels { struct deviceEvent { constexpr deviceEvent() = default; - constexpr deviceEvent(std::nullptr_t p) : v(nullptr){}; + constexpr deviceEvent(std::nullptr_t p) : v(nullptr) {}; template void set(T val) { diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index d0d5ef4680fac..202edd49bc44c 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -496,7 +496,7 @@ size_t GPUReconstructionCUDA::GPUMemCpy(void* dst, const void* src, size_t size, for (int32_t k = 0; k < nEvents; k++) { GPUChkErr(cudaStreamWaitEvent(mInternals->Streams[stream], evList[k].get(), 0)); } - GPUChkErr(cudaMemcpyAsync(dst, src, size, toGPU == -2 ? cudaMemcpyDeviceToDevice : toGPU ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost, mInternals->Streams[stream])); + GPUChkErr(cudaMemcpyAsync(dst, src, size, toGPU == -2 ? cudaMemcpyDeviceToDevice : (toGPU ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost), mInternals->Streams[stream])); } if (ev) { GPUChkErr(cudaEventRecord(ev->get(), mInternals->Streams[stream == -1 ? 0 : stream])); diff --git a/GPU/GPUTracking/DataTypes/GPUOutputControl.h b/GPU/GPUTracking/DataTypes/GPUOutputControl.h index 799fd25330ab4..0495f7ed1d0ff 100644 --- a/GPU/GPUTracking/DataTypes/GPUOutputControl.h +++ b/GPU/GPUTracking/DataTypes/GPUOutputControl.h @@ -78,7 +78,7 @@ struct GPUTrackingOutputs { static constexpr size_t count() { return sizeof(GPUTrackingOutputs) / sizeof(GPUOutputControl); } GPUOutputControl* asArray() { return (GPUOutputControl*)this; } size_t getIndex(const GPUOutputControl& v) { return &v - (const GPUOutputControl*)this; } - static int32_t getIndex(GPUOutputControl GPUTrackingOutputs::*v) { return &(((GPUTrackingOutputs*)(0x10000))->*v) - (GPUOutputControl*)(0x10000); } + static int32_t getIndex(GPUOutputControl GPUTrackingOutputs::* v) { return &(((GPUTrackingOutputs*)(0x10000))->*v) - (GPUOutputControl*)(0x10000); } }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/DataTypes/GPUSettings.h b/GPU/GPUTracking/DataTypes/GPUSettings.h index 05888770ef9e5..c81a8e20e9926 100644 --- a/GPU/GPUTracking/DataTypes/GPUSettings.h +++ b/GPU/GPUTracking/DataTypes/GPUSettings.h @@ -73,9 +73,9 @@ struct GPUSettingsTF { // Settings defining the setup of the GPUReconstruction processing (basically selecting the device / class instance) struct GPUSettingsDeviceBackend { - uint32_t deviceType = GPUDataTypes::DeviceType::CPU; // Device type, shall use GPUDataTypes::DEVICE_TYPE constants, e.g. CPU / CUDA - uint8_t forceDeviceType = 1; // Fail if device initialization fails, otherwise falls back to CPU - GPUReconstruction* master = nullptr; // GPUReconstruction master object + uint32_t deviceType = GPUDataTypes::DeviceType::CPU; // Device type, shall use GPUDataTypes::DEVICE_TYPE constants, e.g. CPU / CUDA + uint8_t forceDeviceType = 1; // Fail if device initialization fails, otherwise falls back to CPU + GPUReconstruction* master = nullptr; // GPUReconstruction master object }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/DataTypes/GPUTRDTrack.h b/GPU/GPUTracking/DataTypes/GPUTRDTrack.h index 18f7c61e01fc3..b358e8b82d480 100644 --- a/GPU/GPUTracking/DataTypes/GPUTRDTrack.h +++ b/GPU/GPUTracking/DataTypes/GPUTRDTrack.h @@ -107,9 +107,9 @@ class GPUTRDTrack_t : public T GPUd() void setHasPadrowCrossing() { mIsCrossingNeighbor |= (1U << 7); } protected: - float mChi2; // total chi2. - float mSignal{-1.f}; // electron Likelihood for track - uint32_t mRefGlobalTrackId; // raw GlobalTrackID of the seeding track (either ITS-TPC or TPC) + float mChi2; // total chi2. + float mSignal{-1.f}; // electron Likelihood for track + uint32_t mRefGlobalTrackId; // raw GlobalTrackID of the seeding track (either ITS-TPC or TPC) int32_t mAttachedTracklets[kNLayers]; // indices of the tracklets attached to this track; -1 means no tracklet in that layer int16_t mCollisionId; // the collision ID of the tracklets attached to this track; is used to retrieve the BC information for this track after the tracking is done uint8_t mFlags; // bits 0 to 5 indicate whether track is findable in layer 0 to 5, bit 6 indicates an ambiguous track and bit 7 flags if the track is stopped in the TRD diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index fff5d2efe0270..290ae32cafca8 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -46,7 +46,7 @@ class GPUChain virtual int32_t Finalize() = 0; virtual int32_t RunChain() = 0; virtual void MemorySize(size_t& gpuMem, size_t& pageLockedHostMem) = 0; - virtual void PrintMemoryStatistics(){}; + virtual void PrintMemoryStatistics() {}; virtual int32_t CheckErrorCodes(bool cpuOnly = false, bool forceShowErrors = false, std::vector>* fillErrors = nullptr) { return 0; } virtual bool SupportsDoublePipeline() { return false; } virtual int32_t FinalizePipelinedProcessing() { return 0; } diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 492ee65d1c9c1..5779cec31130c 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -43,7 +43,7 @@ class MatLayerCylSet; namespace o2::gpu { -//class GPUTRDTrackerGPU; +// class GPUTRDTrackerGPU; class GPUTPCGPUTracker; class GPUDisplayInterface; class GPUQA; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 63d56da37595b..a48050a6cacbc 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -154,8 +154,7 @@ std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint uint32_t endpointAdcSamples[GPUTrackingInOutZS::NENDPOINTS]; memset(endpointAdcSamples, 0, sizeof(endpointAdcSamples)); bool doGPU = mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding; - int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : (mIOPtrs.tpcZS->sector[iSector].count[0] && mIOPtrs.tpcZS->sector[iSector].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->sector[iSector].zsPtr[0][0]) - : 0; + int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : ((mIOPtrs.tpcZS->sector[iSector].count[0] && mIOPtrs.tpcZS->sector[iSector].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->sector[iSector].zsPtr[0][0]) : 0); for (uint16_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { #ifndef GPUCA_NO_VC diff --git a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx index 9e7085b31849e..8d1efd7011227 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx @@ -31,13 +31,13 @@ int32_t GPUChainTracking::RunRefit() RefitShadow.SetPropagator(doGPU ? processorsShadow()->calibObjects.o2Propagator : GetO2Propagator()); RefitShadow.mPTracks = (doGPU ? processorsShadow() : processors())->tpcMerger.OutputTracks(); WriteToConstantMemory(RecoStep::Refit, (char*)&processors()->trackingRefit - (char*)processors(), &RefitShadow, sizeof(RefitShadow), 0); - //TransferMemoryResourcesToGPU(RecoStep::Refit, &Refit, 0); + // TransferMemoryResourcesToGPU(RecoStep::Refit, &Refit, 0); if (param().rec.trackingRefitGPUModel) { runKernel(GetGrid(mIOPtrs.nMergedTracks, 0)); } else { runKernel(GetGrid(mIOPtrs.nMergedTracks, 0)); } - //TransferMemoryResourcesToHost(RecoStep::Refit, &Refit, 0); + // TransferMemoryResourcesToHost(RecoStep::Refit, &Refit, 0); SynchronizeStream(0); return 0; } diff --git a/GPU/GPUTracking/Interface/GPUO2Interface.cxx b/GPU/GPUTracking/Interface/GPUO2Interface.cxx index 34cd5b7280dc3..4dac56afed671 100644 --- a/GPU/GPUTracking/Interface/GPUO2Interface.cxx +++ b/GPU/GPUTracking/Interface/GPUO2Interface.cxx @@ -46,7 +46,7 @@ struct GPUO2Interface_Internals { }; } // namespace o2::gpu -GPUO2Interface::GPUO2Interface() : mInternals(new GPUO2Interface_Internals){}; +GPUO2Interface::GPUO2Interface() : mInternals(new GPUO2Interface_Internals) {}; GPUO2Interface::~GPUO2Interface() { Deinitialize(); } diff --git a/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h b/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h index 425c8b880b4e3..ebb426b7a8cfe 100644 --- a/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h +++ b/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h @@ -50,7 +50,7 @@ #define AddSubConfig(name, instance) #define BeginSubConfig(name, instance, parent, preoptname, preoptnameshort, descr, o2prefix) \ struct GPUCA_M_CAT(GPUConfigurableParam, name) : public o2::conf::ConfigurableParamHelper { \ - O2ParamDef(GPUCA_M_CAT(GPUConfigurableParam, name), GPUCA_M_STR(GPUCA_M_CAT(GPU_, o2prefix))) public: + O2ParamDef(GPUCA_M_CAT(GPUConfigurableParam, name), GPUCA_M_STR(GPUCA_M_CAT(GPU_, o2prefix))) public: #define BeginHiddenConfig(name, instance) struct GPUCA_M_CAT(GPUConfigurableParam, name) { #define EndConfig() \ } \ diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h index 64d8549312736..578fe1eeb4ca7 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h @@ -106,17 +106,17 @@ class GPUTPCGMMergedTrack GPUd() gputpcgmmergertypes::GPUTPCOuterParam& OuterParam() { return mOuterParam; } private: - GPUTPCGMTrackParam mParam; //* fitted track parameters + GPUTPCGMTrackParam mParam; //* fitted track parameters gputpcgmmergertypes::GPUTPCOuterParam mOuterParam; //* outer param - float mAlpha; //* alpha angle - float mLastX; //* outer X - float mLastY; //* outer Y - float mLastZ; //* outer Z - uint32_t mFirstClusterRef; //* index of the first track cluster in corresponding cluster arrays + float mAlpha; //* alpha angle + float mLastX; //* outer X + float mLastY; //* outer Y + float mLastZ; //* outer Z + uint32_t mFirstClusterRef; //* index of the first track cluster in corresponding cluster arrays // TODO: Change to 8 bit - uint32_t mNClusters; //* number of track clusters - uint32_t mNClustersFitted; //* number of clusters used in fit + uint32_t mNClusters; //* number of track clusters + uint32_t mNClustersFitted; //* number of clusters used in fit uint8_t mFlags; uint8_t mLegs; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index e6312d767a496..1c2a8e2b29a9c 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -2140,7 +2140,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, } const float d2xy = CAMath::Sum2(params[i].x - params[j].x, params[i].y - params[j].y); if (d2xy > 15.f) { - //bs |= 1; + // bs |= 1; continue; } const auto& trk1 = mOutputTracks[params[i].id]; @@ -2148,7 +2148,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, const auto& param1 = trk1.GetParam(); const auto& param2 = trk2.GetParam(); if (CAMath::Abs(param1.GetDzDs()) > 0.03f && CAMath::Abs(param2.GetDzDs()) > 0.03f && param1.GetDzDs() * param2.GetDzDs() * param1.GetQPt() * param2.GetQPt() < 0) { - //bs |= 2; + // bs |= 2; continue; } @@ -2170,7 +2170,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, } } if (!dzcorrok) { - //bs |= 4; + // bs |= 4; continue; } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h index 88f0882a79f03..4a608fcc97068 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h @@ -39,11 +39,11 @@ class GPUTPCGMPolynomialFieldManager GPUTPCGMPolynomialFieldManager() = default; /* Get appropriate pre-calculated polynomial field for the given field value nominalFieldkG - */ + */ static int32_t GetPolynomialField(float nominalFieldkG, o2::gpu::GPUTPCGMPolynomialField& field); /* Get pre-calculated polynomial field of type "type", scaled with respect to nominalFieldkG - */ + */ static int32_t GetPolynomialField(StoredField_t type, float nominalFieldkG, o2::gpu::GPUTPCGMPolynomialField& field); }; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx index e15d6fe8b17bd..0c171a74d4e42 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx @@ -716,7 +716,7 @@ GPUd() int32_t GPUTPCGMPropagator::InterpolateReject(const GPUParam& GPUrestrict const float ImP1 = mP[1] + Ik11 * Iz1; const float ImC0 = mC[0] - Ik00 * mC[0]; const float ImC2 = mC[2] - Ik11 * mC[2]; - //printf("\t%21sInterpo ----- abde artaf%16s Y %8.3f, Z %8.3f (Errors %f <-- (%f, %f) %f <-- (%f, %f))\n", "", "", ImP0, ImP1, sqrtf(ImC0), sqrtf(mC[0]), sqrtf(inter->errorY), sqrtf(ImC2), sqrtf(mC[2]), sqrtf(inter->errorZ)); + // printf("\t%21sInterpo ----- abde artaf%16s Y %8.3f, Z %8.3f (Errors %f <-- (%f, %f) %f <-- (%f, %f))\n", "", "", ImP0, ImP1, sqrtf(ImC0), sqrtf(mC[0]), sqrtf(inter->errorY), sqrtf(ImC2), sqrtf(mC[2]), sqrtf(inter->errorZ)); const float Jz0 = posY - ImP0; const float Jz1 = posZ - ImP1; const float Jw0 = 1.f / (ImC0 + err2Y); diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx index 9d10d40107b8f..502a70cb57762 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx +++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx @@ -256,7 +256,7 @@ GPUd() int32_t GPUTrackingRefit::RefitTrack(T& trkX, bool outward, bool resetCov int32_t nAvgCharge = 0; for (int32_t i = start; i != stop; i += cl ? 0 : direction) { - float x = 0, y = 0, z = 0, charge = 0; // FIXME: initialization unneeded, but GCC incorrectly produces uninitialized warnings otherwise + float x = 0, y = 0, z = 0, charge = 0; // FIXME: initialization unneeded, but GCC incorrectly produces uninitialized warnings otherwise float time = 0.f, invCharge = 0.f, invSqrtCharge = 0.f; // Same here... int32_t clusters = 0; while (true) { diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h b/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h index 10ff0a32aeaf3..5bb63d6a10254 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h @@ -30,7 +30,7 @@ class GPUTPCTracklet { public: #if !defined(GPUCA_GPUCODE) - GPUTPCTracklet() : mFirstRow(0), mLastRow(0), mParam(), mHitWeight(0), mFirstHit(0){}; + GPUTPCTracklet() : mFirstRow(0), mLastRow(0), mParam(), mHitWeight(0), mFirstHit(0) {}; #endif //! GPUCA_GPUCODE GPUhd() int32_t FirstRow() const { return mFirstRow; } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx index e7634fa397bae..f1fd95d696f5d 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx @@ -101,7 +101,7 @@ GPUdii() void GPUTPCCFDecodeZS::decode(GPUTPCClusterFinder& clusterer, GPUShared const int32_t nRows = (endpoint & 1) ? (s.nRowsRegion - s.nRowsRegion / 2) : (s.nRowsRegion / 2); for (int32_t l = 0; l < hdr->nTimeBinSpan; l++) { // TODO: Parallelize over time bins - pagePtr += (pagePtr - page) & 1; // Ensure 16 bit alignment + pagePtr += (pagePtr - page) & 1; // Ensure 16 bit alignment const TPCZSTBHDR* tbHdr = reinterpret_cast(pagePtr); if ((tbHdr->rowMask & 0x7FFF) == 0) { pagePtr += 2; @@ -324,8 +324,8 @@ GPUd() void GPUTPCCFDecodeZSLink::DecodeTBSingleThread( bits -= DECODE_BITS; nSamplesWritten++; rawFECChannel++; // Ensure we don't decode same channel twice - } // while (bits >= DECODE_BITS) - } // while (nSamplesWritten < nAdc) + } // while (bits >= DECODE_BITS) + } // while (nSamplesWritten < nAdc) } else { // ! TPCZSHDRV2::TIGHTLY_PACKED_V3 uint32_t rawFECChannel = 0; @@ -705,7 +705,7 @@ GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread( #define PEEK_OVERFLOW(pagePtr, offset) \ (*(PayloadExtendsToNextPage && (pagePtr) < nextPage && (pagePtr) + (offset) >= payloadEnd \ - ? nextPage + sizeof(header::RAWDataHeader) + ((pagePtr) + (offset)-payloadEnd) \ + ? nextPage + sizeof(header::RAWDataHeader) + ((pagePtr) + (offset) - payloadEnd) \ : (pagePtr) + (offset))) #define TEST_BIT(x, bit) static_cast((x) & (1 << (bit))) @@ -931,8 +931,8 @@ GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBSingleThread( bits -= DECODE_BITS; nSamplesWritten++; rawFECChannel++; // Ensure we don't decode same channel twice - } // while (bits >= DECODE_BITS) - } // while (nSamplesWritten < nAdc) + } // while (bits >= DECODE_BITS) + } // while (nSamplesWritten < nAdc) assert(PayloadExtendsToNextPage || adcData <= page); assert(PayloadExtendsToNextPage || page <= payloadEnd); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx index 05fddda5bec68..f3a914cbfcaee 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx @@ -316,8 +316,8 @@ GPUd() void GPUTPCCFNoiseSuppression::findMinimaAndPeaks( uint8_t* bufp = (uint8_t*)buf; /************************************** - * Look for peaks - **************************************/ + * Look for peaks + **************************************/ CfUtils::blockLoad( peakMap, diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h index 24624e60ceba7..f6b8bea29822a 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h @@ -45,7 +45,7 @@ class propagatorInterface { public: typedef o2::base::Propagator propagatorParam; - GPUd() propagatorInterface(const propagatorParam* prop) : mProp(prop){}; + GPUd() propagatorInterface(const propagatorParam* prop) : mProp(prop) {}; GPUd() propagatorInterface(const propagatorInterface&) = delete; GPUd() propagatorInterface& operator=(const propagatorInterface&) = delete; @@ -200,7 +200,7 @@ class propagatorInterface : public GPUTPCGMPropagator } GPUd() bool propagateToX(float x, float maxSnp, float maxStep) { - //bool ok = PropagateToXAlpha(x, GetAlpha(), true) == 0 ? true : false; + // bool ok = PropagateToXAlpha(x, GetAlpha(), true) == 0 ? true : false; int32_t retVal = PropagateToXAlpha(x, GetAlpha(), true); bool ok = (retVal == 0) ? true : false; ok = mTrack->CheckNumericalQuality(); diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackData.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackData.h index 6a6e13fe84e36..ee7d7a30b1c55 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackData.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackData.h @@ -18,15 +18,15 @@ #define GPUTRDTRACKDATA_H struct GPUTRDTrackDataRecord { - float mAlpha; // azimuthal angle of reference frame - float fX; // x: radial distance - float fY; // local Y-coordinate of a track (cm) - float fZ; // local Z-coordinate of a track (cm) - float mSinPhi; // local sine of the track momentum azimuthal angle - float fTgl; // tangent of the track momentum dip angle - float fq1Pt; // 1/pt (1/(GeV/c)) - float fC[15]; // covariance matrix - int32_t fTPCTrackID; // id of corresponding TPC track + float mAlpha; // azimuthal angle of reference frame + float fX; // x: radial distance + float fY; // local Y-coordinate of a track (cm) + float fZ; // local Z-coordinate of a track (cm) + float mSinPhi; // local sine of the track momentum azimuthal angle + float fTgl; // tangent of the track momentum dip angle + float fq1Pt; // 1/pt (1/(GeV/c)) + float fC[15]; // covariance matrix + int32_t fTPCTrackID; // id of corresponding TPC track int32_t fAttachedTracklets[6]; // IDs for attached tracklets sorted by layer uint8_t mIsPadrowCrossing; // bits 0 to 5 indicate whether a padrow was crossed diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx index fa0711887f60f..c633f10adae38 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx @@ -12,7 +12,7 @@ /// \file GPUTRDTracker.cxx /// \author Ole Schmidt -//#define ENABLE_GPUTRDDEBUG +// #define ENABLE_GPUTRDDEBUG #define ENABLE_WARNING 0 #define ENABLE_INFO 0 @@ -326,7 +326,6 @@ GPUd() int32_t GPUTRDTracker_t::LoadTrack(const TRDTRK& trk, uint3 return (0); } - template GPUd() void GPUTRDTracker_t::DumpTracks() { @@ -439,19 +438,19 @@ GPUd() bool GPUTRDTracker_t::CalculateSpacePoints(int32_t iCollisi int32_t trkltIdxStart = trkltIdxOffset + iFirstTrackletInDet; for (int32_t trkltIdx = trkltIdxStart; trkltIdx < trkltIdxStart + nTrackletsInDet; ++trkltIdx) { int32_t trkltZbin = tracklets[trkltIdx].GetZbin(); - float xTrkltDet[3] = {0.f}; // trklt position in chamber coordinates - float xTrkltSec[3] = {0.f}; // trklt position in sector coordinates + float xTrkltDet[3] = {0.f}; // trklt position in chamber coordinates + float xTrkltSec[3] = {0.f}; // trklt position in sector coordinates xTrkltDet[0] = mGeo->AnodePos() + sRadialOffset; xTrkltDet[1] = tracklets[trkltIdx].GetY(); xTrkltDet[2] = pp->GetRowPos(trkltZbin) - pp->GetRowSize(trkltZbin) / 2.f - pp->GetRowPos(pp->GetNrows() / 2); - //GPUInfo("Space point local %i: x=%f, y=%f, z=%f", trkltIdx, xTrkltDet[0], xTrkltDet[1], xTrkltDet[2]); + // GPUInfo("Space point local %i: x=%f, y=%f, z=%f", trkltIdx, xTrkltDet[0], xTrkltDet[1], xTrkltDet[2]); matrix->LocalToMaster(xTrkltDet, xTrkltSec); mSpacePoints[trkltIdx].setX(xTrkltSec[0]); mSpacePoints[trkltIdx].setY(xTrkltSec[1]); mSpacePoints[trkltIdx].setZ(xTrkltSec[2]); mSpacePoints[trkltIdx].setDy(tracklets[trkltIdx].GetdY()); - //GPUInfo("Space point global %i: x=%f, y=%f, z=%f", trkltIdx, mSpacePoints[trkltIdx].getX(), mSpacePoints[trkltIdx].getY(), mSpacePoints[trkltIdx].getZ()); + // GPUInfo("Space point global %i: x=%f, y=%f, z=%f", trkltIdx, mSpacePoints[trkltIdx].getX(), mSpacePoints[trkltIdx].getY(), mSpacePoints[trkltIdx].getZ()); } } return result; @@ -475,10 +474,10 @@ GPUd() bool GPUTRDTracker_t::FollowProlongation(PROP* prop, TRDTRK float zShiftTrk = 0.f; if (mProcessPerTimeFrame) { zShiftTrk = (mTrackAttribs[iTrk].mTime - GetConstantMem()->ioPtrs.trdTriggerTimes[collisionId]) * mTPCVdrift * mTrackAttribs[iTrk].mSide; - //float addZerr = (mTrackAttribs[iTrk].mTimeAddMax + mTrackAttribs[iTrk].mTimeSubMax) * .5f * mTPCVdrift; - // increase Z error based on time window - // -> this is here since it was done before, but the efficiency seems to be better if the covariance is not updated (more tracklets are attached) - //t->updateCovZ2(addZerr * addZerr); // TODO check again once detailed performance study tools are available, maybe this can be tuned + // float addZerr = (mTrackAttribs[iTrk].mTimeAddMax + mTrackAttribs[iTrk].mTimeSubMax) * .5f * mTPCVdrift; + // increase Z error based on time window + // -> this is here since it was done before, but the efficiency seems to be better if the covariance is not updated (more tracklets are attached) + // t->updateCovZ2(addZerr * addZerr); // TODO check again once detailed performance study tools are available, maybe this can be tuned } const GPUTRDpadPlane* pad = nullptr; const GPUTRDTrackletWord* tracklets = GetConstantMem()->ioPtrs.trdTracklets; @@ -637,7 +636,7 @@ GPUd() bool GPUTRDTracker_t::FollowProlongation(PROP* prop, TRDTRK } Hypothesis hypo(trkWork->getNlayersFindable(), iCandidate, trkltIdx, trkWork->getChi2() + chi2); InsertHypothesis(hypo, nCurrHypothesis, hypothesisIdxOffset); - } // end tracklet in window + } // end tracklet in window } // tracklet loop } // chamber loop @@ -723,7 +722,7 @@ GPUd() bool GPUTRDTracker_t::FollowProlongation(PROP* prop, TRDTRK #ifdef ENABLE_GPUTRDDEBUG prop->setTrack(&trackNoUp); prop->rotate(GetAlphaOfSector(trkltSec)); - //prop->propagateToX(spacePoints[mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId].getX(), .8f, 2.f); + // prop->propagateToX(spacePoints[mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId].getX(), .8f, 2.f); prop->propagateToX(mR[tracklets[mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId].GetDetector()], .8f, 2.f); prop->setTrack(trkWork); #endif @@ -1103,7 +1102,6 @@ GPUd() bool GPUTRDTracker_t::IsGeoFindable(const TRDTRK* t, const return true; } - #ifndef GPUCA_GPUCODE namespace o2::gpu { diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h index 29a9b529b0558..431fa357e8b89 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h @@ -85,7 +85,7 @@ class GPUTRDTracker_t : public GPUProcessor int32_t mLayers; // number of layers with TRD space point int32_t mCandidateId; // to which track candidate the hypothesis belongs int32_t mTrackletId; // tracklet index to be used for update (global index within tracklet array) - float mChi2; // predicted chi2 for given space point + float mChi2; // predicted chi2 for given space point GPUd() float GetReducedChi2() { return mLayers > 0 ? mChi2 / mLayers : mChi2; } GPUd() Hypothesis() : mLayers(0), mCandidateId(-1), mTrackletId(-1), mChi2(9999.f) {} @@ -148,32 +148,32 @@ class GPUTRDTracker_t : public GPUProcessor GPUd() const typename PROP::propagatorParam* getPropagatorParam(); protected: - float* mR; // radial position of each TRD chamber, alignment taken into account, radial spread within chambers < 7mm - bool mIsInitialized; // flag is set upon initialization - bool mGenerateSpacePoints; // if true, only tracklets are provided as input and they will be converted into space points by the tracker - bool mProcessPerTimeFrame; // if true, tracking is done per time frame instead of on a single events basis - int16_t mNAngleHistogramBins; // number of bins per chamber for the angular difference histograms - float mAngleHistogramRange; // range of impact angles covered by each histogram - int16_t mMemoryPermanent; // memory id of permanent memory for the tracker - int16_t mMemoryTracklets; // memory id of memory for TRD tracklets - int16_t mMemoryTracks; // memory id of memory for tracks (used for i/o) - int32_t mNMaxCollisions; // max number of collisions to process (per time frame) - int32_t mNMaxTracks; // max number of tracks the tracker can handle (per event) - int32_t mNMaxSpacePoints; // max number of space points hold by the tracker (per event) - TRDTRK* mTracks; // array of trd-updated tracks - HelperTrackAttributes* mTrackAttribs; // array with additional (transient) track attributes - int32_t mNCandidates; // max. track hypothesis per layer - int32_t mNTracks; // number of TPC tracks to be matched - int32_t mNEvents; // number of processed events - int32_t mMaxBackendThreads; // maximum number of supported threads + float* mR; // radial position of each TRD chamber, alignment taken into account, radial spread within chambers < 7mm + bool mIsInitialized; // flag is set upon initialization + bool mGenerateSpacePoints; // if true, only tracklets are provided as input and they will be converted into space points by the tracker + bool mProcessPerTimeFrame; // if true, tracking is done per time frame instead of on a single events basis + int16_t mNAngleHistogramBins; // number of bins per chamber for the angular difference histograms + float mAngleHistogramRange; // range of impact angles covered by each histogram + int16_t mMemoryPermanent; // memory id of permanent memory for the tracker + int16_t mMemoryTracklets; // memory id of memory for TRD tracklets + int16_t mMemoryTracks; // memory id of memory for tracks (used for i/o) + int32_t mNMaxCollisions; // max number of collisions to process (per time frame) + int32_t mNMaxTracks; // max number of tracks the tracker can handle (per event) + int32_t mNMaxSpacePoints; // max number of space points hold by the tracker (per event) + TRDTRK* mTracks; // array of trd-updated tracks + HelperTrackAttributes* mTrackAttribs; // array with additional (transient) track attributes + int32_t mNCandidates; // max. track hypothesis per layer + int32_t mNTracks; // number of TPC tracks to be matched + int32_t mNEvents; // number of processed events + int32_t mMaxBackendThreads; // maximum number of supported threads // index of first tracklet for each chamber within tracklets array, last entry is total number of tracklets for given collision // the array has (kNChambers + 1) * numberOfCollisions entries // note, that for collision iColl one has to add an offset corresponding to the index of the first tracklet of iColl to the index stored in mTrackletIndexArray int32_t* mTrackletIndexArray; - Hypothesis* mHypothesis; // array with multiple track hypothesis - TRDTRK* mCandidates; // array of tracks for multiple hypothesis tracking - GPUTRDSpacePoint* mSpacePoints; // array with tracklet coordinates in global tracking frame - const GPUTRDGeometry* mGeo; // TRD geometry + Hypothesis* mHypothesis; // array with multiple track hypothesis + TRDTRK* mCandidates; // array of tracks for multiple hypothesis tracking + GPUTRDSpacePoint* mSpacePoints; // array with tracklet coordinates in global tracking frame + const GPUTRDGeometry* mGeo; // TRD geometry /// ---- error parametrization depending on magnetic field ---- float mRPhiA2; // parameterization for tracklet position resolution float mRPhiB; // parameterization for tracklet position resolution @@ -185,14 +185,14 @@ class GPUTRDTracker_t : public GPUProcessor float mAngleToDyB; // parameterization for conversion track angle -> tracklet deflection float mAngleToDyC; // parameterization for conversion track angle -> tracklet deflection /// ---- end error parametrization ---- - bool mDebugOutput; // store debug output - static constexpr const float sRadialOffset = -0.1f; // due to (possible) mis-calibration of t0 -> will become obsolete when tracklet conversion is done outside of the tracker - float mMaxEta; // TPC tracks with higher eta are ignored - float mRoadZ; // in z, a constant search road is used - float mZCorrCoefNRC; // tracklet z-position depends linearly on track dip angle - float mTPCVdrift; // TPC drift velocity used for shifting TPC tracks along Z - float mTPCTDriftOffset; // TPC drift time additive offset - GPUTRDTrackerDebug* mDebug; // debug output + bool mDebugOutput; // store debug output + static constexpr const float sRadialOffset = -0.1f; // due to (possible) mis-calibration of t0 -> will become obsolete when tracklet conversion is done outside of the tracker + float mMaxEta; // TPC tracks with higher eta are ignored + float mRoadZ; // in z, a constant search road is used + float mZCorrCoefNRC; // tracklet z-position depends linearly on track dip angle + float mTPCVdrift; // TPC drift velocity used for shifting TPC tracks along Z + float mTPCTDriftOffset; // TPC drift time additive offset + GPUTRDTrackerDebug* mDebug; // debug output }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h index fc874070ec9b8..cd7dfb9432b93 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h @@ -82,7 +82,7 @@ namespace o2::gpu class GPUTRDTrackletWord : private o2::trd::Tracklet64 { public: - GPUd() GPUTRDTrackletWord(uint64_t trackletWord = 0) : o2::trd::Tracklet64(trackletWord){}; + GPUd() GPUTRDTrackletWord(uint64_t trackletWord = 0) : o2::trd::Tracklet64(trackletWord) {}; GPUdDefault() GPUTRDTrackletWord(const GPUTRDTrackletWord& rhs) = default; GPUdDefault() GPUTRDTrackletWord& operator=(const GPUTRDTrackletWord& rhs) = default; GPUdDefault() ~GPUTRDTrackletWord() = default; diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.h b/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.h index ceb63e788564a..9087ec9a431f6 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.h +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.h @@ -138,12 +138,12 @@ class GPUDisplayFrontend : public GPUDisplayFrontendInterface std::unique_ptr mGUI; - void HandleKey(uint8_t key); // Callback for handling key presses - int32_t DrawGLScene(); // Callback to draw the GL scene - void HandleSendKey(); // Optional callback to handle key press from external source (e.g. stdin by default) + void HandleKey(uint8_t key); // Callback for handling key presses + int32_t DrawGLScene(); // Callback to draw the GL scene + void HandleSendKey(); // Optional callback to handle key press from external source (e.g. stdin by default) void ResizeScene(int32_t width, int32_t height); // Callback when GL window is resized int32_t InitDisplay(bool initFailure = false); // Callback to initialize the GL Display (to be called in StartDisplay) - void ExitDisplay(); // Callback to clean up the GL Display + void ExitDisplay(); // Callback to clean up the GL Display int32_t& drawTextFontSize(); }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.cxx index 8d48536e0a351..e511718e258f7 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.cxx @@ -80,12 +80,12 @@ void KillGLWindow() // Properly Kill The Window BOOL CreateGLWindow(char* title, int32_t width, int32_t height, int32_t bits, bool fullscreenflag) { - GLuint PixelFormat; // Holds The Results After Searching For A Match - WNDCLASS wc; // Windows Class Structure - DWORD dwExStyle; // Window Extended Style - DWORD dwStyle; // Window Style - RECT WindowRect; // Grabs Rectangle Upper Left / Lower Right Values - WindowRect.left = (int64_t)0; // Set Left Value To 0 + GLuint PixelFormat; // Holds The Results After Searching For A Match + WNDCLASS wc; // Windows Class Structure + DWORD dwExStyle; // Window Extended Style + DWORD dwStyle; // Window Style + RECT WindowRect; // Grabs Rectangle Upper Left / Lower Right Values + WindowRect.left = (int64_t)0; // Set Left Value To 0 WindowRect.right = (int64_t)width; // Set Right Value To Requested Width WindowRect.top = (int64_t)0; // Set Top Value To 0 WindowRect.bottom = (int64_t)height; // Set Bottom Value To Requested Height diff --git a/GPU/GPUTracking/qa/GPUQAHelper.h b/GPU/GPUTracking/qa/GPUQAHelper.h index 92da6bbac94e8..a7811c6fd55ed 100644 --- a/GPU/GPUTracking/qa/GPUQAHelper.h +++ b/GPU/GPUTracking/qa/GPUQAHelper.h @@ -91,7 +91,7 @@ class GPUTPCTrkLbl inline U computeLabel(float* labelWeight = nullptr, float* totalWeight = nullptr, int32_t* maxCount = nullptr) { if (mLabels.size() == 0) { - return U(); //default constructor creates NotSet label + return U(); // default constructor creates NotSet label } else { uint32_t bestLabelNum = 0, bestLabelCount = 0; for (uint32_t j = 0; j < mLabels.size(); j++) { @@ -133,10 +133,10 @@ struct GPUTPCTrkLbl_ret { template GPUTPCTrkLbl_ret(T){}; #ifdef GPUCA_TPC_GEOMETRY_O2 - GPUTPCTrkLbl_ret(const MCCompLabel& a) : id(a.getTrackEventSourceID()){}; + GPUTPCTrkLbl_ret(const MCCompLabel& a) : id(a.getTrackEventSourceID()) {}; #endif #ifdef GPUCA_STANDALONE - GPUTPCTrkLbl_ret(const AliHLTTPCClusterMCWeight& a) : id(a.fMCID){}; + GPUTPCTrkLbl_ret(const AliHLTTPCClusterMCWeight& a) : id(a.fMCID) {}; #endif void setFakeFlag() { diff --git a/GPU/GPUTracking/qa/genEvents.h b/GPU/GPUTracking/qa/genEvents.h index fb3c5f22d61ef..43b946e6238b2 100644 --- a/GPU/GPUTracking/qa/genEvents.h +++ b/GPU/GPUTracking/qa/genEvents.h @@ -31,7 +31,7 @@ class genEvents int32_t GenerateEvent(const GPUParam& sectorParam, char* filename) { return 1; } void FinishEventGenerator() {} - static void RunEventGenerator(GPUChainTracking* rec){}; + static void RunEventGenerator(GPUChainTracking* rec) {}; }; #else diff --git a/GPU/GPUTracking/utils/qconfig.cxx b/GPU/GPUTracking/utils/qconfig.cxx index cd6267179c844..cdb41ec5813f2 100644 --- a/GPU/GPUTracking/utils/qconfig.cxx +++ b/GPU/GPUTracking/utils/qconfig.cxx @@ -32,8 +32,7 @@ namespace qConfig { #define QCONFIG_SETTING(name, type) \ - struct qon_mxcat3(q, name, _t) \ - { \ + struct qon_mxcat3(q, name, _t) { \ type v; \ constexpr qon_mxcat3(q, name, _t)(type s) : v(s) {} \ }; \ @@ -41,8 +40,7 @@ namespace qConfig #define QCONFIG_SETTING_TEMPLATE(name) \ template \ - struct qon_mxcat3(q, name, _t) \ - { \ + struct qon_mxcat3(q, name, _t) { \ T v; \ constexpr qon_mxcat3(q, name, _t)(const T& s) : v(s) {} \ }; \ @@ -68,7 +66,7 @@ static inline const char* getOptName(const char** argv, int32_t i) template struct qConfigSettings { - qConfigSettings() : checkMin(false), checkMax(false), doSet(false), doDefault(false), min(), max(), set(), message(nullptr), allowEmpty(false){}; + qConfigSettings() : checkMin(false), checkMax(false), doSet(false), doDefault(false), min(), max(), set(), message(nullptr), allowEmpty(false) {}; template qConfigSettings(const qConfigSettings v) : checkMin(false), checkMax(false), doSet(false), doDefault(false), min(), max(), set(), message(v.message), allowEmpty(v.allowEmpty){}; bool checkMin, checkMax; diff --git a/GPU/GPUTracking/utils/threadserver.h b/GPU/GPUTracking/utils/threadserver.h index 606531f46f201..c8dfe831fd578 100644 --- a/GPU/GPUTracking/utils/threadserver.h +++ b/GPU/GPUTracking/utils/threadserver.h @@ -98,7 +98,7 @@ class qThreadCls qThreadParamCls& XthreadParam = *((qThreadParamCls*)&this->threadParam); XthreadParam.pCls = pCls; - XthreadParam.pFunc = (void (S::*)(void*))pFunc; + XthreadParam.pFunc = (void(S::*)(void*))pFunc; XthreadParam.threadNum = threadNum; XthreadParam.pinCPU = pinCPU; pthread_t thr; @@ -150,7 +150,7 @@ void* qThreadCls::qThreadWrapperCls(T* arg) sched_setaffinity(0, sizeof(tmp_mask), &tmp_mask); } - void (S::*pFunc)(T*) = (void (S::*)(T*))arg_A->pFunc; + void (S::*pFunc)(T*) = (void(S::*)(T*))arg_A->pFunc; (arg_A->pCls->*pFunc)(arg); arg_A->threadMutex[1].Unlock(); diff --git a/GPU/GPUbenchmark/cuda/Kernels.cu b/GPU/GPUbenchmark/cuda/Kernels.cu index 75799e4aa8c96..c309e7b2dbc5d 100644 --- a/GPU/GPUbenchmark/cuda/Kernels.cu +++ b/GPU/GPUbenchmark/cuda/Kernels.cu @@ -666,9 +666,9 @@ void GPUbenchmark::runTest(Test test, Mode mode, KernelConfig config) } nThreads *= mOptions.threadPoolFraction; - void (*kernel)(chunk_t*, size_t) = &gpu::read_k; // Initialising to a default value - void (*kernel_distributed)(chunk_t**, size_t*) = &gpu::read_dist_k; // Initialising to a default value - void (*kernel_rand)(chunk_t*, size_t, int32_t) = &gpu::rand_read_k; // Initialising to a default value + void (*kernel)(chunk_t*, size_t) = &gpu::read_k; // Initialising to a default value + void (*kernel_distributed)(chunk_t**, size_t*) = &gpu::read_dist_k; // Initialising to a default value + void (*kernel_rand)(chunk_t*, size_t, int32_t) = &gpu::rand_read_k; // Initialising to a default value void (*kernel_rand_distributed)(chunk_t**, size_t*, int32_t) = &gpu::rand_read_dist_k; // Initialising to a default value bool is_random{false}; diff --git a/GPU/TPCFastTransformation/BandMatrixSolver.h b/GPU/TPCFastTransformation/BandMatrixSolver.h index f11f538e49275..7de44fe4b85e2 100644 --- a/GPU/TPCFastTransformation/BandMatrixSolver.h +++ b/GPU/TPCFastTransformation/BandMatrixSolver.h @@ -131,7 +131,7 @@ inline void BandMatrixSolver::triangulateBlock(double AA[], double b A[0] = c; // store 1/a[0][0] double* rowi = A + BandWidthT - 1; for (int32_t i = 1; i < m; i++) { // row 0+i - double ai = c * A[i]; // A[0][i] + double ai = c * A[i]; // A[0][i] for (int32_t j = i; j < m; j++) { rowi[j] -= ai * A[j]; // A[i][j] -= A[0][j]/A[0][0]*A[i][0] } diff --git a/GPU/TPCFastTransformation/ChebyshevFit1D.cxx b/GPU/TPCFastTransformation/ChebyshevFit1D.cxx index d709e5b9af92d..3edd8f8f22e55 100644 --- a/GPU/TPCFastTransformation/ChebyshevFit1D.cxx +++ b/GPU/TPCFastTransformation/ChebyshevFit1D.cxx @@ -71,7 +71,7 @@ void ChebyshevFit1D::fit() mA[i * mN + j] = mA[j * mN + i]; } } - //print(); + // print(); { double* Ai = mA.data(); for (int32_t i = 0; i < mN; i++, Ai += mN) { @@ -88,7 +88,7 @@ void ChebyshevFit1D::fit() } mB[j] -= c * mB[i]; } - //print(); + // print(); } } { diff --git a/GPU/TPCFastTransformation/CorrectionMapsHelper.h b/GPU/TPCFastTransformation/CorrectionMapsHelper.h index 32ff6e1f06b10..46070b36e63b2 100644 --- a/GPU/TPCFastTransformation/CorrectionMapsHelper.h +++ b/GPU/TPCFastTransformation/CorrectionMapsHelper.h @@ -165,25 +165,25 @@ class CorrectionMapsHelper MapRefBit = 0x2, LumiBit = 0x4, MapMShapeBit = 0x10 }; - bool mOwner = false; // is content of pointers owned by the helper + bool mOwner = false; // is content of pointers owned by the helper bool mLumiCTPAvailable = false; // is CTP Lumi available // these 2 are global options, must be set by the workflow global options int32_t mLumiScaleType = -1; // use CTP Lumi (1) or TPCScaler (2) for the correction scaling, 0 - no scaling int32_t mLumiScaleMode = -1; // scaling-mode of the correciton maps int32_t mUpdatedFlags = 0; - float mInstLumiCTP = 0.; // instanteneous luminosity from CTP (a.u) - float mInstLumi = 0.; // instanteneous luminosity (a.u) used for TPC corrections scaling - float mMeanLumi = 0.; // mean luminosity of the map (a.u) used for TPC corrections scaling - float mMeanLumiRef = 0.; // mean luminosity of the ref map (a.u) used for TPC corrections scaling reference - float mLumiScale = 0.; // precalculated mInstLumi/mMeanLumi - float mMeanLumiOverride = -1.f; // optional value to override mean lumi - float mMeanLumiRefOverride = -1.f; // optional value to override ref mean lumi - float mInstCTPLumiOverride = -1.f; // optional value to override inst lumi from CTP - bool mEnableMShape = false; ///< use v shape correction - bool mScaleInverse{false}; // if set to false the inverse correction is already scaled and will not scaled again - o2::gpu::TPCFastTransform* mCorrMap{nullptr}; // current transform - o2::gpu::TPCFastTransform* mCorrMapRef{nullptr}; // reference transform - o2::gpu::TPCFastTransform* mCorrMapMShape{nullptr}; // correction map for v-shape distortions on A-side + float mInstLumiCTP = 0.; // instanteneous luminosity from CTP (a.u) + float mInstLumi = 0.; // instanteneous luminosity (a.u) used for TPC corrections scaling + float mMeanLumi = 0.; // mean luminosity of the map (a.u) used for TPC corrections scaling + float mMeanLumiRef = 0.; // mean luminosity of the ref map (a.u) used for TPC corrections scaling reference + float mLumiScale = 0.; // precalculated mInstLumi/mMeanLumi + float mMeanLumiOverride = -1.f; // optional value to override mean lumi + float mMeanLumiRefOverride = -1.f; // optional value to override ref mean lumi + float mInstCTPLumiOverride = -1.f; // optional value to override inst lumi from CTP + bool mEnableMShape = false; ///< use v shape correction + bool mScaleInverse{false}; // if set to false the inverse correction is already scaled and will not scaled again + o2::gpu::TPCFastTransform* mCorrMap{nullptr}; // current transform + o2::gpu::TPCFastTransform* mCorrMapRef{nullptr}; // reference transform + o2::gpu::TPCFastTransform* mCorrMapMShape{nullptr}; // correction map for v-shape distortions on A-side ClassDefNV(CorrectionMapsHelper, 6); }; diff --git a/GPU/TPCFastTransformation/NDPiecewisePolynomials.h b/GPU/TPCFastTransformation/NDPiecewisePolynomials.h index 506cd39b519af..e750bffd28f4b 100644 --- a/GPU/TPCFastTransformation/NDPiecewisePolynomials.h +++ b/GPU/TPCFastTransformation/NDPiecewisePolynomials.h @@ -48,13 +48,13 @@ struct NDPiecewisePolynomialContainer { /// for ROOT I/O NDPiecewisePolynomialContainer() = default; - const uint32_t mDim{}; ///< number of dimensions of the polynomial - const uint32_t mDegree{}; ///< degree of the polynomials - const std::vector mParams{}; ///< parameters of the polynomial - const bool mInteractionOnly{}; ///< consider only interaction terms - const std::vector mMin{}; ///< min vertices positions of the grid - const std::vector mMax{}; ///< max vertices positions of the grid - const std::vector mN{}; ///< number of vertices for each dimension + const uint32_t mDim{}; ///< number of dimensions of the polynomial + const uint32_t mDegree{}; ///< degree of the polynomials + const std::vector mParams{}; ///< parameters of the polynomial + const bool mInteractionOnly{}; ///< consider only interaction terms + const std::vector mMin{}; ///< min vertices positions of the grid + const std::vector mMax{}; ///< max vertices positions of the grid + const std::vector mN{}; ///< number of vertices for each dimension }; #endif diff --git a/GPU/TPCFastTransformation/Spline1DHelper.cxx b/GPU/TPCFastTransformation/Spline1DHelper.cxx index 938604bb9172d..9177c67d8b87b 100644 --- a/GPU/TPCFastTransformation/Spline1DHelper.cxx +++ b/GPU/TPCFastTransformation/Spline1DHelper.cxx @@ -606,7 +606,7 @@ int32_t Spline1DHelper::test(const bool draw, const bool drawDataPoints) Spline1D spline2(spline1); spline1.approximateFunction(0., TMath::Pi(), F, nAuxiliaryPoints); - //if (itry == 0) + // if (itry == 0) { TFile outf("testSpline1D.root", "recreate"); if (outf.IsZombie()) { @@ -731,9 +731,9 @@ int32_t Spline1DHelper::test(const bool draw, const bool drawDataPoints) } } // draw } - //delete canv; - //delete nt; - //delete knots; + // delete canv; + // delete nt; + // delete knots; statDf1 = sqrt(statDf1 / statN); statDf2 = sqrt(statDf2 / statN); diff --git a/GPU/TPCFastTransformation/Spline1DHelperOld.h b/GPU/TPCFastTransformation/Spline1DHelperOld.h index fc8d33ad64f87..971541e03258e 100644 --- a/GPU/TPCFastTransformation/Spline1DHelperOld.h +++ b/GPU/TPCFastTransformation/Spline1DHelperOld.h @@ -41,13 +41,13 @@ class Spline1DHelperOld /// \brief Helper structure for 1D spline construction /// struct DataPoint { - double u; ///< u coordinate - double cS0; ///< a coefficient for s0 - double cZ0; ///< a coefficient for s'0 - double cS1; ///< a coefficient for s1 - double cZ1; ///< a coefficient for s'1 + double u; ///< u coordinate + double cS0; ///< a coefficient for s0 + double cZ0; ///< a coefficient for s'0 + double cS1; ///< a coefficient for s1 + double cZ1; ///< a coefficient for s'1 int32_t iKnot; ///< index of the left knot of the segment - bool isKnot; ///< is the point placed at a knot + bool isKnot; ///< is the point placed at a knot }; /// _____________ Constructors / destructors __________________________ @@ -160,11 +160,11 @@ class Spline1DHelperOld /// helpers for the construction of 1D spline - Spline1D mSpline; ///< copy of the spline - int32_t mFdimensions; ///< n of F dimensions - std::vector mDataPoints; ///< measurement points + Spline1D mSpline; ///< copy of the spline + int32_t mFdimensions; ///< n of F dimensions + std::vector mDataPoints; ///< measurement points std::vector mKnotDataPoints; ///< which measurement points are at knots - std::vector mLSMmatrixFull; ///< a matrix to convert the measurements into the spline parameters with the LSM method + std::vector mLSMmatrixFull; ///< a matrix to convert the measurements into the spline parameters with the LSM method std::vector mLSMmatrixSderivatives; std::vector mLSMmatrixSvalues; diff --git a/GPU/TPCFastTransformation/Spline1DSpec.cxx b/GPU/TPCFastTransformation/Spline1DSpec.cxx index 603013d5e0808..0d33cdc88010a 100644 --- a/GPU/TPCFastTransformation/Spline1DSpec.cxx +++ b/GPU/TPCFastTransformation/Spline1DSpec.cxx @@ -144,7 +144,7 @@ void Spline1DContainer::recreate(int32_t nYdim, int32_t numberOfKnots, co } } -#endif //GPUCA_GPUCODE +#endif // GPUCA_GPUCODE template void Spline1DContainer::print() const diff --git a/GPU/TPCFastTransformation/Spline1DSpec.h b/GPU/TPCFastTransformation/Spline1DSpec.h index 1ed1cc322ede3..6462f291d1136 100644 --- a/GPU/TPCFastTransformation/Spline1DSpec.h +++ b/GPU/TPCFastTransformation/Spline1DSpec.h @@ -211,13 +211,13 @@ class Spline1DContainer : public FlatObject /// _____________ Data members ____________ - int32_t mYdim = 0; ///< dimentionality of F - int32_t mNumberOfKnots = 0; ///< n knots on the grid - int32_t mUmax = 0; ///< U of the last knot - DataT mXmin = 0; ///< X of the first knot - DataT mXtoUscale = 0; ///< a scaling factor to convert X to U + int32_t mYdim = 0; ///< dimentionality of F + int32_t mNumberOfKnots = 0; ///< n knots on the grid + int32_t mUmax = 0; ///< U of the last knot + DataT mXmin = 0; ///< X of the first knot + DataT mXtoUscale = 0; ///< a scaling factor to convert X to U int32_t* mUtoKnotMap = nullptr; //! (transient!!) pointer to (integer U -> knot index) map inside the mFlatBufferPtr array - DataT* mParameters = nullptr; //! (transient!!) pointer to F-dependent parameters inside the mFlatBufferPtr array + DataT* mParameters = nullptr; //! (transient!!) pointer to F-dependent parameters inside the mFlatBufferPtr array ClassDefNV(Spline1DContainer, 1); }; diff --git a/GPU/TPCFastTransformation/Spline2DSpec.cxx b/GPU/TPCFastTransformation/Spline2DSpec.cxx index 4571110bdedaa..055530b9314c2 100644 --- a/GPU/TPCFastTransformation/Spline2DSpec.cxx +++ b/GPU/TPCFastTransformation/Spline2DSpec.cxx @@ -66,7 +66,7 @@ void Spline2DContainer::setActualBufferAddress(char* actualFlatBufferPtr) mParameters = nullptr; parametersOffset = alignSize(u2Offset + mGridX2.getFlatBufferSize(), getParameterAlignmentBytes()); - //bufferSize = parametersOffset + getSizeOfParameters(); + // bufferSize = parametersOffset + getSizeOfParameters(); mParameters = reinterpret_cast(mFlatBufferPtr + parametersOffset); mGridX1.setActualBufferAddress(mFlatBufferPtr); diff --git a/GPU/TPCFastTransformation/SplineHelper.cxx b/GPU/TPCFastTransformation/SplineHelper.cxx index b0d1f4348ca60..6e1b53510e0d0 100644 --- a/GPU/TPCFastTransformation/SplineHelper.cxx +++ b/GPU/TPCFastTransformation/SplineHelper.cxx @@ -64,8 +64,8 @@ int32_t SplineHelper::pointstoarray(const int32_t indices[], const int32_ } //////////////// -//arraytopoints -// HILFSFUNKTION +// arraytopoints +// HILFSFUNKTION template int32_t SplineHelper::arraytopoints(int32_t point, int32_t result[], const int32_t numbers[], int32_t dim) { @@ -133,8 +133,8 @@ void SplineHelper::approximateFunction( } // end for all DataPoints d // END MY VERSION - //std::vector dataPointF(getNumberOfDataPoints() * mFdimensions); - //DUMYY VERSION Commented out + // std::vector dataPointF(getNumberOfDataPoints() * mFdimensions); + // DUMYY VERSION Commented out /* for (int32_t i = 0; i < getNumberOfDataPoints() * mFdimensions; i++) { dataPointF[i] = 1.; } */ @@ -250,11 +250,11 @@ void SplineHelper::approximateFunction( // TO BE REMOVED TEST: // LOG(info) << "number of paramtertypes per knot : " << numberOfParameterTypes << ", "; - std::unique_ptr allParameters[numberOfParameterTypes]; //Array for the different parametertypes s, s'u, s'v, s''uv,... + std::unique_ptr allParameters[numberOfParameterTypes]; // Array for the different parametertypes s, s'u, s'v, s''uv,... for (int32_t i = 0; i < numberOfParameterTypes; i++) { - allParameters[i] = std::unique_ptr(new double[numberOfAllDataPoints * mFdimensions]); //To-Do:Fdim!! + allParameters[i] = std::unique_ptr(new double[numberOfAllDataPoints * mFdimensions]); // To-Do:Fdim!! } - //filling allParameters[0] and FParameters with s: + // filling allParameters[0] and FParameters with s: for (int32_t i = 0; i < numberOfAllDataPoints; i++) { for (int32_t f = 0; f < mFdimensions; f++) { // for all f-dimensions allParameters[0][i * mFdimensions + f] = DataPointF[i * mFdimensions + f]; // TO DO - Just get the pointer adress there PLEASE! @@ -273,24 +273,24 @@ void SplineHelper::approximateFunction( for (int32_t j = 0; j < mXdimensions; j++) { // calculate KNotindices for all dimensions // WORKAROUND Getting Knotindices: knotindices[j] = p0indices[j] / ((numberOfDataPoints[j] - 1) / (numberOfKnots[j] - 1)); - //knotindices[j] = mHelpers[j].getDataPoint(p0indices[j]).iKnot; //in der Annahme der wert ist ein Knotenindex und falls der datapoint ein knoten ist, gibt er seinen eigenen knotenindex zurück + // knotindices[j] = mHelpers[j].getDataPoint(p0indices[j]).iKnot; //in der Annahme der wert ist ein Knotenindex und falls der datapoint ein knoten ist, gibt er seinen eigenen knotenindex zurück } // get the knotindexvalue for FParameters: int32_t knotind = pointstoarray(knotindices, numberOfKnots, mXdimensions); for (int32_t f = 0; f < mFdimensions; f++) { // for all f-dimensions get function values into Fparameters - Fparameters[knotind * numberOfParameterTypes * mFdimensions + f] = DataPointF[i * mFdimensions + f]; ///write derivatives in FParameters + Fparameters[knotind * numberOfParameterTypes * mFdimensions + f] = DataPointF[i * mFdimensions + f]; /// write derivatives in FParameters } } // end if isKnot } // end i (filling DataPointF Values into allParameters[0] and FParameters) // now: allParameters[0] = dataPointF; - //Array for input DataPointF-values for Spline1D::approximateFunctionGradually(...); + // Array for input DataPointF-values for Spline1D::approximateFunctionGradually(...); std::unique_ptr dataPointF1D[mXdimensions]; for (int32_t i = 0; i < mXdimensions; i++) { dataPointF1D[i] = std::unique_ptr(new double[numberOfDataPoints[i] * mFdimensions]); // To-Do:Fdim!! For s and derivetives at all knots. } - //Array to be filled by Spline1D::approximateFunctionGradually(...); + // Array to be filled by Spline1D::approximateFunctionGradually(...); std::unique_ptr par[mXdimensions]; std::unique_ptr parD[mXdimensions]; @@ -301,7 +301,7 @@ void SplineHelper::approximateFunction( // LOG(info) << "NumberOfParameters: " << mNumberOfParameters ; - //STARTING MAIN-LOOP, for all Parametertypes: + // STARTING MAIN-LOOP, for all Parametertypes: for (int32_t p = 1; p < numberOfParameterTypes; p++) { // p = 1!! Wir kriegen s (p0) durch approximateFunction()oben int32_t dimension = 0; // find the dimension for approximation for (int32_t i = (int32_t)(log2f((float)p)); i >= 0; i--) { @@ -366,9 +366,9 @@ void SplineHelper::approximateFunction( for (int32_t i = 0; i < mXdimensions; i++) { redistributionindex[i] = startpoint[i]; } - //redistributing the derivatives at dimension-Knots into array p + // redistributing the derivatives at dimension-Knots into array p for (int32_t i = 0; i < numberOfKnots[dimension]; i++) { // for all dimension-Knots - redistributionindex[dimension] = mHelpers[dimension].getKnotDataPoint(i); //find the indices + redistributionindex[dimension] = mHelpers[dimension].getKnotDataPoint(i); // find the indices int32_t finalposition = pointstoarray(redistributionindex, numberOfDataPoints, mXdimensions); for (int32_t f = 0; f < mFdimensions; f++) { @@ -380,7 +380,7 @@ void SplineHelper::approximateFunction( if (!mHelpers[j].getDataPoint(redistributionindex[j]).isKnot) { isKnot = 0; break; - } //noch mal checken!! Das muss noch anders!! + } // noch mal checken!! Das muss noch anders!! } if (isKnot) { // for all knots @@ -388,20 +388,20 @@ void SplineHelper::approximateFunction( for (int32_t j = 0; j < mXdimensions; j++) { // calculate Knotindices for all dimensions knotindices[j] = redistributionindex[j] / ((numberOfDataPoints[j] - 1) / (numberOfKnots[j] - 1)); - //knotindices[j] = mHelpers[j].getDataPoint(redistributionindex[j]).iKnot; //in der Annahme der wert ist ein Knotenindex und falls der datapoint ein knoten ist, gibt er seinen eigenen knotenindex zurück + // knotindices[j] = mHelpers[j].getDataPoint(redistributionindex[j]).iKnot; //in der Annahme der wert ist ein Knotenindex und falls der datapoint ein knoten ist, gibt er seinen eigenen knotenindex zurück } // get the knotindexvalue for FParameters: int32_t knotind = pointstoarray(knotindices, numberOfKnots, mXdimensions); for (int32_t f = 0; f < mFdimensions; f++) { - Fparameters[knotind * numberOfParameterTypes * mFdimensions + p * mFdimensions + f] = par[dimension][2 * i * mFdimensions + mFdimensions + f]; ///write derivatives in FParameters + Fparameters[knotind * numberOfParameterTypes * mFdimensions + p * mFdimensions + f] = par[dimension][2 * i * mFdimensions + mFdimensions + f]; /// write derivatives in FParameters } } } // end for all fknots (for redistribution) // recalculation: for (int32_t i = 0; i < numberOfDataPoints[dimension]; i++) { // this is somehow still redundant// TO DO: ONLY PART OF approximateFunction WHERE NDIM is considerd!! - redistributionindex[dimension] = i; // getting current datapointindices - bool isKnot = 1; // check is current datapoint a knot? + redistributionindex[dimension] = i; // getting current datapointindices + bool isKnot = 1; // check is current datapoint a knot? for (int32_t j = 0; j < mXdimensions; j++) { if (!mHelpers[j].getDataPoint(redistributionindex[j]).isKnot) { isKnot = 0; @@ -410,7 +410,7 @@ void SplineHelper::approximateFunction( } double splineF[mFdimensions]; double u = mHelpers[dimension].getDataPoint(i).u; - mHelpers[dimension].getSpline().interpolateU(mFdimensions, parD[dimension].get(), u, splineF); //recalculate at all datapoints of dimension + mHelpers[dimension].getSpline().interpolateU(mFdimensions, parD[dimension].get(), u, splineF); // recalculate at all datapoints of dimension for (int32_t dim = 0; dim < mFdimensions; dim++) { // writing it in allParameters // LOG(info)<::approximateFunction( for (int32_t j = 0; j < mXdimensions; j++) { // calculate KNotindices for all dimensions knotindices[j] = redistributionindex[j] / ((numberOfDataPoints[j] - 1) / (numberOfKnots[j] - 1)); - //knotindices[j] = mHelpers[j].getDataPoint(redistributionindex[j]).iKnot; //in der Annahme der wert ist ein Knotenindex und falls der datapoint ein knoten ist, gibt er seinen eigenen knotenindex zurück + // knotindices[j] = mHelpers[j].getDataPoint(redistributionindex[j]).iKnot; //in der Annahme der wert ist ein Knotenindex und falls der datapoint ein knoten ist, gibt er seinen eigenen knotenindex zurück } int32_t currentknotarrayindex = pointstoarray(knotindices, numberOfKnots, mXdimensions); // getting the recalculated value into FParameters: @@ -433,7 +433,7 @@ void SplineHelper::approximateFunction( } // end recalculation } // end of all1DSplines } // end of for parametertypes -} //end of approxymateFunction MYVERSION! +} // end of approxymateFunction MYVERSION! template int32_t SplineHelper::test(const bool draw, const bool drawDataPoints) diff --git a/GPU/TPCFastTransformation/SplineHelper.h b/GPU/TPCFastTransformation/SplineHelper.h index 986297e368aab..8c99e8113864f 100644 --- a/GPU/TPCFastTransformation/SplineHelper.h +++ b/GPU/TPCFastTransformation/SplineHelper.h @@ -106,9 +106,9 @@ class SplineHelper /// Stores an error message int32_t storeError(Int_t code, const char* msg); - TString mError = ""; ///< error string - int32_t mXdimensions; ///< number of X dimensions - int32_t mFdimensions; ///< number of F dimensions + TString mError = ""; ///< error string + int32_t mXdimensions; ///< number of X dimensions + int32_t mFdimensions; ///< number of F dimensions int32_t mNumberOfParameters; ///< number of parameters int32_t mNumberOfDataPoints; ///< number of data points std::vector> mHelpers; diff --git a/GPU/TPCFastTransformation/SplineSpec.h b/GPU/TPCFastTransformation/SplineSpec.h index dae17b22f42ea..1af427dee503b 100644 --- a/GPU/TPCFastTransformation/SplineSpec.h +++ b/GPU/TPCFastTransformation/SplineSpec.h @@ -299,7 +299,7 @@ class SplineSpec : public SplineContainer DataT iParameters[(1 << (2 * maxXdim)) * maxYdim]; // Array for all parameters - //get the indices of the "most left" Knot: + // get the indices of the "most left" Knot: int32_t indices[maxXdim]; // indices of the 'most left' knot for (int32_t i = 0; i < nXdim; i++) { @@ -309,7 +309,7 @@ class SplineSpec : public SplineContainer int32_t indicestmp[maxXdim]; for (int32_t i = 0; i < nKnotParametersPerY; i++) { // for every necessary Knot for (int32_t k = 0; k < nXdim; k++) { - indicestmp[k] = indices[k] + (i / (1 << k)) % 2; //get the knot-indices in every dimension (mirrored order binary counting) + indicestmp[k] = indices[k] + (i / (1 << k)) % 2; // get the knot-indices in every dimension (mirrored order binary counting) } int32_t index = TBase::getKnotIndex(indicestmp); // get index of the current Knot @@ -317,7 +317,7 @@ class SplineSpec : public SplineContainer iParameters[i * nKnotParameters + j] = Parameters[index * nKnotParameters + j]; } } - //now start with the interpolation loop: + // now start with the interpolation loop: constexpr auto maxInterpolations = (1 << (2 * maxXdim - 2)) * maxYdim; @@ -329,10 +329,10 @@ class SplineSpec : public SplineContainer int32_t nInterpolations = (1 << (2 * nXdim - 2)) * nYdim; int32_t nKnots = 1 << (nXdim); - for (int32_t d = 0; d < nXdim; d++) { // for every dimension - DataT* pointer[4] = {S0, D0, S1, D1}; // pointers for interpolation arrays S0, D0, S1, D1 point to Arraystart - for (int32_t i = 0; i < nKnots; i++) { // for every knot - for (int32_t j = 0; j < nKnots; j++) { // for every parametertype + for (int32_t d = 0; d < nXdim; d++) { // for every dimension + DataT* pointer[4] = {S0, D0, S1, D1}; // pointers for interpolation arrays S0, D0, S1, D1 point to Arraystart + for (int32_t i = 0; i < nKnots; i++) { // for every knot + for (int32_t j = 0; j < nKnots; j++) { // for every parametertype int32_t pointernr = 2 * (i % 2) + (j % 2); // to which array should it be delivered for (int32_t k = 0; k < nYdim; k++) { pointer[pointernr][0] = iParameters[(i * nKnots + j) * nYdim + k]; diff --git a/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h b/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h index c353f3f3329e7..9589ecbfc1fc4 100644 --- a/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h +++ b/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h @@ -43,7 +43,7 @@ class TPCFastSpaceChargeCorrection : public FlatObject /// \brief The struct contains necessary info for TPC padrow /// struct RowInfo { - int32_t splineScenarioID{0}; ///< scenario index (which of Spline2D splines to use) + int32_t splineScenarioID{0}; ///< scenario index (which of Spline2D splines to use) size_t dataOffsetBytes[3]{0}; ///< offset for the spline data withing a TPC slice ClassDefNV(RowInfo, 1); }; diff --git a/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.cxx b/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.cxx index 27500d12d9d5d..7eea34c19ec25 100644 --- a/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.cxx +++ b/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.cxx @@ -428,7 +428,7 @@ double IrregularSpline2D3DCalibrator::getIntegralDeviationLine(const IrregularSp double d2 = dx * dx + dy * dy + dz * dz; sum += sqrt(d2 / 3.); } - //sum = sqrt(sum/3.); + // sum = sqrt(sum/3.); return sum; } diff --git a/GPU/TPCFastTransformation/devtools/RegularSpline1D.h b/GPU/TPCFastTransformation/devtools/RegularSpline1D.h index 2398ff4cd1cbc..885b2e12eea0b 100644 --- a/GPU/TPCFastTransformation/devtools/RegularSpline1D.h +++ b/GPU/TPCFastTransformation/devtools/RegularSpline1D.h @@ -97,11 +97,11 @@ inline T RegularSpline1D::getSpline(const int32_t iknot1, T f0, T f1, T f2, T f3 /// The polynom is constructed with function values f0,f1,f2,f3 at knots {iknot0,iknot1,iknot2,iknot3} /// The u value supposed to be inside the [knot1,knot2] region, but also may be any. - ///f0 = f value at iknot1-1 - ///f1 = f value at iknot1 - ///f2 = f value at iknot1+1 - ///f3 = f value at iknot1+2 - ///u = u value where f(u) is searched for. + /// f0 = f value at iknot1-1 + /// f1 = f value at iknot1 + /// f2 = f value at iknot1+1 + /// f3 = f value at iknot1+2 + /// u = u value where f(u) is searched for. f0 -= f1; f2 -= f1; @@ -153,7 +153,7 @@ inline double RegularSpline1D::knotIndexToU(int32_t iknot) const inline int32_t RegularSpline1D::getKnotIndex(float u) const { - //index is just u elem [0, 1] * numberOfKnots and then floored. (so the "left" coordinate beside u gets chosen) + // index is just u elem [0, 1] * numberOfKnots and then floored. (so the "left" coordinate beside u gets chosen) int32_t index = (int32_t)(u * (mNumberOfKnots - 1)); if (index <= 1) { index = 1; diff --git a/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.cxx b/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.cxx index 076e4ee0ed780..c030bae650414 100644 --- a/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.cxx +++ b/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.cxx @@ -110,7 +110,7 @@ void SemiregularSpline2D3D::construct(const int32_t numberOfRowsInput, const int FlatObject::startConstruction(); - //construct regular grid for v + // construct regular grid for v mGridV.construct(numberOfRows); // For each x element numbersOfKnots may be a single RegularSpline1D with x knots. @@ -128,7 +128,7 @@ void SemiregularSpline2D3D::construct(const int32_t numberOfRowsInput, const int // this is the space which is taken just by the RegularSpline1D's mDataIndexMapOffset = numberOfRows * sizeof(RegularSpline1D); - //The buffer size is the size of the array + // The buffer size is the size of the array FlatObject::finishConstruction(mDataIndexMapOffset + numberOfRows * sizeof(int32_t)); // Array for the 1D-Splines inside the buffer @@ -146,7 +146,7 @@ void SemiregularSpline2D3D::construct(const int32_t numberOfRowsInput, const int numberOfKnots += knotsU; } - //save the numberOfRows and numberOfKnots + // save the numberOfRows and numberOfKnots mNumberOfRows = numberOfRows; mNumberOfKnots = numberOfKnots; @@ -156,7 +156,7 @@ void SemiregularSpline2D3D::construct(const int32_t numberOfRowsInput, const int // this will count the amount of u-knots "under" a v-coordinate int32_t uSum = 0; - //count the amount of knots which are in gridU's lower than i + // count the amount of knots which are in gridU's lower than i for (int32_t dv = 0; dv < mNumberOfRows; dv++) { dataIndexMap[dv] = uSum; uSum += numbersOfKnots[dv]; diff --git a/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.h b/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.h index 4da954c8096ac..954738fa74f1b 100644 --- a/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.h +++ b/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.h @@ -122,7 +122,7 @@ class SemiregularSpline2D3D : public FlatObject const RegularSpline1D& getGridV() const { return mGridV; } /// Get 1-D grid for V coordinate - //const RegularSpline1D& getGridV() const { return mGridV; } + // const RegularSpline1D& getGridV() const { return mGridV; } const RegularSpline1D& getGridU(const int32_t i) const { return getSplineArray()[i]; } /// Get u,v of i-th knot @@ -131,7 +131,7 @@ class SemiregularSpline2D3D : public FlatObject /// Get size of the mFlatBuffer data size_t getFlatBufferSize() const { return mFlatBufferSize; } - ///Gets the knot index which is the i-th knot in v-space and the j-th knot in u-space + /// Gets the knot index which is the i-th knot in v-space and the j-th knot in u-space int32_t getDataIndex(int32_t i, int32_t j) const; int32_t getDataIndex0(int32_t i, int32_t j) const; @@ -212,16 +212,16 @@ inline void SemiregularSpline2D3D::getKnotUV(int32_t iKnot, float& u, float& v) // the searched u-v-coordinates have to be in this spline. if (iKnot <= nk - 1) { - //in that case v is the current index + // in that case v is the current index v = mGridV.knotIndexToU(i); - //and u the coordinate of the given index + // and u the coordinate of the given index u = gridU.knotIndexToU(iKnot); break; } - //if iKnot is greater than number of knots the searched u-v cannot be in the current gridU - //so we search for nk less indizes and continue with the next v-coordinate + // if iKnot is greater than number of knots the searched u-v cannot be in the current gridU + // so we search for nk less indizes and continue with the next v-coordinate iKnot -= nk; } } @@ -229,16 +229,16 @@ inline void SemiregularSpline2D3D::getKnotUV(int32_t iKnot, float& u, float& v) template inline void SemiregularSpline2D3D::correctEdges(T* data) const { - //Regular v-Grid (vertical) + // Regular v-Grid (vertical) const RegularSpline1D& gridV = getGridV(); int32_t nv = mNumberOfRows; - //EIGENTLICH V VOR U!!! - //Wegen Splines aber U vor V + // EIGENTLICH V VOR U!!! + // Wegen Splines aber U vor V { // ==== left edge of U ==== - //loop through all gridUs + // loop through all gridUs for (int32_t iv = 1; iv < mNumberOfRows - 1; iv++) { T* f0 = data + getDataIndex(0, iv); T* f1 = f0 + 3; @@ -251,7 +251,7 @@ inline void SemiregularSpline2D3D::correctEdges(T* data) const } { // ==== right edge of U ==== - //loop through all gridUs + // loop through all gridUs for (int32_t iv = 1; iv < mNumberOfRows - 1; iv++) { const RegularSpline1D& gridU = getGridU(iv); int32_t nu = gridU.getNumberOfKnots(); @@ -270,8 +270,8 @@ inline void SemiregularSpline2D3D::correctEdges(T* data) const int32_t nu = gridU.getNumberOfKnots(); for (int32_t iu = 0; iu < nu; iu++) { - //f0 to f3 are the x,y,z values of 4 points in the grid along the v axis. - //Since there are no knots because of the irregularity you can get this by using the getSplineMethod. + // f0 to f3 are the x,y,z values of 4 points in the grid along the v axis. + // Since there are no knots because of the irregularity you can get this by using the getSplineMethod. T* f0 = data + getDataIndex(iu, 0); float u = gridU.knotIndexToU(iu); @@ -387,7 +387,7 @@ inline void SemiregularSpline2D3D::getSpline(const T* correctedData, float u, fl dataVx[vxIndex + 2] = gridU.getSpline(ui, correctedData[dataOffset + 2], correctedData[dataOffset + 5], correctedData[dataOffset + 8], correctedData[dataOffset + 11], u); } - //return results + // return results x = mGridV.getSpline(iknotv, dataVx[0], dataVx[3], dataVx[6], dataVx[9], v); y = mGridV.getSpline(iknotv, dataVx[1], dataVx[4], dataVx[7], dataVx[10], v); z = mGridV.getSpline(iknotv, dataVx[2], dataVx[5], dataVx[8], dataVx[11], v); @@ -426,7 +426,7 @@ inline void SemiregularSpline2D3D::getSplineVec(const float* correctedData, floa */ - //workaround 1: + // workaround 1: int32_t vGridi = mGridV.getKnotIndex(v); float dataU[12]; @@ -464,7 +464,7 @@ inline void SemiregularSpline2D3D::getSplineVec(const float* correctedData, floa y = res[1]; z = res[2]; -//getSpline( correctedData, u, v, x, y, z ); +// getSpline( correctedData, u, v, x, y, z ); #else getSpline(correctedData, u, v, x, y, z); #endif diff --git a/GPU/TPCFastTransformation/test/testMultivarPolynomials.cxx b/GPU/TPCFastTransformation/test/testMultivarPolynomials.cxx index a9c39e8528354..f77a55ffcc894 100644 --- a/GPU/TPCFastTransformation/test/testMultivarPolynomials.cxx +++ b/GPU/TPCFastTransformation/test/testMultivarPolynomials.cxx @@ -95,7 +95,7 @@ BOOST_AUTO_TEST_CASE(Polynomials5D_InteractionOnly) const int32_t nPar5D5DegInteraction = 32; // number of parameters const int32_t nDim = 5; // dimensions const int32_t nDegree = 5; // degree - const float abstolerance = 0.0001f; // abosulte difference between refernce to polynomial class + const float abstolerance = 0.0001f; // abosulte difference between refernce to polynomial class const bool interactionOnly = true; MultivariatePolynomial polCT; // compile time polynomial @@ -142,7 +142,7 @@ BOOST_AUTO_TEST_CASE(Piecewise_polynomials) const int32_t nPar5D5DegInteraction = 32; // number of parameters const int32_t nDim = 5; // dimensions const int32_t nDegree = 5; // degree - const bool interactionOnly = true; // consider only interaction terms + const bool interactionOnly = true; // consider only interaction terms // reference polynomial which will be approximated by the NDPiecewisePolynomials MultivariatePolynomial polCT; diff --git a/GPU/Utils/FlatObject.h b/GPU/Utils/FlatObject.h index eba81a2ba06a2..8e13a8dedb868 100644 --- a/GPU/Utils/FlatObject.h +++ b/GPU/Utils/FlatObject.h @@ -28,7 +28,7 @@ #include "GPUCommonRtypes.h" #include "GPUCommonLogger.h" -//#define GPUCA_GPUCODE // uncomment to test "GPU" mode +// #define GPUCA_GPUCODE // uncomment to test "GPU" mode namespace o2 { @@ -319,10 +319,10 @@ class FlatObject InProgress = 0x2 ///< construction started: temporary memory is reserved }; - int32_t mFlatBufferSize = 0; ///< size of the flat buffer - uint32_t mConstructionMask = ConstructionState::NotConstructed; ///< mask for constructed object members, first two bytes are used by this class - char* mFlatBufferContainer = nullptr; //[mFlatBufferSize] Optional container for the flat buffer - char* mFlatBufferPtr = nullptr; //! Pointer to the flat buffer + int32_t mFlatBufferSize = 0; ///< size of the flat buffer + uint32_t mConstructionMask = ConstructionState::NotConstructed; ///< mask for constructed object members, first two bytes are used by this class + char* mFlatBufferContainer = nullptr; //[mFlatBufferSize] Optional container for the flat buffer + char* mFlatBufferPtr = nullptr; //! Pointer to the flat buffer ClassDefNV(FlatObject, 1); }; @@ -569,7 +569,7 @@ inline void FlatObject::setFutureBufferAddress(char* futureFlatBufferPtr) mFlatBufferContainer = nullptr; } -#endif //GPUCA_GPUCODE_DEVICE +#endif // GPUCA_GPUCODE_DEVICE } // namespace gpu } // namespace o2 diff --git a/GPU/Workflow/helper/src/GPUWorkflowHelper.cxx b/GPU/Workflow/helper/src/GPUWorkflowHelper.cxx index 52c3421fa8eb5..a9c9b78e9847e 100644 --- a/GPU/Workflow/helper/src/GPUWorkflowHelper.cxx +++ b/GPU/Workflow/helper/src/GPUWorkflowHelper.cxx @@ -51,7 +51,7 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.itsClusterMC = ITSClsLabels; } } - //LOG(info) << "Got " << ioPtr.nItsClusters << " ITS Clusters"; + // LOG(info) << "Got " << ioPtr.nItsClusters << " ITS Clusters"; } if (maskTrk[GID::ITS] && ioPtr.nItsTracks == 0) { const auto& ITSTracksArray = recoCont.getITSTracks(); @@ -68,7 +68,7 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.itsTrackMC = ITSTrkLabels.data(); } } - //LOG(info) << "Got " << ioPtr.nItsTracks << " ITS Tracks"; + // LOG(info) << "Got " << ioPtr.nItsTracks << " ITS Tracks"; } if (maskTrk[GID::ITSTPC] && ioPtr.nTracksTPCITSO2 == 0) { @@ -77,7 +77,7 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.nTracksTPCITSO2 = trkITSTPC.size(); ioPtr.tracksTPCITSO2 = trkITSTPC.data(); } - //LOG(info) << "Got " << ioPtr.nTracksTPCITSO2 << " ITS-TPC Tracks"; + // LOG(info) << "Got " << ioPtr.nTracksTPCITSO2 << " ITS-TPC Tracks"; } if (maskCl[GID::TOF] && ioPtr.nTOFClusters == 0) { @@ -86,7 +86,7 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.nTOFClusters = tofClusters.size(); ioPtr.tofClusters = tofClusters.data(); } - //LOG(info) << "Got " << ioPtr.nTOFClusters << " TOF Clusters"; + // LOG(info) << "Got " << ioPtr.nTOFClusters << " TOF Clusters"; } if ((maskMatch[GID::TOF] || maskMatch[GID::ITSTPCTOF]) && ioPtr.nITSTPCTOFMatches == 0) { @@ -95,7 +95,7 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.nITSTPCTOFMatches = itstpctofMatches.size(); ioPtr.itstpctofMatches = itstpctofMatches.data(); } - //LOG(info) << "Got " << ioPtr.nITSTPCTOFMatches << " ITS-TPC-TOF Matches"; + // LOG(info) << "Got " << ioPtr.nITSTPCTOFMatches << " ITS-TPC-TOF Matches"; } if ((maskMatch[GID::TOF] || maskMatch[GID::ITSTPCTRDTOF]) && ioPtr.nITSTPCTRDTOFMatches == 0) { @@ -104,7 +104,7 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.nITSTPCTRDTOFMatches = itstpctrdtofMatches.size(); ioPtr.itstpctrdtofMatches = itstpctrdtofMatches.data(); } - //LOG(info) << "Got " << ioPtr.nITSTPCTRDTOFMatches << " ITS-TPC-TRD-TOF Matches"; + // LOG(info) << "Got " << ioPtr.nITSTPCTRDTOFMatches << " ITS-TPC-TRD-TOF Matches"; } if ((maskMatch[GID::TOF] || maskMatch[GID::TPCTOF]) && ioPtr.nTPCTOFMatches == 0) { @@ -113,7 +113,7 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.nTPCTOFMatches = tpctofMatches.size(); ioPtr.tpctofMatches = tpctofMatches.data(); } - //LOG(info) << "Got " << ioPtr.nTPCTOFMatches << " TPC-TOF Matches"; + // LOG(info) << "Got " << ioPtr.nTPCTOFMatches << " TPC-TOF Matches"; } if ((maskMatch[GID::TOF] || maskMatch[GID::TPCTRDTOF]) && ioPtr.nTPCTRDTOFMatches == 0) { @@ -122,12 +122,12 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.nTPCTRDTOFMatches = tpctrdtofMatches.size(); ioPtr.tpctrdtofMatches = tpctrdtofMatches.data(); } - //LOG(info) << "Got " << ioPtr.nTPCTOFMatches << " TPC-TOF Matches"; + // LOG(info) << "Got " << ioPtr.nTPCTOFMatches << " TPC-TOF Matches"; } if (maskCl[GID::TRD]) { recoCont.inputsTRD->fillGPUIOPtr(&ioPtr); - //LOG(info) << "Got " << ioPtr.nTRDTracklets << " TRD Tracklets"; + // LOG(info) << "Got " << ioPtr.nTRDTracklets << " TRD Tracklets"; } if (maskTrk[GID::ITSTPCTRD] && ioPtr.nTRDTracksITSTPCTRD == 0) { @@ -136,7 +136,7 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.nTRDTracksITSTPCTRD = trdTracks.size(); ioPtr.trdTracksITSTPCTRD = trdTracks.data(); } - //LOG(info) << "Got " << ioPtr.nTRDTracksITSTPCTRD << " ITS-TPC-TRD Tracks"; + // LOG(info) << "Got " << ioPtr.nTRDTracksITSTPCTRD << " ITS-TPC-TRD Tracks"; } if (maskTrk[GID::TPCTRD] && ioPtr.nTRDTracksTPCTRD == 0) { @@ -145,12 +145,12 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.nTRDTracksTPCTRD = trdTracks.size(); ioPtr.trdTracksTPCTRD = trdTracks.data(); } - //LOG(info) << "Got " << ioPtr.nTRDTracksTPCTRD << " TPC-TRD Tracks"; + // LOG(info) << "Got " << ioPtr.nTRDTracksTPCTRD << " TPC-TRD Tracks"; } if (maskCl[GID::TPC] && ioPtr.clustersNative == nullptr) { ioPtr.clustersNative = &recoCont.getTPCClusters(); - //LOG(info) << "Got " << ioPtr.clustersNative->nClustersTotal << " TPC Clusters"; + // LOG(info) << "Got " << ioPtr.clustersNative->nClustersTotal << " TPC Clusters"; } if (maskTrk[GID::TPC] && ioPtr.nOutputTracksTPCO2 == 0) { @@ -176,7 +176,7 @@ std::shared_ptr GPUWorkflowHelper::fi retVal->tpcLinkTRD.resize(ioPtr.nOutputTracksTPCO2, -1); ioPtr.tpcLinkTRD = retVal->tpcLinkTRD.data(); } - //LOG(info) << "Got " << ioPtr.nOutputTracksTPCO2 << " TPC Tracks"; + // LOG(info) << "Got " << ioPtr.nOutputTracksTPCO2 << " TPC Tracks"; } auto creator = [maskTrk, &ioPtr, &recoCont, &retVal](auto& trk, GID gid, float time, float) { From 8c0bc0442ec796d002914a57cd2d53d906fb5a02 Mon Sep 17 00:00:00 2001 From: ALICE Action Bot Date: Fri, 14 Mar 2025 19:16:01 +0000 Subject: [PATCH 8/8] Please consider the following formatting changes --- GPU/GPUTracking/Base/GPUReconstructionConvert.cxx | 2 +- GPU/GPUTracking/Base/GPUReconstructionProcessing.h | 2 +- GPU/GPUTracking/DataTypes/GPUOutputControl.h | 2 +- GPU/GPUTracking/Interface/GPUO2Interface.cxx | 2 +- .../Interface/GPUO2InterfaceConfigurableParam.h | 2 +- GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h | 2 +- GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h | 2 +- GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h | 2 +- GPU/GPUTracking/qa/GPUQAHelper.h | 4 ++-- GPU/GPUTracking/utils/qconfig.cxx | 8 +++++--- 10 files changed, 15 insertions(+), 13 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx index bc760f6188caa..8f5cab6807050 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx @@ -923,7 +923,7 @@ void zsEncoderDenseLinkBased::decodePage(std::vector& outputBuff if (decLinkX & 0b00100000) { bitmaskL2.set(); } else { - bitmaskL2 = std::bitset<10>(((((uint16_t)decLinkX) & 0b11000000) << 2) | (uint16_t)*((const uint8_t*)decPagePtr)); + bitmaskL2 = std::bitset<10>(((((uint16_t)decLinkX) & 0b11000000) << 2) | (uint16_t) * ((const uint8_t*)decPagePtr)); decPagePtr += sizeof(uint8_t); } diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h index 43560616782db..4ccfb9ff10311 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h @@ -28,7 +28,7 @@ namespace gpu_reconstruction_kernels { struct deviceEvent { constexpr deviceEvent() = default; - constexpr deviceEvent(std::nullptr_t p) : v(nullptr) {}; + constexpr deviceEvent(std::nullptr_t p) : v(nullptr){}; template void set(T val) { diff --git a/GPU/GPUTracking/DataTypes/GPUOutputControl.h b/GPU/GPUTracking/DataTypes/GPUOutputControl.h index 0495f7ed1d0ff..799fd25330ab4 100644 --- a/GPU/GPUTracking/DataTypes/GPUOutputControl.h +++ b/GPU/GPUTracking/DataTypes/GPUOutputControl.h @@ -78,7 +78,7 @@ struct GPUTrackingOutputs { static constexpr size_t count() { return sizeof(GPUTrackingOutputs) / sizeof(GPUOutputControl); } GPUOutputControl* asArray() { return (GPUOutputControl*)this; } size_t getIndex(const GPUOutputControl& v) { return &v - (const GPUOutputControl*)this; } - static int32_t getIndex(GPUOutputControl GPUTrackingOutputs::* v) { return &(((GPUTrackingOutputs*)(0x10000))->*v) - (GPUOutputControl*)(0x10000); } + static int32_t getIndex(GPUOutputControl GPUTrackingOutputs::*v) { return &(((GPUTrackingOutputs*)(0x10000))->*v) - (GPUOutputControl*)(0x10000); } }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/Interface/GPUO2Interface.cxx b/GPU/GPUTracking/Interface/GPUO2Interface.cxx index 4dac56afed671..34cd5b7280dc3 100644 --- a/GPU/GPUTracking/Interface/GPUO2Interface.cxx +++ b/GPU/GPUTracking/Interface/GPUO2Interface.cxx @@ -46,7 +46,7 @@ struct GPUO2Interface_Internals { }; } // namespace o2::gpu -GPUO2Interface::GPUO2Interface() : mInternals(new GPUO2Interface_Internals) {}; +GPUO2Interface::GPUO2Interface() : mInternals(new GPUO2Interface_Internals){}; GPUO2Interface::~GPUO2Interface() { Deinitialize(); } diff --git a/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h b/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h index ebb426b7a8cfe..425c8b880b4e3 100644 --- a/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h +++ b/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h @@ -50,7 +50,7 @@ #define AddSubConfig(name, instance) #define BeginSubConfig(name, instance, parent, preoptname, preoptnameshort, descr, o2prefix) \ struct GPUCA_M_CAT(GPUConfigurableParam, name) : public o2::conf::ConfigurableParamHelper { \ - O2ParamDef(GPUCA_M_CAT(GPUConfigurableParam, name), GPUCA_M_STR(GPUCA_M_CAT(GPU_, o2prefix))) public: + O2ParamDef(GPUCA_M_CAT(GPUConfigurableParam, name), GPUCA_M_STR(GPUCA_M_CAT(GPU_, o2prefix))) public: #define BeginHiddenConfig(name, instance) struct GPUCA_M_CAT(GPUConfigurableParam, name) { #define EndConfig() \ } \ diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h b/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h index 5bb63d6a10254..10ff0a32aeaf3 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h @@ -30,7 +30,7 @@ class GPUTPCTracklet { public: #if !defined(GPUCA_GPUCODE) - GPUTPCTracklet() : mFirstRow(0), mLastRow(0), mParam(), mHitWeight(0), mFirstHit(0) {}; + GPUTPCTracklet() : mFirstRow(0), mLastRow(0), mParam(), mHitWeight(0), mFirstHit(0){}; #endif //! GPUCA_GPUCODE GPUhd() int32_t FirstRow() const { return mFirstRow; } diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h index f6b8bea29822a..c8b5b4ba93c92 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h @@ -45,7 +45,7 @@ class propagatorInterface { public: typedef o2::base::Propagator propagatorParam; - GPUd() propagatorInterface(const propagatorParam* prop) : mProp(prop) {}; + GPUd() propagatorInterface(const propagatorParam* prop) : mProp(prop){}; GPUd() propagatorInterface(const propagatorInterface&) = delete; GPUd() propagatorInterface& operator=(const propagatorInterface&) = delete; diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h index cd7dfb9432b93..fc874070ec9b8 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h @@ -82,7 +82,7 @@ namespace o2::gpu class GPUTRDTrackletWord : private o2::trd::Tracklet64 { public: - GPUd() GPUTRDTrackletWord(uint64_t trackletWord = 0) : o2::trd::Tracklet64(trackletWord) {}; + GPUd() GPUTRDTrackletWord(uint64_t trackletWord = 0) : o2::trd::Tracklet64(trackletWord){}; GPUdDefault() GPUTRDTrackletWord(const GPUTRDTrackletWord& rhs) = default; GPUdDefault() GPUTRDTrackletWord& operator=(const GPUTRDTrackletWord& rhs) = default; GPUdDefault() ~GPUTRDTrackletWord() = default; diff --git a/GPU/GPUTracking/qa/GPUQAHelper.h b/GPU/GPUTracking/qa/GPUQAHelper.h index a7811c6fd55ed..c6ea27e835909 100644 --- a/GPU/GPUTracking/qa/GPUQAHelper.h +++ b/GPU/GPUTracking/qa/GPUQAHelper.h @@ -133,10 +133,10 @@ struct GPUTPCTrkLbl_ret { template GPUTPCTrkLbl_ret(T){}; #ifdef GPUCA_TPC_GEOMETRY_O2 - GPUTPCTrkLbl_ret(const MCCompLabel& a) : id(a.getTrackEventSourceID()) {}; + GPUTPCTrkLbl_ret(const MCCompLabel& a) : id(a.getTrackEventSourceID()){}; #endif #ifdef GPUCA_STANDALONE - GPUTPCTrkLbl_ret(const AliHLTTPCClusterMCWeight& a) : id(a.fMCID) {}; + GPUTPCTrkLbl_ret(const AliHLTTPCClusterMCWeight& a) : id(a.fMCID){}; #endif void setFakeFlag() { diff --git a/GPU/GPUTracking/utils/qconfig.cxx b/GPU/GPUTracking/utils/qconfig.cxx index cdb41ec5813f2..cd6267179c844 100644 --- a/GPU/GPUTracking/utils/qconfig.cxx +++ b/GPU/GPUTracking/utils/qconfig.cxx @@ -32,7 +32,8 @@ namespace qConfig { #define QCONFIG_SETTING(name, type) \ - struct qon_mxcat3(q, name, _t) { \ + struct qon_mxcat3(q, name, _t) \ + { \ type v; \ constexpr qon_mxcat3(q, name, _t)(type s) : v(s) {} \ }; \ @@ -40,7 +41,8 @@ namespace qConfig #define QCONFIG_SETTING_TEMPLATE(name) \ template \ - struct qon_mxcat3(q, name, _t) { \ + struct qon_mxcat3(q, name, _t) \ + { \ T v; \ constexpr qon_mxcat3(q, name, _t)(const T& s) : v(s) {} \ }; \ @@ -66,7 +68,7 @@ static inline const char* getOptName(const char** argv, int32_t i) template struct qConfigSettings { - qConfigSettings() : checkMin(false), checkMax(false), doSet(false), doDefault(false), min(), max(), set(), message(nullptr), allowEmpty(false) {}; + qConfigSettings() : checkMin(false), checkMax(false), doSet(false), doDefault(false), min(), max(), set(), message(nullptr), allowEmpty(false){}; template qConfigSettings(const qConfigSettings v) : checkMin(false), checkMax(false), doSet(false), doDefault(false), min(), max(), set(), message(v.message), allowEmpty(v.allowEmpty){}; bool checkMin, checkMax;