From 8e8965d77662daf87b270f79ac82a2225e77f932 Mon Sep 17 00:00:00 2001
From: David Rohr <drohr@jwdt.org>
Date: Thu, 13 Mar 2025 18:12:30 +0100
Subject: [PATCH 1/8] GPU: Provide general GPUFailedMsg functionality also
 externally

---
 GPU/Common/CMakeLists.txt                     |  1 +
 GPU/Common/GPUCommonChkErr.h                  | 21 +++++++++++++++++++
 GPU/Common/GPUCommonDefAPI.h                  |  2 +-
 GPU/GPUTracking/Base/GPUReconstruction.cxx    | 15 +++++++++++++
 GPU/GPUTracking/Base/GPUReconstruction.h      |  3 +++
 .../Base/GPUReconstructionDeviceBase.cxx      |  6 +++++-
 .../Base/GPUReconstructionDeviceBase.h        |  1 +
 .../Base/cuda/GPUReconstructionCUDA.cu        | 16 ++------------
 .../Base/cuda/GPUReconstructionCUDA.h         |  4 ++--
 .../GPUReconstructionCUDAExternalProvider.cu  |  2 +-
 .../cuda/GPUReconstructionCUDAInternals.h     |  4 +---
 .../Base/cuda/GPUReconstructionCUDAKernels.cu |  4 +++-
 .../Base/opencl/GPUReconstructionOCL.cxx      | 17 +++------------
 .../Base/opencl/GPUReconstructionOCL.h        |  3 +--
 .../opencl/GPUReconstructionOCLIncludesHost.h |  4 +---
 15 files changed, 61 insertions(+), 42 deletions(-)
 create mode 100644 GPU/Common/GPUCommonChkErr.h

diff --git a/GPU/Common/CMakeLists.txt b/GPU/Common/CMakeLists.txt
index 8466035d74ef7..8b0a75679479f 100644
--- a/GPU/Common/CMakeLists.txt
+++ b/GPU/Common/CMakeLists.txt
@@ -15,6 +15,7 @@ set(HDRS_INSTALL
     GPUCommonAlgorithm.h
     GPUCommonDef.h
     GPUCommonDefAPI.h
+    GPUCommonChkErr.h
     GPUCommonDefSettings.h
     GPUCommonConstants.h
     GPUCommonLogger.h
diff --git a/GPU/Common/GPUCommonChkErr.h b/GPU/Common/GPUCommonChkErr.h
new file mode 100644
index 0000000000000..df007b31dab64
--- /dev/null
+++ b/GPU/Common/GPUCommonChkErr.h
@@ -0,0 +1,21 @@
+// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
+// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
+// All rights not expressly granted are reserved.
+//
+// This software is distributed under the terms of the GNU General Public
+// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
+//
+// In applying this license CERN does not waive the privileges and immunities
+// granted to it by virtue of its status as an Intergovernmental Organization
+// or submit itself to any jurisdiction.
+
+/// \file GPUCommonChkErr.h
+/// \author David Rohr
+
+#ifndef GPUCOMMONCHKERR_H
+#define GPUCOMMONCHKERR_H
+
+#define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__, true)
+#define GPUFailedMsgI(x) GPUFailedMsgA(x, __FILE__, __LINE__, false)
+
+#endif
diff --git a/GPU/Common/GPUCommonDefAPI.h b/GPU/Common/GPUCommonDefAPI.h
index 0cd3c4ebddb7f..f7efbf7e976d4 100644
--- a/GPU/Common/GPUCommonDefAPI.h
+++ b/GPU/Common/GPUCommonDefAPI.h
@@ -36,7 +36,7 @@
   #define GPUdni()                                  // Device function, not-to-be-inlined
   #define GPUdnii() inline                          // Device function, not-to-be-inlined on device, inlined on host
   #define GPUh()                                    // Host-only function
-  // NOTE: All GPUd*() functions are also compiled on the host during GCC compilation.
+  // NOTE: All GPUd*() functions are also compiled on the host during host compilation.
   // The GPUh*() macros are for the rare cases of functions that you want to compile for the host during GPU compilation.
   // Usually, you do not need the GPUh*() versions. If in doubt, use GPUd*()!
   #define GPUhi() inline                            // to-be-inlined host-only function
diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx
index df9a7380834ce..28241cb7aeec5 100644
--- a/GPU/GPUTracking/Base/GPUReconstruction.cxx
+++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx
@@ -1078,6 +1078,21 @@ int32_t GPUReconstruction::CheckErrorCodes(bool cpuOnly, bool forceShowErrors, s
   return retVal;
 }
 
+int32_t GPUReconstruction::GPUFailedMsgA(const int64_t error, const char* file, int32_t line, bool failOnError)
+{
+  if (error == 0 || !GPUFailedMsgInternal(error, file, line)) {
+    return 0;
+  }
+  if (failOnError) {
+    if (mInitialized && mInErrorHandling == false) {
+      mInErrorHandling = true;
+      CheckErrorCodes(false, true);
+    }
+    throw std::runtime_error("GPU Backend Failure");
+  }
+  return 1;
+}
+
 void GPUReconstruction::DumpSettings(const char* dir)
 {
   std::string f;
diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h
index 93310284d7564..1fe08d08a8058 100644
--- a/GPU/GPUTracking/Base/GPUReconstruction.h
+++ b/GPU/GPUTracking/Base/GPUReconstruction.h
@@ -143,6 +143,7 @@ class GPUReconstruction
   virtual void* getGPUPointer(void* ptr) { return ptr; }
   virtual void startGPUProfiling() {}
   virtual void endGPUProfiling() {}
+  int32_t GPUFailedMsgA(const int64_t error, const char* file, int32_t line, bool failOnError);
   int32_t CheckErrorCodes(bool cpuOnly = false, bool forceShowErrors = false, std::vector<std::array<uint32_t, 4>>* fillErrors = nullptr);
   void RunPipelineWorker();
   void TerminatePipelineWorker();
@@ -246,6 +247,7 @@ class GPUReconstruction
   void UpdateMaxMemoryUsed();
   int32_t EnqueuePipeline(bool terminate = false);
   GPUChain* GetNextChainInQueue();
+  virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const { return 0; }
 
   virtual int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) = 0;
   virtual int32_t unregisterMemoryForGPU_internal(const void* ptr) = 0;
@@ -327,6 +329,7 @@ class GPUReconstruction
 
   // Others
   bool mInitialized = false;
+  bool mInErrorHandling = false;
   uint32_t mStatNEvents = 0;
   uint32_t mNEventsProcessed = 0;
   double mStatKernelTime = 0.;
diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx
index d1091f59b784a..b389e99a0b2bb 100644
--- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx
+++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx
@@ -175,7 +175,11 @@ void GPUReconstructionDeviceBase::runConstantRegistrators()
 {
   auto& list = getDeviceConstantMemRegistratorsVector();
   for (uint32_t i = 0; i < list.size(); i++) {
-    mDeviceConstantMemList.emplace_back(list[i]());
+    auto* ptr = list[i]();
+    if (ptr == nullptr) {
+      GPUFatal("Error registering constant memory");
+    }
+    mDeviceConstantMemList.emplace_back(ptr);
   }
 }
 
diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h
index 6cd3813ff1431..c4595bed4c3fb 100644
--- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h
+++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h
@@ -46,6 +46,7 @@ class GPUReconstructionDeviceBase : public GPUReconstructionCPU
   virtual int32_t InitDevice_Runtime() = 0;
   int32_t ExitDevice() override;
   virtual int32_t ExitDevice_Runtime() = 0;
+  virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override = 0;
   int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) override;
   int32_t unregisterMemoryForGPU_internal(const void* ptr) override;
   void unregisterRemainingRegisteredMemory();
diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu
index d2adc3cc1fd19..40e3fa9b90eae 100644
--- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu
+++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu
@@ -61,9 +61,9 @@ GPUReconstructionCUDABackend::~GPUReconstructionCUDABackend()
   }
 }
 
-int32_t GPUReconstructionCUDABackend::GPUFailedMsgAI(const int64_t error, const char* file, int32_t line)
+static_assert(sizeof(cudaError_t) <= sizeof(int64_t) && cudaSuccess == 0);
+int32_t GPUReconstructionCUDABackend::GPUFailedMsgStatic(const int64_t error, const char* file, int32_t line)
 {
-  // Check for CUDA Error and in the case of an error display the corresponding error string
   if (error == cudaSuccess) {
     return (0);
   }
@@ -71,18 +71,6 @@ int32_t GPUReconstructionCUDABackend::GPUFailedMsgAI(const int64_t error, const
   return 1;
 }
 
-void GPUReconstructionCUDABackend::GPUFailedMsgA(const int64_t error, const char* file, int32_t line)
-{
-  if (GPUFailedMsgAI(error, file, line)) {
-    static bool runningCallbacks = false;
-    if (IsInitialized() && runningCallbacks == false) {
-      runningCallbacks = true;
-      CheckErrorCodes(false, true);
-    }
-    throw std::runtime_error("CUDA Failure");
-  }
-}
-
 GPUReconstructionCUDA::GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionKernels(cfg)
 {
   mDeviceBackendSettings.deviceType = DeviceType::CUDA;
diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h
index dde70b9076e08..e04e14bd383d3 100644
--- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h
+++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h
@@ -33,13 +33,13 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase
 {
  public:
   ~GPUReconstructionCUDABackend() override;
-  static int32_t GPUFailedMsgAI(const int64_t error, const char* file, int32_t line);
-  void GPUFailedMsgA(const int64_t error, const char* file, int32_t line);
+  static int32_t GPUFailedMsgStatic(const int64_t error, const char* file, int32_t line);
 
  protected:
   GPUReconstructionCUDABackend(const GPUSettingsDeviceBackend& cfg);
 
   void PrintKernelOccupancies() override;
+  virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override { return GPUFailedMsgStatic(error, file, line); }
 
   template <class T, int32_t I = 0, typename... Args>
   void runKernelBackend(const krnlSetupArgs<T, I, Args...>& args);
diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu
index 6bcafe565e930..521ca2182c9bb 100644
--- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu
+++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu
@@ -36,7 +36,7 @@ using namespace o2::gpu;
 #ifndef GPUCA_NO_CONSTANT_MEMORY
 static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() {
   void* retVal = nullptr;
-  if (cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer) != cudaSuccess) {
+  if (GPUReconstructionCUDA::GPUFailedMsgStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) {
     throw std::runtime_error("Could not obtain GPU constant memory symbol");
   }
   return retVal;
diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h
index 49c466103c593..a6d55c2d729fd 100644
--- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h
+++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h
@@ -22,13 +22,11 @@
 #include <vector>
 #include <memory>
 #include <string>
+#include "GPUCommonChkErr.h"
 
 namespace o2::gpu
 {
 
-#define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__)
-#define GPUFailedMsgI(x) GPUFailedMsgAI(x, __FILE__, __LINE__)
-
 struct GPUReconstructionCUDAInternals {
   std::vector<std::unique_ptr<CUmodule>> kernelModules;     // module for RTC compilation
   std::vector<std::unique_ptr<CUfunction>> kernelFunctions; // vector of ptrs to RTC kernels
diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu
index 0f8d9bf219ba6..a5ab353f3d43f 100644
--- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu
+++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu
@@ -138,7 +138,9 @@ void GPUReconstructionCUDABackend::getRTCKernelCalls(std::vector<std::string>& k
 #ifndef GPUCA_NO_CONSTANT_MEMORY
 static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() {
   void* retVal = nullptr;
-  GPUReconstructionCUDA::GPUFailedMsgI(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer));
+  if (GPUReconstructionCUDA::GPUFailedMsgStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) {
+    throw std::runtime_error("Could not obtain GPU constant memory symbol");
+  }
   return retVal;
 });
 #endif
diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx
index c7a8be62a12ea..6639c78b113e5 100644
--- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx
+++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx
@@ -48,28 +48,17 @@ GPUReconstructionOCLBackend::~GPUReconstructionOCLBackend()
   }
 }
 
-int32_t GPUReconstructionOCLBackend::GPUFailedMsgAI(const int64_t error, const char* file, int32_t line)
+static_assert(sizeof(cl_int) <= sizeof(int64_t) && CL_SUCCESS == 0);
+int32_t GPUReconstructionOCLBackend::GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const
 {
   // Check for OPENCL Error and in the case of an error display the corresponding error string
   if (error == CL_SUCCESS) {
     return (0);
   }
-  GPUError("OCL Error: %ld / %s (%s:%d)", error, convertErrorToString(error), file, line);
+  GPUError("OpenCL Error: %ld / %s (%s:%d)", error, convertErrorToString(error), file, line);
   return 1;
 }
 
-void GPUReconstructionOCLBackend::GPUFailedMsgA(const int64_t error, const char* file, int32_t line)
-{
-  if (GPUFailedMsgAI(error, file, line)) {
-    static bool runningCallbacks = false;
-    if (IsInitialized() && runningCallbacks == false) {
-      runningCallbacks = true;
-      CheckErrorCodes(false, true);
-    }
-    throw std::runtime_error("OpenCL Failure");
-  }
-}
-
 void GPUReconstructionOCLBackend::UpdateAutomaticProcessingSettings()
 {
   GPUCA_GPUReconstructionUpdateDefaults();
diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h
index 5132baa444cd9..79f54274cd32c 100644
--- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h
+++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h
@@ -39,8 +39,7 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase
   int32_t ExitDevice_Runtime() override;
   void UpdateAutomaticProcessingSettings() override;
 
-  int32_t GPUFailedMsgAI(const int64_t error, const char* file, int32_t line);
-  void GPUFailedMsgA(const int64_t error, const char* file, int32_t line);
+  virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override;
 
   void SynchronizeGPU() override;
   int32_t DoStuckProtection(int32_t stream, deviceEvent event) override;
diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h
index aec5708a80f3c..9c8cdbe87c7c1 100644
--- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h
+++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h
@@ -28,6 +28,7 @@
 
 #include "GPUReconstructionOCL.h"
 #include "GPUReconstructionIncludes.h"
+#include "GPUCommonChkErr.h"
 
 using namespace o2::gpu;
 
@@ -36,9 +37,6 @@ using namespace o2::gpu;
 #include <typeinfo>
 #include <cstdlib>
 
-#define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__)
-#define GPUFailedMsgI(x) GPUFailedMsgAI(x, __FILE__, __LINE__)
-
 namespace o2::gpu
 {
 struct GPUReconstructionOCLInternals {

From e5e4e1c76624be0056179108d514e36256e2c683 Mon Sep 17 00:00:00 2001
From: David Rohr <drohr@jwdt.org>
Date: Thu, 13 Mar 2025 18:13:23 +0100
Subject: [PATCH 2/8] GPU: Rename GPUFailedMsg to GPUChkErr

---
 GPU/Common/GPUCommonChkErr.h                  |  13 +-
 GPU/GPUTracking/Base/GPUReconstruction.cxx    |   4 +-
 GPU/GPUTracking/Base/GPUReconstruction.h      |   4 +-
 .../Base/GPUReconstructionDeviceBase.h        |   2 +-
 .../Base/cuda/GPUReconstructionCUDA.cu        | 154 +++++++++---------
 .../Base/cuda/GPUReconstructionCUDA.h         |   4 +-
 .../GPUReconstructionCUDAExternalProvider.cu  |   2 +-
 .../cuda/GPUReconstructionCUDAInternals.h     |  10 +-
 .../Base/cuda/GPUReconstructionCUDAKernels.cu |  12 +-
 .../Base/opencl/GPUReconstructionOCL.cxx      |  72 ++++----
 .../Base/opencl/GPUReconstructionOCL.h        |   2 +-
 .../opencl/GPUReconstructionOCLKernels.cxx    |  16 +-
 12 files changed, 152 insertions(+), 143 deletions(-)

diff --git a/GPU/Common/GPUCommonChkErr.h b/GPU/Common/GPUCommonChkErr.h
index df007b31dab64..00cb9e50d302f 100644
--- a/GPU/Common/GPUCommonChkErr.h
+++ b/GPU/Common/GPUCommonChkErr.h
@@ -12,10 +12,19 @@
 /// \file GPUCommonChkErr.h
 /// \author David Rohr
 
+// GPUChkErr and GPUChkErrI will both check x for an error, using the loaded backend of GPUReconstruction (requiring GPUReconstruction.h to be included by the user).
+// In case of an error, it will print out the corresponding CUDA / HIP / OpenCL error code
+// GPUChkErr will download GPUReconstruction error values from GPU, print them, and terminate the application with an exception if an error occured.
+// GPUChkErrI will return 0 or 1, depending on whether an error has occurred.
+// The Macros must be called ona GPUReconstruction instance, e.g.:
+// if (mRec->GPUChkErrI(cudaMalloc(...))) { exit(1); }
+// gpuRecObj.GPUChkErr(cudaMalloc(...));
+
 #ifndef GPUCOMMONCHKERR_H
 #define GPUCOMMONCHKERR_H
 
-#define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__, true)
-#define GPUFailedMsgI(x) GPUFailedMsgA(x, __FILE__, __LINE__, false)
+// Please #include "GPUReconstruction.h" in your code, if you use these 2!
+#define GPUChkErr(x) GPUChkErrA(x, __FILE__, __LINE__, true)
+#define GPUChkErrI(x) GPUChkErrA(x, __FILE__, __LINE__, false)
 
 #endif
diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx
index 28241cb7aeec5..2bd4c0e937c20 100644
--- a/GPU/GPUTracking/Base/GPUReconstruction.cxx
+++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx
@@ -1078,9 +1078,9 @@ int32_t GPUReconstruction::CheckErrorCodes(bool cpuOnly, bool forceShowErrors, s
   return retVal;
 }
 
-int32_t GPUReconstruction::GPUFailedMsgA(const int64_t error, const char* file, int32_t line, bool failOnError)
+int32_t GPUReconstruction::GPUChkErrA(const int64_t error, const char* file, int32_t line, bool failOnError)
 {
-  if (error == 0 || !GPUFailedMsgInternal(error, file, line)) {
+  if (error == 0 || !GPUChkErrInternal(error, file, line)) {
     return 0;
   }
   if (failOnError) {
diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h
index 1fe08d08a8058..f363f3f58aa6f 100644
--- a/GPU/GPUTracking/Base/GPUReconstruction.h
+++ b/GPU/GPUTracking/Base/GPUReconstruction.h
@@ -143,7 +143,7 @@ class GPUReconstruction
   virtual void* getGPUPointer(void* ptr) { return ptr; }
   virtual void startGPUProfiling() {}
   virtual void endGPUProfiling() {}
-  int32_t GPUFailedMsgA(const int64_t error, const char* file, int32_t line, bool failOnError);
+  int32_t GPUChkErrA(const int64_t error, const char* file, int32_t line, bool failOnError);
   int32_t CheckErrorCodes(bool cpuOnly = false, bool forceShowErrors = false, std::vector<std::array<uint32_t, 4>>* fillErrors = nullptr);
   void RunPipelineWorker();
   void TerminatePipelineWorker();
@@ -247,7 +247,7 @@ class GPUReconstruction
   void UpdateMaxMemoryUsed();
   int32_t EnqueuePipeline(bool terminate = false);
   GPUChain* GetNextChainInQueue();
-  virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const { return 0; }
+  virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const { return 0; }
 
   virtual int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) = 0;
   virtual int32_t unregisterMemoryForGPU_internal(const void* ptr) = 0;
diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h
index c4595bed4c3fb..f0e19f588e0f1 100644
--- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h
+++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h
@@ -46,7 +46,7 @@ class GPUReconstructionDeviceBase : public GPUReconstructionCPU
   virtual int32_t InitDevice_Runtime() = 0;
   int32_t ExitDevice() override;
   virtual int32_t ExitDevice_Runtime() = 0;
-  virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override = 0;
+  virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override = 0;
   int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) override;
   int32_t unregisterMemoryForGPU_internal(const void* ptr) override;
   void unregisterRemainingRegisteredMemory();
diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu
index 40e3fa9b90eae..d30eb51bd4938 100644
--- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu
+++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu
@@ -62,7 +62,7 @@ GPUReconstructionCUDABackend::~GPUReconstructionCUDABackend()
 }
 
 static_assert(sizeof(cudaError_t) <= sizeof(int64_t) && cudaSuccess == 0);
-int32_t GPUReconstructionCUDABackend::GPUFailedMsgStatic(const int64_t error, const char* file, int32_t line)
+int32_t GPUReconstructionCUDABackend::GPUChkErrStatic(const int64_t error, const char* file, int32_t line)
 {
   if (error == cudaSuccess) {
     return (0);
@@ -123,7 +123,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime()
     cudaDeviceProp deviceProp;
     int32_t count, bestDevice = -1;
     double bestDeviceSpeed = -1, deviceSpeed;
-    if (GPUFailedMsgI(cudaGetDeviceCount(&count))) {
+    if (GPUChkErrI(cudaGetDeviceCount(&count))) {
       GPUError("Error getting CUDA Device Count");
       return (1);
     }
@@ -139,9 +139,9 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime()
       }
       size_t free, total;
 #ifndef __HIPCC__ // CUDA
-      if (GPUFailedMsgI(cudaInitDevice(i, 0, 0))) {
+      if (GPUChkErrI(cudaInitDevice(i, 0, 0))) {
 #else // HIP
-      if (GPUFailedMsgI(hipSetDevice(i))) {
+      if (GPUChkErrI(hipSetDevice(i))) {
 #endif
         if (mProcessingSettings.debugLevel >= 4) {
           GPUWarning("Couldn't create context for device %d. Skipping it.", i);
@@ -149,21 +149,21 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime()
         continue;
       }
       contextCreated = true;
-      if (GPUFailedMsgI(cudaMemGetInfo(&free, &total))) {
+      if (GPUChkErrI(cudaMemGetInfo(&free, &total))) {
         if (mProcessingSettings.debugLevel >= 4) {
           GPUWarning("Error obtaining CUDA memory info about device %d! Skipping it.", i);
         }
-        GPUFailedMsg(cudaDeviceReset());
+        GPUChkErr(cudaDeviceReset());
         continue;
       }
       if (count > 1) {
-        GPUFailedMsg(cudaDeviceReset());
+        GPUChkErr(cudaDeviceReset());
         contextCreated = false;
       }
       if (mProcessingSettings.debugLevel >= 4) {
         GPUInfo("Obtained current memory usage for device %d", i);
       }
-      if (GPUFailedMsgI(cudaGetDeviceProperties(&deviceProp, i))) {
+      if (GPUChkErrI(cudaGetDeviceProperties(&deviceProp, i))) {
         continue;
       }
       if (mProcessingSettings.debugLevel >= 4) {
@@ -221,13 +221,13 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime()
     }
     if (noDevice) {
       if (contextCreated) {
-        GPUFailedMsgI(cudaDeviceReset());
+        GPUChkErrI(cudaDeviceReset());
       }
       return (1);
     }
     mDeviceId = bestDevice;
 
-    GPUFailedMsgI(cudaGetDeviceProperties(&deviceProp, mDeviceId));
+    GPUChkErrI(cudaGetDeviceProperties(&deviceProp, mDeviceId));
 
     if (mProcessingSettings.debugLevel >= 2) {
       GPUInfo("Using CUDA Device %s with Properties:", deviceProp.name);
@@ -280,27 +280,27 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime()
 #endif
 
 #ifndef __HIPCC__ // CUDA
-    if (contextCreated == 0 && GPUFailedMsgI(cudaInitDevice(mDeviceId, 0, 0))) {
+    if (contextCreated == 0 && GPUChkErrI(cudaInitDevice(mDeviceId, 0, 0))) {
 #else // HIP
-    if (contextCreated == 0 && GPUFailedMsgI(hipSetDevice(mDeviceId))) {
+    if (contextCreated == 0 && GPUChkErrI(hipSetDevice(mDeviceId))) {
 #endif
       GPUError("Could not set CUDA Device!");
       return (1);
     }
 
 #ifndef __HIPCC__ // CUDA
-    if (GPUFailedMsgI(cudaDeviceSetLimit(cudaLimitStackSize, GPUCA_GPU_STACK_SIZE))) {
+    if (GPUChkErrI(cudaDeviceSetLimit(cudaLimitStackSize, GPUCA_GPU_STACK_SIZE))) {
       GPUError("Error setting CUDA stack size");
-      GPUFailedMsgI(cudaDeviceReset());
+      GPUChkErrI(cudaDeviceReset());
       return (1);
     }
-    if (GPUFailedMsgI(cudaDeviceSetLimit(cudaLimitMallocHeapSize, mProcessingSettings.deterministicGPUReconstruction ? std::max<size_t>(1024 * 1024 * 1024, GPUCA_GPU_HEAP_SIZE) : GPUCA_GPU_HEAP_SIZE))) {
+    if (GPUChkErrI(cudaDeviceSetLimit(cudaLimitMallocHeapSize, mProcessingSettings.deterministicGPUReconstruction ? std::max<size_t>(1024 * 1024 * 1024, GPUCA_GPU_HEAP_SIZE) : GPUCA_GPU_HEAP_SIZE))) {
       GPUError("Error setting CUDA stack size");
-      GPUFailedMsgI(cudaDeviceReset());
+      GPUChkErrI(cudaDeviceReset());
       return (1);
     }
 #else // HIP
-    if (GPUFailedMsgI(hipSetDeviceFlags(hipDeviceScheduleBlockingSync))) {
+    if (GPUChkErrI(hipSetDeviceFlags(hipDeviceScheduleBlockingSync))) {
       GPUError("Could not set HIP Device flags!");
       return (1);
     }
@@ -319,35 +319,35 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime()
     if (mProcessingSettings.debugLevel >= 3) {
       GPUInfo("Allocating memory on GPU");
     }
-    if (mDeviceMemorySize > deviceProp.totalGlobalMem || GPUFailedMsgI(cudaMalloc(&mDeviceMemoryBase, mDeviceMemorySize))) {
+    if (mDeviceMemorySize > deviceProp.totalGlobalMem || GPUChkErrI(cudaMalloc(&mDeviceMemoryBase, mDeviceMemorySize))) {
       size_t free, total;
-      GPUFailedMsg(cudaMemGetInfo(&free, &total));
+      GPUChkErr(cudaMemGetInfo(&free, &total));
       GPUError("CUDA Memory Allocation Error (trying %ld bytes, %ld available on GPU, %ld free)", (int64_t)mDeviceMemorySize, (int64_t)deviceProp.totalGlobalMem, (int64_t)free);
-      GPUFailedMsgI(cudaDeviceReset());
+      GPUChkErrI(cudaDeviceReset());
       return (1);
     }
     if (mProcessingSettings.debugLevel >= 3) {
       GPUInfo("Allocating memory on Host");
     }
-    if (GPUFailedMsgI(cudaMallocHost(&mHostMemoryBase, mHostMemorySize))) {
+    if (GPUChkErrI(cudaMallocHost(&mHostMemoryBase, mHostMemorySize))) {
       GPUError("Error allocating Page Locked Host Memory (trying %ld bytes)", (int64_t)mHostMemorySize);
-      GPUFailedMsgI(cudaDeviceReset());
+      GPUChkErrI(cudaDeviceReset());
       return (1);
     }
     if (mProcessingSettings.debugLevel >= 1) {
       GPUInfo("Memory ptrs: GPU (%ld bytes): %p - Host (%ld bytes): %p", (int64_t)mDeviceMemorySize, mDeviceMemoryBase, (int64_t)mHostMemorySize, mHostMemoryBase);
       memset(mHostMemoryBase, 0xDD, mHostMemorySize);
-      if (GPUFailedMsgI(cudaMemset(mDeviceMemoryBase, 0xDD, mDeviceMemorySize))) {
+      if (GPUChkErrI(cudaMemset(mDeviceMemoryBase, 0xDD, mDeviceMemorySize))) {
         GPUError("Error during CUDA memset");
-        GPUFailedMsgI(cudaDeviceReset());
+        GPUChkErrI(cudaDeviceReset());
         return (1);
       }
     }
 
     for (int32_t i = 0; i < mNStreams; i++) {
-      if (GPUFailedMsgI(cudaStreamCreateWithFlags(&mInternals->Streams[i], cudaStreamNonBlocking))) {
+      if (GPUChkErrI(cudaStreamCreateWithFlags(&mInternals->Streams[i], cudaStreamNonBlocking))) {
         GPUError("Error creating CUDA Stream");
-        GPUFailedMsgI(cudaDeviceReset());
+        GPUChkErrI(cudaDeviceReset());
         return (1);
       }
     }
@@ -365,7 +365,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime()
     else {
 #define GPUCA_KRNL(x_class, ...)                                        \
   mInternals->kernelModules.emplace_back(std::make_unique<CUmodule>()); \
-  GPUFailedMsg(cuModuleLoadData(mInternals->kernelModules.back().get(), GPUCA_M_CAT3(_binary_cuda_kernel_module_fatbin_krnl_, GPUCA_M_KRNL_NAME(x_class), GPUCA_M_CAT(PER_KERNEL_OBJECT_EXT, _start))));
+  GPUChkErr(cuModuleLoadData(mInternals->kernelModules.back().get(), GPUCA_M_CAT3(_binary_cuda_kernel_module_fatbin_krnl_, GPUCA_M_KRNL_NAME(x_class), GPUCA_M_CAT(PER_KERNEL_OBJECT_EXT, _start))));
 #include "GPUReconstructionKernelList.h"
 #undef GPUCA_KRNL
       loadKernelModules(true);
@@ -382,11 +382,11 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime()
       CUdeviceptr tmp = nullptr; // HIP just uses void*
 #endif
       size_t tmpSize = 0;
-      GPUFailedMsg(cuModuleGetGlobal(&tmp, &tmpSize, *mInternals->kernelModules[i], "gGPUConstantMemBuffer"));
+      GPUChkErr(cuModuleGetGlobal(&tmp, &tmpSize, *mInternals->kernelModules[i], "gGPUConstantMemBuffer"));
       mDeviceConstantMemList.emplace_back((void*)tmp);
     }
 #else
-    GPUFailedMsg(cudaMalloc(&devPtrConstantMem, gGPUConstantMemBufferSize));
+    GPUChkErr(cudaMalloc(&devPtrConstantMem, gGPUConstantMemBufferSize));
 #endif
     mDeviceConstantMem = (GPUConstantMem*)devPtrConstantMem;
 
@@ -402,7 +402,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime()
     mDeviceConstantMemList.resize(master->mDeviceConstantMemList.size());
     std::copy(master->mDeviceConstantMemList.begin(), master->mDeviceConstantMemList.end(), mDeviceConstantMemList.begin());
     mInternals = master->mInternals;
-    GPUFailedMsg(cudaSetDevice(mDeviceId));
+    GPUChkErr(cudaSetDevice(mDeviceId));
 
     GPUInfo("CUDA Initialisation successfull (from master)");
   }
@@ -411,12 +411,12 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime()
     cudaEvent_t* events = (cudaEvent_t*)mEvents[i].data();
     for (uint32_t j = 0; j < mEvents[i].size(); j++) {
 #ifndef __HIPCC__ // CUDA
-      if (GPUFailedMsgI(cudaEventCreate(&events[j]))) {
+      if (GPUChkErrI(cudaEventCreate(&events[j]))) {
 #else
-      if (GPUFailedMsgI(hipEventCreateWithFlags(&events[j], hipEventBlockingSync))) {
+      if (GPUChkErrI(hipEventCreateWithFlags(&events[j], hipEventBlockingSync))) {
 #endif
         GPUError("Error creating event");
-        GPUFailedMsgI(cudaDeviceReset());
+        GPUChkErrI(cudaDeviceReset());
         return 1;
       }
     }
@@ -435,7 +435,7 @@ void GPUReconstructionCUDA::genAndLoadRTC()
   for (uint32_t i = 0; i < nCompile; i++) {
     if (mProcessingSettings.rtc.runTest != 2) {
       mInternals->kernelModules.emplace_back(std::make_unique<CUmodule>());
-      GPUFailedMsg(cuModuleLoad(mInternals->kernelModules.back().get(), (filename + "_" + std::to_string(i) + mRtcBinExtension).c_str()));
+      GPUChkErr(cuModuleLoad(mInternals->kernelModules.back().get(), (filename + "_" + std::to_string(i) + mRtcBinExtension).c_str()));
     }
     remove((filename + "_" + std::to_string(i) + mRtcSrcExtension).c_str());
     remove((filename + "_" + std::to_string(i) + mRtcBinExtension).c_str());
@@ -449,33 +449,33 @@ void GPUReconstructionCUDA::genAndLoadRTC()
 int32_t GPUReconstructionCUDA::ExitDevice_Runtime()
 {
   // Uninitialize CUDA
-  GPUFailedMsg(cudaSetDevice(mDeviceId));
+  GPUChkErr(cudaSetDevice(mDeviceId));
   SynchronizeGPU();
   unregisterRemainingRegisteredMemory();
 
   for (uint32_t i = 0; i < mEvents.size(); i++) {
     cudaEvent_t* events = (cudaEvent_t*)mEvents[i].data();
     for (uint32_t j = 0; j < mEvents[i].size(); j++) {
-      GPUFailedMsgI(cudaEventDestroy(events[j]));
+      GPUChkErrI(cudaEventDestroy(events[j]));
     }
   }
 
   if (mMaster == nullptr) {
-    GPUFailedMsgI(cudaFree(mDeviceMemoryBase));
+    GPUChkErrI(cudaFree(mDeviceMemoryBase));
 #ifdef GPUCA_NO_CONSTANT_MEMORY
-    GPUFailedMsgI(cudaFree(mDeviceConstantMem));
+    GPUChkErrI(cudaFree(mDeviceConstantMem));
 #endif
 
     for (int32_t i = 0; i < mNStreams; i++) {
-      GPUFailedMsgI(cudaStreamDestroy(mInternals->Streams[i]));
+      GPUChkErrI(cudaStreamDestroy(mInternals->Streams[i]));
     }
 
-    GPUFailedMsgI(cudaFreeHost(mHostMemoryBase));
+    GPUChkErrI(cudaFreeHost(mHostMemoryBase));
     for (uint32_t i = 0; i < mInternals->kernelModules.size(); i++) {
-      GPUFailedMsg(cuModuleUnload(*mInternals->kernelModules[i]));
+      GPUChkErr(cuModuleUnload(*mInternals->kernelModules[i]));
     }
 
-    GPUFailedMsgI(cudaDeviceReset());
+    GPUChkErrI(cudaDeviceReset());
     GPUInfo("CUDA Uninitialized");
   }
   mDeviceMemoryBase = nullptr;
@@ -491,18 +491,18 @@ size_t GPUReconstructionCUDA::GPUMemCpy(void* dst, const void* src, size_t size,
   }
   if (stream == -1) {
     SynchronizeGPU();
-    GPUFailedMsg(cudaMemcpy(dst, src, size, toGPU ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost));
+    GPUChkErr(cudaMemcpy(dst, src, size, toGPU ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost));
   } else {
     if (evList == nullptr) {
       nEvents = 0;
     }
     for (int32_t k = 0; k < nEvents; k++) {
-      GPUFailedMsg(cudaStreamWaitEvent(mInternals->Streams[stream], evList[k].get<cudaEvent_t>(), 0));
+      GPUChkErr(cudaStreamWaitEvent(mInternals->Streams[stream], evList[k].get<cudaEvent_t>(), 0));
     }
-    GPUFailedMsg(cudaMemcpyAsync(dst, src, size, toGPU == -2 ? cudaMemcpyDeviceToDevice : toGPU ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost, mInternals->Streams[stream]));
+    GPUChkErr(cudaMemcpyAsync(dst, src, size, toGPU == -2 ? cudaMemcpyDeviceToDevice : toGPU ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost, mInternals->Streams[stream]));
   }
   if (ev) {
-    GPUFailedMsg(cudaEventRecord(ev->get<cudaEvent_t>(), mInternals->Streams[stream == -1 ? 0 : stream]));
+    GPUChkErr(cudaEventRecord(ev->get<cudaEvent_t>(), mInternals->Streams[stream == -1 ? 0 : stream]));
   }
   if (mProcessingSettings.serializeGPU & 2) {
     GPUDebug(("GPUMemCpy " + std::to_string(toGPU)).c_str(), stream, true);
@@ -518,13 +518,13 @@ size_t GPUReconstructionCUDA::WriteToConstantMemory(size_t offset, const void* s
       continue;
     }
     if (stream == -1) {
-      GPUFailedMsg(cudaMemcpy(((char*)basePtr) + offset, src, size, cudaMemcpyHostToDevice));
+      GPUChkErr(cudaMemcpy(((char*)basePtr) + offset, src, size, cudaMemcpyHostToDevice));
     } else {
-      GPUFailedMsg(cudaMemcpyAsync(((char*)basePtr) + offset, src, size, cudaMemcpyHostToDevice, mInternals->Streams[stream]));
+      GPUChkErr(cudaMemcpyAsync(((char*)basePtr) + offset, src, size, cudaMemcpyHostToDevice, mInternals->Streams[stream]));
     }
   }
   if (ev && stream != -1) {
-    GPUFailedMsg(cudaEventRecord(ev->get<cudaEvent_t>(), mInternals->Streams[stream]));
+    GPUChkErr(cudaEventRecord(ev->get<cudaEvent_t>(), mInternals->Streams[stream]));
   }
   if (mProcessingSettings.serializeGPU & 2) {
     GPUDebug("WriteToConstantMemory", stream, true);
@@ -533,28 +533,28 @@ size_t GPUReconstructionCUDA::WriteToConstantMemory(size_t offset, const void* s
 }
 
 void GPUReconstructionCUDA::ReleaseEvent(deviceEvent ev) {}
-void GPUReconstructionCUDA::RecordMarker(deviceEvent* ev, int32_t stream) { GPUFailedMsg(cudaEventRecord(ev->get<cudaEvent_t>(), mInternals->Streams[stream])); }
+void GPUReconstructionCUDA::RecordMarker(deviceEvent* ev, int32_t stream) { GPUChkErr(cudaEventRecord(ev->get<cudaEvent_t>(), mInternals->Streams[stream])); }
 
 std::unique_ptr<gpu_reconstruction_kernels::threadContext> GPUReconstructionCUDA::GetThreadContext()
 {
-  GPUFailedMsg(cudaSetDevice(mDeviceId));
+  GPUChkErr(cudaSetDevice(mDeviceId));
   return GPUReconstructionProcessing::GetThreadContext();
 }
 
-void GPUReconstructionCUDA::SynchronizeGPU() { GPUFailedMsg(cudaDeviceSynchronize()); }
-void GPUReconstructionCUDA::SynchronizeStream(int32_t stream) { GPUFailedMsg(cudaStreamSynchronize(mInternals->Streams[stream])); }
+void GPUReconstructionCUDA::SynchronizeGPU() { GPUChkErr(cudaDeviceSynchronize()); }
+void GPUReconstructionCUDA::SynchronizeStream(int32_t stream) { GPUChkErr(cudaStreamSynchronize(mInternals->Streams[stream])); }
 
 void GPUReconstructionCUDA::SynchronizeEvents(deviceEvent* evList, int32_t nEvents)
 {
   for (int32_t i = 0; i < nEvents; i++) {
-    GPUFailedMsg(cudaEventSynchronize(evList[i].get<cudaEvent_t>()));
+    GPUChkErr(cudaEventSynchronize(evList[i].get<cudaEvent_t>()));
   }
 }
 
 void GPUReconstructionCUDA::StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents)
 {
   for (int32_t i = 0; i < nEvents; i++) {
-    GPUFailedMsg(cudaStreamWaitEvent(mInternals->Streams[stream], evList[i].get<cudaEvent_t>(), 0));
+    GPUChkErr(cudaStreamWaitEvent(mInternals->Streams[stream], evList[i].get<cudaEvent_t>(), 0));
   }
 }
 
@@ -565,7 +565,7 @@ bool GPUReconstructionCUDA::IsEventDone(deviceEvent* evList, int32_t nEvents)
     if (retVal == cudaErrorNotReady) {
       return false;
     }
-    GPUFailedMsg(retVal);
+    GPUChkErr(retVal);
   }
   return (true);
 }
@@ -582,7 +582,7 @@ int32_t GPUReconstructionCUDA::GPUDebug(const char* state, int32_t stream, bool
   if (!force && mProcessingSettings.debugLevel <= 0) {
     return (0);
   }
-  if (GPUFailedMsgI(stream == -1 ? cudaDeviceSynchronize() : cudaStreamSynchronize(mInternals->Streams[stream]))) {
+  if (GPUChkErrI(stream == -1 ? cudaDeviceSynchronize() : cudaStreamSynchronize(mInternals->Streams[stream]))) {
     GPUError("CUDA Error while synchronizing (%s) (Stream %d)", state, stream);
     return (1);
   }
@@ -597,23 +597,23 @@ int32_t GPUReconstructionCUDA::registerMemoryForGPU_internal(const void* ptr, si
   if (mProcessingSettings.debugLevel >= 3) {
     GPUInfo("Registering %zu bytes of memory for GPU", size);
   }
-  return GPUFailedMsgI(cudaHostRegister((void*)ptr, size, cudaHostRegisterDefault));
+  return GPUChkErrI(cudaHostRegister((void*)ptr, size, cudaHostRegisterDefault));
 }
 
 int32_t GPUReconstructionCUDA::unregisterMemoryForGPU_internal(const void* ptr)
 {
-  return GPUFailedMsgI(cudaHostUnregister((void*)ptr));
+  return GPUChkErrI(cudaHostUnregister((void*)ptr));
 }
 
 void GPUReconstructionCUDABackend::PrintKernelOccupancies()
 {
   int32_t maxBlocks = 0, threads = 0, suggestedBlocks = 0, nRegs = 0, sMem = 0;
-  GPUFailedMsg(cudaSetDevice(mDeviceId));
+  GPUChkErr(cudaSetDevice(mDeviceId));
   for (uint32_t i = 0; i < mInternals->kernelFunctions.size(); i++) {
-    GPUFailedMsg(cuOccupancyMaxPotentialBlockSize(&suggestedBlocks, &threads, *mInternals->kernelFunctions[i], 0, 0, 0)); // NOLINT: failure in clang-tidy
-    GPUFailedMsg(cuOccupancyMaxActiveBlocksPerMultiprocessor(&maxBlocks, *mInternals->kernelFunctions[i], threads, 0));
-    GPUFailedMsg(cuFuncGetAttribute(&nRegs, CU_FUNC_ATTRIBUTE_NUM_REGS, *mInternals->kernelFunctions[i]));
-    GPUFailedMsg(cuFuncGetAttribute(&sMem, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, *mInternals->kernelFunctions[i]));
+    GPUChkErr(cuOccupancyMaxPotentialBlockSize(&suggestedBlocks, &threads, *mInternals->kernelFunctions[i], 0, 0, 0)); // NOLINT: failure in clang-tidy
+    GPUChkErr(cuOccupancyMaxActiveBlocksPerMultiprocessor(&maxBlocks, *mInternals->kernelFunctions[i], threads, 0));
+    GPUChkErr(cuFuncGetAttribute(&nRegs, CU_FUNC_ATTRIBUTE_NUM_REGS, *mInternals->kernelFunctions[i]));
+    GPUChkErr(cuFuncGetAttribute(&sMem, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, *mInternals->kernelFunctions[i]));
     GPUInfo("Kernel: %50s Block size: %4d, Maximum active blocks: %3d, Suggested blocks: %3d, Regs: %3d, smem: %3d", mInternals->kernelNames[i].c_str(), threads, maxBlocks, suggestedBlocks, nRegs, sMem);
   }
 }
@@ -621,14 +621,14 @@ void GPUReconstructionCUDABackend::PrintKernelOccupancies()
 void GPUReconstructionCUDA::loadKernelModules(bool perKernel)
 {
   uint32_t j = 0;
-#define GPUCA_KRNL(x_class, ...)                                                                                                                                                           \
-  getRTCkernelNum<false, GPUCA_M_KRNL_TEMPLATE(x_class)>(mInternals->kernelFunctions.size());                                                                                              \
-  mInternals->kernelFunctions.emplace_back(new CUfunction);                                                                                                                                \
-  mInternals->kernelNames.emplace_back(GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))));                                                                                       \
-  if (mProcessingSettings.debugLevel >= 3) {                                                                                                                                               \
-    GPUInfo("Loading kernel %s (j = %u)", GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), j);                                                                                 \
-  }                                                                                                                                                                                        \
-  GPUFailedMsg(cuModuleGetFunction(mInternals->kernelFunctions.back().get(), *mInternals->kernelModules[perKernel ? j : 0], GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))))); \
+#define GPUCA_KRNL(x_class, ...)                                                                                                                                                        \
+  getRTCkernelNum<false, GPUCA_M_KRNL_TEMPLATE(x_class)>(mInternals->kernelFunctions.size());                                                                                           \
+  mInternals->kernelFunctions.emplace_back(new CUfunction);                                                                                                                             \
+  mInternals->kernelNames.emplace_back(GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))));                                                                                    \
+  if (mProcessingSettings.debugLevel >= 3) {                                                                                                                                            \
+    GPUInfo("Loading kernel %s (j = %u)", GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), j);                                                                              \
+  }                                                                                                                                                                                     \
+  GPUChkErr(cuModuleGetFunction(mInternals->kernelFunctions.back().get(), *mInternals->kernelModules[perKernel ? j : 0], GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))))); \
   j++;
 #include "GPUReconstructionKernelList.h"
 #undef GPUCA_KRNL
@@ -644,27 +644,27 @@ int32_t GPUReconstructionCUDA::PrepareTextures()
 #ifdef GPUCA_USE_TEXTURES
   cudaChannelFormatDesc channelDescu2 = cudaCreateChannelDesc<cahit2>();
   size_t offset;
-  GPUFailedMsg(cudaBindTexture(&offset, &gAliTexRefu2, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu2, NSECTORS * GPUCA_SECTOR_DATA_MEMORY));
+  GPUChkErr(cudaBindTexture(&offset, &gAliTexRefu2, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu2, NSECTORS * GPUCA_SECTOR_DATA_MEMORY));
   cudaChannelFormatDesc channelDescu = cudaCreateChannelDesc<calink>();
-  GPUFailedMsg(cudaBindTexture(&offset, &gAliTexRefu, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu, NSECTORS * GPUCA_SECTOR_DATA_MEMORY));
+  GPUChkErr(cudaBindTexture(&offset, &gAliTexRefu, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu, NSECTORS * GPUCA_SECTOR_DATA_MEMORY));
 #endif
   return (0);
 }
 
 void GPUReconstructionCUDA::startGPUProfiling()
 {
-  GPUFailedMsg(cudaProfilerStart());
+  GPUChkErr(cudaProfilerStart());
 }
 
 void GPUReconstructionCUDA::endGPUProfiling()
 {
-  GPUFailedMsg(cudaProfilerStop());
+  GPUChkErr(cudaProfilerStop());
 }
 #else  // HIP
 void* GPUReconstructionHIP::getGPUPointer(void* ptr)
 {
   void* retVal = nullptr;
-  GPUFailedMsg(hipHostGetDevicePointer(&retVal, ptr, 0));
+  GPUChkErr(hipHostGetDevicePointer(&retVal, ptr, 0));
   return retVal;
 }
 #endif // __HIPCC__
diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h
index e04e14bd383d3..02e8f92bb2328 100644
--- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h
+++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h
@@ -33,13 +33,13 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase
 {
  public:
   ~GPUReconstructionCUDABackend() override;
-  static int32_t GPUFailedMsgStatic(const int64_t error, const char* file, int32_t line);
+  static int32_t GPUChkErrStatic(const int64_t error, const char* file, int32_t line);
 
  protected:
   GPUReconstructionCUDABackend(const GPUSettingsDeviceBackend& cfg);
 
   void PrintKernelOccupancies() override;
-  virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override { return GPUFailedMsgStatic(error, file, line); }
+  virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override { return GPUChkErrStatic(error, file, line); }
 
   template <class T, int32_t I = 0, typename... Args>
   void runKernelBackend(const krnlSetupArgs<T, I, Args...>& args);
diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu
index 521ca2182c9bb..bc1d573385598 100644
--- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu
+++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu
@@ -36,7 +36,7 @@ using namespace o2::gpu;
 #ifndef GPUCA_NO_CONSTANT_MEMORY
 static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() {
   void* retVal = nullptr;
-  if (GPUReconstructionCUDA::GPUFailedMsgStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) {
+  if (GPUReconstructionCUDA::GPUChkErrStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) {
     throw std::runtime_error("Could not obtain GPU constant memory symbol");
   }
   return retVal;
diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h
index a6d55c2d729fd..027a9d5445b2c 100644
--- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h
+++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h
@@ -49,7 +49,7 @@ class GPUDebugTiming
   {
     if (mDo) {
       if (mDeviceTimers) {
-        mRec->GPUFailedMsg(cudaEventRecord(mDeviceTimers[0].get<cudaEvent_t>(), mStreams[mXYZ.x.stream]));
+        mRec->GPUChkErr(cudaEventRecord(mDeviceTimers[0].get<cudaEvent_t>(), mStreams[mXYZ.x.stream]));
       } else {
         mTimer.ResetStart();
       }
@@ -59,13 +59,13 @@ class GPUDebugTiming
   {
     if (mDo && mXYZ.t == 0.) {
       if (mDeviceTimers) {
-        mRec->GPUFailedMsg(cudaEventRecord(mDeviceTimers[1].get<cudaEvent_t>(), mStreams[mXYZ.x.stream]));
-        mRec->GPUFailedMsg(cudaEventSynchronize(mDeviceTimers[1].get<cudaEvent_t>()));
+        mRec->GPUChkErr(cudaEventRecord(mDeviceTimers[1].get<cudaEvent_t>(), mStreams[mXYZ.x.stream]));
+        mRec->GPUChkErr(cudaEventSynchronize(mDeviceTimers[1].get<cudaEvent_t>()));
         float v;
-        mRec->GPUFailedMsg(cudaEventElapsedTime(&v, mDeviceTimers[0].get<cudaEvent_t>(), mDeviceTimers[1].get<cudaEvent_t>()));
+        mRec->GPUChkErr(cudaEventElapsedTime(&v, mDeviceTimers[0].get<cudaEvent_t>(), mDeviceTimers[1].get<cudaEvent_t>()));
         mXYZ.t = v * 1.e-3f;
       } else {
-        mRec->GPUFailedMsg(cudaStreamSynchronize(mStreams[mXYZ.x.stream]));
+        mRec->GPUChkErr(cudaStreamSynchronize(mStreams[mXYZ.x.stream]));
         mXYZ.t = mTimer.GetCurrentElapsedTime();
       }
     }
diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu
index a5ab353f3d43f..f60f00c13710d 100644
--- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu
+++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu
@@ -34,7 +34,7 @@ __global__ void gGPUConstantMemBuffer_dummy(int32_t* p) { *p = *(int32_t*)&gGPUC
 template <>
 inline void GPUReconstructionCUDABackend::runKernelBackendInternal<GPUMemClean16, 0>(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size)
 {
-  GPUFailedMsg(cudaMemsetAsync(ptr, 0, size, mInternals->Streams[_xyz.x.stream]));
+  GPUChkErr(cudaMemsetAsync(ptr, 0, size, mInternals->Streams[_xyz.x.stream]));
 }
 
 template <class T, int32_t I, typename... Args>
@@ -56,7 +56,7 @@ inline void GPUReconstructionCUDABackend::runKernelBackendInternal(const krnlSet
 #endif
     pArgs[arg_offset] = &y.index;
     GPUReconstructionCUDAInternals::getArgPtrs(&pArgs[arg_offset + 1], args...);
-    GPUFailedMsg(cuLaunchKernel(*mInternals->kernelFunctions[getRTCkernelNum<false, T, I>()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr));
+    GPUChkErr(cuLaunchKernel(*mInternals->kernelFunctions[getRTCkernelNum<false, T, I>()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr));
   }
 }
 
@@ -67,16 +67,16 @@ void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs<T, I, Ar
   auto& z = args.s.z;
   if (z.evList) {
     for (int32_t k = 0; k < z.nEvents; k++) {
-      GPUFailedMsg(cudaStreamWaitEvent(mInternals->Streams[x.stream], ((cudaEvent_t*)z.evList)[k], 0));
+      GPUChkErr(cudaStreamWaitEvent(mInternals->Streams[x.stream], ((cudaEvent_t*)z.evList)[k], 0));
     }
   }
   {
     GPUDebugTiming timer(mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0, (deviceEvent*)mDebugEvents, mInternals->Streams, args.s, this);
     std::apply([this, &args](auto&... vals) { this->runKernelBackendInternal<T, I, Args...>(args.s, vals...); }, args.v);
   }
-  GPUFailedMsg(cudaGetLastError());
+  GPUChkErr(cudaGetLastError());
   if (z.ev) {
-    GPUFailedMsg(cudaEventRecord(*(cudaEvent_t*)z.ev, mInternals->Streams[x.stream]));
+    GPUChkErr(cudaEventRecord(*(cudaEvent_t*)z.ev, mInternals->Streams[x.stream]));
   }
 }
 
@@ -138,7 +138,7 @@ void GPUReconstructionCUDABackend::getRTCKernelCalls(std::vector<std::string>& k
 #ifndef GPUCA_NO_CONSTANT_MEMORY
 static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() {
   void* retVal = nullptr;
-  if (GPUReconstructionCUDA::GPUFailedMsgStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) {
+  if (GPUReconstructionCUDA::GPUChkErrStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) {
     throw std::runtime_error("Could not obtain GPU constant memory symbol");
   }
   return retVal;
diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx
index 6639c78b113e5..e52494937f8bf 100644
--- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx
+++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx
@@ -49,7 +49,7 @@ GPUReconstructionOCLBackend::~GPUReconstructionOCLBackend()
 }
 
 static_assert(sizeof(cl_int) <= sizeof(int64_t) && CL_SUCCESS == 0);
-int32_t GPUReconstructionOCLBackend::GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const
+int32_t GPUReconstructionOCLBackend::GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const
 {
   // Check for OPENCL Error and in the case of an error display the corresponding error string
   if (error == CL_SUCCESS) {
@@ -69,7 +69,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime()
   if (mMaster == nullptr) {
     cl_int ocl_error;
     cl_uint num_platforms;
-    if (GPUFailedMsgI(clGetPlatformIDs(0, nullptr, &num_platforms))) {
+    if (GPUChkErrI(clGetPlatformIDs(0, nullptr, &num_platforms))) {
       GPUErrorReturn("Error getting OpenCL Platform Count");
     }
     if (num_platforms == 0) {
@@ -82,7 +82,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime()
     // Query platforms and devices
     std::unique_ptr<cl_platform_id[]> platforms;
     platforms.reset(new cl_platform_id[num_platforms]);
-    if (GPUFailedMsgI(clGetPlatformIDs(num_platforms, platforms.get(), nullptr))) {
+    if (GPUChkErrI(clGetPlatformIDs(num_platforms, platforms.get(), nullptr))) {
       GPUErrorReturn("Error getting OpenCL Platforms");
     }
 
@@ -227,7 +227,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime()
       GPUErrorReturn("Did not find compatible OpenCL Platform / Device, aborting OPENCL Initialisation");
     }
     mInternals->platform = platforms[bestPlatform];
-    GPUFailedMsg(clGetDeviceIDs(mInternals->platform, CL_DEVICE_TYPE_ALL, devices.size(), devices.data(), nullptr));
+    GPUChkErr(clGetDeviceIDs(mInternals->platform, CL_DEVICE_TYPE_ALL, devices.size(), devices.data(), nullptr));
     mInternals->device = devices[bestDevice];
     queryDevice(mInternals->device);
 
@@ -267,7 +267,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime()
     mMaxBackendThreads = std::max<int32_t>(mMaxBackendThreads, deviceMaxWorkGroup * mBlockCount);
 
     mInternals->context = clCreateContext(nullptr, 1, &mInternals->device, nullptr, nullptr, &ocl_error);
-    if (GPUFailedMsgI(ocl_error)) {
+    if (GPUChkErrI(ocl_error)) {
       GPUErrorReturn("Could not create OPENCL Device Context!");
     }
 
@@ -280,13 +280,13 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime()
     }
 
     mInternals->mem_gpu = clCreateBuffer(mInternals->context, CL_MEM_READ_WRITE, mDeviceMemorySize, nullptr, &ocl_error);
-    if (GPUFailedMsgI(ocl_error)) {
+    if (GPUChkErrI(ocl_error)) {
       clReleaseContext(mInternals->context);
       GPUErrorReturn("OPENCL Memory Allocation Error");
     }
 
     mInternals->mem_constant = clCreateBuffer(mInternals->context, CL_MEM_READ_ONLY, gGPUConstantMemBufferSize, nullptr, &ocl_error);
-    if (GPUFailedMsgI(ocl_error)) {
+    if (GPUChkErrI(ocl_error)) {
       clReleaseMemObject(mInternals->mem_gpu);
       clReleaseContext(mInternals->context);
       GPUErrorReturn("OPENCL Constant Memory Allocation Error");
@@ -314,44 +314,44 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime()
 #else
       mInternals->command_queue[i] = clCreateCommandQueue(mInternals->context, mInternals->device, 0, &ocl_error);
 #endif
-      if (GPUFailedMsgI(ocl_error)) {
+      if (GPUChkErrI(ocl_error)) {
         GPUErrorReturn("Error creating OpenCL command queue");
       }
     }
-    if (GPUFailedMsgI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_gpu, 0, 0, nullptr, nullptr))) {
+    if (GPUChkErrI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_gpu, 0, 0, nullptr, nullptr))) {
       GPUErrorReturn("Error migrating buffer");
     }
-    if (GPUFailedMsgI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_constant, 0, 0, nullptr, nullptr))) {
+    if (GPUChkErrI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_constant, 0, 0, nullptr, nullptr))) {
       GPUErrorReturn("Error migrating buffer");
     }
 
     mInternals->mem_host = clCreateBuffer(mInternals->context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, mHostMemorySize, nullptr, &ocl_error);
-    if (GPUFailedMsgI(ocl_error)) {
+    if (GPUChkErrI(ocl_error)) {
       GPUErrorReturn("Error allocating pinned host memory");
     }
 
     const char* krnlGetPtr = "__kernel void krnlGetPtr(__global char* gpu_mem, __global char* constant_mem, __global size_t* host_mem) {if (get_global_id(0) == 0) {host_mem[0] = (size_t) gpu_mem; host_mem[1] = (size_t) constant_mem;}}";
     cl_program program = clCreateProgramWithSource(mInternals->context, 1, (const char**)&krnlGetPtr, nullptr, &ocl_error);
-    if (GPUFailedMsgI(ocl_error)) {
+    if (GPUChkErrI(ocl_error)) {
       GPUErrorReturn("Error creating program object");
     }
     ocl_error = clBuildProgram(program, 1, &mInternals->device, "", nullptr, nullptr);
-    if (GPUFailedMsgI(ocl_error)) {
+    if (GPUChkErrI(ocl_error)) {
       char build_log[16384];
       clGetProgramBuildInfo(program, mInternals->device, CL_PROGRAM_BUILD_LOG, 16384, build_log, nullptr);
       GPUImportant("Build Log:\n\n%s\n\n", build_log);
       GPUErrorReturn("Error compiling program");
     }
     cl_kernel kernel = clCreateKernel(program, "krnlGetPtr", &ocl_error);
-    if (GPUFailedMsgI(ocl_error)) {
+    if (GPUChkErrI(ocl_error)) {
       GPUErrorReturn("Error creating kernel");
     }
 
-    if (GPUFailedMsgI(OCLsetKernelParameters(kernel, mInternals->mem_gpu, mInternals->mem_constant, mInternals->mem_host)) ||
-        GPUFailedMsgI(clExecuteKernelA(mInternals->command_queue[0], kernel, 16, 16, nullptr)) ||
-        GPUFailedMsgI(clFinish(mInternals->command_queue[0])) ||
-        GPUFailedMsgI(clReleaseKernel(kernel)) ||
-        GPUFailedMsgI(clReleaseProgram(program))) {
+    if (GPUChkErrI(OCLsetKernelParameters(kernel, mInternals->mem_gpu, mInternals->mem_constant, mInternals->mem_host)) ||
+        GPUChkErrI(clExecuteKernelA(mInternals->command_queue[0], kernel, 16, 16, nullptr)) ||
+        GPUChkErrI(clFinish(mInternals->command_queue[0])) ||
+        GPUChkErrI(clReleaseKernel(kernel)) ||
+        GPUChkErrI(clReleaseProgram(program))) {
       GPUErrorReturn("Error obtaining device memory ptr");
     }
 
@@ -359,7 +359,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime()
       GPUInfo("Mapping hostmemory");
     }
     mHostMemoryBase = clEnqueueMapBuffer(mInternals->command_queue[0], mInternals->mem_host, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, mHostMemorySize, 0, nullptr, nullptr, &ocl_error);
-    if (GPUFailedMsgI(ocl_error)) {
+    if (GPUChkErrI(ocl_error)) {
       GPUErrorReturn("Error allocating Page Locked Host Memory");
     }
 
@@ -435,14 +435,14 @@ size_t GPUReconstructionOCLBackend::GPUMemCpy(void* dst, const void* src, size_t
   }
   if (size == 0) {
     if (ev || nEvents) { // Workaround for OCL runtimes, which can throw an error in case size = 0
-      GPUFailedMsg(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream == -1 ? 0 : stream], nEvents, evList->getEventList<cl_event>(), ev->getEventList<cl_event>()));
+      GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream == -1 ? 0 : stream], nEvents, evList->getEventList<cl_event>(), ev->getEventList<cl_event>()));
     }
   } else if (toGPU == -2) {
-    GPUFailedMsg(clEnqueueCopyBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, mInternals->mem_gpu, (char*)src - (char*)mDeviceMemoryBase, (char*)dst - (char*)mDeviceMemoryBase, size, nEvents, evList->getEventList<cl_event>(), ev->getEventList<cl_event>()));
+    GPUChkErr(clEnqueueCopyBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, mInternals->mem_gpu, (char*)src - (char*)mDeviceMemoryBase, (char*)dst - (char*)mDeviceMemoryBase, size, nEvents, evList->getEventList<cl_event>(), ev->getEventList<cl_event>()));
   } else if (toGPU) {
-    GPUFailedMsg(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)dst - (char*)mDeviceMemoryBase, size, src, nEvents, evList->getEventList<cl_event>(), ev->getEventList<cl_event>()));
+    GPUChkErr(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)dst - (char*)mDeviceMemoryBase, size, src, nEvents, evList->getEventList<cl_event>(), ev->getEventList<cl_event>()));
   } else {
-    GPUFailedMsg(clEnqueueReadBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)src - (char*)mDeviceMemoryBase, size, dst, nEvents, evList->getEventList<cl_event>(), ev->getEventList<cl_event>()));
+    GPUChkErr(clEnqueueReadBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)src - (char*)mDeviceMemoryBase, size, dst, nEvents, evList->getEventList<cl_event>(), ev->getEventList<cl_event>()));
   }
   if (mProcessingSettings.serializeGPU & 2) {
     GPUDebug(("GPUMemCpy " + std::to_string(toGPU)).c_str(), stream, true);
@@ -455,16 +455,16 @@ size_t GPUReconstructionOCLBackend::WriteToConstantMemory(size_t offset, const v
   if (stream == -1) {
     SynchronizeGPU();
   }
-  GPUFailedMsg(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_constant, stream == -1, offset, size, src, 0, nullptr, ev->getEventList<cl_event>()));
+  GPUChkErr(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_constant, stream == -1, offset, size, src, 0, nullptr, ev->getEventList<cl_event>()));
   if (mProcessingSettings.serializeGPU & 2) {
     GPUDebug("WriteToConstantMemory", stream, true);
   }
   return size;
 }
 
-void GPUReconstructionOCLBackend::ReleaseEvent(deviceEvent ev) { GPUFailedMsg(clReleaseEvent(ev.get<cl_event>())); }
+void GPUReconstructionOCLBackend::ReleaseEvent(deviceEvent ev) { GPUChkErr(clReleaseEvent(ev.get<cl_event>())); }
 
-void GPUReconstructionOCLBackend::RecordMarker(deviceEvent* ev, int32_t stream) { GPUFailedMsg(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], 0, nullptr, ev->getEventList<cl_event>())); }
+void GPUReconstructionOCLBackend::RecordMarker(deviceEvent* ev, int32_t stream) { GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], 0, nullptr, ev->getEventList<cl_event>())); }
 
 int32_t GPUReconstructionOCLBackend::DoStuckProtection(int32_t stream, deviceEvent event)
 {
@@ -490,18 +490,18 @@ int32_t GPUReconstructionOCLBackend::DoStuckProtection(int32_t stream, deviceEve
 void GPUReconstructionOCLBackend::SynchronizeGPU()
 {
   for (int32_t i = 0; i < mNStreams; i++) {
-    GPUFailedMsg(clFinish(mInternals->command_queue[i]));
+    GPUChkErr(clFinish(mInternals->command_queue[i]));
   }
 }
 
-void GPUReconstructionOCLBackend::SynchronizeStream(int32_t stream) { GPUFailedMsg(clFinish(mInternals->command_queue[stream])); }
+void GPUReconstructionOCLBackend::SynchronizeStream(int32_t stream) { GPUChkErr(clFinish(mInternals->command_queue[stream])); }
 
-void GPUReconstructionOCLBackend::SynchronizeEvents(deviceEvent* evList, int32_t nEvents) { GPUFailedMsg(clWaitForEvents(nEvents, evList->getEventList<cl_event>())); }
+void GPUReconstructionOCLBackend::SynchronizeEvents(deviceEvent* evList, int32_t nEvents) { GPUChkErr(clWaitForEvents(nEvents, evList->getEventList<cl_event>())); }
 
 void GPUReconstructionOCLBackend::StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents)
 {
   if (nEvents) {
-    GPUFailedMsg(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], nEvents, evList->getEventList<cl_event>(), nullptr));
+    GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], nEvents, evList->getEventList<cl_event>(), nullptr));
   }
 }
 
@@ -509,7 +509,7 @@ bool GPUReconstructionOCLBackend::IsEventDone(deviceEvent* evList, int32_t nEven
 {
   cl_int eventdone;
   for (int32_t i = 0; i < nEvents; i++) {
-    GPUFailedMsg(clGetEventInfo(evList[i].get<cl_event>(), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(eventdone), &eventdone, nullptr));
+    GPUChkErr(clGetEventInfo(evList[i].get<cl_event>(), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(eventdone), &eventdone, nullptr));
     if (eventdone != CL_COMPLETE) {
       return false;
     }
@@ -524,7 +524,7 @@ int32_t GPUReconstructionOCLBackend::GPUDebug(const char* state, int32_t stream,
     return (0);
   }
   for (int32_t i = 0; i < mNStreams; i++) {
-    if (GPUFailedMsgI(clFinish(mInternals->command_queue[i]))) {
+    if (GPUChkErrI(clFinish(mInternals->command_queue[i]))) {
       GPUError("OpenCL Error while synchronizing (%s) (Stream %d/%d)", state, stream, i);
     }
   }
@@ -554,14 +554,14 @@ int32_t GPUReconstructionOCLBackend::GetOCLPrograms()
     mInternals->program = clCreateProgramWithSource(mInternals->context, (cl_uint)1, (const char**)&programs_sources, program_sizes, &ocl_error);
   }
 
-  if (GPUFailedMsgI(ocl_error)) {
+  if (GPUChkErrI(ocl_error)) {
     GPUError("Error creating OpenCL program from binary");
     return 1;
   }
 
-  if (GPUFailedMsgI(clBuildProgram(mInternals->program, 1, &mInternals->device, oclBuildFlags, nullptr, nullptr))) {
+  if (GPUChkErrI(clBuildProgram(mInternals->program, 1, &mInternals->device, oclBuildFlags, nullptr, nullptr))) {
     cl_build_status status;
-    if (GPUFailedMsgI(clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, nullptr)) == 0 && status == CL_BUILD_ERROR) {
+    if (GPUChkErrI(clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, nullptr)) == 0 && status == CL_BUILD_ERROR) {
       size_t log_size;
       clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_size);
       std::unique_ptr<char[]> build_log(new char[log_size + 1]);
diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h
index 79f54274cd32c..2abae229c74bb 100644
--- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h
+++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h
@@ -39,7 +39,7 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase
   int32_t ExitDevice_Runtime() override;
   void UpdateAutomaticProcessingSettings() override;
 
-  virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override;
+  virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override;
 
   void SynchronizeGPU() override;
   int32_t DoStuckProtection(int32_t stream, deviceEvent event) override;
diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx
index 8a6c889773cb0..4f6a8725b4be5 100644
--- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx
+++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx
@@ -18,7 +18,7 @@ template <>
 inline void GPUReconstructionOCLBackend::runKernelBackendInternal<GPUMemClean16, 0>(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size)
 {
   cl_int4 val0 = {0, 0, 0, 0};
-  GPUFailedMsg(clEnqueueFillBuffer(mInternals->command_queue[_xyz.x.stream], mInternals->mem_gpu, &val0, sizeof(val0), (char*)ptr - (char*)mDeviceMemoryBase, (size + sizeof(val0) - 1) & ~(sizeof(val0) - 1), _xyz.z.evList == nullptr ? 0 : _xyz.z.nEvents, _xyz.z.evList->getEventList<cl_event>(), _xyz.z.ev->getEventList<cl_event>()));
+  GPUChkErr(clEnqueueFillBuffer(mInternals->command_queue[_xyz.x.stream], mInternals->mem_gpu, &val0, sizeof(val0), (char*)ptr - (char*)mDeviceMemoryBase, (size + sizeof(val0) - 1) & ~(sizeof(val0) - 1), _xyz.z.evList == nullptr ? 0 : _xyz.z.nEvents, _xyz.z.evList->getEventList<cl_event>(), _xyz.z.ev->getEventList<cl_event>()));
 }
 
 template <class T, int32_t I, typename... Args>
@@ -28,7 +28,7 @@ inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetu
   auto& x = _xyz.x;
   auto& y = _xyz.y;
   auto& z = _xyz.z;
-  GPUFailedMsg(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.index, args...));
+  GPUChkErr(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.index, args...));
 
   cl_event ev;
   cl_event* evr;
@@ -39,15 +39,15 @@ inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetu
   } else {
     evr = (cl_event*)z.ev;
   }
-  GPUFailedMsg(clExecuteKernelA(mInternals->command_queue[x.stream], k, x.nThreads, x.nThreads * x.nBlocks, evr, (cl_event*)z.evList, z.nEvents));
+  GPUChkErr(clExecuteKernelA(mInternals->command_queue[x.stream], k, x.nThreads, x.nThreads * x.nBlocks, evr, (cl_event*)z.evList, z.nEvents));
   if (mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0) {
     cl_ulong time_start, time_end;
-    GPUFailedMsg(clWaitForEvents(1, evr));
-    GPUFailedMsg(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, nullptr));
-    GPUFailedMsg(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, nullptr));
+    GPUChkErr(clWaitForEvents(1, evr));
+    GPUChkErr(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, nullptr));
+    GPUChkErr(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, nullptr));
     _xyz.t = (time_end - time_start) * 1.e-9f;
     if (tmpEvent) {
-      GPUFailedMsg(clReleaseEvent(ev));
+      GPUChkErr(clReleaseEvent(ev));
     }
   }
 }
@@ -80,7 +80,7 @@ int32_t GPUReconstructionOCLBackend::AddKernel()
 
   cl_int ocl_error;
   cl_kernel krnl = clCreateKernel(mInternals->program, kname.c_str(), &ocl_error);
-  if (GPUFailedMsgI(ocl_error)) {
+  if (GPUChkErrI(ocl_error)) {
     GPUError("Error creating OPENCL Kernel: %s", name.c_str());
     return 1;
   }

From d7d0ee9b194930b59ab49b4410c4d8afa6bb84fd Mon Sep 17 00:00:00 2001
From: David Rohr <drohr@jwdt.org>
Date: Fri, 14 Mar 2025 14:52:53 +0100
Subject: [PATCH 3/8] Fix compiler warning

---
 Generators/src/GeneratorFromFile.cxx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Generators/src/GeneratorFromFile.cxx b/Generators/src/GeneratorFromFile.cxx
index 6d4e85afa6721..e37a3886c24e1 100644
--- a/Generators/src/GeneratorFromFile.cxx
+++ b/Generators/src/GeneratorFromFile.cxx
@@ -361,7 +361,7 @@ namespace
 std::vector<std::string> executeCommand(const std::string& command)
 {
   std::vector<std::string> result;
-  std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(command.c_str(), "r"), pclose);
+  std::unique_ptr<FILE, int (*)(FILE*)> pipe(popen(command.c_str(), "r"), pclose);
   if (!pipe) {
     throw std::runtime_error("Failed to open pipe");
   }

From 36d0ea0f3fc4b7905c37866a278c20b93ee059d7 Mon Sep 17 00:00:00 2001
From: David Rohr <drohr@jwdt.org>
Date: Fri, 14 Mar 2025 15:06:29 +0100
Subject: [PATCH 4/8] GPU: Clean up more of C++ < 11 compatibility code

---
 GPU/GPUTracking/Base/GPUConstantMem.h         |  2 +-
 GPU/GPUTracking/Base/GPUGeneralKernels.h      |  6 +-
 GPU/GPUTracking/Base/GPUReconstructionCPU.h   |  4 +-
 .../Base/GPUReconstructionKernels.h           |  8 +--
 GPU/GPUTracking/DataTypes/GPUDataTypes.h      | 60 ++++++++-----------
 GPU/GPUTracking/Global/GPUChain.cxx           | 12 ++--
 GPU/GPUTracking/Global/GPUChain.h             | 12 ++--
 .../SectorTracker/GPUTPCCreateTrackingData.h  |  2 +-
 .../GPUTPCExtrapolationTracking.h             |  4 +-
 .../SectorTracker/GPUTPCNeighboursCleaner.h   |  2 +-
 .../SectorTracker/GPUTPCNeighboursFinder.h    |  2 +-
 .../SectorTracker/GPUTPCStartHitsFinder.h     |  2 +-
 .../SectorTracker/GPUTPCStartHitsSorter.h     |  2 +-
 .../SectorTracker/GPUTPCTrackletConstructor.h |  2 +-
 .../SectorTracker/GPUTPCTrackletSelector.h    |  2 +-
 .../TRDTracking/GPUTRDTrackerKernels.h        |  2 +-
 16 files changed, 58 insertions(+), 66 deletions(-)

diff --git a/GPU/GPUTracking/Base/GPUConstantMem.h b/GPU/GPUTracking/Base/GPUConstantMem.h
index 8f1cc90f5ae93..e0b06f0a3ea55 100644
--- a/GPU/GPUTracking/Base/GPUConstantMem.h
+++ b/GPU/GPUTracking/Base/GPUConstantMem.h
@@ -96,7 +96,7 @@ static constexpr size_t gGPUConstantMemBufferSize = (sizeof(GPUConstantMem) + si
 #endif
 } // namespace o2::gpu
 #if defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) && !defined(GPUCA_GPUCODE_HOSTONLY)
-GPUconstant() o2::gpu::GPUConstantMemCopyable gGPUConstantMemBuffer;
+GPUconstant() o2::gpu::GPUConstantMemCopyable gGPUConstantMemBuffer; // TODO: This should go into o2::gpu namespace, but then CUDA or HIP would not find the symbol
 #endif // GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM
 namespace o2::gpu
 {
diff --git a/GPU/GPUTracking/Base/GPUGeneralKernels.h b/GPU/GPUTracking/Base/GPUGeneralKernels.h
index 71980d38fdc9e..ce93e2e5eead8 100644
--- a/GPU/GPUTracking/Base/GPUGeneralKernels.h
+++ b/GPU/GPUTracking/Base/GPUGeneralKernels.h
@@ -79,7 +79,7 @@ class GPUKernelTemplate
   };
 
   typedef GPUconstantref() GPUConstantMem processorType;
-  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; }
+  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::NoRecoStep; }
   GPUhdi() static processorType* Processor(GPUConstantMem& processors)
   {
     return &processors;
@@ -94,7 +94,7 @@ class GPUKernelTemplate
 class GPUMemClean16 : public GPUKernelTemplate
 {
  public:
-  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; }
+  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::NoRecoStep; }
   template <int32_t iKernel = defaultKernel>
   GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUglobalref() void* ptr, uint64_t size);
 };
@@ -103,7 +103,7 @@ class GPUMemClean16 : public GPUKernelTemplate
 class GPUitoa : public GPUKernelTemplate
 {
  public:
-  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; }
+  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::NoRecoStep; }
   template <int32_t iKernel = defaultKernel>
   GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUglobalref() int32_t* ptr, uint64_t size);
 };
diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h
index f90820281c74d..fd999ec2304e1 100644
--- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h
+++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h
@@ -134,8 +134,8 @@ template <class S, int32_t I, typename... Args>
 inline void GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args)
 {
   HighResTimer* t = nullptr;
-  GPUCA_RECO_STEP myStep = S::GetRecoStep() == GPUCA_RECO_STEP::NoRecoStep ? setup.x.step : S::GetRecoStep();
-  if (myStep == GPUCA_RECO_STEP::NoRecoStep) {
+  GPUDataTypes::RecoStep myStep = S::GetRecoStep() == GPUDataTypes::RecoStep::NoRecoStep ? setup.x.step : S::GetRecoStep();
+  if (myStep == GPUDataTypes::RecoStep::NoRecoStep) {
     throw std::runtime_error("Failure running general kernel without defining RecoStep");
   }
   int32_t cpuFallback = IsGPU() ? (setup.x.device == krnlDeviceType::CPU ? 2 : (mRecoSteps.stepsGPUMask & myStep) != myStep) : 0;
diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernels.h b/GPU/GPUTracking/Base/GPUReconstructionKernels.h
index ba30f38e902ad..b8f3e3746c743 100644
--- a/GPU/GPUTracking/Base/GPUReconstructionKernels.h
+++ b/GPU/GPUTracking/Base/GPUReconstructionKernels.h
@@ -30,14 +30,14 @@ struct classArgument {
 };
 
 struct krnlExec {
-  constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto) : nBlocks(b), nThreads(t), stream(s), device(d), step(GPUCA_RECO_STEP::NoRecoStep) {}
-  constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUCA_RECO_STEP st) : nBlocks(b), nThreads(t), stream(s), device(GPUReconstruction::krnlDeviceType::Auto), step(st) {}
-  constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d, GPUCA_RECO_STEP st) : nBlocks(b), nThreads(t), stream(s), device(d), step(st) {}
+  constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto) : nBlocks(b), nThreads(t), stream(s), device(d), step(GPUDataTypes::RecoStep::NoRecoStep) {}
+  constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUDataTypes::RecoStep st) : nBlocks(b), nThreads(t), stream(s), device(GPUReconstruction::krnlDeviceType::Auto), step(st) {}
+  constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st) : nBlocks(b), nThreads(t), stream(s), device(d), step(st) {}
   uint32_t nBlocks;
   uint32_t nThreads;
   int32_t stream;
   GPUReconstruction::krnlDeviceType device;
-  GPUCA_RECO_STEP step;
+  GPUDataTypes::RecoStep step;
 };
 struct krnlRunRange {
   constexpr krnlRunRange() = default;
diff --git a/GPU/GPUTracking/DataTypes/GPUDataTypes.h b/GPU/GPUTracking/DataTypes/GPUDataTypes.h
index f7bfe38be988d..6cc1e7266e722 100644
--- a/GPU/GPUTracking/DataTypes/GPUDataTypes.h
+++ b/GPU/GPUTracking/DataTypes/GPUDataTypes.h
@@ -96,9 +96,6 @@ struct TPCPadGainCalib;
 struct TPCZSLinkMapping;
 
 #include "utils/bitfield.h"
-#define ENUM_CLASS class
-#define ENUM_UINT : uint32_t
-#define GPUCA_RECO_STEP GPUDataTypes::RecoStep
 
 class GPUTPCTrack;
 class GPUTPCHitId;
@@ -117,36 +114,33 @@ struct GPUSettingsTF;
 class GPUDataTypes
 {
  public:
-  enum ENUM_CLASS GeometryType ENUM_UINT{RESERVED_GEOMETRY = 0, ALIROOT = 1, O2 = 2};
-  enum DeviceType ENUM_UINT { INVALID_DEVICE = 0,
-                              CPU = 1,
-                              CUDA = 2,
-                              HIP = 3,
-                              OCL = 4 };
-  enum ENUM_CLASS GeneralStep { Prepare = 1,
-                                QA = 2 };
+  // clang-format off
+  enum class GeometryType : uint32_t { RESERVED_GEOMETRY = 0, ALIROOT = 1, O2 = 2 };
+  enum DeviceType : uint32_t { INVALID_DEVICE = 0, CPU = 1, CUDA = 2, HIP = 3, OCL = 4 };
+  enum class GeneralStep { Prepare = 1, QA = 2 };
+  // clang-format on
 
-  enum ENUM_CLASS RecoStep { TPCConversion = 1,
-                             TPCSectorTracking = 2,
-                             TPCMerging = 4,
-                             TPCCompression = 8,
-                             TRDTracking = 16,
-                             ITSTracking = 32,
-                             TPCdEdx = 64,
-                             TPCClusterFinding = 128,
-                             TPCDecompression = 256,
-                             Refit = 512,
-                             AllRecoSteps = 0x7FFFFFFF,
-                             NoRecoStep = 0 };
-  enum ENUM_CLASS InOutType { TPCClusters = 1,
-                              OBSOLETE = 2,
-                              TPCMergedTracks = 4,
-                              TPCCompressedClusters = 8,
-                              TRDTracklets = 16,
-                              TRDTracks = 32,
-                              TPCRaw = 64,
-                              ITSClusters = 128,
-                              ITSTracks = 256 };
+  enum class RecoStep { TPCConversion = 1,
+                        TPCSectorTracking = 2,
+                        TPCMerging = 4,
+                        TPCCompression = 8,
+                        TRDTracking = 16,
+                        ITSTracking = 32,
+                        TPCdEdx = 64,
+                        TPCClusterFinding = 128,
+                        TPCDecompression = 256,
+                        Refit = 512,
+                        AllRecoSteps = 0x7FFFFFFF,
+                        NoRecoStep = 0 };
+  enum class InOutType { TPCClusters = 1,
+                         OBSOLETE = 2,
+                         TPCMergedTracks = 4,
+                         TPCCompressedClusters = 8,
+                         TRDTracklets = 16,
+                         TRDTracks = 32,
+                         TPCRaw = 64,
+                         ITSClusters = 128,
+                         ITSTracks = 256 };
 #ifndef __OPENCL__
   static constexpr const char* const DEVICE_TYPE_NAMES[] = {"INVALID", "CPU", "CUDA", "HIP", "OCL"};
   static constexpr const char* const RECO_STEP_NAMES[] = {"TPC Transformation", "TPC Sector Tracking", "TPC Track Merging and Fit", "TPC Compression", "TRD Tracking", "ITS Tracking", "TPC dEdx Computation", "TPC Cluster Finding", "TPC Decompression", "Global Refit"};
@@ -312,8 +306,6 @@ struct GPUTrackingInOutPointers {
   const GPUSettingsTF* settingsTF = nullptr;
 };
 
-#undef ENUM_CLASS
-#undef ENUM_UINT
 } // namespace o2::gpu
 
 #endif
diff --git a/GPU/GPUTracking/Global/GPUChain.cxx b/GPU/GPUTracking/Global/GPUChain.cxx
index 6990d5e08b638..300de31a509ba 100644
--- a/GPU/GPUTracking/Global/GPUChain.cxx
+++ b/GPU/GPUTracking/Global/GPUChain.cxx
@@ -18,33 +18,33 @@ using namespace o2::gpu;
 constexpr GPUChain::krnlRunRange GPUChain::krnlRunRangeNone;
 constexpr GPUChain::krnlEvent GPUChain::krnlEventNone;
 
-GPUChain::krnlExec GPUChain::GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d, GPUCA_RECO_STEP st)
+GPUChain::krnlExec GPUChain::GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st)
 {
   const uint32_t nBlocks = (totalItems + nThreads - 1) / nThreads;
   return {nBlocks, nThreads, stream, d, st};
 }
 
-GPUChain::krnlExec GPUChain::GetGrid(uint32_t totalItems, int32_t stream, GPUReconstruction::krnlDeviceType d, GPUCA_RECO_STEP st)
+GPUChain::krnlExec GPUChain::GetGrid(uint32_t totalItems, int32_t stream, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st)
 {
   return {(uint32_t)-1, totalItems, stream, d, st};
 }
 
-GPUChain::krnlExec GPUChain::GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d, GPUCA_RECO_STEP st)
+GPUChain::krnlExec GPUChain::GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st)
 {
   return {(uint32_t)-2, nBlocks, stream, d, st};
 }
 
-GPUChain::krnlExec GPUChain::GetGridBlkStep(uint32_t nBlocks, int32_t stream, GPUCA_RECO_STEP st)
+GPUChain::krnlExec GPUChain::GetGridBlkStep(uint32_t nBlocks, int32_t stream, GPUDataTypes::RecoStep st)
 {
   return {(uint32_t)-2, nBlocks, stream, GPUReconstruction::krnlDeviceType::Auto, st};
 }
 
-GPUChain::krnlExec GPUChain::GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d, GPUCA_RECO_STEP st)
+GPUChain::krnlExec GPUChain::GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st)
 {
   return {(uint32_t)-3, 0, stream, d, st};
 }
 
-GPUChain::krnlExec GPUChain::GetGridAutoStep(int32_t stream, GPUCA_RECO_STEP st)
+GPUChain::krnlExec GPUChain::GetGridAutoStep(int32_t stream, GPUDataTypes::RecoStep st)
 {
   return {(uint32_t)-3, 0, stream, GPUReconstruction::krnlDeviceType::Auto, st};
 }
diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h
index b9da1c9a330d3..fff5d2efe0270 100644
--- a/GPU/GPUTracking/Global/GPUChain.h
+++ b/GPU/GPUTracking/Global/GPUChain.h
@@ -192,15 +192,15 @@ class GPUChain
     return mRec->getTimer<T, J>(name, num);
   }
   // Get GRID with NBLOCKS minimal such that nThreads * NBLOCS >= totalItems
-  krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep);
+  krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep);
   // Get GRID with NBLOCKS minimal such that ideal number of threads * NBLOCKS >= totalItems
-  krnlExec GetGrid(uint32_t totalItems, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep);
+  krnlExec GetGrid(uint32_t totalItems, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep);
   // Get GRID with specified number of blocks, each block with ideal number of threads
-  krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep);
-  krnlExec GetGridBlkStep(uint32_t nBlocks, int32_t stream, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep);
+  krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep);
+  krnlExec GetGridBlkStep(uint32_t nBlocks, int32_t stream, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep);
   // Get GRID with ideal number of threads / blocks for GPU
-  krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep);
-  krnlExec GetGridAutoStep(int32_t stream, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep);
+  krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep);
+  krnlExec GetGridAutoStep(int32_t stream, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep);
 
   inline uint32_t BlockCount() const { return mRec->mBlockCount; }
   inline uint32_t WarpSize() const { return mRec->mWarpSize; }
diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h b/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h
index 9327699c9404b..dc1beacf79d02 100644
--- a/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h
+++ b/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h
@@ -32,7 +32,7 @@ class GPUTPCCreateTrackingData : public GPUKernelTemplate
   };
 
   typedef GPUconstantref() GPUTPCTracker processorType;
-  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; }
+  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; }
   GPUhdi() static processorType* Processor(GPUConstantMem& processors)
   {
     return processors.tpcTrackers;
diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h b/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h
index 2d2b275d06399..91a33d132f136 100644
--- a/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h
+++ b/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h
@@ -30,7 +30,7 @@ class GPUTPCExtrapolationTracking : public GPUKernelTemplate
   };
 
   typedef GPUconstantref() GPUTPCTracker processorType;
-  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; }
+  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; }
   GPUhdi() static processorType* Processor(GPUConstantMem& processors)
   {
     return processors.tpcTrackers;
@@ -50,7 +50,7 @@ class GPUTPCExtrapolationTrackingCopyNumbers : public GPUKernelTemplate
 {
  public:
   typedef GPUconstantref() GPUTPCTracker processorType;
-  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; }
+  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; }
   GPUhdi() static processorType* Processor(GPUConstantMem& processors)
   {
     return processors.tpcTrackers;
diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h
index 7af6e8eb1a582..de79b268aea78 100644
--- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h
+++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h
@@ -38,7 +38,7 @@ class GPUTPCNeighboursCleaner : public GPUKernelTemplate
   };
 
   typedef GPUconstantref() GPUTPCTracker processorType;
-  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; }
+  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; }
   GPUhdi() static processorType* Processor(GPUConstantMem& processors)
   {
     return processors.tpcTrackers;
diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h
index 54dc0876f8a55..41b5eb8a4ffb8 100644
--- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h
+++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h
@@ -49,7 +49,7 @@ class GPUTPCNeighboursFinder : public GPUKernelTemplate
   };
 
   typedef GPUconstantref() GPUTPCTracker processorType;
-  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; }
+  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; }
   GPUhdi() static processorType* Processor(GPUConstantMem& processors)
   {
     return processors.tpcTrackers;
diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h
index 5e620180570c8..c834b17369f0f 100644
--- a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h
+++ b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h
@@ -38,7 +38,7 @@ class GPUTPCStartHitsFinder : public GPUKernelTemplate
   };
 
   typedef GPUconstantref() GPUTPCTracker processorType;
-  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; }
+  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; }
   GPUhdi() static processorType* Processor(GPUConstantMem& processors)
   {
     return processors.tpcTrackers;
diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h
index b0349d660dbc1..0e2fd96dd2690 100644
--- a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h
+++ b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h
@@ -38,7 +38,7 @@ class GPUTPCStartHitsSorter : public GPUKernelTemplate
   };
 
   typedef GPUconstantref() GPUTPCTracker processorType;
-  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; }
+  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; }
   GPUhdi() static processorType* Processor(GPUConstantMem& processors)
   {
     return processors.tpcTrackers;
diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h
index 8757ed87072da..0f8314ee0fad4 100644
--- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h
+++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h
@@ -97,7 +97,7 @@ class GPUTPCTrackletConstructor
   GPUd() static int32_t GPUTPCTrackletConstructorExtrapolationTracking(GPUconstantref() GPUTPCTracker& tracker, GPUsharedref() T& sMem, GPUTPCTrackParam& tParam, int32_t startrow, int32_t increment, int32_t iTracklet, calink* rowHits);
 
   typedef GPUconstantref() GPUTPCTracker processorType;
-  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; }
+  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; }
   GPUhdi() static processorType* Processor(GPUConstantMem& processors)
   {
     return processors.tpcTrackers;
diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h
index bb969d866ef29..5009c672b030e 100644
--- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h
+++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h
@@ -42,7 +42,7 @@ class GPUTPCTrackletSelector : public GPUKernelTemplate
   };
 
   typedef GPUconstantref() GPUTPCTracker processorType;
-  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; }
+  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; }
   GPUhdi() static processorType* Processor(GPUConstantMem& processors)
   {
     return processors.tpcTrackers;
diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h
index 70b525420f294..21135ddc48dfa 100644
--- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h
+++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h
@@ -26,7 +26,7 @@ class GPUTRDTrackerKernels : public GPUKernelTemplate
   enum K { defaultKernel = 0,
            gpuVersion = 0,
            o2Version = 1 };
-  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TRDTracking; }
+  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TRDTracking; }
   template <int32_t iKernel = defaultKernel, class T>
   GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, T* externalInstance = nullptr);
 };

From cb499fb3faa77a036f83f813af9c6b09965ca037 Mon Sep 17 00:00:00 2001
From: David Rohr <drohr@jwdt.org>
Date: Fri, 14 Mar 2025 15:06:57 +0100
Subject: [PATCH 5/8] GPU: Provide static versions of GPUChkErr() macros

test2

GPU: Provide static versions of GPUChkErr() macros
---
 GPU/Common/CMakeLists.txt                     |  2 +-
 GPU/Common/GPUCommonChkErr.h                  | 30 ---------
 GPU/Common/GPUCommonDef.h                     | 24 +++++--
 GPU/Common/GPUCommonHelpers.h                 | 62 +++++++++++++++++++
 GPU/GPUTracking/Base/cuda/CMakeLists.txt      |  2 +-
 .../Base/cuda/GPUReconstructionCUDA.cu        |  9 +--
 .../Base/cuda/GPUReconstructionCUDA.h         |  3 +-
 .../GPUReconstructionCUDAExternalProvider.cu  |  3 +-
 .../cuda/GPUReconstructionCUDAHelpers.inc     | 31 ++++++++++
 .../cuda/GPUReconstructionCUDAInternals.h     |  2 +-
 .../Base/cuda/GPUReconstructionCUDAKernels.cu |  2 +-
 GPU/GPUTracking/Base/hip/CMakeLists.txt       |  4 +-
 .../Base/opencl/GPUReconstructionOCL.cxx      |  7 +--
 .../opencl/GPUReconstructionOCLIncludesHost.h |  2 +-
 14 files changed, 127 insertions(+), 56 deletions(-)
 delete mode 100644 GPU/Common/GPUCommonChkErr.h
 create mode 100644 GPU/Common/GPUCommonHelpers.h
 create mode 100644 GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAHelpers.inc

diff --git a/GPU/Common/CMakeLists.txt b/GPU/Common/CMakeLists.txt
index 8b0a75679479f..bacf4454c39fd 100644
--- a/GPU/Common/CMakeLists.txt
+++ b/GPU/Common/CMakeLists.txt
@@ -15,7 +15,7 @@ set(HDRS_INSTALL
     GPUCommonAlgorithm.h
     GPUCommonDef.h
     GPUCommonDefAPI.h
-    GPUCommonChkErr.h
+    GPUCommonHelpers.h
     GPUCommonDefSettings.h
     GPUCommonConstants.h
     GPUCommonLogger.h
diff --git a/GPU/Common/GPUCommonChkErr.h b/GPU/Common/GPUCommonChkErr.h
deleted file mode 100644
index 00cb9e50d302f..0000000000000
--- a/GPU/Common/GPUCommonChkErr.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
-// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
-// All rights not expressly granted are reserved.
-//
-// This software is distributed under the terms of the GNU General Public
-// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
-//
-// In applying this license CERN does not waive the privileges and immunities
-// granted to it by virtue of its status as an Intergovernmental Organization
-// or submit itself to any jurisdiction.
-
-/// \file GPUCommonChkErr.h
-/// \author David Rohr
-
-// GPUChkErr and GPUChkErrI will both check x for an error, using the loaded backend of GPUReconstruction (requiring GPUReconstruction.h to be included by the user).
-// In case of an error, it will print out the corresponding CUDA / HIP / OpenCL error code
-// GPUChkErr will download GPUReconstruction error values from GPU, print them, and terminate the application with an exception if an error occured.
-// GPUChkErrI will return 0 or 1, depending on whether an error has occurred.
-// The Macros must be called ona GPUReconstruction instance, e.g.:
-// if (mRec->GPUChkErrI(cudaMalloc(...))) { exit(1); }
-// gpuRecObj.GPUChkErr(cudaMalloc(...));
-
-#ifndef GPUCOMMONCHKERR_H
-#define GPUCOMMONCHKERR_H
-
-// Please #include "GPUReconstruction.h" in your code, if you use these 2!
-#define GPUChkErr(x) GPUChkErrA(x, __FILE__, __LINE__, true)
-#define GPUChkErrI(x) GPUChkErrA(x, __FILE__, __LINE__, false)
-
-#endif
diff --git a/GPU/Common/GPUCommonDef.h b/GPU/Common/GPUCommonDef.h
index ae8c48b500b69..b4a788e66a81c 100644
--- a/GPU/Common/GPUCommonDef.h
+++ b/GPU/Common/GPUCommonDef.h
@@ -35,13 +35,25 @@
     #define GPUCA_GPUCODE // Compiled by GPU compiler
   #endif
 
-  #if defined(__CUDA_ARCH__) || defined(__OPENCL__) || defined(__HIP_DEVICE_COMPILE__)
-    #define GPUCA_GPUCODE_DEVICE // Executed on device
+  #if defined(GPUCA_GPUCODE)
+    #if defined(__CUDA_ARCH__) || defined(__OPENCL__) || defined(__HIP_DEVICE_COMPILE__)
+      #define GPUCA_GPUCODE_DEVICE // Executed on device
+    #endif
+    #if defined(__CUDACC__)
+      #define GPUCA_GPUTYPE CUDA
+    #elif defined(__HIPCC__)
+      #define GPUCA_GPUTYPE HIP
+    #elif defined(__OPENCL__) || defined(__OPENCL_HOST__)
+      #define GPUCA_GPUTYPE OCL
+    #endif
   #endif
 #endif
+#ifndef GPUCA_GPUTYPE
+  #define GPUCA_GPUTYPE CPU
+#endif
 
 #if defined(GPUCA_STANDALONE) || (defined(GPUCA_O2_LIB) && !defined(GPUCA_O2_INTERFACE)) || defined (GPUCA_GPUCODE)
-  #define GPUCA_ALIGPUCODE
+  #define GPUCA_ALIGPUCODE // Part of GPUTracking library but not of interface
 #endif
 
 #if (defined(__CUDACC__) && defined(GPUCA_CUDA_NO_CONSTANT_MEMORY)) || (defined(__HIPCC__) && defined(GPUCA_HIP_NO_CONSTANT_MEMORY)) || (defined(__OPENCL__) && defined(GPUCA_OPENCL_NO_CONSTANT_MEMORY))
@@ -51,13 +63,13 @@
 #endif
 
 #if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && defined(DEBUG_STREAMER)
-#define GPUCA_DEBUG_STREAMER_CHECK(...) __VA_ARGS__
+  #define GPUCA_DEBUG_STREAMER_CHECK(...) __VA_ARGS__
 #else
-#define GPUCA_DEBUG_STREAMER_CHECK(...)
+  #define GPUCA_DEBUG_STREAMER_CHECK(...)
 #endif
 
 #ifndef GPUCA_RTC_SPECIAL_CODE
-#define GPUCA_RTC_SPECIAL_CODE(...)
+  #define GPUCA_RTC_SPECIAL_CODE(...)
 #endif
 
 // API Definitions for GPU Compilation
diff --git a/GPU/Common/GPUCommonHelpers.h b/GPU/Common/GPUCommonHelpers.h
new file mode 100644
index 0000000000000..ad876db0d6c3a
--- /dev/null
+++ b/GPU/Common/GPUCommonHelpers.h
@@ -0,0 +1,62 @@
+// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
+// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
+// All rights not expressly granted are reserved.
+//
+// This software is distributed under the terms of the GNU General Public
+// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
+//
+// In applying this license CERN does not waive the privileges and immunities
+// granted to it by virtue of its status as an Intergovernmental Organization
+// or submit itself to any jurisdiction.
+
+/// \file GPUCommonHelpers.h
+/// \author David Rohr
+
+// GPUChkErr and GPUChkErrI will both check x for an error, using the loaded backend of GPUReconstruction (requiring GPUReconstruction.h to be included by the user).
+// In case of an error, it will print out the corresponding CUDA / HIP / OpenCL error code
+// GPUChkErr will download GPUReconstruction error values from GPU, print them, and terminate the application with an exception if an error occured.
+// GPUChkErrI will return 0 or 1, depending on whether an error has occurred.
+// These Macros must be called ona GPUReconstruction instance.
+// The GPUChkErrS and GPUChkErrSI are similar but static, without required GPUReconstruction instance.
+// Examples:
+// if (mRec->GPUChkErrI(cudaMalloc(...))) { exit(1); }
+// gpuRecObj.GPUChkErr(cudaMalloc(...));
+// if (GPUChkErrSI(cudaMalloc(..))) { exit(1); }
+
+#ifndef GPUCOMMONHELPERS_H
+#define GPUCOMMONHELPERS_H
+
+// Please #include "GPUReconstruction.h" in your code, if you use these 2!
+#define GPUChkErr(x) GPUChkErrA(x, __FILE__, __LINE__, true)
+#define GPUChkErrI(x) GPUChkErrA(x, __FILE__, __LINE__, false)
+#define GPUChkErrS(x) o2::gpu::internal::GPUReconstructionChkErr(x, __FILE__, __LINE__, true)
+#define GPUChkErrSI(x) o2::gpu::internal::GPUReconstructionChkErr(x, __FILE__, __LINE__, false)
+
+#include "GPUCommonDef.h"
+#include <cstdint>
+
+namespace o2::gpu::internal
+{
+#define GPUCOMMON_INTERNAL_CAT_A(a, b, c) a##b##c
+#define GPUCOMMON_INTERNAL_CAT(...) GPUCOMMON_INTERNAL_CAT_A(__VA_ARGS__)
+extern int32_t GPUCOMMON_INTERNAL_CAT(GPUReconstruction, GPUCA_GPUTYPE, ChkErr)(const int64_t error, const char* file, int32_t line);
+inline int32_t GPUReconstructionCPUChkErr(const int64_t error, const char* file, int32_t line)
+{
+  if (error) {
+    GPUError("GPUCommon Error Code %d (%s:%d)", error, file, line);
+  }
+  return error != 0;
+}
+static inline int32_t GPUReconstructionChkErr(const int64_t error, const char* file, int32_t line, bool failOnError)
+{
+  int32_t retVal = error && GPUCOMMON_INTERNAL_CAT(GPUReconstruction, GPUCA_GPUTYPE, ChkErr)(error, file, line);
+  if (retVal && failOnError) {
+    throw std::runtime_error("GPU API Call Failure");
+  }
+  return error;
+}
+#undef GPUCOMMON_INTERNAL_CAT_A
+#undef GPUCOMMON_INTERNAL_CAT
+} // namespace o2::gpu::internal
+
+#endif
diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt
index a33234db49a27..5b2e53179e50c 100644
--- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt
+++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt
@@ -18,7 +18,7 @@ endif()
 message(STATUS "Building GPUTracking with CUDA support ${TMP_TARGET}")
 
 set(SRCS GPUReconstructionCUDA.cu GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu)
-set(HDRS GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDADef.h GPUReconstructionCUDAIncludesHost.h CUDAThrustHelpers.h)
+set(HDRS GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDADef.h GPUReconstructionCUDAIncludesHost.h CUDAThrustHelpers.h)
 # -------------------------------- Prepare RTC -------------------------------------------------------
 enable_language(ASM)
 if(ALIGPU_BUILD_TYPE STREQUAL "O2")
diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu
index d30eb51bd4938..d0d5ef4680fac 100644
--- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu
+++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu
@@ -22,6 +22,7 @@
 #include "CUDAThrustHelpers.h"
 #include "GPUReconstructionIncludes.h"
 #include "GPUParamRTC.h"
+#include "GPUReconstructionCUDAHelpers.inc"
 
 #if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 1
 #include "utils/qGetLdBinarySymbols.h"
@@ -62,13 +63,9 @@ GPUReconstructionCUDABackend::~GPUReconstructionCUDABackend()
 }
 
 static_assert(sizeof(cudaError_t) <= sizeof(int64_t) && cudaSuccess == 0);
-int32_t GPUReconstructionCUDABackend::GPUChkErrStatic(const int64_t error, const char* file, int32_t line)
+int32_t GPUReconstructionCUDABackend::GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const
 {
-  if (error == cudaSuccess) {
-    return (0);
-  }
-  GPUError("CUDA Error: %ld / %s (%s:%d)", error, cudaGetErrorString((cudaError_t)error), file, line);
-  return 1;
+  return internal::GPUReconstructionCUDAChkErr(error, file, line);
 }
 
 GPUReconstructionCUDA::GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionKernels(cfg)
diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h
index 02e8f92bb2328..f78270d40146c 100644
--- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h
+++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h
@@ -33,13 +33,12 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase
 {
  public:
   ~GPUReconstructionCUDABackend() override;
-  static int32_t GPUChkErrStatic(const int64_t error, const char* file, int32_t line);
 
  protected:
   GPUReconstructionCUDABackend(const GPUSettingsDeviceBackend& cfg);
 
   void PrintKernelOccupancies() override;
-  virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override { return GPUChkErrStatic(error, file, line); }
+  virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override;
 
   template <class T, int32_t I = 0, typename... Args>
   void runKernelBackend(const krnlSetupArgs<T, I, Args...>& args);
diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu
index bc1d573385598..f341a778076b8 100644
--- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu
+++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu
@@ -32,11 +32,12 @@ using namespace o2::gpu;
 #include "TrackParametrizationWithError.cxx"
 #include "Propagator.cxx"
 #include "TrackLTIntegral.cxx"
+#include "GPUReconstructionCUDAHelpers.inc"
 
 #ifndef GPUCA_NO_CONSTANT_MEMORY
 static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() {
   void* retVal = nullptr;
-  if (GPUReconstructionCUDA::GPUChkErrStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) {
+  if (GPUChkErrS(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer))) {
     throw std::runtime_error("Could not obtain GPU constant memory symbol");
   }
   return retVal;
diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAHelpers.inc b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAHelpers.inc
new file mode 100644
index 0000000000000..a34f940a1337a
--- /dev/null
+++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAHelpers.inc
@@ -0,0 +1,31 @@
+// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
+// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
+// All rights not expressly granted are reserved.
+//
+// This software is distributed under the terms of the GNU General Public
+// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
+//
+// In applying this license CERN does not waive the privileges and immunities
+// granted to it by virtue of its status as an Intergovernmental Organization
+// or submit itself to any jurisdiction.
+
+/// \file GPUReconstructionCUDAHelpers.inc
+/// \author David Rohr
+
+#ifndef GPURECONSTRUCTIONCUDAHELPERS_INC_H
+#define GPURECONSTRUCTIONCUDAHELPERS_INC_H
+
+#include "GPUCommonHelpers.h"
+
+namespace o2::gpu::internal
+{
+int32_t __attribute__((weak)) GPUReconstructionCUDAChkErr(const int64_t error, const char* file, int32_t line)
+{
+  if (error != cudaSuccess) {
+    GPUError("CUDA Error: %ld / %s (%s:%d)", error, cudaGetErrorString((cudaError_t)error), file, line);
+  }
+  return error != cudaSuccess;
+}
+} // namespace o2::gpu::internal
+
+#endif
diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h
index 027a9d5445b2c..c85d98d85420e 100644
--- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h
+++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h
@@ -22,7 +22,7 @@
 #include <vector>
 #include <memory>
 #include <string>
-#include "GPUCommonChkErr.h"
+#include "GPUCommonHelpers.h"
 
 namespace o2::gpu
 {
diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu
index f60f00c13710d..0c83223ba238a 100644
--- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu
+++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu
@@ -138,7 +138,7 @@ void GPUReconstructionCUDABackend::getRTCKernelCalls(std::vector<std::string>& k
 #ifndef GPUCA_NO_CONSTANT_MEMORY
 static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() {
   void* retVal = nullptr;
-  if (GPUReconstructionCUDA::GPUChkErrStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) {
+  if (GPUChkErrS(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer))) {
     throw std::runtime_error("Could not obtain GPU constant memory symbol");
   }
   return retVal;
diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt
index 30f6683ff93c5..21a641c0cc7c0 100644
--- a/GPU/GPUTracking/Base/hip/CMakeLists.txt
+++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt
@@ -24,7 +24,7 @@ message(STATUS "Building GPUTracking with HIP support ${TMP_TARGET}")
 if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}")
   set(GPUCA_HIP_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/hipify)
   file(MAKE_DIRECTORY ${GPUCA_HIP_SOURCE_DIR})
-  set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAkernel.template.cu CUDAThrustHelpers.h GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDArtc.cu)
+  set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDAkernel.template.cu CUDAThrustHelpers.h GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDArtc.cu)
   set(GPUCA_HIP_LOCAL_FILE_LIST GPUReconstructionHIPIncludesHost.h)
   set(HIP_SOURCES "")
   foreach(file ${GPUCA_HIP_FILE_LIST})
@@ -63,7 +63,7 @@ endif()
 
 set(SRCS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.hip ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPKernels.hip)
 set(SRCS_CXX ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPGenRTC.cxx)
-set(HDRS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPInternals.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPDef.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPIncludesHost.h ${GPUCA_HIP_SOURCE_DIR}/HIPThrustHelpers.h)
+set(HDRS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPInternals.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPHelpers.inc ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPDef.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPIncludesHost.h ${GPUCA_HIP_SOURCE_DIR}/HIPThrustHelpers.h)
 
 # -------------------------------- Prepare RTC -------------------------------------------------------
 enable_language(ASM)
diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx
index e52494937f8bf..e92205b9864e6 100644
--- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx
+++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx
@@ -52,11 +52,10 @@ static_assert(sizeof(cl_int) <= sizeof(int64_t) && CL_SUCCESS == 0);
 int32_t GPUReconstructionOCLBackend::GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const
 {
   // Check for OPENCL Error and in the case of an error display the corresponding error string
-  if (error == CL_SUCCESS) {
-    return (0);
+  if (error != CL_SUCCESS) {
+    GPUError("OpenCL Error: %ld / %s (%s:%d)", error, convertErrorToString(error), file, line);
   }
-  GPUError("OpenCL Error: %ld / %s (%s:%d)", error, convertErrorToString(error), file, line);
-  return 1;
+  return error != CL_SUCCESS;
 }
 
 void GPUReconstructionOCLBackend::UpdateAutomaticProcessingSettings()
diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h
index 9c8cdbe87c7c1..97316cf9aa32e 100644
--- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h
+++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h
@@ -28,7 +28,7 @@
 
 #include "GPUReconstructionOCL.h"
 #include "GPUReconstructionIncludes.h"
-#include "GPUCommonChkErr.h"
+#include "GPUCommonHelpers.h"
 
 using namespace o2::gpu;
 

From 0a56fcdbe66b48e90a02c85ddd44b6ce1e32d604 Mon Sep 17 00:00:00 2001
From: David Rohr <drohr@jwdt.org>
Date: Fri, 14 Mar 2025 16:15:23 +0100
Subject: [PATCH 6/8] GPU: Automatically derive GPUReconstruction backend class
 from preprocessor constant

---
 GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h      | 4 ++--
 GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h      | 1 -
 .../Base/opencl/GPUReconstructionOCLKernels.cxx           | 1 -
 GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx                 | 8 ++++----
 GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx               | 2 +-
 5 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h
index cd1180cbc9991..0b1a501ebc094 100644
--- a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h
+++ b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h
@@ -68,7 +68,7 @@
 // GPU Host wrappers for kernel
 #define GPUCA_KRNL_HOST(x_class, ...) \
   GPUCA_KRNLGPU(x_class, __VA_ARGS__) \
-  template <> class GPUCA_KRNL_BACKEND_CLASS::backendInternal<GPUCA_M_KRNL_TEMPLATE(x_class)> { \
+  template <> class GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::backendInternal<GPUCA_M_KRNL_TEMPLATE(x_class)> { \
    public: \
     template <typename T, typename... Args> \
     static inline void runKernelBackendMacro(const krnlSetupTime& _xyz, T* me, const Args&... args) \
@@ -80,7 +80,7 @@
   };
 
 #define GPUCA_KRNL_PROP(x_class, x_attributes) \
-  template <> gpu_reconstruction_kernels::krnlProperties GPUCA_KRNL_BACKEND_CLASS::getKernelPropertiesBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>() { \
+  template <> gpu_reconstruction_kernels::krnlProperties GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::getKernelPropertiesBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>() { \
     gpu_reconstruction_kernels::krnlProperties ret = gpu_reconstruction_kernels::krnlProperties{GPUCA_ATTRRES(_EXTRREG, GPUCA_M_STRIP(x_attributes))}; \
     return ret.nThreads > 0 ? ret : gpu_reconstruction_kernels::krnlProperties{(int32_t)mThreadCount}; \
   }
diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h
index 7f77925ca3aaa..4ed352279fb90 100644
--- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h
+++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h
@@ -34,6 +34,5 @@
 #define GPUCA_CONSMEM_CALL me->mDeviceConstantMem,
 #define GPUCA_CONSMEM ((GPUConstantMem&)(*gGPUConstantMemBuffer))
 #endif
-#define GPUCA_KRNL_BACKEND_CLASS GPUReconstructionCUDABackend
 
 #endif
diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx
index 4f6a8725b4be5..ce6b6553ae1f7 100644
--- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx
+++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx
@@ -109,6 +109,5 @@ int32_t GPUReconstructionOCLBackend::AddKernels()
 #define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \
   GPUCA_KRNL_PROP(x_class, x_attributes)                                   \
   template void GPUReconstructionOCLBackend::runKernelBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>(const krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>& args);
-#define GPUCA_KRNL_BACKEND_CLASS GPUReconstructionOCLBackend
 #include "GPUReconstructionKernelList.h"
 #undef GPUCA_KRNL
diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx
index fa85d796baeba..e6312d767a496 100644
--- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx
+++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx
@@ -769,7 +769,7 @@ struct MergeBorderTracks_compMin {
 } // namespace o2::gpu::internal
 
 template <>
-inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal<GPUTPCGMMergerMergeBorders, 3>(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax)
+inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal<GPUTPCGMMergerMergeBorders, 3>(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax)
 {
   thrust::device_ptr<GPUTPCGMBorderRange> p(range);
   ThrustVolatileAsyncAllocator alloc(this);
@@ -1873,7 +1873,7 @@ struct GPUTPCGMMergerSortTracksQPt_comp {
 } // namespace o2::gpu::internal
 
 template <>
-inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal<GPUTPCGMMergerSortTracks, 0>(const krnlSetupTime& _xyz)
+inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal<GPUTPCGMMergerSortTracks, 0>(const krnlSetupTime& _xyz)
 {
   thrust::device_ptr<uint32_t> trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackOrderProcess());
   ThrustVolatileAsyncAllocator alloc(this);
@@ -1881,7 +1881,7 @@ inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal<GPUTPCGMMergerSor
 }
 
 template <>
-inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal<GPUTPCGMMergerSortTracksQPt, 0>(const krnlSetupTime& _xyz)
+inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal<GPUTPCGMMergerSortTracksQPt, 0>(const krnlSetupTime& _xyz)
 {
   thrust::device_ptr<uint32_t> trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackSort());
   ThrustVolatileAsyncAllocator alloc(this);
@@ -2106,7 +2106,7 @@ struct GPUTPCGMMergerMergeLoopers_comp {
 } // namespace o2::gpu::internal
 
 template <>
-inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal<GPUTPCGMMergerMergeLoopers, 1>(const krnlSetupTime& _xyz)
+inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal<GPUTPCGMMergerMergeLoopers, 1>(const krnlSetupTime& _xyz)
 {
   thrust::device_ptr<MergeLooperParam> params(mProcessorsShadow->tpcMerger.LooperCandidates());
   ThrustVolatileAsyncAllocator alloc(this);
diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx
index 45293bae9820b..13f204d0f940a 100644
--- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx
+++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx
@@ -102,7 +102,7 @@ struct GPUTPCGMO2OutputSort_comp {
 };
 
 template <>
-inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal<GPUTPCGMO2Output, GPUTPCGMO2Output::sort>(const krnlSetupTime& _xyz)
+inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal<GPUTPCGMO2Output, GPUTPCGMO2Output::sort>(const krnlSetupTime& _xyz)
 {
   thrust::device_ptr<GPUTPCGMMerger::tmpSort> trackSort(mProcessorsShadow->tpcMerger.TrackSortO2());
   ThrustVolatileAsyncAllocator alloc(this);

From 1a4806d96c5ec4a053deda3f2d89611834b65234 Mon Sep 17 00:00:00 2001
From: David Rohr <drohr@jwdt.org>
Date: Fri, 14 Mar 2025 16:33:57 +0100
Subject: [PATCH 7/8] GPU: Plenty of clang-format fixes

---
 GPU/GPUTracking/Base/GPUParam.h               |  8 +--
 GPU/GPUTracking/Base/GPUReconstruction.h      |  4 +-
 .../Base/GPUReconstructionConvert.cxx         |  2 +-
 .../Base/GPUReconstructionProcessing.h        |  2 +-
 .../Base/cuda/GPUReconstructionCUDA.cu        |  2 +-
 GPU/GPUTracking/DataTypes/GPUOutputControl.h  |  2 +-
 GPU/GPUTracking/DataTypes/GPUSettings.h       |  6 +-
 GPU/GPUTracking/DataTypes/GPUTRDTrack.h       |  6 +-
 GPU/GPUTracking/Global/GPUChain.h             |  2 +-
 GPU/GPUTracking/Global/GPUChainTracking.h     |  2 +-
 .../Global/GPUChainTrackingClusterizer.cxx    |  3 +-
 .../Global/GPUChainTrackingRefit.cxx          |  4 +-
 GPU/GPUTracking/Interface/GPUO2Interface.cxx  |  2 +-
 .../GPUO2InterfaceConfigurableParam.h         |  2 +-
 GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h  | 16 ++---
 GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx     |  6 +-
 .../Merger/GPUTPCGMPolynomialFieldManager.h   |  4 +-
 GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx |  2 +-
 GPU/GPUTracking/Refit/GPUTrackingRefit.cxx    |  2 +-
 .../SectorTracker/GPUTPCTracklet.h            |  2 +-
 .../TPCClusterFinder/GPUTPCCFDecodeZS.cxx     | 12 ++--
 .../GPUTPCCFNoiseSuppression.cxx              |  4 +-
 .../TRDTracking/GPUTRDInterfaces.h            |  4 +-
 GPU/GPUTracking/TRDTracking/GPUTRDTrackData.h | 18 +++---
 GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx | 24 ++++---
 GPU/GPUTracking/TRDTracking/GPUTRDTracker.h   | 62 +++++++++----------
 .../TRDTracking/GPUTRDTrackletWord.h          |  2 +-
 .../display/frontend/GPUDisplayFrontend.h     |  8 +--
 .../frontend/GPUDisplayFrontendWindows.cxx    | 12 ++--
 GPU/GPUTracking/qa/GPUQAHelper.h              |  6 +-
 GPU/GPUTracking/qa/genEvents.h                |  2 +-
 GPU/GPUTracking/utils/qconfig.cxx             |  8 +--
 GPU/GPUTracking/utils/threadserver.h          |  4 +-
 GPU/GPUbenchmark/cuda/Kernels.cu              |  6 +-
 GPU/TPCFastTransformation/BandMatrixSolver.h  |  2 +-
 GPU/TPCFastTransformation/ChebyshevFit1D.cxx  |  4 +-
 .../CorrectionMapsHelper.h                    | 28 ++++-----
 .../NDPiecewisePolynomials.h                  | 14 ++---
 GPU/TPCFastTransformation/Spline1DHelper.cxx  |  8 +--
 GPU/TPCFastTransformation/Spline1DHelperOld.h | 20 +++---
 GPU/TPCFastTransformation/Spline1DSpec.cxx    |  2 +-
 GPU/TPCFastTransformation/Spline1DSpec.h      | 12 ++--
 GPU/TPCFastTransformation/Spline2DSpec.cxx    |  2 +-
 GPU/TPCFastTransformation/SplineHelper.cxx    | 44 ++++++-------
 GPU/TPCFastTransformation/SplineHelper.h      |  6 +-
 GPU/TPCFastTransformation/SplineSpec.h        | 14 ++---
 .../TPCFastSpaceChargeCorrection.h            |  2 +-
 .../IrregularSpline2D3DCalibrator.cxx         |  2 +-
 .../devtools/RegularSpline1D.h                | 12 ++--
 .../devtools/SemiregularSpline2D3D.cxx        |  8 +--
 .../devtools/SemiregularSpline2D3D.h          | 32 +++++-----
 .../test/testMultivarPolynomials.cxx          |  4 +-
 GPU/Utils/FlatObject.h                        | 12 ++--
 GPU/Workflow/helper/src/GPUWorkflowHelper.cxx | 26 ++++----
 54 files changed, 250 insertions(+), 255 deletions(-)

diff --git a/GPU/GPUTracking/Base/GPUParam.h b/GPU/GPUTracking/Base/GPUParam.h
index 9bdf705dfeb59..fbce6246de112 100644
--- a/GPU/GPUTracking/Base/GPUParam.h
+++ b/GPU/GPUTracking/Base/GPUParam.h
@@ -59,10 +59,10 @@ struct GPUParam_t {
   int32_t continuousMaxTimeBin;
   int32_t tpcCutTimeBin;
 
-  GPUTPCGeometry tpcGeometry;                       // TPC Geometry
-  GPUTPCGMPolynomialField polynomialField;          // Polynomial approx. of magnetic field for TPC GM
-  const uint32_t* occupancyMap;                     // Ptr to TPC occupancy map
-  uint32_t occupancyTotal;                          // Total occupancy in the TPC (nCl / nHbf)
+  GPUTPCGeometry tpcGeometry;              // TPC Geometry
+  GPUTPCGMPolynomialField polynomialField; // Polynomial approx. of magnetic field for TPC GM
+  const uint32_t* occupancyMap;            // Ptr to TPC occupancy map
+  uint32_t occupancyTotal;                 // Total occupancy in the TPC (nCl / nHbf)
 
   GPUParamSector SectorParam[GPUCA_NSECTORS];
 
diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h
index f363f3f58aa6f..5e03c77f08230 100644
--- a/GPU/GPUTracking/Base/GPUReconstruction.h
+++ b/GPU/GPUTracking/Base/GPUReconstruction.h
@@ -52,7 +52,7 @@ namespace gpu_reconstruction_kernels
 {
 struct deviceEvent;
 class threadContext;
-}
+} // namespace gpu_reconstruction_kernels
 
 class GPUReconstruction
 {
@@ -193,7 +193,7 @@ class GPUReconstruction
   bool IsInitialized() const { return mInitialized; }
   void SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration* workflow = nullptr);
   void SetSettings(const GPUSettingsGRP* grp, const GPUSettingsRec* rec = nullptr, const GPUSettingsProcessing* proc = nullptr, const GPURecoStepConfiguration* workflow = nullptr);
-  void SetResetTimers(bool reset) { mProcessingSettings.resetTimers = reset; } // May update also after Init()
+  void SetResetTimers(bool reset) { mProcessingSettings.resetTimers = reset; }     // May update also after Init()
   void SetDebugLevelTmp(int32_t level) { mProcessingSettings.debugLevel = level; } // Temporarily, before calling SetSettings()
   void UpdateSettings(const GPUSettingsGRP* g, const GPUSettingsProcessing* p = nullptr, const GPUSettingsRecDynamic* d = nullptr);
   void UpdateDynamicSettings(const GPUSettingsRecDynamic* d);
diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx
index 8f5cab6807050..bc760f6188caa 100644
--- a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx
+++ b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx
@@ -923,7 +923,7 @@ void zsEncoderDenseLinkBased::decodePage(std::vector<o2::tpc::Digit>& outputBuff
       if (decLinkX & 0b00100000) {
         bitmaskL2.set();
       } else {
-        bitmaskL2 = std::bitset<10>(((((uint16_t)decLinkX) & 0b11000000) << 2) | (uint16_t) * ((const uint8_t*)decPagePtr));
+        bitmaskL2 = std::bitset<10>(((((uint16_t)decLinkX) & 0b11000000) << 2) | (uint16_t)*((const uint8_t*)decPagePtr));
         decPagePtr += sizeof(uint8_t);
       }
 
diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h
index 4ccfb9ff10311..43560616782db 100644
--- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h
+++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h
@@ -28,7 +28,7 @@ namespace gpu_reconstruction_kernels
 {
 struct deviceEvent {
   constexpr deviceEvent() = default;
-  constexpr deviceEvent(std::nullptr_t p) : v(nullptr){};
+  constexpr deviceEvent(std::nullptr_t p) : v(nullptr) {};
   template <class T>
   void set(T val)
   {
diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu
index d0d5ef4680fac..202edd49bc44c 100644
--- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu
+++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu
@@ -496,7 +496,7 @@ size_t GPUReconstructionCUDA::GPUMemCpy(void* dst, const void* src, size_t size,
     for (int32_t k = 0; k < nEvents; k++) {
       GPUChkErr(cudaStreamWaitEvent(mInternals->Streams[stream], evList[k].get<cudaEvent_t>(), 0));
     }
-    GPUChkErr(cudaMemcpyAsync(dst, src, size, toGPU == -2 ? cudaMemcpyDeviceToDevice : toGPU ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost, mInternals->Streams[stream]));
+    GPUChkErr(cudaMemcpyAsync(dst, src, size, toGPU == -2 ? cudaMemcpyDeviceToDevice : (toGPU ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost), mInternals->Streams[stream]));
   }
   if (ev) {
     GPUChkErr(cudaEventRecord(ev->get<cudaEvent_t>(), mInternals->Streams[stream == -1 ? 0 : stream]));
diff --git a/GPU/GPUTracking/DataTypes/GPUOutputControl.h b/GPU/GPUTracking/DataTypes/GPUOutputControl.h
index 799fd25330ab4..0495f7ed1d0ff 100644
--- a/GPU/GPUTracking/DataTypes/GPUOutputControl.h
+++ b/GPU/GPUTracking/DataTypes/GPUOutputControl.h
@@ -78,7 +78,7 @@ struct GPUTrackingOutputs {
   static constexpr size_t count() { return sizeof(GPUTrackingOutputs) / sizeof(GPUOutputControl); }
   GPUOutputControl* asArray() { return (GPUOutputControl*)this; }
   size_t getIndex(const GPUOutputControl& v) { return &v - (const GPUOutputControl*)this; }
-  static int32_t getIndex(GPUOutputControl GPUTrackingOutputs::*v) { return &(((GPUTrackingOutputs*)(0x10000))->*v) - (GPUOutputControl*)(0x10000); }
+  static int32_t getIndex(GPUOutputControl GPUTrackingOutputs::* v) { return &(((GPUTrackingOutputs*)(0x10000))->*v) - (GPUOutputControl*)(0x10000); }
 };
 
 } // namespace o2::gpu
diff --git a/GPU/GPUTracking/DataTypes/GPUSettings.h b/GPU/GPUTracking/DataTypes/GPUSettings.h
index 05888770ef9e5..c81a8e20e9926 100644
--- a/GPU/GPUTracking/DataTypes/GPUSettings.h
+++ b/GPU/GPUTracking/DataTypes/GPUSettings.h
@@ -73,9 +73,9 @@ struct GPUSettingsTF {
 
 // Settings defining the setup of the GPUReconstruction processing (basically selecting the device / class instance)
 struct GPUSettingsDeviceBackend {
-  uint32_t deviceType = GPUDataTypes::DeviceType::CPU;     // Device type, shall use GPUDataTypes::DEVICE_TYPE constants, e.g. CPU / CUDA
-  uint8_t forceDeviceType = 1;                             // Fail if device initialization fails, otherwise falls back to CPU
-  GPUReconstruction* master = nullptr;                     // GPUReconstruction master object
+  uint32_t deviceType = GPUDataTypes::DeviceType::CPU; // Device type, shall use GPUDataTypes::DEVICE_TYPE constants, e.g. CPU / CUDA
+  uint8_t forceDeviceType = 1;                         // Fail if device initialization fails, otherwise falls back to CPU
+  GPUReconstruction* master = nullptr;                 // GPUReconstruction master object
 };
 
 } // namespace o2::gpu
diff --git a/GPU/GPUTracking/DataTypes/GPUTRDTrack.h b/GPU/GPUTracking/DataTypes/GPUTRDTrack.h
index 18f7c61e01fc3..b358e8b82d480 100644
--- a/GPU/GPUTracking/DataTypes/GPUTRDTrack.h
+++ b/GPU/GPUTracking/DataTypes/GPUTRDTrack.h
@@ -107,9 +107,9 @@ class GPUTRDTrack_t : public T
   GPUd() void setHasPadrowCrossing() { mIsCrossingNeighbor |= (1U << 7); }
 
  protected:
-  float mChi2;                       // total chi2.
-  float mSignal{-1.f};               // electron Likelihood for track
-  uint32_t mRefGlobalTrackId;        // raw GlobalTrackID of the seeding track (either ITS-TPC or TPC)
+  float mChi2;                          // total chi2.
+  float mSignal{-1.f};                  // electron Likelihood for track
+  uint32_t mRefGlobalTrackId;           // raw GlobalTrackID of the seeding track (either ITS-TPC or TPC)
   int32_t mAttachedTracklets[kNLayers]; // indices of the tracklets attached to this track; -1 means no tracklet in that layer
   int16_t mCollisionId;                 // the collision ID of the tracklets attached to this track; is used to retrieve the BC information for this track after the tracking is done
   uint8_t mFlags;                       // bits 0 to 5 indicate whether track is findable in layer 0 to 5, bit 6 indicates an ambiguous track and bit 7 flags if the track is stopped in the TRD
diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h
index fff5d2efe0270..290ae32cafca8 100644
--- a/GPU/GPUTracking/Global/GPUChain.h
+++ b/GPU/GPUTracking/Global/GPUChain.h
@@ -46,7 +46,7 @@ class GPUChain
   virtual int32_t Finalize() = 0;
   virtual int32_t RunChain() = 0;
   virtual void MemorySize(size_t& gpuMem, size_t& pageLockedHostMem) = 0;
-  virtual void PrintMemoryStatistics(){};
+  virtual void PrintMemoryStatistics() {};
   virtual int32_t CheckErrorCodes(bool cpuOnly = false, bool forceShowErrors = false, std::vector<std::array<uint32_t, 4>>* fillErrors = nullptr) { return 0; }
   virtual bool SupportsDoublePipeline() { return false; }
   virtual int32_t FinalizePipelinedProcessing() { return 0; }
diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h
index 492ee65d1c9c1..5779cec31130c 100644
--- a/GPU/GPUTracking/Global/GPUChainTracking.h
+++ b/GPU/GPUTracking/Global/GPUChainTracking.h
@@ -43,7 +43,7 @@ class MatLayerCylSet;
 
 namespace o2::gpu
 {
-//class GPUTRDTrackerGPU;
+// class GPUTRDTrackerGPU;
 class GPUTPCGPUTracker;
 class GPUDisplayInterface;
 class GPUQA;
diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx
index 63d56da37595b..a48050a6cacbc 100644
--- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx
+++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx
@@ -154,8 +154,7 @@ std::pair<uint32_t, uint32_t> GPUChainTracking::TPCClusterizerDecodeZSCount(uint
   uint32_t endpointAdcSamples[GPUTrackingInOutZS::NENDPOINTS];
   memset(endpointAdcSamples, 0, sizeof(endpointAdcSamples));
   bool doGPU = mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding;
-  int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : (mIOPtrs.tpcZS->sector[iSector].count[0] && mIOPtrs.tpcZS->sector[iSector].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->sector[iSector].zsPtr[0][0])
-                                                                                                                                                                                                               : 0;
+  int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : ((mIOPtrs.tpcZS->sector[iSector].count[0] && mIOPtrs.tpcZS->sector[iSector].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->sector[iSector].zsPtr[0][0]) : 0);
 
   for (uint16_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) {
 #ifndef GPUCA_NO_VC
diff --git a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx
index 9e7085b31849e..8d1efd7011227 100644
--- a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx
+++ b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx
@@ -31,13 +31,13 @@ int32_t GPUChainTracking::RunRefit()
   RefitShadow.SetPropagator(doGPU ? processorsShadow()->calibObjects.o2Propagator : GetO2Propagator());
   RefitShadow.mPTracks = (doGPU ? processorsShadow() : processors())->tpcMerger.OutputTracks();
   WriteToConstantMemory(RecoStep::Refit, (char*)&processors()->trackingRefit - (char*)processors(), &RefitShadow, sizeof(RefitShadow), 0);
-  //TransferMemoryResourcesToGPU(RecoStep::Refit, &Refit, 0);
+  // TransferMemoryResourcesToGPU(RecoStep::Refit, &Refit, 0);
   if (param().rec.trackingRefitGPUModel) {
     runKernel<GPUTrackingRefitKernel, GPUTrackingRefitKernel::mode0asGPU>(GetGrid(mIOPtrs.nMergedTracks, 0));
   } else {
     runKernel<GPUTrackingRefitKernel, GPUTrackingRefitKernel::mode1asTrackParCov>(GetGrid(mIOPtrs.nMergedTracks, 0));
   }
-  //TransferMemoryResourcesToHost(RecoStep::Refit, &Refit, 0);
+  // TransferMemoryResourcesToHost(RecoStep::Refit, &Refit, 0);
   SynchronizeStream(0);
   return 0;
 }
diff --git a/GPU/GPUTracking/Interface/GPUO2Interface.cxx b/GPU/GPUTracking/Interface/GPUO2Interface.cxx
index 34cd5b7280dc3..4dac56afed671 100644
--- a/GPU/GPUTracking/Interface/GPUO2Interface.cxx
+++ b/GPU/GPUTracking/Interface/GPUO2Interface.cxx
@@ -46,7 +46,7 @@ struct GPUO2Interface_Internals {
 };
 } // namespace o2::gpu
 
-GPUO2Interface::GPUO2Interface() : mInternals(new GPUO2Interface_Internals){};
+GPUO2Interface::GPUO2Interface() : mInternals(new GPUO2Interface_Internals) {};
 
 GPUO2Interface::~GPUO2Interface() { Deinitialize(); }
 
diff --git a/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h b/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h
index 425c8b880b4e3..ebb426b7a8cfe 100644
--- a/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h
+++ b/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h
@@ -50,7 +50,7 @@
 #define AddSubConfig(name, instance)
 #define BeginSubConfig(name, instance, parent, preoptname, preoptnameshort, descr, o2prefix)                                           \
   struct GPUCA_M_CAT(GPUConfigurableParam, name) : public o2::conf::ConfigurableParamHelper<GPUCA_M_CAT(GPUConfigurableParam, name)> { \
-    O2ParamDef(GPUCA_M_CAT(GPUConfigurableParam, name), GPUCA_M_STR(GPUCA_M_CAT(GPU_, o2prefix))) public:
+   O2ParamDef(GPUCA_M_CAT(GPUConfigurableParam, name), GPUCA_M_STR(GPUCA_M_CAT(GPU_, o2prefix))) public:
 #define BeginHiddenConfig(name, instance) struct GPUCA_M_CAT(GPUConfigurableParam, name) {
 #define EndConfig() \
   }                 \
diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h
index 64d8549312736..578fe1eeb4ca7 100644
--- a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h
+++ b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h
@@ -106,17 +106,17 @@ class GPUTPCGMMergedTrack
   GPUd() gputpcgmmergertypes::GPUTPCOuterParam& OuterParam() { return mOuterParam; }
 
  private:
-  GPUTPCGMTrackParam mParam;                        //* fitted track parameters
+  GPUTPCGMTrackParam mParam;                         //* fitted track parameters
   gputpcgmmergertypes::GPUTPCOuterParam mOuterParam; //* outer param
 
-  float mAlpha;                  //* alpha angle
-  float mLastX;                  //* outer X
-  float mLastY;                  //* outer Y
-  float mLastZ;                  //* outer Z
-  uint32_t mFirstClusterRef;     //* index of the first track cluster in corresponding cluster arrays
+  float mAlpha;              //* alpha angle
+  float mLastX;              //* outer X
+  float mLastY;              //* outer Y
+  float mLastZ;              //* outer Z
+  uint32_t mFirstClusterRef; //* index of the first track cluster in corresponding cluster arrays
   // TODO: Change to 8 bit
-  uint32_t mNClusters;           //* number of track clusters
-  uint32_t mNClustersFitted;     //* number of clusters used in fit
+  uint32_t mNClusters;       //* number of track clusters
+  uint32_t mNClustersFitted; //* number of clusters used in fit
   uint8_t mFlags;
   uint8_t mLegs;
 
diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx
index e6312d767a496..1c2a8e2b29a9c 100644
--- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx
+++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx
@@ -2140,7 +2140,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads,
       }
       const float d2xy = CAMath::Sum2(params[i].x - params[j].x, params[i].y - params[j].y);
       if (d2xy > 15.f) {
-        //bs |= 1;
+        // bs |= 1;
         continue;
       }
       const auto& trk1 = mOutputTracks[params[i].id];
@@ -2148,7 +2148,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads,
       const auto& param1 = trk1.GetParam();
       const auto& param2 = trk2.GetParam();
       if (CAMath::Abs(param1.GetDzDs()) > 0.03f && CAMath::Abs(param2.GetDzDs()) > 0.03f && param1.GetDzDs() * param2.GetDzDs() * param1.GetQPt() * param2.GetQPt() < 0) {
-        //bs |= 2;
+        // bs |= 2;
         continue;
       }
 
@@ -2170,7 +2170,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads,
         }
       }
       if (!dzcorrok) {
-        //bs |= 4;
+        // bs |= 4;
         continue;
       }
 
diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h
index 88f0882a79f03..4a608fcc97068 100644
--- a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h
+++ b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h
@@ -39,11 +39,11 @@ class GPUTPCGMPolynomialFieldManager
   GPUTPCGMPolynomialFieldManager() = default;
 
   /* Get appropriate pre-calculated polynomial field for the given field value nominalFieldkG
- */
+   */
   static int32_t GetPolynomialField(float nominalFieldkG, o2::gpu::GPUTPCGMPolynomialField& field);
 
   /* Get pre-calculated polynomial field of type "type", scaled with respect to nominalFieldkG
- */
+   */
   static int32_t GetPolynomialField(StoredField_t type, float nominalFieldkG, o2::gpu::GPUTPCGMPolynomialField& field);
 };
 
diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx
index e15d6fe8b17bd..0c171a74d4e42 100644
--- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx
+++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx
@@ -716,7 +716,7 @@ GPUd() int32_t GPUTPCGMPropagator::InterpolateReject(const GPUParam& GPUrestrict
       const float ImP1 = mP[1] + Ik11 * Iz1;
       const float ImC0 = mC[0] - Ik00 * mC[0];
       const float ImC2 = mC[2] - Ik11 * mC[2];
-      //printf("\t%21sInterpo ----- abde artaf%16s Y %8.3f, Z %8.3f (Errors %f <-- (%f, %f) %f <-- (%f, %f))\n", "", "", ImP0, ImP1, sqrtf(ImC0), sqrtf(mC[0]), sqrtf(inter->errorY), sqrtf(ImC2), sqrtf(mC[2]), sqrtf(inter->errorZ));
+      // printf("\t%21sInterpo ----- abde artaf%16s Y %8.3f, Z %8.3f (Errors %f <-- (%f, %f) %f <-- (%f, %f))\n", "", "", ImP0, ImP1, sqrtf(ImC0), sqrtf(mC[0]), sqrtf(inter->errorY), sqrtf(ImC2), sqrtf(mC[2]), sqrtf(inter->errorZ));
       const float Jz0 = posY - ImP0;
       const float Jz1 = posZ - ImP1;
       const float Jw0 = 1.f / (ImC0 + err2Y);
diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx
index 9d10d40107b8f..502a70cb57762 100644
--- a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx
+++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx
@@ -256,7 +256,7 @@ GPUd() int32_t GPUTrackingRefit::RefitTrack(T& trkX, bool outward, bool resetCov
   int32_t nAvgCharge = 0;
 
   for (int32_t i = start; i != stop; i += cl ? 0 : direction) {
-    float x = 0, y = 0, z = 0, charge = 0; // FIXME: initialization unneeded, but GCC incorrectly produces uninitialized warnings otherwise
+    float x = 0, y = 0, z = 0, charge = 0;                  // FIXME: initialization unneeded, but GCC incorrectly produces uninitialized warnings otherwise
     float time = 0.f, invCharge = 0.f, invSqrtCharge = 0.f; // Same here...
     int32_t clusters = 0;
     while (true) {
diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h b/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h
index 10ff0a32aeaf3..5bb63d6a10254 100644
--- a/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h
+++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h
@@ -30,7 +30,7 @@ class GPUTPCTracklet
 {
  public:
 #if !defined(GPUCA_GPUCODE)
-  GPUTPCTracklet() : mFirstRow(0), mLastRow(0), mParam(), mHitWeight(0), mFirstHit(0){};
+  GPUTPCTracklet() : mFirstRow(0), mLastRow(0), mParam(), mHitWeight(0), mFirstHit(0) {};
 #endif //! GPUCA_GPUCODE
 
   GPUhd() int32_t FirstRow() const { return mFirstRow; }
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx
index e7634fa397bae..f1fd95d696f5d 100644
--- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx
+++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx
@@ -101,7 +101,7 @@ GPUdii() void GPUTPCCFDecodeZS::decode(GPUTPCClusterFinder& clusterer, GPUShared
       const int32_t nRows = (endpoint & 1) ? (s.nRowsRegion - s.nRowsRegion / 2) : (s.nRowsRegion / 2);
 
       for (int32_t l = 0; l < hdr->nTimeBinSpan; l++) { // TODO: Parallelize over time bins
-        pagePtr += (pagePtr - page) & 1;            // Ensure 16 bit alignment
+        pagePtr += (pagePtr - page) & 1;                // Ensure 16 bit alignment
         const TPCZSTBHDR* tbHdr = reinterpret_cast<const TPCZSTBHDR*>(pagePtr);
         if ((tbHdr->rowMask & 0x7FFF) == 0) {
           pagePtr += 2;
@@ -324,8 +324,8 @@ GPUd() void GPUTPCCFDecodeZSLink::DecodeTBSingleThread(
         bits -= DECODE_BITS;
         nSamplesWritten++;
         rawFECChannel++; // Ensure we don't decode same channel twice
-      }                  // while (bits >= DECODE_BITS)
-    }                    // while (nSamplesWritten < nAdc)
+      } // while (bits >= DECODE_BITS)
+    } // while (nSamplesWritten < nAdc)
 
   } else { // ! TPCZSHDRV2::TIGHTLY_PACKED_V3
     uint32_t rawFECChannel = 0;
@@ -705,7 +705,7 @@ GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread(
 
 #define PEEK_OVERFLOW(pagePtr, offset)                                                      \
   (*(PayloadExtendsToNextPage && (pagePtr) < nextPage && (pagePtr) + (offset) >= payloadEnd \
-       ? nextPage + sizeof(header::RAWDataHeader) + ((pagePtr) + (offset)-payloadEnd)       \
+       ? nextPage + sizeof(header::RAWDataHeader) + ((pagePtr) + (offset) - payloadEnd)     \
        : (pagePtr) + (offset)))
 
 #define TEST_BIT(x, bit) static_cast<bool>((x) & (1 << (bit)))
@@ -931,8 +931,8 @@ GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBSingleThread(
       bits -= DECODE_BITS;
       nSamplesWritten++;
       rawFECChannel++; // Ensure we don't decode same channel twice
-    }                  // while (bits >= DECODE_BITS)
-  }                    // while (nSamplesWritten < nAdc)
+    } // while (bits >= DECODE_BITS)
+  } // while (nSamplesWritten < nAdc)
 
   assert(PayloadExtendsToNextPage || adcData <= page);
   assert(PayloadExtendsToNextPage || page <= payloadEnd);
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx
index 05fddda5bec68..f3a914cbfcaee 100644
--- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx
+++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx
@@ -316,8 +316,8 @@ GPUd() void GPUTPCCFNoiseSuppression::findMinimaAndPeaks(
   uint8_t* bufp = (uint8_t*)buf;
 
   /**************************************
-     * Look for peaks
-     **************************************/
+   * Look for peaks
+   **************************************/
 
   CfUtils::blockLoad(
     peakMap,
diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h
index 24624e60ceba7..f6b8bea29822a 100644
--- a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h
+++ b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h
@@ -45,7 +45,7 @@ class propagatorInterface<o2::base::Propagator>
 {
  public:
   typedef o2::base::Propagator propagatorParam;
-  GPUd() propagatorInterface(const propagatorParam* prop) : mProp(prop){};
+  GPUd() propagatorInterface(const propagatorParam* prop) : mProp(prop) {};
   GPUd() propagatorInterface(const propagatorInterface<o2::base::Propagator>&) = delete;
   GPUd() propagatorInterface& operator=(const propagatorInterface<o2::base::Propagator>&) = delete;
 
@@ -200,7 +200,7 @@ class propagatorInterface<GPUTPCGMPropagator> : public GPUTPCGMPropagator
   }
   GPUd() bool propagateToX(float x, float maxSnp, float maxStep)
   {
-    //bool ok = PropagateToXAlpha(x, GetAlpha(), true) == 0 ? true : false;
+    // bool ok = PropagateToXAlpha(x, GetAlpha(), true) == 0 ? true : false;
     int32_t retVal = PropagateToXAlpha(x, GetAlpha(), true);
     bool ok = (retVal == 0) ? true : false;
     ok = mTrack->CheckNumericalQuality();
diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackData.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackData.h
index 6a6e13fe84e36..ee7d7a30b1c55 100644
--- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackData.h
+++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackData.h
@@ -18,15 +18,15 @@
 #define GPUTRDTRACKDATA_H
 
 struct GPUTRDTrackDataRecord {
-  float mAlpha;              // azimuthal angle of reference frame
-  float fX;                  // x: radial distance
-  float fY;                  // local Y-coordinate of a track (cm)
-  float fZ;                  // local Z-coordinate of a track (cm)
-  float mSinPhi;             // local sine of the track momentum azimuthal angle
-  float fTgl;                // tangent of the track momentum dip angle
-  float fq1Pt;               // 1/pt (1/(GeV/c))
-  float fC[15];              // covariance matrix
-  int32_t fTPCTrackID;       // id of corresponding TPC track
+  float mAlpha;                  // azimuthal angle of reference frame
+  float fX;                      // x: radial distance
+  float fY;                      // local Y-coordinate of a track (cm)
+  float fZ;                      // local Z-coordinate of a track (cm)
+  float mSinPhi;                 // local sine of the track momentum azimuthal angle
+  float fTgl;                    // tangent of the track momentum dip angle
+  float fq1Pt;                   // 1/pt (1/(GeV/c))
+  float fC[15];                  // covariance matrix
+  int32_t fTPCTrackID;           // id of corresponding TPC track
   int32_t fAttachedTracklets[6]; // IDs for attached tracklets sorted by layer
   uint8_t mIsPadrowCrossing;     // bits 0 to 5 indicate whether a padrow was crossed
 
diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx
index fa0711887f60f..c633f10adae38 100644
--- a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx
+++ b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx
@@ -12,7 +12,7 @@
 /// \file GPUTRDTracker.cxx
 /// \author Ole Schmidt
 
-//#define ENABLE_GPUTRDDEBUG
+// #define ENABLE_GPUTRDDEBUG
 #define ENABLE_WARNING 0
 #define ENABLE_INFO 0
 
@@ -326,7 +326,6 @@ GPUd() int32_t GPUTRDTracker_t<TRDTRK, PROP>::LoadTrack(const TRDTRK& trk, uint3
   return (0);
 }
 
-
 template <class TRDTRK, class PROP>
 GPUd() void GPUTRDTracker_t<TRDTRK, PROP>::DumpTracks()
 {
@@ -439,19 +438,19 @@ GPUd() bool GPUTRDTracker_t<TRDTRK, PROP>::CalculateSpacePoints(int32_t iCollisi
     int32_t trkltIdxStart = trkltIdxOffset + iFirstTrackletInDet;
     for (int32_t trkltIdx = trkltIdxStart; trkltIdx < trkltIdxStart + nTrackletsInDet; ++trkltIdx) {
       int32_t trkltZbin = tracklets[trkltIdx].GetZbin();
-      float xTrkltDet[3] = {0.f};                                            // trklt position in chamber coordinates
-      float xTrkltSec[3] = {0.f};                                            // trklt position in sector coordinates
+      float xTrkltDet[3] = {0.f}; // trklt position in chamber coordinates
+      float xTrkltSec[3] = {0.f}; // trklt position in sector coordinates
       xTrkltDet[0] = mGeo->AnodePos() + sRadialOffset;
       xTrkltDet[1] = tracklets[trkltIdx].GetY();
       xTrkltDet[2] = pp->GetRowPos(trkltZbin) - pp->GetRowSize(trkltZbin) / 2.f - pp->GetRowPos(pp->GetNrows() / 2);
-      //GPUInfo("Space point local %i: x=%f, y=%f, z=%f", trkltIdx, xTrkltDet[0], xTrkltDet[1], xTrkltDet[2]);
+      // GPUInfo("Space point local %i: x=%f, y=%f, z=%f", trkltIdx, xTrkltDet[0], xTrkltDet[1], xTrkltDet[2]);
       matrix->LocalToMaster(xTrkltDet, xTrkltSec);
       mSpacePoints[trkltIdx].setX(xTrkltSec[0]);
       mSpacePoints[trkltIdx].setY(xTrkltSec[1]);
       mSpacePoints[trkltIdx].setZ(xTrkltSec[2]);
       mSpacePoints[trkltIdx].setDy(tracklets[trkltIdx].GetdY());
 
-      //GPUInfo("Space point global %i: x=%f, y=%f, z=%f", trkltIdx, mSpacePoints[trkltIdx].getX(), mSpacePoints[trkltIdx].getY(), mSpacePoints[trkltIdx].getZ());
+      // GPUInfo("Space point global %i: x=%f, y=%f, z=%f", trkltIdx, mSpacePoints[trkltIdx].getX(), mSpacePoints[trkltIdx].getY(), mSpacePoints[trkltIdx].getZ());
     }
   }
   return result;
@@ -475,10 +474,10 @@ GPUd() bool GPUTRDTracker_t<TRDTRK, PROP>::FollowProlongation(PROP* prop, TRDTRK
   float zShiftTrk = 0.f;
   if (mProcessPerTimeFrame) {
     zShiftTrk = (mTrackAttribs[iTrk].mTime - GetConstantMem()->ioPtrs.trdTriggerTimes[collisionId]) * mTPCVdrift * mTrackAttribs[iTrk].mSide;
-    //float addZerr = (mTrackAttribs[iTrk].mTimeAddMax + mTrackAttribs[iTrk].mTimeSubMax) * .5f * mTPCVdrift;
-    // increase Z error based on time window
-    // -> this is here since it was done before, but the efficiency seems to be better if the covariance is not updated (more tracklets are attached)
-    //t->updateCovZ2(addZerr * addZerr); // TODO check again once detailed performance study tools are available, maybe this can be tuned
+    // float addZerr = (mTrackAttribs[iTrk].mTimeAddMax + mTrackAttribs[iTrk].mTimeSubMax) * .5f * mTPCVdrift;
+    //  increase Z error based on time window
+    //  -> this is here since it was done before, but the efficiency seems to be better if the covariance is not updated (more tracklets are attached)
+    // t->updateCovZ2(addZerr * addZerr); // TODO check again once detailed performance study tools are available, maybe this can be tuned
   }
   const GPUTRDpadPlane* pad = nullptr;
   const GPUTRDTrackletWord* tracklets = GetConstantMem()->ioPtrs.trdTracklets;
@@ -637,7 +636,7 @@ GPUd() bool GPUTRDTracker_t<TRDTRK, PROP>::FollowProlongation(PROP* prop, TRDTRK
             }
             Hypothesis hypo(trkWork->getNlayersFindable(), iCandidate, trkltIdx, trkWork->getChi2() + chi2);
             InsertHypothesis(hypo, nCurrHypothesis, hypothesisIdxOffset);
-          }   // end tracklet in window
+          } // end tracklet in window
         } // tracklet loop
       } // chamber loop
 
@@ -723,7 +722,7 @@ GPUd() bool GPUTRDTracker_t<TRDTRK, PROP>::FollowProlongation(PROP* prop, TRDTRK
 #ifdef ENABLE_GPUTRDDEBUG
       prop->setTrack(&trackNoUp);
       prop->rotate(GetAlphaOfSector(trkltSec));
-      //prop->propagateToX(spacePoints[mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId].getX(), .8f, 2.f);
+      // prop->propagateToX(spacePoints[mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId].getX(), .8f, 2.f);
       prop->propagateToX(mR[tracklets[mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId].GetDetector()], .8f, 2.f);
       prop->setTrack(trkWork);
 #endif
@@ -1103,7 +1102,6 @@ GPUd() bool GPUTRDTracker_t<TRDTRK, PROP>::IsGeoFindable(const TRDTRK* t, const
   return true;
 }
 
-
 #ifndef GPUCA_GPUCODE
 namespace o2::gpu
 {
diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h
index 29a9b529b0558..431fa357e8b89 100644
--- a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h
+++ b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h
@@ -85,7 +85,7 @@ class GPUTRDTracker_t : public GPUProcessor
     int32_t mLayers;      // number of layers with TRD space point
     int32_t mCandidateId; // to which track candidate the hypothesis belongs
     int32_t mTrackletId;  // tracklet index to be used for update (global index within tracklet array)
-    float mChi2;      // predicted chi2 for given space point
+    float mChi2;          // predicted chi2 for given space point
 
     GPUd() float GetReducedChi2() { return mLayers > 0 ? mChi2 / mLayers : mChi2; }
     GPUd() Hypothesis() : mLayers(0), mCandidateId(-1), mTrackletId(-1), mChi2(9999.f) {}
@@ -148,32 +148,32 @@ class GPUTRDTracker_t : public GPUProcessor
   GPUd() const typename PROP::propagatorParam* getPropagatorParam();
 
  protected:
-  float* mR;                               // radial position of each TRD chamber, alignment taken into account, radial spread within chambers < 7mm
-  bool mIsInitialized;                     // flag is set upon initialization
-  bool mGenerateSpacePoints;               // if true, only tracklets are provided as input and they will be converted into space points by the tracker
-  bool mProcessPerTimeFrame;               // if true, tracking is done per time frame instead of on a single events basis
-  int16_t mNAngleHistogramBins;            // number of bins per chamber for the angular difference histograms
-  float mAngleHistogramRange;              // range of impact angles covered by each histogram
-  int16_t mMemoryPermanent;                // memory id of permanent memory for the tracker
-  int16_t mMemoryTracklets;                // memory id of memory for TRD tracklets
-  int16_t mMemoryTracks;                   // memory id of memory for tracks (used for i/o)
-  int32_t mNMaxCollisions;                 // max number of collisions to process (per time frame)
-  int32_t mNMaxTracks;                     // max number of tracks the tracker can handle (per event)
-  int32_t mNMaxSpacePoints;                // max number of space points hold by the tracker (per event)
-  TRDTRK* mTracks;                         // array of trd-updated tracks
-  HelperTrackAttributes* mTrackAttribs;    // array with additional (transient) track attributes
-  int32_t mNCandidates;                    // max. track hypothesis per layer
-  int32_t mNTracks;                        // number of TPC tracks to be matched
-  int32_t mNEvents;                        // number of processed events
-  int32_t mMaxBackendThreads;              // maximum number of supported threads
+  float* mR;                            // radial position of each TRD chamber, alignment taken into account, radial spread within chambers < 7mm
+  bool mIsInitialized;                  // flag is set upon initialization
+  bool mGenerateSpacePoints;            // if true, only tracklets are provided as input and they will be converted into space points by the tracker
+  bool mProcessPerTimeFrame;            // if true, tracking is done per time frame instead of on a single events basis
+  int16_t mNAngleHistogramBins;         // number of bins per chamber for the angular difference histograms
+  float mAngleHistogramRange;           // range of impact angles covered by each histogram
+  int16_t mMemoryPermanent;             // memory id of permanent memory for the tracker
+  int16_t mMemoryTracklets;             // memory id of memory for TRD tracklets
+  int16_t mMemoryTracks;                // memory id of memory for tracks (used for i/o)
+  int32_t mNMaxCollisions;              // max number of collisions to process (per time frame)
+  int32_t mNMaxTracks;                  // max number of tracks the tracker can handle (per event)
+  int32_t mNMaxSpacePoints;             // max number of space points hold by the tracker (per event)
+  TRDTRK* mTracks;                      // array of trd-updated tracks
+  HelperTrackAttributes* mTrackAttribs; // array with additional (transient) track attributes
+  int32_t mNCandidates;                 // max. track hypothesis per layer
+  int32_t mNTracks;                     // number of TPC tracks to be matched
+  int32_t mNEvents;                     // number of processed events
+  int32_t mMaxBackendThreads;           // maximum number of supported threads
   // index of first tracklet for each chamber within tracklets array, last entry is total number of tracklets for given collision
   // the array has (kNChambers + 1) * numberOfCollisions entries
   // note, that for collision iColl one has to add an offset corresponding to the index of the first tracklet of iColl to the index stored in mTrackletIndexArray
   int32_t* mTrackletIndexArray;
-  Hypothesis* mHypothesis;                 // array with multiple track hypothesis
-  TRDTRK* mCandidates;                     // array of tracks for multiple hypothesis tracking
-  GPUTRDSpacePoint* mSpacePoints;          // array with tracklet coordinates in global tracking frame
-  const GPUTRDGeometry* mGeo;              // TRD geometry
+  Hypothesis* mHypothesis;        // array with multiple track hypothesis
+  TRDTRK* mCandidates;            // array of tracks for multiple hypothesis tracking
+  GPUTRDSpacePoint* mSpacePoints; // array with tracklet coordinates in global tracking frame
+  const GPUTRDGeometry* mGeo;     // TRD geometry
   /// ---- error parametrization depending on magnetic field ----
   float mRPhiA2;     // parameterization for tracklet position resolution
   float mRPhiB;      // parameterization for tracklet position resolution
@@ -185,14 +185,14 @@ class GPUTRDTracker_t : public GPUProcessor
   float mAngleToDyB; // parameterization for conversion track angle -> tracklet deflection
   float mAngleToDyC; // parameterization for conversion track angle -> tracklet deflection
   /// ---- end error parametrization ----
-  bool mDebugOutput;                  // store debug output
-  static constexpr const float sRadialOffset = -0.1f;             // due to (possible) mis-calibration of t0 -> will become obsolete when tracklet conversion is done outside of the tracker
-  float mMaxEta;                                                  // TPC tracks with higher eta are ignored
-  float mRoadZ;                       // in z, a constant search road is used
-  float mZCorrCoefNRC;                // tracklet z-position depends linearly on track dip angle
-  float mTPCVdrift;                   // TPC drift velocity used for shifting TPC tracks along Z
-  float mTPCTDriftOffset;             // TPC drift time additive offset
-  GPUTRDTrackerDebug<TRDTRK>* mDebug; // debug output
+  bool mDebugOutput;                                  // store debug output
+  static constexpr const float sRadialOffset = -0.1f; // due to (possible) mis-calibration of t0 -> will become obsolete when tracklet conversion is done outside of the tracker
+  float mMaxEta;                                      // TPC tracks with higher eta are ignored
+  float mRoadZ;                                       // in z, a constant search road is used
+  float mZCorrCoefNRC;                                // tracklet z-position depends linearly on track dip angle
+  float mTPCVdrift;                                   // TPC drift velocity used for shifting TPC tracks along Z
+  float mTPCTDriftOffset;                             // TPC drift time additive offset
+  GPUTRDTrackerDebug<TRDTRK>* mDebug;                 // debug output
 };
 } // namespace o2::gpu
 
diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h
index fc874070ec9b8..cd7dfb9432b93 100644
--- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h
+++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h
@@ -82,7 +82,7 @@ namespace o2::gpu
 class GPUTRDTrackletWord : private o2::trd::Tracklet64
 {
  public:
-  GPUd() GPUTRDTrackletWord(uint64_t trackletWord = 0) : o2::trd::Tracklet64(trackletWord){};
+  GPUd() GPUTRDTrackletWord(uint64_t trackletWord = 0) : o2::trd::Tracklet64(trackletWord) {};
   GPUdDefault() GPUTRDTrackletWord(const GPUTRDTrackletWord& rhs) = default;
   GPUdDefault() GPUTRDTrackletWord& operator=(const GPUTRDTrackletWord& rhs) = default;
   GPUdDefault() ~GPUTRDTrackletWord() = default;
diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.h b/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.h
index ceb63e788564a..9087ec9a431f6 100644
--- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.h
+++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.h
@@ -138,12 +138,12 @@ class GPUDisplayFrontend : public GPUDisplayFrontendInterface
 
   std::unique_ptr<GPUDisplayGUIWrapper> mGUI;
 
-  void HandleKey(uint8_t key);               // Callback for handling key presses
-  int32_t DrawGLScene();                     // Callback to draw the GL scene
-  void HandleSendKey();                      // Optional callback to handle key press from external source (e.g. stdin by default)
+  void HandleKey(uint8_t key);                     // Callback for handling key presses
+  int32_t DrawGLScene();                           // Callback to draw the GL scene
+  void HandleSendKey();                            // Optional callback to handle key press from external source (e.g. stdin by default)
   void ResizeScene(int32_t width, int32_t height); // Callback when GL window is resized
   int32_t InitDisplay(bool initFailure = false);   // Callback to initialize the GL Display (to be called in StartDisplay)
-  void ExitDisplay();                        // Callback to clean up the GL Display
+  void ExitDisplay();                              // Callback to clean up the GL Display
   int32_t& drawTextFontSize();
 };
 } // namespace o2::gpu
diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.cxx
index 8d48536e0a351..e511718e258f7 100644
--- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.cxx
+++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.cxx
@@ -80,12 +80,12 @@ void KillGLWindow() // Properly Kill The Window
 
 BOOL CreateGLWindow(char* title, int32_t width, int32_t height, int32_t bits, bool fullscreenflag)
 {
-  GLuint PixelFormat;               // Holds The Results After Searching For A Match
-  WNDCLASS wc;                      // Windows Class Structure
-  DWORD dwExStyle;                  // Window Extended Style
-  DWORD dwStyle;                    // Window Style
-  RECT WindowRect;                  // Grabs Rectangle Upper Left / Lower Right Values
-  WindowRect.left = (int64_t)0;     // Set Left Value To 0
+  GLuint PixelFormat;                  // Holds The Results After Searching For A Match
+  WNDCLASS wc;                         // Windows Class Structure
+  DWORD dwExStyle;                     // Window Extended Style
+  DWORD dwStyle;                       // Window Style
+  RECT WindowRect;                     // Grabs Rectangle Upper Left / Lower Right Values
+  WindowRect.left = (int64_t)0;        // Set Left Value To 0
   WindowRect.right = (int64_t)width;   // Set Right Value To Requested Width
   WindowRect.top = (int64_t)0;         // Set Top Value To 0
   WindowRect.bottom = (int64_t)height; // Set Bottom Value To Requested Height
diff --git a/GPU/GPUTracking/qa/GPUQAHelper.h b/GPU/GPUTracking/qa/GPUQAHelper.h
index 92da6bbac94e8..a7811c6fd55ed 100644
--- a/GPU/GPUTracking/qa/GPUQAHelper.h
+++ b/GPU/GPUTracking/qa/GPUQAHelper.h
@@ -91,7 +91,7 @@ class GPUTPCTrkLbl
   inline U computeLabel(float* labelWeight = nullptr, float* totalWeight = nullptr, int32_t* maxCount = nullptr)
   {
     if (mLabels.size() == 0) {
-      return U(); //default constructor creates NotSet label
+      return U(); // default constructor creates NotSet label
     } else {
       uint32_t bestLabelNum = 0, bestLabelCount = 0;
       for (uint32_t j = 0; j < mLabels.size(); j++) {
@@ -133,10 +133,10 @@ struct GPUTPCTrkLbl_ret {
   template <class T>
   GPUTPCTrkLbl_ret(T){};
 #ifdef GPUCA_TPC_GEOMETRY_O2
-  GPUTPCTrkLbl_ret(const MCCompLabel& a) : id(a.getTrackEventSourceID()){};
+  GPUTPCTrkLbl_ret(const MCCompLabel& a) : id(a.getTrackEventSourceID()) {};
 #endif
 #ifdef GPUCA_STANDALONE
-  GPUTPCTrkLbl_ret(const AliHLTTPCClusterMCWeight& a) : id(a.fMCID){};
+  GPUTPCTrkLbl_ret(const AliHLTTPCClusterMCWeight& a) : id(a.fMCID) {};
 #endif
   void setFakeFlag()
   {
diff --git a/GPU/GPUTracking/qa/genEvents.h b/GPU/GPUTracking/qa/genEvents.h
index fb3c5f22d61ef..43b946e6238b2 100644
--- a/GPU/GPUTracking/qa/genEvents.h
+++ b/GPU/GPUTracking/qa/genEvents.h
@@ -31,7 +31,7 @@ class genEvents
   int32_t GenerateEvent(const GPUParam& sectorParam, char* filename) { return 1; }
   void FinishEventGenerator() {}
 
-  static void RunEventGenerator(GPUChainTracking* rec){};
+  static void RunEventGenerator(GPUChainTracking* rec) {};
 };
 
 #else
diff --git a/GPU/GPUTracking/utils/qconfig.cxx b/GPU/GPUTracking/utils/qconfig.cxx
index cd6267179c844..cdb41ec5813f2 100644
--- a/GPU/GPUTracking/utils/qconfig.cxx
+++ b/GPU/GPUTracking/utils/qconfig.cxx
@@ -32,8 +32,7 @@
 namespace qConfig
 {
 #define QCONFIG_SETTING(name, type)                     \
-  struct qon_mxcat3(q, name, _t)                        \
-  {                                                     \
+  struct qon_mxcat3(q, name, _t) {                      \
     type v;                                             \
     constexpr qon_mxcat3(q, name, _t)(type s) : v(s) {} \
   };                                                    \
@@ -41,8 +40,7 @@ namespace qConfig
 
 #define QCONFIG_SETTING_TEMPLATE(name)                      \
   template <typename T>                                     \
-  struct qon_mxcat3(q, name, _t)                            \
-  {                                                         \
+  struct qon_mxcat3(q, name, _t) {                          \
     T v;                                                    \
     constexpr qon_mxcat3(q, name, _t)(const T& s) : v(s) {} \
   };                                                        \
@@ -68,7 +66,7 @@ static inline const char* getOptName(const char** argv, int32_t i)
 
 template <typename T>
 struct qConfigSettings {
-  qConfigSettings() : checkMin(false), checkMax(false), doSet(false), doDefault(false), min(), max(), set(), message(nullptr), allowEmpty(false){};
+  qConfigSettings() : checkMin(false), checkMax(false), doSet(false), doDefault(false), min(), max(), set(), message(nullptr), allowEmpty(false) {};
   template <typename S>
   qConfigSettings(const qConfigSettings<S> v) : checkMin(false), checkMax(false), doSet(false), doDefault(false), min(), max(), set(), message(v.message), allowEmpty(v.allowEmpty){};
   bool checkMin, checkMax;
diff --git a/GPU/GPUTracking/utils/threadserver.h b/GPU/GPUTracking/utils/threadserver.h
index 606531f46f201..c8dfe831fd578 100644
--- a/GPU/GPUTracking/utils/threadserver.h
+++ b/GPU/GPUTracking/utils/threadserver.h
@@ -98,7 +98,7 @@ class qThreadCls
     qThreadParamCls<S>& XthreadParam = *((qThreadParamCls<S>*)&this->threadParam);
 
     XthreadParam.pCls = pCls;
-    XthreadParam.pFunc = (void (S::*)(void*))pFunc;
+    XthreadParam.pFunc = (void(S::*)(void*))pFunc;
     XthreadParam.threadNum = threadNum;
     XthreadParam.pinCPU = pinCPU;
     pthread_t thr;
@@ -150,7 +150,7 @@ void* qThreadCls<S, T>::qThreadWrapperCls(T* arg)
     sched_setaffinity(0, sizeof(tmp_mask), &tmp_mask);
   }
 
-  void (S::*pFunc)(T*) = (void (S::*)(T*))arg_A->pFunc;
+  void (S::*pFunc)(T*) = (void(S::*)(T*))arg_A->pFunc;
   (arg_A->pCls->*pFunc)(arg);
 
   arg_A->threadMutex[1].Unlock();
diff --git a/GPU/GPUbenchmark/cuda/Kernels.cu b/GPU/GPUbenchmark/cuda/Kernels.cu
index 75799e4aa8c96..c309e7b2dbc5d 100644
--- a/GPU/GPUbenchmark/cuda/Kernels.cu
+++ b/GPU/GPUbenchmark/cuda/Kernels.cu
@@ -666,9 +666,9 @@ void GPUbenchmark<chunk_t>::runTest(Test test, Mode mode, KernelConfig config)
   }
   nThreads *= mOptions.threadPoolFraction;
 
-  void (*kernel)(chunk_t*, size_t) = &gpu::read_k<chunk_t>;                                   // Initialising to a default value
-  void (*kernel_distributed)(chunk_t**, size_t*) = &gpu::read_dist_k<chunk_t>;                // Initialising to a default value
-  void (*kernel_rand)(chunk_t*, size_t, int32_t) = &gpu::rand_read_k<chunk_t>;                // Initialising to a default value
+  void (*kernel)(chunk_t*, size_t) = &gpu::read_k<chunk_t>;                                       // Initialising to a default value
+  void (*kernel_distributed)(chunk_t**, size_t*) = &gpu::read_dist_k<chunk_t>;                    // Initialising to a default value
+  void (*kernel_rand)(chunk_t*, size_t, int32_t) = &gpu::rand_read_k<chunk_t>;                    // Initialising to a default value
   void (*kernel_rand_distributed)(chunk_t**, size_t*, int32_t) = &gpu::rand_read_dist_k<chunk_t>; // Initialising to a default value
 
   bool is_random{false};
diff --git a/GPU/TPCFastTransformation/BandMatrixSolver.h b/GPU/TPCFastTransformation/BandMatrixSolver.h
index f11f538e49275..7de44fe4b85e2 100644
--- a/GPU/TPCFastTransformation/BandMatrixSolver.h
+++ b/GPU/TPCFastTransformation/BandMatrixSolver.h
@@ -131,7 +131,7 @@ inline void BandMatrixSolver<BandWidthT>::triangulateBlock(double AA[], double b
       A[0] = c; // store 1/a[0][0]
       double* rowi = A + BandWidthT - 1;
       for (int32_t i = 1; i < m; i++) { // row 0+i
-        double ai = c * A[i];       // A[0][i]
+        double ai = c * A[i];           // A[0][i]
         for (int32_t j = i; j < m; j++) {
           rowi[j] -= ai * A[j]; // A[i][j] -= A[0][j]/A[0][0]*A[i][0]
         }
diff --git a/GPU/TPCFastTransformation/ChebyshevFit1D.cxx b/GPU/TPCFastTransformation/ChebyshevFit1D.cxx
index d709e5b9af92d..3edd8f8f22e55 100644
--- a/GPU/TPCFastTransformation/ChebyshevFit1D.cxx
+++ b/GPU/TPCFastTransformation/ChebyshevFit1D.cxx
@@ -71,7 +71,7 @@ void ChebyshevFit1D::fit()
       mA[i * mN + j] = mA[j * mN + i];
     }
   }
-  //print();
+  // print();
   {
     double* Ai = mA.data();
     for (int32_t i = 0; i < mN; i++, Ai += mN) {
@@ -88,7 +88,7 @@ void ChebyshevFit1D::fit()
         }
         mB[j] -= c * mB[i];
       }
-      //print();
+      // print();
     }
   }
   {
diff --git a/GPU/TPCFastTransformation/CorrectionMapsHelper.h b/GPU/TPCFastTransformation/CorrectionMapsHelper.h
index 32ff6e1f06b10..46070b36e63b2 100644
--- a/GPU/TPCFastTransformation/CorrectionMapsHelper.h
+++ b/GPU/TPCFastTransformation/CorrectionMapsHelper.h
@@ -165,25 +165,25 @@ class CorrectionMapsHelper
                      MapRefBit = 0x2,
                      LumiBit = 0x4,
                      MapMShapeBit = 0x10 };
-  bool mOwner = false; // is content of pointers owned by the helper
+  bool mOwner = false;            // is content of pointers owned by the helper
   bool mLumiCTPAvailable = false; // is CTP Lumi available
   // these 2 are global options, must be set by the workflow global options
   int32_t mLumiScaleType = -1; // use CTP Lumi (1) or TPCScaler (2) for the correction scaling, 0 - no scaling
   int32_t mLumiScaleMode = -1; // scaling-mode of the correciton maps
   int32_t mUpdatedFlags = 0;
-  float mInstLumiCTP = 0.;                                         // instanteneous luminosity from CTP (a.u)
-  float mInstLumi = 0.;                                            // instanteneous luminosity (a.u) used for TPC corrections scaling
-  float mMeanLumi = 0.;                                            // mean luminosity of the map (a.u) used for TPC corrections scaling
-  float mMeanLumiRef = 0.;                                         // mean luminosity of the ref map (a.u) used for TPC corrections scaling reference
-  float mLumiScale = 0.;                                           // precalculated mInstLumi/mMeanLumi
-  float mMeanLumiOverride = -1.f;                                  // optional value to override mean lumi
-  float mMeanLumiRefOverride = -1.f;                               // optional value to override ref mean lumi
-  float mInstCTPLumiOverride = -1.f;                               // optional value to override inst lumi from CTP
-  bool mEnableMShape = false;                                      ///< use v shape correction
-  bool mScaleInverse{false};                                       // if set to false the inverse correction is already scaled and will not scaled again
-  o2::gpu::TPCFastTransform* mCorrMap{nullptr};                    // current transform
-  o2::gpu::TPCFastTransform* mCorrMapRef{nullptr};                 // reference transform
-  o2::gpu::TPCFastTransform* mCorrMapMShape{nullptr};              // correction map for v-shape distortions on A-side
+  float mInstLumiCTP = 0.;                            // instanteneous luminosity from CTP (a.u)
+  float mInstLumi = 0.;                               // instanteneous luminosity (a.u) used for TPC corrections scaling
+  float mMeanLumi = 0.;                               // mean luminosity of the map (a.u) used for TPC corrections scaling
+  float mMeanLumiRef = 0.;                            // mean luminosity of the ref map (a.u) used for TPC corrections scaling reference
+  float mLumiScale = 0.;                              // precalculated mInstLumi/mMeanLumi
+  float mMeanLumiOverride = -1.f;                     // optional value to override mean lumi
+  float mMeanLumiRefOverride = -1.f;                  // optional value to override ref mean lumi
+  float mInstCTPLumiOverride = -1.f;                  // optional value to override inst lumi from CTP
+  bool mEnableMShape = false;                         ///< use v shape correction
+  bool mScaleInverse{false};                          // if set to false the inverse correction is already scaled and will not scaled again
+  o2::gpu::TPCFastTransform* mCorrMap{nullptr};       // current transform
+  o2::gpu::TPCFastTransform* mCorrMapRef{nullptr};    // reference transform
+  o2::gpu::TPCFastTransform* mCorrMapMShape{nullptr}; // correction map for v-shape distortions on A-side
   ClassDefNV(CorrectionMapsHelper, 6);
 };
 
diff --git a/GPU/TPCFastTransformation/NDPiecewisePolynomials.h b/GPU/TPCFastTransformation/NDPiecewisePolynomials.h
index 506cd39b519af..e750bffd28f4b 100644
--- a/GPU/TPCFastTransformation/NDPiecewisePolynomials.h
+++ b/GPU/TPCFastTransformation/NDPiecewisePolynomials.h
@@ -48,13 +48,13 @@ struct NDPiecewisePolynomialContainer {
   /// for ROOT I/O
   NDPiecewisePolynomialContainer() = default;
 
-  const uint32_t mDim{};                ///< number of dimensions of the polynomial
-  const uint32_t mDegree{};             ///< degree of the polynomials
-  const std::vector<float> mParams{};   ///< parameters of the polynomial
-  const bool mInteractionOnly{};        ///< consider only interaction terms
-  const std::vector<float> mMin{};      ///< min vertices positions of the grid
-  const std::vector<float> mMax{};      ///< max vertices positions of the grid
-  const std::vector<uint32_t> mN{};     ///< number of vertices for each dimension
+  const uint32_t mDim{};              ///< number of dimensions of the polynomial
+  const uint32_t mDegree{};           ///< degree of the polynomials
+  const std::vector<float> mParams{}; ///< parameters of the polynomial
+  const bool mInteractionOnly{};      ///< consider only interaction terms
+  const std::vector<float> mMin{};    ///< min vertices positions of the grid
+  const std::vector<float> mMax{};    ///< max vertices positions of the grid
+  const std::vector<uint32_t> mN{};   ///< number of vertices for each dimension
 };
 #endif
 
diff --git a/GPU/TPCFastTransformation/Spline1DHelper.cxx b/GPU/TPCFastTransformation/Spline1DHelper.cxx
index 938604bb9172d..9177c67d8b87b 100644
--- a/GPU/TPCFastTransformation/Spline1DHelper.cxx
+++ b/GPU/TPCFastTransformation/Spline1DHelper.cxx
@@ -606,7 +606,7 @@ int32_t Spline1DHelper<DataT>::test(const bool draw, const bool drawDataPoints)
     Spline1D<DataT, Ndim> spline2(spline1);
     spline1.approximateFunction(0., TMath::Pi(), F, nAuxiliaryPoints);
 
-    //if (itry == 0)
+    // if (itry == 0)
     {
       TFile outf("testSpline1D.root", "recreate");
       if (outf.IsZombie()) {
@@ -731,9 +731,9 @@ int32_t Spline1DHelper<DataT>::test(const bool draw, const bool drawDataPoints)
       }
     } // draw
   }
-  //delete canv;
-  //delete nt;
-  //delete knots;
+  // delete canv;
+  // delete nt;
+  // delete knots;
 
   statDf1 = sqrt(statDf1 / statN);
   statDf2 = sqrt(statDf2 / statN);
diff --git a/GPU/TPCFastTransformation/Spline1DHelperOld.h b/GPU/TPCFastTransformation/Spline1DHelperOld.h
index fc8d33ad64f87..971541e03258e 100644
--- a/GPU/TPCFastTransformation/Spline1DHelperOld.h
+++ b/GPU/TPCFastTransformation/Spline1DHelperOld.h
@@ -41,13 +41,13 @@ class Spline1DHelperOld
   /// \brief Helper structure for 1D spline construction
   ///
   struct DataPoint {
-    double u;    ///< u coordinate
-    double cS0;  ///< a coefficient for s0
-    double cZ0;  ///< a coefficient for s'0
-    double cS1;  ///< a coefficient for s1
-    double cZ1;  ///< a coefficient for s'1
+    double u;      ///< u coordinate
+    double cS0;    ///< a coefficient for s0
+    double cZ0;    ///< a coefficient for s'0
+    double cS1;    ///< a coefficient for s1
+    double cZ1;    ///< a coefficient for s'1
     int32_t iKnot; ///< index of the left knot of the segment
-    bool isKnot; ///< is the point placed at a knot
+    bool isKnot;   ///< is the point placed at a knot
   };
 
   /// _____________  Constructors / destructors __________________________
@@ -160,11 +160,11 @@ class Spline1DHelperOld
 
   /// helpers for the construction of 1D spline
 
-  Spline1D<double> mSpline;           ///< copy of the spline
-  int32_t mFdimensions;               ///< n of F dimensions
-  std::vector<DataPoint> mDataPoints; ///< measurement points
+  Spline1D<double> mSpline;             ///< copy of the spline
+  int32_t mFdimensions;                 ///< n of F dimensions
+  std::vector<DataPoint> mDataPoints;   ///< measurement points
   std::vector<int32_t> mKnotDataPoints; ///< which measurement points are at knots
-  std::vector<double> mLSMmatrixFull; ///< a matrix to convert the measurements into the spline parameters with the LSM method
+  std::vector<double> mLSMmatrixFull;   ///< a matrix to convert the measurements into the spline parameters with the LSM method
   std::vector<double> mLSMmatrixSderivatives;
   std::vector<double> mLSMmatrixSvalues;
 
diff --git a/GPU/TPCFastTransformation/Spline1DSpec.cxx b/GPU/TPCFastTransformation/Spline1DSpec.cxx
index 603013d5e0808..0d33cdc88010a 100644
--- a/GPU/TPCFastTransformation/Spline1DSpec.cxx
+++ b/GPU/TPCFastTransformation/Spline1DSpec.cxx
@@ -144,7 +144,7 @@ void Spline1DContainer<DataT>::recreate(int32_t nYdim, int32_t numberOfKnots, co
   }
 }
 
-#endif //GPUCA_GPUCODE
+#endif // GPUCA_GPUCODE
 
 template <class DataT>
 void Spline1DContainer<DataT>::print() const
diff --git a/GPU/TPCFastTransformation/Spline1DSpec.h b/GPU/TPCFastTransformation/Spline1DSpec.h
index 1ed1cc322ede3..6462f291d1136 100644
--- a/GPU/TPCFastTransformation/Spline1DSpec.h
+++ b/GPU/TPCFastTransformation/Spline1DSpec.h
@@ -211,13 +211,13 @@ class Spline1DContainer : public FlatObject
 
   /// _____________  Data members  ____________
 
-  int32_t mYdim = 0;            ///< dimentionality of F
-  int32_t mNumberOfKnots = 0;   ///< n knots on the grid
-  int32_t mUmax = 0;            ///< U of the last knot
-  DataT mXmin = 0;              ///< X of the first knot
-  DataT mXtoUscale = 0;         ///< a scaling factor to convert X to U
+  int32_t mYdim = 0;              ///< dimentionality of F
+  int32_t mNumberOfKnots = 0;     ///< n knots on the grid
+  int32_t mUmax = 0;              ///< U of the last knot
+  DataT mXmin = 0;                ///< X of the first knot
+  DataT mXtoUscale = 0;           ///< a scaling factor to convert X to U
   int32_t* mUtoKnotMap = nullptr; //! (transient!!) pointer to (integer U -> knot index) map inside the mFlatBufferPtr array
-  DataT* mParameters = nullptr; //! (transient!!) pointer to F-dependent parameters inside the mFlatBufferPtr array
+  DataT* mParameters = nullptr;   //! (transient!!) pointer to F-dependent parameters inside the mFlatBufferPtr array
 
   ClassDefNV(Spline1DContainer, 1);
 };
diff --git a/GPU/TPCFastTransformation/Spline2DSpec.cxx b/GPU/TPCFastTransformation/Spline2DSpec.cxx
index 4571110bdedaa..055530b9314c2 100644
--- a/GPU/TPCFastTransformation/Spline2DSpec.cxx
+++ b/GPU/TPCFastTransformation/Spline2DSpec.cxx
@@ -66,7 +66,7 @@ void Spline2DContainer<DataT>::setActualBufferAddress(char* actualFlatBufferPtr)
   mParameters = nullptr;
 
   parametersOffset = alignSize(u2Offset + mGridX2.getFlatBufferSize(), getParameterAlignmentBytes());
-  //bufferSize = parametersOffset + getSizeOfParameters();
+  // bufferSize = parametersOffset + getSizeOfParameters();
   mParameters = reinterpret_cast<DataT*>(mFlatBufferPtr + parametersOffset);
 
   mGridX1.setActualBufferAddress(mFlatBufferPtr);
diff --git a/GPU/TPCFastTransformation/SplineHelper.cxx b/GPU/TPCFastTransformation/SplineHelper.cxx
index b0d1f4348ca60..6e1b53510e0d0 100644
--- a/GPU/TPCFastTransformation/SplineHelper.cxx
+++ b/GPU/TPCFastTransformation/SplineHelper.cxx
@@ -64,8 +64,8 @@ int32_t SplineHelper<DataT>::pointstoarray(const int32_t indices[], const int32_
 }
 
 ////////////////
-//arraytopoints
-// HILFSFUNKTION
+// arraytopoints
+//  HILFSFUNKTION
 template <typename DataT>
 int32_t SplineHelper<DataT>::arraytopoints(int32_t point, int32_t result[], const int32_t numbers[], int32_t dim)
 {
@@ -133,8 +133,8 @@ void SplineHelper<DataT>::approximateFunction(
   } // end for all DataPoints d
   // END MY VERSION
 
-  //std::vector<DataT> dataPointF(getNumberOfDataPoints() * mFdimensions);
-  //DUMYY VERSION Commented out
+  // std::vector<DataT> dataPointF(getNumberOfDataPoints() * mFdimensions);
+  // DUMYY VERSION Commented out
   /* for (int32_t i = 0; i < getNumberOfDataPoints() * mFdimensions; i++) {
     dataPointF[i] = 1.;
   } */
@@ -250,11 +250,11 @@ void SplineHelper<DataT>::approximateFunction(
   // TO BE REMOVED TEST:
   // LOG(info) << "number of paramtertypes per knot : " <<  numberOfParameterTypes << ", ";
 
-  std::unique_ptr<double[]> allParameters[numberOfParameterTypes]; //Array for the different parametertypes s, s'u, s'v, s''uv,...
+  std::unique_ptr<double[]> allParameters[numberOfParameterTypes]; // Array for the different parametertypes s, s'u, s'v, s''uv,...
   for (int32_t i = 0; i < numberOfParameterTypes; i++) {
-    allParameters[i] = std::unique_ptr<double[]>(new double[numberOfAllDataPoints * mFdimensions]); //To-Do:Fdim!!
+    allParameters[i] = std::unique_ptr<double[]>(new double[numberOfAllDataPoints * mFdimensions]); // To-Do:Fdim!!
   }
-  //filling allParameters[0] and FParameters with s:
+  // filling allParameters[0] and FParameters with s:
   for (int32_t i = 0; i < numberOfAllDataPoints; i++) {
     for (int32_t f = 0; f < mFdimensions; f++) {                                 // for all f-dimensions
       allParameters[0][i * mFdimensions + f] = DataPointF[i * mFdimensions + f]; // TO DO - Just get the pointer adress there PLEASE!
@@ -273,24 +273,24 @@ void SplineHelper<DataT>::approximateFunction(
       for (int32_t j = 0; j < mXdimensions; j++) { // calculate KNotindices for all dimensions
         // WORKAROUND Getting Knotindices:
         knotindices[j] = p0indices[j] / ((numberOfDataPoints[j] - 1) / (numberOfKnots[j] - 1));
-        //knotindices[j] = mHelpers[j].getDataPoint(p0indices[j]).iKnot; //in der Annahme der wert ist ein Knotenindex und falls der datapoint ein knoten ist, gibt er seinen eigenen knotenindex zurück
+        // knotindices[j] = mHelpers[j].getDataPoint(p0indices[j]).iKnot; //in der Annahme der wert ist ein Knotenindex und falls der datapoint ein knoten ist, gibt er seinen eigenen knotenindex zurück
       }
       // get the knotindexvalue for FParameters:
       int32_t knotind = pointstoarray(knotindices, numberOfKnots, mXdimensions);
 
       for (int32_t f = 0; f < mFdimensions; f++) {                                                           // for all f-dimensions get function values into Fparameters
-        Fparameters[knotind * numberOfParameterTypes * mFdimensions + f] = DataPointF[i * mFdimensions + f]; ///write derivatives in FParameters
+        Fparameters[knotind * numberOfParameterTypes * mFdimensions + f] = DataPointF[i * mFdimensions + f]; /// write derivatives in FParameters
       }
     } // end if isKnot
   } // end i (filling DataPointF Values into allParameters[0] and FParameters)
   // now: allParameters[0] = dataPointF;
 
-  //Array for input DataPointF-values for Spline1D::approximateFunctionGradually(...);
+  // Array for input DataPointF-values for Spline1D::approximateFunctionGradually(...);
   std::unique_ptr<double[]> dataPointF1D[mXdimensions];
   for (int32_t i = 0; i < mXdimensions; i++) {
     dataPointF1D[i] = std::unique_ptr<double[]>(new double[numberOfDataPoints[i] * mFdimensions]); // To-Do:Fdim!! For s and derivetives at all knots.
   }
-  //Array to be filled by Spline1D::approximateFunctionGradually(...);
+  // Array to be filled by Spline1D::approximateFunctionGradually(...);
   std::unique_ptr<DataT[]> par[mXdimensions];
   std::unique_ptr<double[]> parD[mXdimensions];
 
@@ -301,7 +301,7 @@ void SplineHelper<DataT>::approximateFunction(
 
   // LOG(info) << "NumberOfParameters: " <<  mNumberOfParameters ;
 
-  //STARTING MAIN-LOOP, for all Parametertypes:
+  // STARTING MAIN-LOOP, for all Parametertypes:
   for (int32_t p = 1; p < numberOfParameterTypes; p++) { // p = 1!! Wir kriegen s (p0) durch approximateFunction()oben
     int32_t dimension = 0;                               // find the dimension for approximation
     for (int32_t i = (int32_t)(log2f((float)p)); i >= 0; i--) {
@@ -366,9 +366,9 @@ void SplineHelper<DataT>::approximateFunction(
       for (int32_t i = 0; i < mXdimensions; i++) {
         redistributionindex[i] = startpoint[i];
       }
-      //redistributing the derivatives at dimension-Knots into array p
+      // redistributing the derivatives at dimension-Knots into array p
       for (int32_t i = 0; i < numberOfKnots[dimension]; i++) {                    // for all dimension-Knots
-        redistributionindex[dimension] = mHelpers[dimension].getKnotDataPoint(i); //find the indices
+        redistributionindex[dimension] = mHelpers[dimension].getKnotDataPoint(i); // find the indices
         int32_t finalposition = pointstoarray(redistributionindex, numberOfDataPoints, mXdimensions);
 
         for (int32_t f = 0; f < mFdimensions; f++) {
@@ -380,7 +380,7 @@ void SplineHelper<DataT>::approximateFunction(
           if (!mHelpers[j].getDataPoint(redistributionindex[j]).isKnot) {
             isKnot = 0;
             break;
-          } //noch mal checken!! Das muss noch anders!!
+          } // noch mal checken!! Das muss noch anders!!
         }
 
         if (isKnot) { // for all knots
@@ -388,20 +388,20 @@ void SplineHelper<DataT>::approximateFunction(
 
           for (int32_t j = 0; j < mXdimensions; j++) { // calculate Knotindices for all dimensions
             knotindices[j] = redistributionindex[j] / ((numberOfDataPoints[j] - 1) / (numberOfKnots[j] - 1));
-            //knotindices[j] = mHelpers[j].getDataPoint(redistributionindex[j]).iKnot; //in der Annahme der wert ist ein Knotenindex und falls der datapoint ein knoten ist, gibt er seinen eigenen knotenindex zurück
+            // knotindices[j] = mHelpers[j].getDataPoint(redistributionindex[j]).iKnot; //in der Annahme der wert ist ein Knotenindex und falls der datapoint ein knoten ist, gibt er seinen eigenen knotenindex zurück
           }
           // get the knotindexvalue for FParameters:
           int32_t knotind = pointstoarray(knotindices, numberOfKnots, mXdimensions);
           for (int32_t f = 0; f < mFdimensions; f++) {
-            Fparameters[knotind * numberOfParameterTypes * mFdimensions + p * mFdimensions + f] = par[dimension][2 * i * mFdimensions + mFdimensions + f]; ///write derivatives in FParameters
+            Fparameters[knotind * numberOfParameterTypes * mFdimensions + p * mFdimensions + f] = par[dimension][2 * i * mFdimensions + mFdimensions + f]; /// write derivatives in FParameters
           }
         }
       } // end for all fknots (for redistribution)
 
       // recalculation:
       for (int32_t i = 0; i < numberOfDataPoints[dimension]; i++) { // this is somehow still redundant// TO DO: ONLY PART OF approximateFunction WHERE NDIM is considerd!!
-        redistributionindex[dimension] = i;                     // getting current datapointindices
-        bool isKnot = 1;                                        // check is current datapoint a knot?
+        redistributionindex[dimension] = i;                         // getting current datapointindices
+        bool isKnot = 1;                                            // check is current datapoint a knot?
         for (int32_t j = 0; j < mXdimensions; j++) {
           if (!mHelpers[j].getDataPoint(redistributionindex[j]).isKnot) {
             isKnot = 0;
@@ -410,7 +410,7 @@ void SplineHelper<DataT>::approximateFunction(
         }
         double splineF[mFdimensions];
         double u = mHelpers[dimension].getDataPoint(i).u;
-        mHelpers[dimension].getSpline().interpolateU(mFdimensions, parD[dimension].get(), u, splineF); //recalculate at all datapoints of dimension
+        mHelpers[dimension].getSpline().interpolateU(mFdimensions, parD[dimension].get(), u, splineF); // recalculate at all datapoints of dimension
         for (int32_t dim = 0; dim < mFdimensions; dim++) {                                             // writing it in allParameters
           // LOG(info)<<allParameters [p-(int32_t)(pow(2.0, dimension))] [(int32_t)(startdatapoint*mFdimensions + i*distance + dim)]<<", ";
           allParameters[p - (int32_t)(pow(2.0, dimension))][(int32_t)(startdatapoint * mFdimensions + i * distance + dim)] = splineF[dim]; // write it in the array.
@@ -422,7 +422,7 @@ void SplineHelper<DataT>::approximateFunction(
 
           for (int32_t j = 0; j < mXdimensions; j++) { // calculate KNotindices for all dimensions
             knotindices[j] = redistributionindex[j] / ((numberOfDataPoints[j] - 1) / (numberOfKnots[j] - 1));
-            //knotindices[j] = mHelpers[j].getDataPoint(redistributionindex[j]).iKnot; //in der Annahme der wert ist ein Knotenindex und falls der datapoint ein knoten ist, gibt er seinen eigenen knotenindex zurück
+            // knotindices[j] = mHelpers[j].getDataPoint(redistributionindex[j]).iKnot; //in der Annahme der wert ist ein Knotenindex und falls der datapoint ein knoten ist, gibt er seinen eigenen knotenindex zurück
           }
           int32_t currentknotarrayindex = pointstoarray(knotindices, numberOfKnots, mXdimensions);
           // getting the recalculated value into FParameters:
@@ -433,7 +433,7 @@ void SplineHelper<DataT>::approximateFunction(
       } // end recalculation
     } // end of all1DSplines
   } // end of for parametertypes
-} //end of approxymateFunction MYVERSION!
+} // end of approxymateFunction MYVERSION!
 
 template <typename DataT>
 int32_t SplineHelper<DataT>::test(const bool draw, const bool drawDataPoints)
diff --git a/GPU/TPCFastTransformation/SplineHelper.h b/GPU/TPCFastTransformation/SplineHelper.h
index 986297e368aab..8c99e8113864f 100644
--- a/GPU/TPCFastTransformation/SplineHelper.h
+++ b/GPU/TPCFastTransformation/SplineHelper.h
@@ -106,9 +106,9 @@ class SplineHelper
   /// Stores an error message
   int32_t storeError(Int_t code, const char* msg);
 
-  TString mError = "";     ///< error string
-  int32_t mXdimensions;    ///< number of X dimensions
-  int32_t mFdimensions;    ///< number of F dimensions
+  TString mError = "";         ///< error string
+  int32_t mXdimensions;        ///< number of X dimensions
+  int32_t mFdimensions;        ///< number of F dimensions
   int32_t mNumberOfParameters; ///< number of parameters
   int32_t mNumberOfDataPoints; ///< number of data points
   std::vector<Spline1DHelperOld<DataT>> mHelpers;
diff --git a/GPU/TPCFastTransformation/SplineSpec.h b/GPU/TPCFastTransformation/SplineSpec.h
index dae17b22f42ea..1af427dee503b 100644
--- a/GPU/TPCFastTransformation/SplineSpec.h
+++ b/GPU/TPCFastTransformation/SplineSpec.h
@@ -299,7 +299,7 @@ class SplineSpec<DataT, XdimT, YdimT, 0> : public SplineContainer<DataT>
 
     DataT iParameters[(1 << (2 * maxXdim)) * maxYdim]; // Array for all parameters
 
-    //get the indices of the "most left" Knot:
+    // get the indices of the "most left" Knot:
 
     int32_t indices[maxXdim]; // indices of the 'most left' knot
     for (int32_t i = 0; i < nXdim; i++) {
@@ -309,7 +309,7 @@ class SplineSpec<DataT, XdimT, YdimT, 0> : public SplineContainer<DataT>
     int32_t indicestmp[maxXdim];
     for (int32_t i = 0; i < nKnotParametersPerY; i++) { // for every necessary Knot
       for (int32_t k = 0; k < nXdim; k++) {
-        indicestmp[k] = indices[k] + (i / (1 << k)) % 2; //get the knot-indices in every dimension (mirrored order binary counting)
+        indicestmp[k] = indices[k] + (i / (1 << k)) % 2; // get the knot-indices in every dimension (mirrored order binary counting)
       }
       int32_t index = TBase::getKnotIndex(indicestmp); // get index of the current Knot
 
@@ -317,7 +317,7 @@ class SplineSpec<DataT, XdimT, YdimT, 0> : public SplineContainer<DataT>
         iParameters[i * nKnotParameters + j] = Parameters[index * nKnotParameters + j];
       }
     }
-    //now start with the interpolation loop:
+    // now start with the interpolation loop:
 
     constexpr auto maxInterpolations = (1 << (2 * maxXdim - 2)) * maxYdim;
 
@@ -329,10 +329,10 @@ class SplineSpec<DataT, XdimT, YdimT, 0> : public SplineContainer<DataT>
     int32_t nInterpolations = (1 << (2 * nXdim - 2)) * nYdim;
     int32_t nKnots = 1 << (nXdim);
 
-    for (int32_t d = 0; d < nXdim; d++) {        // for every dimension
-      DataT* pointer[4] = {S0, D0, S1, D1};      // pointers for interpolation arrays S0, D0, S1, D1 point to Arraystart
-      for (int32_t i = 0; i < nKnots; i++) {     // for every knot
-        for (int32_t j = 0; j < nKnots; j++) {   // for every parametertype
+    for (int32_t d = 0; d < nXdim; d++) {            // for every dimension
+      DataT* pointer[4] = {S0, D0, S1, D1};          // pointers for interpolation arrays S0, D0, S1, D1 point to Arraystart
+      for (int32_t i = 0; i < nKnots; i++) {         // for every knot
+        for (int32_t j = 0; j < nKnots; j++) {       // for every parametertype
           int32_t pointernr = 2 * (i % 2) + (j % 2); // to which array should it be delivered
           for (int32_t k = 0; k < nYdim; k++) {
             pointer[pointernr][0] = iParameters[(i * nKnots + j) * nYdim + k];
diff --git a/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h b/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h
index c353f3f3329e7..9589ecbfc1fc4 100644
--- a/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h
+++ b/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h
@@ -43,7 +43,7 @@ class TPCFastSpaceChargeCorrection : public FlatObject
   /// \brief The struct contains necessary info for TPC padrow
   ///
   struct RowInfo {
-    int32_t splineScenarioID{0}; ///< scenario index (which of Spline2D splines to use)
+    int32_t splineScenarioID{0};  ///< scenario index (which of Spline2D splines to use)
     size_t dataOffsetBytes[3]{0}; ///< offset for the spline data withing a TPC slice
     ClassDefNV(RowInfo, 1);
   };
diff --git a/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.cxx b/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.cxx
index 27500d12d9d5d..7eea34c19ec25 100644
--- a/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.cxx
+++ b/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.cxx
@@ -428,7 +428,7 @@ double IrregularSpline2D3DCalibrator::getIntegralDeviationLine(const IrregularSp
     double d2 = dx * dx + dy * dy + dz * dz;
     sum += sqrt(d2 / 3.);
   }
-  //sum = sqrt(sum/3.);
+  // sum = sqrt(sum/3.);
   return sum;
 }
 
diff --git a/GPU/TPCFastTransformation/devtools/RegularSpline1D.h b/GPU/TPCFastTransformation/devtools/RegularSpline1D.h
index 2398ff4cd1cbc..885b2e12eea0b 100644
--- a/GPU/TPCFastTransformation/devtools/RegularSpline1D.h
+++ b/GPU/TPCFastTransformation/devtools/RegularSpline1D.h
@@ -97,11 +97,11 @@ inline T RegularSpline1D::getSpline(const int32_t iknot1, T f0, T f1, T f2, T f3
   /// The polynom is constructed with function values f0,f1,f2,f3 at knots {iknot0,iknot1,iknot2,iknot3}
   /// The u value supposed to be inside the [knot1,knot2] region, but also may be any.
 
-  ///f0 = f value at iknot1-1
-  ///f1 = f value at iknot1
-  ///f2 = f value at iknot1+1
-  ///f3 = f value at iknot1+2
-  ///u = u value where f(u) is searched for.
+  /// f0 = f value at iknot1-1
+  /// f1 = f value at iknot1
+  /// f2 = f value at iknot1+1
+  /// f3 = f value at iknot1+2
+  /// u = u value where f(u) is searched for.
 
   f0 -= f1;
   f2 -= f1;
@@ -153,7 +153,7 @@ inline double RegularSpline1D::knotIndexToU(int32_t iknot) const
 
 inline int32_t RegularSpline1D::getKnotIndex(float u) const
 {
-  //index is just u elem [0, 1] * numberOfKnots and then floored. (so the "left" coordinate beside u gets chosen)
+  // index is just u elem [0, 1] * numberOfKnots and then floored. (so the "left" coordinate beside u gets chosen)
   int32_t index = (int32_t)(u * (mNumberOfKnots - 1));
   if (index <= 1) {
     index = 1;
diff --git a/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.cxx b/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.cxx
index 076e4ee0ed780..c030bae650414 100644
--- a/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.cxx
+++ b/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.cxx
@@ -110,7 +110,7 @@ void SemiregularSpline2D3D::construct(const int32_t numberOfRowsInput, const int
 
   FlatObject::startConstruction();
 
-  //construct regular grid for v
+  // construct regular grid for v
   mGridV.construct(numberOfRows);
 
   // For each x element numbersOfKnots may be a single RegularSpline1D with x knots.
@@ -128,7 +128,7 @@ void SemiregularSpline2D3D::construct(const int32_t numberOfRowsInput, const int
   // this is the space which is taken just by the RegularSpline1D's
   mDataIndexMapOffset = numberOfRows * sizeof(RegularSpline1D);
 
-  //The buffer size is the size of the array
+  // The buffer size is the size of the array
   FlatObject::finishConstruction(mDataIndexMapOffset + numberOfRows * sizeof(int32_t));
 
   // Array for the 1D-Splines inside the buffer
@@ -146,7 +146,7 @@ void SemiregularSpline2D3D::construct(const int32_t numberOfRowsInput, const int
     numberOfKnots += knotsU;
   }
 
-  //save the numberOfRows and numberOfKnots
+  // save the numberOfRows and numberOfKnots
   mNumberOfRows = numberOfRows;
   mNumberOfKnots = numberOfKnots;
 
@@ -156,7 +156,7 @@ void SemiregularSpline2D3D::construct(const int32_t numberOfRowsInput, const int
   // this will count the amount of u-knots "under" a v-coordinate
   int32_t uSum = 0;
 
-  //count the amount of knots which are in gridU's lower than i
+  // count the amount of knots which are in gridU's lower than i
   for (int32_t dv = 0; dv < mNumberOfRows; dv++) {
     dataIndexMap[dv] = uSum;
     uSum += numbersOfKnots[dv];
diff --git a/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.h b/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.h
index 4da954c8096ac..954738fa74f1b 100644
--- a/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.h
+++ b/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.h
@@ -122,7 +122,7 @@ class SemiregularSpline2D3D : public FlatObject
   const RegularSpline1D& getGridV() const { return mGridV; }
 
   /// Get 1-D grid for V coordinate
-  //const RegularSpline1D& getGridV() const { return mGridV; }
+  // const RegularSpline1D& getGridV() const { return mGridV; }
   const RegularSpline1D& getGridU(const int32_t i) const { return getSplineArray()[i]; }
 
   /// Get u,v of i-th knot
@@ -131,7 +131,7 @@ class SemiregularSpline2D3D : public FlatObject
   /// Get size of the mFlatBuffer data
   size_t getFlatBufferSize() const { return mFlatBufferSize; }
 
-  ///Gets the knot index which is the i-th knot in v-space and the j-th knot in u-space
+  /// Gets the knot index which is the i-th knot in v-space and the j-th knot in u-space
   int32_t getDataIndex(int32_t i, int32_t j) const;
   int32_t getDataIndex0(int32_t i, int32_t j) const;
 
@@ -212,16 +212,16 @@ inline void SemiregularSpline2D3D::getKnotUV(int32_t iKnot, float& u, float& v)
     // the searched u-v-coordinates have to be in this spline.
     if (iKnot <= nk - 1) {
 
-      //in that case v is the current index
+      // in that case v is the current index
       v = mGridV.knotIndexToU(i);
 
-      //and u the coordinate of the given index
+      // and u the coordinate of the given index
       u = gridU.knotIndexToU(iKnot);
       break;
     }
 
-    //if iKnot is greater than number of knots the searched u-v cannot be in the current gridU
-    //so we search for nk less indizes and continue with the next v-coordinate
+    // if iKnot is greater than number of knots the searched u-v cannot be in the current gridU
+    // so we search for nk less indizes and continue with the next v-coordinate
     iKnot -= nk;
   }
 }
@@ -229,16 +229,16 @@ inline void SemiregularSpline2D3D::getKnotUV(int32_t iKnot, float& u, float& v)
 template <typename T>
 inline void SemiregularSpline2D3D::correctEdges(T* data) const
 {
-  //Regular v-Grid (vertical)
+  // Regular v-Grid (vertical)
   const RegularSpline1D& gridV = getGridV();
 
   int32_t nv = mNumberOfRows;
 
-  //EIGENTLICH V VOR U!!!
-  //Wegen Splines aber U vor V
+  // EIGENTLICH V VOR U!!!
+  // Wegen Splines aber U vor V
 
   { // ==== left edge of U ====
-    //loop through all gridUs
+    // loop through all gridUs
     for (int32_t iv = 1; iv < mNumberOfRows - 1; iv++) {
       T* f0 = data + getDataIndex(0, iv);
       T* f1 = f0 + 3;
@@ -251,7 +251,7 @@ inline void SemiregularSpline2D3D::correctEdges(T* data) const
   }
 
   { // ==== right edge of U ====
-    //loop through all gridUs
+    // loop through all gridUs
     for (int32_t iv = 1; iv < mNumberOfRows - 1; iv++) {
       const RegularSpline1D& gridU = getGridU(iv);
       int32_t nu = gridU.getNumberOfKnots();
@@ -270,8 +270,8 @@ inline void SemiregularSpline2D3D::correctEdges(T* data) const
     int32_t nu = gridU.getNumberOfKnots();
 
     for (int32_t iu = 0; iu < nu; iu++) {
-      //f0 to f3 are the x,y,z values of 4 points in the grid along the v axis.
-      //Since there are no knots because of the irregularity you can get this by using the getSplineMethod.
+      // f0 to f3 are the x,y,z values of 4 points in the grid along the v axis.
+      // Since there are no knots because of the irregularity you can get this by using the getSplineMethod.
       T* f0 = data + getDataIndex(iu, 0);
       float u = gridU.knotIndexToU(iu);
 
@@ -387,7 +387,7 @@ inline void SemiregularSpline2D3D::getSpline(const T* correctedData, float u, fl
     dataVx[vxIndex + 2] = gridU.getSpline(ui, correctedData[dataOffset + 2], correctedData[dataOffset + 5], correctedData[dataOffset + 8], correctedData[dataOffset + 11], u);
   }
 
-  //return results
+  // return results
   x = mGridV.getSpline(iknotv, dataVx[0], dataVx[3], dataVx[6], dataVx[9], v);
   y = mGridV.getSpline(iknotv, dataVx[1], dataVx[4], dataVx[7], dataVx[10], v);
   z = mGridV.getSpline(iknotv, dataVx[2], dataVx[5], dataVx[8], dataVx[11], v);
@@ -426,7 +426,7 @@ inline void SemiregularSpline2D3D::getSplineVec(const float* correctedData, floa
 
   */
 
-  //workaround 1:
+  // workaround 1:
   int32_t vGridi = mGridV.getKnotIndex(v);
 
   float dataU[12];
@@ -464,7 +464,7 @@ inline void SemiregularSpline2D3D::getSplineVec(const float* correctedData, floa
   y = res[1];
   z = res[2];
 
-//getSpline( correctedData, u, v, x, y, z );
+// getSpline( correctedData, u, v, x, y, z );
 #else
   getSpline(correctedData, u, v, x, y, z);
 #endif
diff --git a/GPU/TPCFastTransformation/test/testMultivarPolynomials.cxx b/GPU/TPCFastTransformation/test/testMultivarPolynomials.cxx
index a9c39e8528354..f77a55ffcc894 100644
--- a/GPU/TPCFastTransformation/test/testMultivarPolynomials.cxx
+++ b/GPU/TPCFastTransformation/test/testMultivarPolynomials.cxx
@@ -95,7 +95,7 @@ BOOST_AUTO_TEST_CASE(Polynomials5D_InteractionOnly)
   const int32_t nPar5D5DegInteraction = 32; // number of parameters
   const int32_t nDim = 5;                   // dimensions
   const int32_t nDegree = 5;                // degree
-  const float abstolerance = 0.0001f;   // abosulte difference between refernce to polynomial class
+  const float abstolerance = 0.0001f;       // abosulte difference between refernce to polynomial class
   const bool interactionOnly = true;
 
   MultivariatePolynomial<nDim, nDegree, interactionOnly> polCT;       // compile time polynomial
@@ -142,7 +142,7 @@ BOOST_AUTO_TEST_CASE(Piecewise_polynomials)
   const int32_t nPar5D5DegInteraction = 32; // number of parameters
   const int32_t nDim = 5;                   // dimensions
   const int32_t nDegree = 5;                // degree
-  const bool interactionOnly = true;    // consider only interaction terms
+  const bool interactionOnly = true;        // consider only interaction terms
 
   // reference polynomial which will be approximated by the NDPiecewisePolynomials
   MultivariatePolynomial<nDim, nDegree, interactionOnly> polCT;
diff --git a/GPU/Utils/FlatObject.h b/GPU/Utils/FlatObject.h
index eba81a2ba06a2..8e13a8dedb868 100644
--- a/GPU/Utils/FlatObject.h
+++ b/GPU/Utils/FlatObject.h
@@ -28,7 +28,7 @@
 #include "GPUCommonRtypes.h"
 #include "GPUCommonLogger.h"
 
-//#define GPUCA_GPUCODE // uncomment to test "GPU" mode
+// #define GPUCA_GPUCODE // uncomment to test "GPU" mode
 
 namespace o2
 {
@@ -319,10 +319,10 @@ class FlatObject
     InProgress = 0x2      ///< construction started: temporary  memory is reserved
   };
 
-  int32_t mFlatBufferSize = 0;                                        ///< size of the flat buffer
-  uint32_t mConstructionMask = ConstructionState::NotConstructed;     ///< mask for constructed object members, first two bytes are used by this class
-  char* mFlatBufferContainer = nullptr;                               //[mFlatBufferSize]  Optional container for the flat buffer
-  char* mFlatBufferPtr = nullptr;                                     //!  Pointer to the flat buffer
+  int32_t mFlatBufferSize = 0;                                    ///< size of the flat buffer
+  uint32_t mConstructionMask = ConstructionState::NotConstructed; ///< mask for constructed object members, first two bytes are used by this class
+  char* mFlatBufferContainer = nullptr;                           //[mFlatBufferSize]  Optional container for the flat buffer
+  char* mFlatBufferPtr = nullptr;                                 //!  Pointer to the flat buffer
 
   ClassDefNV(FlatObject, 1);
 };
@@ -569,7 +569,7 @@ inline void FlatObject::setFutureBufferAddress(char* futureFlatBufferPtr)
   mFlatBufferContainer = nullptr;
 }
 
-#endif //GPUCA_GPUCODE_DEVICE
+#endif // GPUCA_GPUCODE_DEVICE
 
 } // namespace gpu
 } // namespace o2
diff --git a/GPU/Workflow/helper/src/GPUWorkflowHelper.cxx b/GPU/Workflow/helper/src/GPUWorkflowHelper.cxx
index 52c3421fa8eb5..a9c9b78e9847e 100644
--- a/GPU/Workflow/helper/src/GPUWorkflowHelper.cxx
+++ b/GPU/Workflow/helper/src/GPUWorkflowHelper.cxx
@@ -51,7 +51,7 @@ std::shared_ptr<const GPUWorkflowHelper::tmpDataContainer> GPUWorkflowHelper::fi
         ioPtr.itsClusterMC = ITSClsLabels;
       }
     }
-    //LOG(info) << "Got " << ioPtr.nItsClusters << " ITS Clusters";
+    // LOG(info) << "Got " << ioPtr.nItsClusters << " ITS Clusters";
   }
   if (maskTrk[GID::ITS] && ioPtr.nItsTracks == 0) {
     const auto& ITSTracksArray = recoCont.getITSTracks();
@@ -68,7 +68,7 @@ std::shared_ptr<const GPUWorkflowHelper::tmpDataContainer> GPUWorkflowHelper::fi
         ioPtr.itsTrackMC = ITSTrkLabels.data();
       }
     }
-    //LOG(info) << "Got " << ioPtr.nItsTracks << " ITS Tracks";
+    // LOG(info) << "Got " << ioPtr.nItsTracks << " ITS Tracks";
   }
 
   if (maskTrk[GID::ITSTPC] && ioPtr.nTracksTPCITSO2 == 0) {
@@ -77,7 +77,7 @@ std::shared_ptr<const GPUWorkflowHelper::tmpDataContainer> GPUWorkflowHelper::fi
       ioPtr.nTracksTPCITSO2 = trkITSTPC.size();
       ioPtr.tracksTPCITSO2 = trkITSTPC.data();
     }
-    //LOG(info) << "Got " << ioPtr.nTracksTPCITSO2 << " ITS-TPC Tracks";
+    // LOG(info) << "Got " << ioPtr.nTracksTPCITSO2 << " ITS-TPC Tracks";
   }
 
   if (maskCl[GID::TOF] && ioPtr.nTOFClusters == 0) {
@@ -86,7 +86,7 @@ std::shared_ptr<const GPUWorkflowHelper::tmpDataContainer> GPUWorkflowHelper::fi
       ioPtr.nTOFClusters = tofClusters.size();
       ioPtr.tofClusters = tofClusters.data();
     }
-    //LOG(info) << "Got " << ioPtr.nTOFClusters << " TOF Clusters";
+    // LOG(info) << "Got " << ioPtr.nTOFClusters << " TOF Clusters";
   }
 
   if ((maskMatch[GID::TOF] || maskMatch[GID::ITSTPCTOF]) && ioPtr.nITSTPCTOFMatches == 0) {
@@ -95,7 +95,7 @@ std::shared_ptr<const GPUWorkflowHelper::tmpDataContainer> GPUWorkflowHelper::fi
       ioPtr.nITSTPCTOFMatches = itstpctofMatches.size();
       ioPtr.itstpctofMatches = itstpctofMatches.data();
     }
-    //LOG(info) << "Got " << ioPtr.nITSTPCTOFMatches << " ITS-TPC-TOF Matches";
+    // LOG(info) << "Got " << ioPtr.nITSTPCTOFMatches << " ITS-TPC-TOF Matches";
   }
 
   if ((maskMatch[GID::TOF] || maskMatch[GID::ITSTPCTRDTOF]) && ioPtr.nITSTPCTRDTOFMatches == 0) {
@@ -104,7 +104,7 @@ std::shared_ptr<const GPUWorkflowHelper::tmpDataContainer> GPUWorkflowHelper::fi
       ioPtr.nITSTPCTRDTOFMatches = itstpctrdtofMatches.size();
       ioPtr.itstpctrdtofMatches = itstpctrdtofMatches.data();
     }
-    //LOG(info) << "Got " << ioPtr.nITSTPCTRDTOFMatches << " ITS-TPC-TRD-TOF Matches";
+    // LOG(info) << "Got " << ioPtr.nITSTPCTRDTOFMatches << " ITS-TPC-TRD-TOF Matches";
   }
 
   if ((maskMatch[GID::TOF] || maskMatch[GID::TPCTOF]) && ioPtr.nTPCTOFMatches == 0) {
@@ -113,7 +113,7 @@ std::shared_ptr<const GPUWorkflowHelper::tmpDataContainer> GPUWorkflowHelper::fi
       ioPtr.nTPCTOFMatches = tpctofMatches.size();
       ioPtr.tpctofMatches = tpctofMatches.data();
     }
-    //LOG(info) << "Got " << ioPtr.nTPCTOFMatches << " TPC-TOF Matches";
+    // LOG(info) << "Got " << ioPtr.nTPCTOFMatches << " TPC-TOF Matches";
   }
 
   if ((maskMatch[GID::TOF] || maskMatch[GID::TPCTRDTOF]) && ioPtr.nTPCTRDTOFMatches == 0) {
@@ -122,12 +122,12 @@ std::shared_ptr<const GPUWorkflowHelper::tmpDataContainer> GPUWorkflowHelper::fi
       ioPtr.nTPCTRDTOFMatches = tpctrdtofMatches.size();
       ioPtr.tpctrdtofMatches = tpctrdtofMatches.data();
     }
-    //LOG(info) << "Got " << ioPtr.nTPCTOFMatches << " TPC-TOF Matches";
+    // LOG(info) << "Got " << ioPtr.nTPCTOFMatches << " TPC-TOF Matches";
   }
 
   if (maskCl[GID::TRD]) {
     recoCont.inputsTRD->fillGPUIOPtr(&ioPtr);
-    //LOG(info) << "Got " << ioPtr.nTRDTracklets << " TRD Tracklets";
+    // LOG(info) << "Got " << ioPtr.nTRDTracklets << " TRD Tracklets";
   }
 
   if (maskTrk[GID::ITSTPCTRD] && ioPtr.nTRDTracksITSTPCTRD == 0) {
@@ -136,7 +136,7 @@ std::shared_ptr<const GPUWorkflowHelper::tmpDataContainer> GPUWorkflowHelper::fi
       ioPtr.nTRDTracksITSTPCTRD = trdTracks.size();
       ioPtr.trdTracksITSTPCTRD = trdTracks.data();
     }
-    //LOG(info) << "Got " << ioPtr.nTRDTracksITSTPCTRD << " ITS-TPC-TRD Tracks";
+    // LOG(info) << "Got " << ioPtr.nTRDTracksITSTPCTRD << " ITS-TPC-TRD Tracks";
   }
 
   if (maskTrk[GID::TPCTRD] && ioPtr.nTRDTracksTPCTRD == 0) {
@@ -145,12 +145,12 @@ std::shared_ptr<const GPUWorkflowHelper::tmpDataContainer> GPUWorkflowHelper::fi
       ioPtr.nTRDTracksTPCTRD = trdTracks.size();
       ioPtr.trdTracksTPCTRD = trdTracks.data();
     }
-    //LOG(info) << "Got " << ioPtr.nTRDTracksTPCTRD << " TPC-TRD Tracks";
+    // LOG(info) << "Got " << ioPtr.nTRDTracksTPCTRD << " TPC-TRD Tracks";
   }
 
   if (maskCl[GID::TPC] && ioPtr.clustersNative == nullptr) {
     ioPtr.clustersNative = &recoCont.getTPCClusters();
-    //LOG(info) << "Got " << ioPtr.clustersNative->nClustersTotal << " TPC Clusters";
+    // LOG(info) << "Got " << ioPtr.clustersNative->nClustersTotal << " TPC Clusters";
   }
 
   if (maskTrk[GID::TPC] && ioPtr.nOutputTracksTPCO2 == 0) {
@@ -176,7 +176,7 @@ std::shared_ptr<const GPUWorkflowHelper::tmpDataContainer> GPUWorkflowHelper::fi
       retVal->tpcLinkTRD.resize(ioPtr.nOutputTracksTPCO2, -1);
       ioPtr.tpcLinkTRD = retVal->tpcLinkTRD.data();
     }
-    //LOG(info) << "Got " << ioPtr.nOutputTracksTPCO2 << " TPC Tracks";
+    // LOG(info) << "Got " << ioPtr.nOutputTracksTPCO2 << " TPC Tracks";
   }
 
   auto creator = [maskTrk, &ioPtr, &recoCont, &retVal](auto& trk, GID gid, float time, float) {

From 8c0bc0442ec796d002914a57cd2d53d906fb5a02 Mon Sep 17 00:00:00 2001
From: ALICE Action Bot <alibuild@cern.ch>
Date: Fri, 14 Mar 2025 19:16:01 +0000
Subject: [PATCH 8/8] Please consider the following formatting changes

---
 GPU/GPUTracking/Base/GPUReconstructionConvert.cxx         | 2 +-
 GPU/GPUTracking/Base/GPUReconstructionProcessing.h        | 2 +-
 GPU/GPUTracking/DataTypes/GPUOutputControl.h              | 2 +-
 GPU/GPUTracking/Interface/GPUO2Interface.cxx              | 2 +-
 .../Interface/GPUO2InterfaceConfigurableParam.h           | 2 +-
 GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h            | 2 +-
 GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h            | 2 +-
 GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h          | 2 +-
 GPU/GPUTracking/qa/GPUQAHelper.h                          | 4 ++--
 GPU/GPUTracking/utils/qconfig.cxx                         | 8 +++++---
 10 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx
index bc760f6188caa..8f5cab6807050 100644
--- a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx
+++ b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx
@@ -923,7 +923,7 @@ void zsEncoderDenseLinkBased::decodePage(std::vector<o2::tpc::Digit>& outputBuff
       if (decLinkX & 0b00100000) {
         bitmaskL2.set();
       } else {
-        bitmaskL2 = std::bitset<10>(((((uint16_t)decLinkX) & 0b11000000) << 2) | (uint16_t)*((const uint8_t*)decPagePtr));
+        bitmaskL2 = std::bitset<10>(((((uint16_t)decLinkX) & 0b11000000) << 2) | (uint16_t) * ((const uint8_t*)decPagePtr));
         decPagePtr += sizeof(uint8_t);
       }
 
diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h
index 43560616782db..4ccfb9ff10311 100644
--- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h
+++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h
@@ -28,7 +28,7 @@ namespace gpu_reconstruction_kernels
 {
 struct deviceEvent {
   constexpr deviceEvent() = default;
-  constexpr deviceEvent(std::nullptr_t p) : v(nullptr) {};
+  constexpr deviceEvent(std::nullptr_t p) : v(nullptr){};
   template <class T>
   void set(T val)
   {
diff --git a/GPU/GPUTracking/DataTypes/GPUOutputControl.h b/GPU/GPUTracking/DataTypes/GPUOutputControl.h
index 0495f7ed1d0ff..799fd25330ab4 100644
--- a/GPU/GPUTracking/DataTypes/GPUOutputControl.h
+++ b/GPU/GPUTracking/DataTypes/GPUOutputControl.h
@@ -78,7 +78,7 @@ struct GPUTrackingOutputs {
   static constexpr size_t count() { return sizeof(GPUTrackingOutputs) / sizeof(GPUOutputControl); }
   GPUOutputControl* asArray() { return (GPUOutputControl*)this; }
   size_t getIndex(const GPUOutputControl& v) { return &v - (const GPUOutputControl*)this; }
-  static int32_t getIndex(GPUOutputControl GPUTrackingOutputs::* v) { return &(((GPUTrackingOutputs*)(0x10000))->*v) - (GPUOutputControl*)(0x10000); }
+  static int32_t getIndex(GPUOutputControl GPUTrackingOutputs::*v) { return &(((GPUTrackingOutputs*)(0x10000))->*v) - (GPUOutputControl*)(0x10000); }
 };
 
 } // namespace o2::gpu
diff --git a/GPU/GPUTracking/Interface/GPUO2Interface.cxx b/GPU/GPUTracking/Interface/GPUO2Interface.cxx
index 4dac56afed671..34cd5b7280dc3 100644
--- a/GPU/GPUTracking/Interface/GPUO2Interface.cxx
+++ b/GPU/GPUTracking/Interface/GPUO2Interface.cxx
@@ -46,7 +46,7 @@ struct GPUO2Interface_Internals {
 };
 } // namespace o2::gpu
 
-GPUO2Interface::GPUO2Interface() : mInternals(new GPUO2Interface_Internals) {};
+GPUO2Interface::GPUO2Interface() : mInternals(new GPUO2Interface_Internals){};
 
 GPUO2Interface::~GPUO2Interface() { Deinitialize(); }
 
diff --git a/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h b/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h
index ebb426b7a8cfe..425c8b880b4e3 100644
--- a/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h
+++ b/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h
@@ -50,7 +50,7 @@
 #define AddSubConfig(name, instance)
 #define BeginSubConfig(name, instance, parent, preoptname, preoptnameshort, descr, o2prefix)                                           \
   struct GPUCA_M_CAT(GPUConfigurableParam, name) : public o2::conf::ConfigurableParamHelper<GPUCA_M_CAT(GPUConfigurableParam, name)> { \
-   O2ParamDef(GPUCA_M_CAT(GPUConfigurableParam, name), GPUCA_M_STR(GPUCA_M_CAT(GPU_, o2prefix))) public:
+    O2ParamDef(GPUCA_M_CAT(GPUConfigurableParam, name), GPUCA_M_STR(GPUCA_M_CAT(GPU_, o2prefix))) public:
 #define BeginHiddenConfig(name, instance) struct GPUCA_M_CAT(GPUConfigurableParam, name) {
 #define EndConfig() \
   }                 \
diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h b/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h
index 5bb63d6a10254..10ff0a32aeaf3 100644
--- a/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h
+++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h
@@ -30,7 +30,7 @@ class GPUTPCTracklet
 {
  public:
 #if !defined(GPUCA_GPUCODE)
-  GPUTPCTracklet() : mFirstRow(0), mLastRow(0), mParam(), mHitWeight(0), mFirstHit(0) {};
+  GPUTPCTracklet() : mFirstRow(0), mLastRow(0), mParam(), mHitWeight(0), mFirstHit(0){};
 #endif //! GPUCA_GPUCODE
 
   GPUhd() int32_t FirstRow() const { return mFirstRow; }
diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h
index f6b8bea29822a..c8b5b4ba93c92 100644
--- a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h
+++ b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h
@@ -45,7 +45,7 @@ class propagatorInterface<o2::base::Propagator>
 {
  public:
   typedef o2::base::Propagator propagatorParam;
-  GPUd() propagatorInterface(const propagatorParam* prop) : mProp(prop) {};
+  GPUd() propagatorInterface(const propagatorParam* prop) : mProp(prop){};
   GPUd() propagatorInterface(const propagatorInterface<o2::base::Propagator>&) = delete;
   GPUd() propagatorInterface& operator=(const propagatorInterface<o2::base::Propagator>&) = delete;
 
diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h
index cd7dfb9432b93..fc874070ec9b8 100644
--- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h
+++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h
@@ -82,7 +82,7 @@ namespace o2::gpu
 class GPUTRDTrackletWord : private o2::trd::Tracklet64
 {
  public:
-  GPUd() GPUTRDTrackletWord(uint64_t trackletWord = 0) : o2::trd::Tracklet64(trackletWord) {};
+  GPUd() GPUTRDTrackletWord(uint64_t trackletWord = 0) : o2::trd::Tracklet64(trackletWord){};
   GPUdDefault() GPUTRDTrackletWord(const GPUTRDTrackletWord& rhs) = default;
   GPUdDefault() GPUTRDTrackletWord& operator=(const GPUTRDTrackletWord& rhs) = default;
   GPUdDefault() ~GPUTRDTrackletWord() = default;
diff --git a/GPU/GPUTracking/qa/GPUQAHelper.h b/GPU/GPUTracking/qa/GPUQAHelper.h
index a7811c6fd55ed..c6ea27e835909 100644
--- a/GPU/GPUTracking/qa/GPUQAHelper.h
+++ b/GPU/GPUTracking/qa/GPUQAHelper.h
@@ -133,10 +133,10 @@ struct GPUTPCTrkLbl_ret {
   template <class T>
   GPUTPCTrkLbl_ret(T){};
 #ifdef GPUCA_TPC_GEOMETRY_O2
-  GPUTPCTrkLbl_ret(const MCCompLabel& a) : id(a.getTrackEventSourceID()) {};
+  GPUTPCTrkLbl_ret(const MCCompLabel& a) : id(a.getTrackEventSourceID()){};
 #endif
 #ifdef GPUCA_STANDALONE
-  GPUTPCTrkLbl_ret(const AliHLTTPCClusterMCWeight& a) : id(a.fMCID) {};
+  GPUTPCTrkLbl_ret(const AliHLTTPCClusterMCWeight& a) : id(a.fMCID){};
 #endif
   void setFakeFlag()
   {
diff --git a/GPU/GPUTracking/utils/qconfig.cxx b/GPU/GPUTracking/utils/qconfig.cxx
index cdb41ec5813f2..cd6267179c844 100644
--- a/GPU/GPUTracking/utils/qconfig.cxx
+++ b/GPU/GPUTracking/utils/qconfig.cxx
@@ -32,7 +32,8 @@
 namespace qConfig
 {
 #define QCONFIG_SETTING(name, type)                     \
-  struct qon_mxcat3(q, name, _t) {                      \
+  struct qon_mxcat3(q, name, _t)                        \
+  {                                                     \
     type v;                                             \
     constexpr qon_mxcat3(q, name, _t)(type s) : v(s) {} \
   };                                                    \
@@ -40,7 +41,8 @@ namespace qConfig
 
 #define QCONFIG_SETTING_TEMPLATE(name)                      \
   template <typename T>                                     \
-  struct qon_mxcat3(q, name, _t) {                          \
+  struct qon_mxcat3(q, name, _t)                            \
+  {                                                         \
     T v;                                                    \
     constexpr qon_mxcat3(q, name, _t)(const T& s) : v(s) {} \
   };                                                        \
@@ -66,7 +68,7 @@ static inline const char* getOptName(const char** argv, int32_t i)
 
 template <typename T>
 struct qConfigSettings {
-  qConfigSettings() : checkMin(false), checkMax(false), doSet(false), doDefault(false), min(), max(), set(), message(nullptr), allowEmpty(false) {};
+  qConfigSettings() : checkMin(false), checkMax(false), doSet(false), doDefault(false), min(), max(), set(), message(nullptr), allowEmpty(false){};
   template <typename S>
   qConfigSettings(const qConfigSettings<S> v) : checkMin(false), checkMax(false), doSet(false), doDefault(false), min(), max(), set(), message(v.message), allowEmpty(v.allowEmpty){};
   bool checkMin, checkMax;