From fc2133ecce1020ad9b915cbed98079240b61d593 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 18 Mar 2025 10:21:46 +0100 Subject: [PATCH 1/7] GPU Display: suppress compiler warning with sanitizers enabled --- GPU/GPUTracking/display/GPUDisplay.h | 1 + 1 file changed, 1 insertion(+) diff --git a/GPU/GPUTracking/display/GPUDisplay.h b/GPU/GPUTracking/display/GPUDisplay.h index 73f65b6b24241..bb270cda23565 100644 --- a/GPU/GPUTracking/display/GPUDisplay.h +++ b/GPU/GPUTracking/display/GPUDisplay.h @@ -161,6 +161,7 @@ class GPUDisplay : public GPUDisplayInterface { #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wformat-security" +#pragma GCC diagnostic ignored "-Wformat-truncation" snprintf(mInfoText2, 1024, args...); #pragma GCC diagnostic pop GPUInfo("%s", mInfoText2); From e896718ed5dd16593020c730e7afad04d193ca30 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 18 Mar 2025 10:22:09 +0100 Subject: [PATCH 2/7] GPU Standalone can compile with sanitizers also without debug build --- GPU/GPUTracking/Standalone/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index b9620b9385c73..de245a71845c3 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -50,9 +50,6 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON) if(GPUCA_BUILD_DEBUG) set(CMAKE_CXX_FLAGS "-O0 -ggdb") - if (GPUCA_BUILD_DEBUG_SANITIZE) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-sanitize=vptr") #TODO: Check why this does not work with clang - endif() set(CMAKE_BUILD_TYPE DEBUG) else() set(CMAKE_CXX_FLAGS "-O3 -march=native -ggdb -minline-all-stringops -funroll-loops -fno-stack-protector") @@ -67,6 +64,9 @@ else() set(CMAKE_BUILD_TYPE RELEASE) add_definitions(-DNDEBUG) endif() +if (GPUCA_BUILD_DEBUG_SANITIZE) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-sanitize=vptr") #TODO: Check why this does not work with clang +endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -rdynamic -Wl,--no-undefined") From e1791e1a20c68d177ec349fb565de7ca33390f75 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 18 Mar 2025 11:05:52 +0100 Subject: [PATCH 3/7] GPU: Disable clang warnings when using C variable length array extension in C++ --- GPU/GPUTracking/Standalone/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index de245a71845c3..8fa8f0c2b68c9 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -67,7 +67,7 @@ endif() if (GPUCA_BUILD_DEBUG_SANITIZE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-sanitize=vptr") #TODO: Check why this does not work with clang endif() -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings -Wno-vla-cxx-extension") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -rdynamic -Wl,--no-undefined") # Find mandatory packages From 08cead5f2ff6277ea6ffdd7255e31c56f8c2ea4c Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 18 Mar 2025 11:20:26 +0100 Subject: [PATCH 4/7] GPU Standalone: Fix build using clang compiler with sanitizers --- GPU/GPUTracking/Standalone/CMakeLists.txt | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index 8fa8f0c2b68c9..6e536727a0c67 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -65,7 +65,10 @@ else() add_definitions(-DNDEBUG) endif() if (GPUCA_BUILD_DEBUG_SANITIZE) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-sanitize=vptr") #TODO: Check why this does not work with clang + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-sanitize=vptr") + if(CMAKE_CXX_COMPILER MATCHES "clang\\+\\+") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -shared-libasan") + endif() endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings -Wno-vla-cxx-extension") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -rdynamic -Wl,--no-undefined") @@ -247,6 +250,13 @@ if(GPUCA_CONFIG_ROOT) endif() target_link_libraries(standalone_support PUBLIC Microsoft.GSL::GSL TPCFastTransformation) +if (GPUCA_BUILD_DEBUG_SANITIZE AND CMAKE_CXX_COMPILER MATCHES "clang\\+\\+") + execute_process(COMMAND ${CMAKE_CXX_COMPILER} -print-file-name=libclang_rt.asan-x86_64.so OUTPUT_VARIABLE CLANG_ASAN_SO_PATH OUTPUT_STRIP_TRAILING_WHITESPACE) + get_filename_component(CLANG_ASAN_SO_PATH "${CLANG_ASAN_SO_PATH}" DIRECTORY) + get_filename_component(CLANG_ASAN_SO_PATH "${CLANG_ASAN_SO_PATH}" ABSOLUTE) + target_link_options(ca PRIVATE "-Wl,-rpath,${CLANG_ASAN_SO_PATH}") +endif() + # Installation install(TARGETS ca TPCFastTransformation standalone_support) install(FILES "cmake/makefile" DESTINATION "${CMAKE_INSTALL_PREFIX}") From 8517baf29d82e1b8146931a3c0c6a3d225a55dda Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 18 Mar 2025 11:38:06 +0100 Subject: [PATCH 5/7] GPU: Suppress another clang warning --- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index a4074282da30f..a8a83fdbd9203 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -112,7 +112,12 @@ inline void GPUReconstructionCPUBackend::runKernelBackendInternal void GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs& args) { +#pragma GCC diagnostic push +#if defined(__clang__) +#pragma GCC diagnostic ignored "-Wunused-lambda-capture" // this is not alway captured below +#endif std::apply([this, &args](auto&... vals) { runKernelBackendInternal(args.s, vals...); }, args.v); +#pragma GCC diagnostic push } template From ec9171b22380f722ad719faa0a415605d0ad1255 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 18 Mar 2025 13:11:30 +0100 Subject: [PATCH 6/7] GPU: Fix some minor issues indicated by clang sanitizer --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 22 +++++++++++++------ .../Global/GPUChainTrackingSectorTracker.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 11 +++++----- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 2 +- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 35e44d99d5c0c..8bae1df267412 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -147,8 +147,12 @@ int32_t GPUReconstruction::Init() if (InitDevice()) { return 1; } - mHostMemoryPoolEnd = (char*)mHostMemoryBase + mHostMemorySize; - mDeviceMemoryPoolEnd = (char*)mDeviceMemoryBase + mDeviceMemorySize; + if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { + mHostMemoryPoolEnd = (char*)mHostMemoryBase + mHostMemorySize; + mDeviceMemoryPoolEnd = (char*)mDeviceMemoryBase + mDeviceMemorySize; + } else { + mHostMemoryPoolEnd = mDeviceMemoryPoolEnd = nullptr; + } if (InitPhasePermanentMemory()) { return 1; } @@ -860,14 +864,18 @@ void GPUReconstruction::ClearAllocatedMemory(bool clearOutputs) FreeRegisteredMemory(i); } } - mHostMemoryPool = GPUProcessor::alignPointer(mHostMemoryPermanent); - mDeviceMemoryPool = GPUProcessor::alignPointer(mDeviceMemoryPermanent); mUnmanagedChunks.clear(); - mVolatileMemoryStart = nullptr; mNonPersistentMemoryStack.clear(); mNonPersistentIndividualAllocations.clear(); - mHostMemoryPoolEnd = mHostMemoryPoolBlocked ? mHostMemoryPoolBlocked : ((char*)mHostMemoryBase + mHostMemorySize); - mDeviceMemoryPoolEnd = mDeviceMemoryPoolBlocked ? mDeviceMemoryPoolBlocked : ((char*)mDeviceMemoryBase + mDeviceMemorySize); + mVolatileMemoryStart = nullptr; + if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { + mHostMemoryPool = GPUProcessor::alignPointer(mHostMemoryPermanent); + mDeviceMemoryPool = GPUProcessor::alignPointer(mDeviceMemoryPermanent); + mHostMemoryPoolEnd = mHostMemoryPoolBlocked ? mHostMemoryPoolBlocked : ((char*)mHostMemoryBase + mHostMemorySize); + mDeviceMemoryPoolEnd = mDeviceMemoryPoolBlocked ? mDeviceMemoryPoolBlocked : ((char*)mDeviceMemoryBase + mDeviceMemorySize); + } else { + mHostMemoryPool = mDeviceMemoryPool = mHostMemoryPoolEnd = mDeviceMemoryPoolEnd = nullptr; + } } void GPUReconstruction::UpdateMaxMemoryUsed() diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index dd71a797f2744..e161f74a31032 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -150,7 +150,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() if (param().rec.tpc.occupancyMapTimeBins || param().rec.tpc.sysClusErrorC12Norm) { uint32_t& occupancyTotal = *mInputsHost->mTPCClusterOccupancyMap; occupancyTotal = CAMath::Float2UIntRn(mRec->MemoryScalers()->nTPCHits / (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasNHBFPerTF ? mIOPtrs.settingsTF->nHBFPerTF : 128)); - mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, streamInitAndOccMap); + mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, doGPU && param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, streamInitAndOccMap); } int32_t streamMap[NSECTORS]; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index f373d56ea0395..eb1df3f37b6b5 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -1706,20 +1706,20 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread nHits = nFilteredHits; } - uint32_t iOutTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nOutputTrackClusters, (uint32_t)nHits); + const uint32_t iOutTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nOutputTrackClusters, (uint32_t)nHits); if (iOutTrackFirstCluster >= mNMaxOutputTrackClusters) { raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iOutTrackFirstCluster, mNMaxOutputTrackClusters); CAMath::AtomicExch(&mMemory->nOutputTrackClusters, mNMaxOutputTrackClusters); continue; } - GPUTPCGMMergedTrackHit* cl = mClusters + iOutTrackFirstCluster; - GPUTPCGMMergedTrackHitXYZ* clXYZ = mClustersXYZ + iOutTrackFirstCluster; + GPUTPCGMMergedTrackHit* const cl = mClusters + iOutTrackFirstCluster; for (int32_t i = 0; i < nHits; i++) { uint8_t state; if (Param().par.earlyTpcTransform) { const GPUTPCClusterData& c = GetConstantMem()->tpcTrackers[trackClusters[i].sector].ClusterData()[trackClusters[i].id - GetConstantMem()->tpcTrackers[trackClusters[i].sector].Data().ClusterIdOffset()]; + GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iOutTrackFirstCluster; clXYZ[i].x = c.x; clXYZ[i].y = c.y; clXYZ[i].z = c.z; @@ -1760,7 +1760,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread mergedTrack.SetCSide(p2.CSide()); GPUTPCGMBorderTrack b; - const float toX = Param().par.earlyTpcTransform ? clXYZ[0].x : GPUTPCGeometry::Row2X(cl[0].row); + const float toX = Param().par.earlyTpcTransform ? mClustersXYZ[iOutTrackFirstCluster].x : GPUTPCGeometry::Row2X(cl[0].row); if (p2.TransportToX(this, toX, Param().bzCLight, b, GPUCA_MAX_SIN_PHI, false)) { p1.X() = toX; p1.Y() = b.Par()[0]; @@ -1791,12 +1791,13 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread if (Param().rec.tpc.mergeCE) { bool CEside; if (Param().par.earlyTpcTransform) { + const GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iOutTrackFirstCluster; CEside = (mergedTrack.CSide() != 0) ^ (clXYZ[0].z > clXYZ[nHits - 1].z); } else { auto& cls = mConstantMem->ioPtrs.clustersNative->clustersLinear; CEside = cls[cl[0].num].getTime() < cls[cl[nHits - 1].num].getTime(); } - MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], &clXYZ[CEside ? (nHits - 1) : 0], iOutputTrack); + MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], Param().par.earlyTpcTransform ? &(mClustersXYZ + iOutTrackFirstCluster)[CEside ? (nHits - 1) : 0] : nullptr, iOutputTrack); } } // itr } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 3bd2257d02e01..d235b3398c062 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -1091,7 +1091,7 @@ GPUd() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() tr GPUTPCGMTrackParam t = track.Param(); float Alpha = track.Alpha(); CADEBUG(int32_t nTrackHitsOld = nTrackHits; float ptOld = t.QPt()); - bool ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), merger->ClustersXYZ() + track.FirstClusterRef(), nTrackHits, NTolerated, Alpha, attempt, GPUCA_MAX_SIN_PHI, &track.OuterParam()); + bool ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), merger->Param().par.earlyTpcTransform ? merger->ClustersXYZ() + track.FirstClusterRef() : nullptr, nTrackHits, NTolerated, Alpha, attempt, GPUCA_MAX_SIN_PHI, &track.OuterParam()); CADEBUG(printf("Finished Fit Track %d\n", iTrk)); CADEBUG(printf("OUTPUT hits %d -> %d+%d = %d, QPt %f -> %f, SP %f, ok %d chi2 %f chi2ndf %f\n", nTrackHitsOld, nTrackHits, NTolerated, nTrackHits + NTolerated, ptOld, t.QPt(), t.SinPhi(), (int32_t)ok, t.Chi2(), t.Chi2() / CAMath::Max(1, nTrackHits))); From f047cc56da0bfd40f5e4a7d466da6069b354b1d5 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 18 Mar 2025 13:28:56 +0100 Subject: [PATCH 7/7] With -ffast-math, std::finite is UB and one shoult assume all float to be finite --- Common/MathUtils/include/MathUtils/detail/basicMath.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Common/MathUtils/include/MathUtils/detail/basicMath.h b/Common/MathUtils/include/MathUtils/detail/basicMath.h index 3fc3fe374b380..3565764435a68 100644 --- a/Common/MathUtils/include/MathUtils/detail/basicMath.h +++ b/Common/MathUtils/include/MathUtils/detail/basicMath.h @@ -113,7 +113,11 @@ GPUdi() int nint(double x) template <> GPUdi() bool finite(double x) { +#ifdef __FAST_MATH__ + return false; +#else return std::isfinite(x); +#endif } template <> GPUdi() double log(double x)