diff --git a/Common/MathUtils/include/MathUtils/detail/basicMath.h b/Common/MathUtils/include/MathUtils/detail/basicMath.h index 3fc3fe374b380..3565764435a68 100644 --- a/Common/MathUtils/include/MathUtils/detail/basicMath.h +++ b/Common/MathUtils/include/MathUtils/detail/basicMath.h @@ -113,7 +113,11 @@ GPUdi() int nint(double x) template <> GPUdi() bool finite(double x) { +#ifdef __FAST_MATH__ + return false; +#else return std::isfinite(x); +#endif } template <> GPUdi() double log(double x) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 35e44d99d5c0c..8bae1df267412 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -147,8 +147,12 @@ int32_t GPUReconstruction::Init() if (InitDevice()) { return 1; } - mHostMemoryPoolEnd = (char*)mHostMemoryBase + mHostMemorySize; - mDeviceMemoryPoolEnd = (char*)mDeviceMemoryBase + mDeviceMemorySize; + if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { + mHostMemoryPoolEnd = (char*)mHostMemoryBase + mHostMemorySize; + mDeviceMemoryPoolEnd = (char*)mDeviceMemoryBase + mDeviceMemorySize; + } else { + mHostMemoryPoolEnd = mDeviceMemoryPoolEnd = nullptr; + } if (InitPhasePermanentMemory()) { return 1; } @@ -860,14 +864,18 @@ void GPUReconstruction::ClearAllocatedMemory(bool clearOutputs) FreeRegisteredMemory(i); } } - mHostMemoryPool = GPUProcessor::alignPointer(mHostMemoryPermanent); - mDeviceMemoryPool = GPUProcessor::alignPointer(mDeviceMemoryPermanent); mUnmanagedChunks.clear(); - mVolatileMemoryStart = nullptr; mNonPersistentMemoryStack.clear(); mNonPersistentIndividualAllocations.clear(); - mHostMemoryPoolEnd = mHostMemoryPoolBlocked ? mHostMemoryPoolBlocked : ((char*)mHostMemoryBase + mHostMemorySize); - mDeviceMemoryPoolEnd = mDeviceMemoryPoolBlocked ? mDeviceMemoryPoolBlocked : ((char*)mDeviceMemoryBase + mDeviceMemorySize); + mVolatileMemoryStart = nullptr; + if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { + mHostMemoryPool = GPUProcessor::alignPointer(mHostMemoryPermanent); + mDeviceMemoryPool = GPUProcessor::alignPointer(mDeviceMemoryPermanent); + mHostMemoryPoolEnd = mHostMemoryPoolBlocked ? mHostMemoryPoolBlocked : ((char*)mHostMemoryBase + mHostMemorySize); + mDeviceMemoryPoolEnd = mDeviceMemoryPoolBlocked ? mDeviceMemoryPoolBlocked : ((char*)mDeviceMemoryBase + mDeviceMemorySize); + } else { + mHostMemoryPool = mDeviceMemoryPool = mHostMemoryPoolEnd = mDeviceMemoryPoolEnd = nullptr; + } } void GPUReconstruction::UpdateMaxMemoryUsed() diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index a4074282da30f..a8a83fdbd9203 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -112,7 +112,12 @@ inline void GPUReconstructionCPUBackend::runKernelBackendInternal void GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs& args) { +#pragma GCC diagnostic push +#if defined(__clang__) +#pragma GCC diagnostic ignored "-Wunused-lambda-capture" // this is not alway captured below +#endif std::apply([this, &args](auto&... vals) { runKernelBackendInternal(args.s, vals...); }, args.v); +#pragma GCC diagnostic push } template diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index dd71a797f2744..e161f74a31032 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -150,7 +150,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() if (param().rec.tpc.occupancyMapTimeBins || param().rec.tpc.sysClusErrorC12Norm) { uint32_t& occupancyTotal = *mInputsHost->mTPCClusterOccupancyMap; occupancyTotal = CAMath::Float2UIntRn(mRec->MemoryScalers()->nTPCHits / (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasNHBFPerTF ? mIOPtrs.settingsTF->nHBFPerTF : 128)); - mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, streamInitAndOccMap); + mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, doGPU && param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, streamInitAndOccMap); } int32_t streamMap[NSECTORS]; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index f373d56ea0395..eb1df3f37b6b5 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -1706,20 +1706,20 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread nHits = nFilteredHits; } - uint32_t iOutTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nOutputTrackClusters, (uint32_t)nHits); + const uint32_t iOutTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nOutputTrackClusters, (uint32_t)nHits); if (iOutTrackFirstCluster >= mNMaxOutputTrackClusters) { raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iOutTrackFirstCluster, mNMaxOutputTrackClusters); CAMath::AtomicExch(&mMemory->nOutputTrackClusters, mNMaxOutputTrackClusters); continue; } - GPUTPCGMMergedTrackHit* cl = mClusters + iOutTrackFirstCluster; - GPUTPCGMMergedTrackHitXYZ* clXYZ = mClustersXYZ + iOutTrackFirstCluster; + GPUTPCGMMergedTrackHit* const cl = mClusters + iOutTrackFirstCluster; for (int32_t i = 0; i < nHits; i++) { uint8_t state; if (Param().par.earlyTpcTransform) { const GPUTPCClusterData& c = GetConstantMem()->tpcTrackers[trackClusters[i].sector].ClusterData()[trackClusters[i].id - GetConstantMem()->tpcTrackers[trackClusters[i].sector].Data().ClusterIdOffset()]; + GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iOutTrackFirstCluster; clXYZ[i].x = c.x; clXYZ[i].y = c.y; clXYZ[i].z = c.z; @@ -1760,7 +1760,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread mergedTrack.SetCSide(p2.CSide()); GPUTPCGMBorderTrack b; - const float toX = Param().par.earlyTpcTransform ? clXYZ[0].x : GPUTPCGeometry::Row2X(cl[0].row); + const float toX = Param().par.earlyTpcTransform ? mClustersXYZ[iOutTrackFirstCluster].x : GPUTPCGeometry::Row2X(cl[0].row); if (p2.TransportToX(this, toX, Param().bzCLight, b, GPUCA_MAX_SIN_PHI, false)) { p1.X() = toX; p1.Y() = b.Par()[0]; @@ -1791,12 +1791,13 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread if (Param().rec.tpc.mergeCE) { bool CEside; if (Param().par.earlyTpcTransform) { + const GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iOutTrackFirstCluster; CEside = (mergedTrack.CSide() != 0) ^ (clXYZ[0].z > clXYZ[nHits - 1].z); } else { auto& cls = mConstantMem->ioPtrs.clustersNative->clustersLinear; CEside = cls[cl[0].num].getTime() < cls[cl[nHits - 1].num].getTime(); } - MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], &clXYZ[CEside ? (nHits - 1) : 0], iOutputTrack); + MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], Param().par.earlyTpcTransform ? &(mClustersXYZ + iOutTrackFirstCluster)[CEside ? (nHits - 1) : 0] : nullptr, iOutputTrack); } } // itr } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 3bd2257d02e01..d235b3398c062 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -1091,7 +1091,7 @@ GPUd() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() tr GPUTPCGMTrackParam t = track.Param(); float Alpha = track.Alpha(); CADEBUG(int32_t nTrackHitsOld = nTrackHits; float ptOld = t.QPt()); - bool ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), merger->ClustersXYZ() + track.FirstClusterRef(), nTrackHits, NTolerated, Alpha, attempt, GPUCA_MAX_SIN_PHI, &track.OuterParam()); + bool ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), merger->Param().par.earlyTpcTransform ? merger->ClustersXYZ() + track.FirstClusterRef() : nullptr, nTrackHits, NTolerated, Alpha, attempt, GPUCA_MAX_SIN_PHI, &track.OuterParam()); CADEBUG(printf("Finished Fit Track %d\n", iTrk)); CADEBUG(printf("OUTPUT hits %d -> %d+%d = %d, QPt %f -> %f, SP %f, ok %d chi2 %f chi2ndf %f\n", nTrackHitsOld, nTrackHits, NTolerated, nTrackHits + NTolerated, ptOld, t.QPt(), t.SinPhi(), (int32_t)ok, t.Chi2(), t.Chi2() / CAMath::Max(1, nTrackHits))); diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index b9620b9385c73..6e536727a0c67 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -50,9 +50,6 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON) if(GPUCA_BUILD_DEBUG) set(CMAKE_CXX_FLAGS "-O0 -ggdb") - if (GPUCA_BUILD_DEBUG_SANITIZE) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-sanitize=vptr") #TODO: Check why this does not work with clang - endif() set(CMAKE_BUILD_TYPE DEBUG) else() set(CMAKE_CXX_FLAGS "-O3 -march=native -ggdb -minline-all-stringops -funroll-loops -fno-stack-protector") @@ -67,7 +64,13 @@ else() set(CMAKE_BUILD_TYPE RELEASE) add_definitions(-DNDEBUG) endif() -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings") +if (GPUCA_BUILD_DEBUG_SANITIZE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-sanitize=vptr") + if(CMAKE_CXX_COMPILER MATCHES "clang\\+\\+") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -shared-libasan") + endif() +endif() +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings -Wno-vla-cxx-extension") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -rdynamic -Wl,--no-undefined") # Find mandatory packages @@ -247,6 +250,13 @@ if(GPUCA_CONFIG_ROOT) endif() target_link_libraries(standalone_support PUBLIC Microsoft.GSL::GSL TPCFastTransformation) +if (GPUCA_BUILD_DEBUG_SANITIZE AND CMAKE_CXX_COMPILER MATCHES "clang\\+\\+") + execute_process(COMMAND ${CMAKE_CXX_COMPILER} -print-file-name=libclang_rt.asan-x86_64.so OUTPUT_VARIABLE CLANG_ASAN_SO_PATH OUTPUT_STRIP_TRAILING_WHITESPACE) + get_filename_component(CLANG_ASAN_SO_PATH "${CLANG_ASAN_SO_PATH}" DIRECTORY) + get_filename_component(CLANG_ASAN_SO_PATH "${CLANG_ASAN_SO_PATH}" ABSOLUTE) + target_link_options(ca PRIVATE "-Wl,-rpath,${CLANG_ASAN_SO_PATH}") +endif() + # Installation install(TARGETS ca TPCFastTransformation standalone_support) install(FILES "cmake/makefile" DESTINATION "${CMAKE_INSTALL_PREFIX}") diff --git a/GPU/GPUTracking/display/GPUDisplay.h b/GPU/GPUTracking/display/GPUDisplay.h index 73f65b6b24241..bb270cda23565 100644 --- a/GPU/GPUTracking/display/GPUDisplay.h +++ b/GPU/GPUTracking/display/GPUDisplay.h @@ -161,6 +161,7 @@ class GPUDisplay : public GPUDisplayInterface { #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wformat-security" +#pragma GCC diagnostic ignored "-Wformat-truncation" snprintf(mInfoText2, 1024, args...); #pragma GCC diagnostic pop GPUInfo("%s", mInfoText2);