From e20648ccd1dabd3d7ba6716399290917c8f98c68 Mon Sep 17 00:00:00 2001 From: Gabriele Cimador Date: Thu, 16 Oct 2025 21:56:43 +0200 Subject: [PATCH 1/3] GPU TPC: added dynamic buffer allocation during track-model decoding --- GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx index fd0c929dd2ba7..036eb7a92ec1d 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx @@ -18,6 +18,7 @@ #include "GPUO2DataTypes.h" #include "GPUMemorySizeScalers.h" #include "GPULogging.h" +#include using namespace o2::gpu; @@ -116,5 +117,8 @@ void GPUTPCDecompression::RegisterMemoryAllocation() void GPUTPCDecompression::SetMaxData(const GPUTrackingInOutPointers& io) { - mMaxNativeClustersPerBuffer = mRec->GetProcessingSettings().tpcMaxAttachedClustersPerSectorRow; + mMaxNativeClustersPerBuffer = *std::max_element(mInputGPU.nSliceRowClusters, mInputGPU.nSliceRowClusters + mInputGPU.nSliceRows); + float clsRatio = mInputGPU.nUnattachedClusters > 0 ? float(mInputGPU.nAttachedClusters) / float(mInputGPU.nUnattachedClusters) : 1.0f; + mMaxNativeClustersPerBuffer *= clsRatio; + mMaxNativeClustersPerBuffer = std::min(mMaxNativeClustersPerBuffer, mRec->GetProcessingSettings().tpcMaxAttachedClustersPerSectorRow); } From 0d687166e4eabe8a69c96234bcdcc122492c9af1 Mon Sep 17 00:00:00 2001 From: cima22 Date: Fri, 17 Oct 2025 15:34:34 +0200 Subject: [PATCH 2/3] GPU TPC: improvement for dynamic buffer size for track-model decoding --- .../DataCompression/GPUTPCDecompression.cxx | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx index 036eb7a92ec1d..533e76d26c583 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx @@ -117,8 +117,10 @@ void GPUTPCDecompression::RegisterMemoryAllocation() void GPUTPCDecompression::SetMaxData(const GPUTrackingInOutPointers& io) { - mMaxNativeClustersPerBuffer = *std::max_element(mInputGPU.nSliceRowClusters, mInputGPU.nSliceRowClusters + mInputGPU.nSliceRows); - float clsRatio = mInputGPU.nUnattachedClusters > 0 ? float(mInputGPU.nAttachedClusters) / float(mInputGPU.nUnattachedClusters) : 1.0f; - mMaxNativeClustersPerBuffer *= clsRatio; - mMaxNativeClustersPerBuffer = std::min(mMaxNativeClustersPerBuffer, mRec->GetProcessingSettings().tpcMaxAttachedClustersPerSectorRow); + uint32_t maxAttachedClsMargin1 = *std::max_element(mInputGPU.nSliceRowClusters, mInputGPU.nSliceRowClusters + mInputGPU.nSliceRows); + float clsRatio1 = std::max(mInputGPU.nUnattachedClusters > 0 ? float(mInputGPU.nAttachedClusters) / float(mInputGPU.nUnattachedClusters) : 1.5f, 1.5f); // minimum raio = 1.5 + maxAttachedClsMargin1 *= clsRatio1; + uint32_t maxAttachedClsMargin2 = mInputGPU.nAttachedClusters / mInputGPU.nSliceRows * 3; // mean #attached cls per SectorRow multiplied by tuned number 3 + mMaxNativeClustersPerBuffer = std::max({maxAttachedClsMargin1, maxAttachedClsMargin2, 1000u}); // take biggest margin, 1000 clusters minimum + mMaxNativeClustersPerBuffer = std::min(mMaxNativeClustersPerBuffer, mRec->GetProcessingSettings().tpcMaxAttachedClustersPerSectorRow); // upperbound given by configurable param } From 89dea6f55dcdfd359456973b3149733c3a7db9ae Mon Sep 17 00:00:00 2001 From: cima22 Date: Sat, 18 Oct 2025 00:15:36 +0200 Subject: [PATCH 3/3] GPU TPC: increased margins for track-model decoding buffers --- GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx index 533e76d26c583..7e7ee86623099 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx @@ -118,9 +118,9 @@ void GPUTPCDecompression::RegisterMemoryAllocation() void GPUTPCDecompression::SetMaxData(const GPUTrackingInOutPointers& io) { uint32_t maxAttachedClsMargin1 = *std::max_element(mInputGPU.nSliceRowClusters, mInputGPU.nSliceRowClusters + mInputGPU.nSliceRows); - float clsRatio1 = std::max(mInputGPU.nUnattachedClusters > 0 ? float(mInputGPU.nAttachedClusters) / float(mInputGPU.nUnattachedClusters) : 1.5f, 1.5f); // minimum raio = 1.5 + float clsRatio1 = (mInputGPU.nUnattachedClusters > 0 ? float(mInputGPU.nAttachedClusters) / float(mInputGPU.nUnattachedClusters) : 1.0f) * 1.5f; maxAttachedClsMargin1 *= clsRatio1; - uint32_t maxAttachedClsMargin2 = mInputGPU.nAttachedClusters / mInputGPU.nSliceRows * 3; // mean #attached cls per SectorRow multiplied by tuned number 3 + uint32_t maxAttachedClsMargin2 = mInputGPU.nAttachedClusters / mInputGPU.nSliceRows * 3.5; // mean #attached cls per SectorRow multiplied by 3.5 (tuned) mMaxNativeClustersPerBuffer = std::max({maxAttachedClsMargin1, maxAttachedClsMargin2, 1000u}); // take biggest margin, 1000 clusters minimum mMaxNativeClustersPerBuffer = std::min(mMaxNativeClustersPerBuffer, mRec->GetProcessingSettings().tpcMaxAttachedClustersPerSectorRow); // upperbound given by configurable param }