diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h index f9583d97ca030..21eea4505cdea 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h @@ -17,6 +17,7 @@ #include "ITStracking/Definitions.h" #include "ITStracking/TrackerTraits.h" #include "ITStrackingGPU/TimeFrameGPU.h" +#include "Framework/Logger.h" namespace o2 { @@ -24,28 +25,28 @@ namespace its { template -class TrackerTraitsGPU : public TrackerTraits +class TrackerTraitsGPU final : public TrackerTraits { public: TrackerTraitsGPU() = default; ~TrackerTraitsGPU() override = default; - // void computeLayerCells() final; - void adoptTimeFrame(TimeFrame* tf) override; - void initialiseTimeFrame(const int iteration) override; - void computeLayerTracklets(const int iteration, int, int) final; - void computeLayerCells(const int iteration) override; - void setBz(float) override; - void findCellsNeighbours(const int iteration) override; - void findRoads(const int iteration) override; + void adoptTimeFrame(TimeFrame* tf) final; + void initialiseTimeFrame(const int iteration) final; + void setBz(float) final; - // Methods to get CPU execution from traits - void initialiseTimeFrameHybrid(const int iteration) override { initialiseTimeFrame(iteration); }; - void computeTrackletsHybrid(const int iteration, int, int) override; - void computeCellsHybrid(const int iteration) override; - void findCellsNeighboursHybrid(const int iteration) override; + void computeLayerTracklets(const int iteration, int, int) final { LOGP(fatal, "computeLayerTracklers must never be called from Hybrid traits!"); }; + void computeLayerCells(const int iteration) final { LOGP(fatal, "computeLayerCells must never be called from Hybrid traits!"); }; + void findCellsNeighbours(const int iteration) final { LOGP(fatal, "findCellsNeighbours must never be called from Hybrid traits!"); }; + void findRoads(const int iteration) final { LOGP(fatal, "findRoads must never be called from Hybrid traits!"); }; + void extendTracks(const int iteration) final { LOGP(fatal, "extendTracks must never be called from Hybrid traits!"); }; + void findShortPrimaries() final { LOGP(fatal, "findShortPrimaries must never be called from Hybrid traits!"); }; - void extendTracks(const int iteration) override; + void initialiseTimeFrameHybrid(const int iteration) final { initialiseTimeFrame(iteration); }; + void computeTrackletsHybrid(const int iteration, int, int) final; + void computeCellsHybrid(const int iteration) final; + void findCellsNeighboursHybrid(const int iteration) final; + void findRoadsHybrid(const int iteration) final; // TimeFrameGPU information forwarding int getTFNumberOfClusters() const override; diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx index 3c65faddcff71..7a9b34b2e7de1 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx @@ -11,10 +11,7 @@ /// #include -#include -#include #include -#include #include "DataFormatsITS/TrackITS.h" @@ -40,26 +37,6 @@ void TrackerTraitsGPU::initialiseTimeFrame(const int iteration) mTimeFrameGPU->loadIndexTableUtils(iteration); } -template -void TrackerTraitsGPU::computeLayerTracklets(const int iteration, int, int) -{ -} - -template -void TrackerTraitsGPU::computeLayerCells(const int iteration) -{ -} - -template -void TrackerTraitsGPU::findCellsNeighbours(const int iteration) -{ -} - -template -void TrackerTraitsGPU::extendTracks(const int iteration) -{ -} - template void TrackerTraitsGPU::setBz(float bz) { @@ -260,7 +237,7 @@ void TrackerTraitsGPU::findCellsNeighboursHybrid(const int iteration) }; template -void TrackerTraitsGPU::findRoads(const int iteration) +void TrackerTraitsGPU::findRoadsHybrid(const int iteration) { auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance(); for (int startLevel{mTrkParams[iteration].CellsPerRoad()}; startLevel >= mTrkParams[iteration].CellMinimumLevel(); --startLevel) { diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index a1c65f67738dd..b1cd6725f3003 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -12,11 +12,9 @@ #include #include -#include -#include #include -#include +#include #include #include #include @@ -28,7 +26,6 @@ #include #include "ITStracking/Constants.h" -#include "ITStracking/Configuration.h" #include "ITStracking/IndexTableUtils.h" #include "ITStracking/MathUtils.h" #include "DataFormatsITS/TrackITS.h" @@ -59,7 +56,7 @@ namespace o2::its using namespace constants::its2; using Vertex = o2::dataformats::Vertex>; -GPUd() float Sq(float v) +GPUdii() float Sq(float v) { return v * v; } @@ -76,15 +73,15 @@ GPUd() const int4 getBinsRect(const Cluster& currentCluster, const int layerInde const float zRangeMax = o2::gpu::CAMath::Max(z1, z2) + maxdeltaz; const float phiRangeMax = (maxdeltaphi > constants::math::Pi) ? constants::math::TwoPi : currentCluster.phi + maxdeltaphi; - if (zRangeMax < -LayersZCoordinate()[layerIndex + 1] || - zRangeMin > LayersZCoordinate()[layerIndex + 1] || zRangeMin > zRangeMax) { + if (zRangeMax < -utils.getLayerZ(layerIndex) || + zRangeMin > utils.getLayerZ(layerIndex) || zRangeMin > zRangeMax) { return getEmptyBinsRect(); } - return int4{o2::gpu::CAMath::Max(0, utils.getZBinIndex(layerIndex + 1, zRangeMin)), + return int4{o2::gpu::CAMath::Max(0, utils.getZBinIndex(layerIndex, zRangeMin)), utils.getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMin)), - o2::gpu::CAMath::Min(utils.getNzBins() - 1, utils.getZBinIndex(layerIndex + 1, zRangeMax)), + o2::gpu::CAMath::Min(utils.getNzBins() - 1, utils.getZBinIndex(layerIndex, zRangeMax)), utils.getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMax))}; } @@ -522,7 +519,7 @@ GPUg() void computeLayerTrackletsMultiROFKernel( const float zAtRmax{tanLambda * (maxR - currentCluster.radius) + currentCluster.zCoordinate}; const float sqInverseDeltaZ0{1.f / (Sq(currentCluster.zCoordinate - primaryVertex.getZ()) + 2.e-8f)}; /// protecting from overflows adding the detector resolution const float sigmaZ{o2::gpu::CAMath::Sqrt(Sq(resolution) * Sq(tanLambda) * ((Sq(inverseR0) + sqInverseDeltaZ0) * Sq(meanDeltaR) + 1.f) + Sq(meanDeltaR * MSAngle))}; - const int4 selectedBinsRect{getBinsRect(currentCluster, layerIndex, *utils, zAtRmin, zAtRmax, sigmaZ * NSigmaCut, phiCut)}; + const int4 selectedBinsRect{getBinsRect(currentCluster, layerIndex + 1, *utils, zAtRmin, zAtRmax, sigmaZ * NSigmaCut, phiCut)}; if (selectedBinsRect.x == 0 && selectedBinsRect.y == 0 && selectedBinsRect.z == 0 && selectedBinsRect.w == 0) { continue; } @@ -800,6 +797,44 @@ GPUg() void printCellSeeds(CellSeed* seed, int nCells, const unsigned int tId = } } } + +template +GPUhi() void cubExclusiveScanInPlace(T* in_out, int num_items, cudaStream_t stream = nullptr) +{ + void* d_temp_storage = nullptr; + size_t temp_storage_bytes = 0; + GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, in_out, + in_out, num_items, stream)); + GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); + GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, in_out, + in_out, num_items, stream)); + GPUChkErrS(cudaFree(d_temp_storage)); +} + +template +GPUhi() void cubExclusiveScanInPlace(Vector& in_out, int num_items, cudaStream_t stream = nullptr) +{ + cubExclusiveScanInPlace(thrust::raw_pointer_cast(in_out.data()), num_items, stream); +} + +template +GPUhi() void cubInclusiveScanInPlace(T* in_out, int num_items, cudaStream_t stream = nullptr) +{ + void* d_temp_storage = nullptr; + size_t temp_storage_bytes = 0; + GPUChkErrS(cub::DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes, in_out, + in_out, num_items, stream)); + GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); + GPUChkErrS(cub::DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes, in_out, + in_out, num_items, stream)); + GPUChkErrS(cudaFree(d_temp_storage)); +} + +template +GPUhi() void cubInclusiveScanInPlace(Vector& in_out, int num_items, cudaStream_t stream = nullptr) +{ + cubInclusiveScanInPlace(thrust::raw_pointer_cast(in_out.data()), num_items, stream); +} } // namespace gpu template @@ -833,7 +868,8 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils, const int nThreads) { for (int iLayer = 0; iLayer < nLayers - 1; ++iLayer) { - gpu::computeLayerTrackletsMultiROFKernel<<>>( + gpu::computeLayerTrackletsMultiROFKernel<<>>( utils, multMask, iLayer, @@ -860,22 +896,7 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils, resolutions[iLayer], radii[iLayer + 1] - radii[iLayer], mulScatAng[iLayer]); - void* d_temp_storage = nullptr; - size_t temp_storage_bytes = 0; - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - trackletsLUTsHost[iLayer], // d_in - trackletsLUTsHost[iLayer], // d_out - nClusters[iLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - trackletsLUTsHost[iLayer], // d_in - trackletsLUTsHost[iLayer], // d_out - nClusters[iLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaFree(d_temp_storage)); + gpu::cubExclusiveScanInPlace(trackletsLUTsHost[iLayer], nClusters[iLayer] + 1); } } @@ -913,55 +934,42 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils, const int nThreads) { for (int iLayer = 0; iLayer < nLayers - 1; ++iLayer) { - gpu::computeLayerTrackletsMultiROFKernel<<>>(utils, - multMask, - iLayer, - startROF, - endROF, - maxROF, - deltaROF, - vertices, - rofPV, - nVertices, - vertexId, - clusters, - ROFClusters, - usedClusters, - clustersIndexTables, - tracklets, - trackletsLUTs, - iteration, - NSigmaCut, - phiCuts[iLayer], - resolutionPV, - minRs[iLayer + 1], - maxRs[iLayer + 1], - resolutions[iLayer], - radii[iLayer + 1] - radii[iLayer], - mulScatAng[iLayer]); + gpu::computeLayerTrackletsMultiROFKernel<<>>(utils, + multMask, + iLayer, + startROF, + endROF, + maxROF, + deltaROF, + vertices, + rofPV, + nVertices, + vertexId, + clusters, + ROFClusters, + usedClusters, + clustersIndexTables, + tracklets, + trackletsLUTs, + iteration, + NSigmaCut, + phiCuts[iLayer], + resolutionPV, + minRs[iLayer + 1], + maxRs[iLayer + 1], + resolutions[iLayer], + radii[iLayer + 1] - radii[iLayer], + mulScatAng[iLayer]); thrust::device_ptr tracklets_ptr(spanTracklets[iLayer]); thrust::sort(thrust::device, tracklets_ptr, tracklets_ptr + nTracklets[iLayer], gpu::sort_tracklets()); auto unique_end = thrust::unique(thrust::device, tracklets_ptr, tracklets_ptr + nTracklets[iLayer], gpu::equal_tracklets()); nTracklets[iLayer] = unique_end - tracklets_ptr; if (iLayer > 0) { GPUChkErrS(cudaMemset(trackletsLUTsHost[iLayer], 0, nClusters[iLayer] * sizeof(int))); - gpu::compileTrackletsLookupTableKernel<<>>(spanTracklets[iLayer], trackletsLUTsHost[iLayer], nTracklets[iLayer]); - void* d_temp_storage = nullptr; - size_t temp_storage_bytes = 0; - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - trackletsLUTsHost[iLayer], // d_in - trackletsLUTsHost[iLayer], // d_out - nClusters[iLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - trackletsLUTsHost[iLayer], // d_in - trackletsLUTsHost[iLayer], // d_out - nClusters[iLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaFree(d_temp_storage)); + gpu::compileTrackletsLookupTableKernel<<>>( + spanTracklets[iLayer], trackletsLUTsHost[iLayer], nTracklets[iLayer]); + gpu::cubExclusiveScanInPlace(trackletsLUTsHost[iLayer], nClusters[iLayer] + 1); } } } @@ -984,7 +992,8 @@ void countCellsHandler( const int nBlocks, const int nThreads) { - gpu::computeLayerCellsKernel<<>>( + gpu::computeLayerCellsKernel<<>>( sortedClusters, // const Cluster** unsortedClusters, // const Cluster** tfInfo, // const TrackingFrameInfo** @@ -998,22 +1007,7 @@ void countCellsHandler( maxChi2ClusterAttachment, // const float cellDeltaTanLambdaSigma, // const float nSigmaCut); // const float - void* d_temp_storage = nullptr; - size_t temp_storage_bytes = 0; - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - cellsLUTsHost, // d_in - cellsLUTsHost, // d_out - nTracklets + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - cellsLUTsHost, // d_in - cellsLUTsHost, // d_out - nTracklets + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaFree(d_temp_storage)); + gpu::cubExclusiveScanInPlace(cellsLUTsHost, nTracklets + 1); } void computeCellsHandler( @@ -1034,7 +1028,8 @@ void computeCellsHandler( const int nBlocks, const int nThreads) { - gpu::computeLayerCellsKernel<<>>( + gpu::computeLayerCellsKernel<<>>( sortedClusters, // const Cluster** unsortedClusters, // const Cluster** tfInfo, // const TrackingFrameInfo** @@ -1064,7 +1059,8 @@ unsigned int countCellNeighboursHandler(CellSeed** cellsLayersDevice, const int nBlocks, const int nThreads) { - gpu::computeLayerCellNeighboursKernel<<>>( + gpu::computeLayerCellNeighboursKernel<<>>( cellsLayersDevice, neighboursLUT, neighboursIndexTable, @@ -1076,39 +1072,10 @@ unsigned int countCellNeighboursHandler(CellSeed** cellsLayersDevice, nCells, maxCellNeighbours); - void *d_temp_storage = nullptr, *d_temp_storage_2 = nullptr; - size_t temp_storage_bytes = 0, temp_storage_bytes_2 = 0; - GPUChkErrS(cub::DeviceScan::InclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - neighboursLUT, // d_in - neighboursLUT, // d_out - nCellsNext)); // num_items - - GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::InclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - neighboursLUT, // d_in - neighboursLUT, // d_out - nCellsNext)); // num_items - - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage_2, // d_temp_storage - temp_storage_bytes_2, // temp_storage_bytes - neighboursIndexTable, // d_in - neighboursIndexTable, // d_out - nCells + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - - GPUChkErrS(cudaMalloc(&d_temp_storage_2, temp_storage_bytes_2)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage_2, // d_temp_storage - temp_storage_bytes_2, // temp_storage_bytes - neighboursIndexTable, // d_in - neighboursIndexTable, // d_out - nCells + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer + gpu::cubInclusiveScanInPlace(neighboursLUT, nCellsNext); + gpu::cubExclusiveScanInPlace(neighboursIndexTable, nCells + 1); unsigned int nNeighbours; GPUChkErrS(cudaMemcpy(&nNeighbours, &neighboursLUT[nCellsNext - 1], sizeof(unsigned int), cudaMemcpyDeviceToHost)); - GPUChkErrS(cudaFree(d_temp_storage)); - GPUChkErrS(cudaFree(d_temp_storage_2)); return nNeighbours; } @@ -1190,69 +1157,56 @@ void processNeighboursHandler(const int startLayer, const int nBlocks, const int nThreads) { - thrust::device_vector foundSeedsTable(nCells[startLayer] + 1); // Shortcut: device_vector skips central memory management, we are relying on the contingency. TODO: fix this. - // thrust::device_vector lastCellIds(lastCellIdHost); - // thrust::device_vector lastCellSeed(lastCellSeedHost); - thrust::device_vector lastCellId, updatedCellId; - thrust::device_vector lastCellSeed, updatedCellSeed; - gpu::processNeighboursKernel<<>>(startLayer, - startLevel, - allCellSeeds, - currentCellSeeds, - nullptr, - nCells[startLayer], - nullptr, - nullptr, - thrust::raw_pointer_cast(&foundSeedsTable[0]), - usedClusters, - neighbours[startLayer - 1], - neighboursDeviceLUTs[startLayer - 1], - foundTrackingFrameInfo, - bz, - maxChi2ClusterAttachment, - propagator, - matCorrType); - void* d_temp_storage = nullptr; - size_t temp_storage_bytes = 0; - GPUChkErrS(cub::DeviceScan::ExclusiveSum(nullptr, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out - nCells[startLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out - nCells[startLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - - updatedCellId.resize(foundSeedsTable.back()); - updatedCellSeed.resize(foundSeedsTable.back()); - - gpu::processNeighboursKernel<<>>(startLayer, - startLevel, - allCellSeeds, - currentCellSeeds, - nullptr, - nCells[startLayer], - thrust::raw_pointer_cast(&updatedCellSeed[0]), - thrust::raw_pointer_cast(&updatedCellId[0]), - thrust::raw_pointer_cast(&foundSeedsTable[0]), - usedClusters, - neighbours[startLayer - 1], - neighboursDeviceLUTs[startLayer - 1], - foundTrackingFrameInfo, - bz, - maxChi2ClusterAttachment, - propagator, - matCorrType); + thrust::device_vector foundSeedsTable(nCells[startLayer] + 1); // Shortcut: device_vector skips central memory management, we are relying on the contingency. + // TODO: fix this. + + gpu::processNeighboursKernel<<>>( + startLayer, + startLevel, + allCellSeeds, + currentCellSeeds, + nullptr, + nCells[startLayer], + nullptr, + nullptr, + thrust::raw_pointer_cast(&foundSeedsTable[0]), + usedClusters, + neighbours[startLayer - 1], + neighboursDeviceLUTs[startLayer - 1], + foundTrackingFrameInfo, + bz, + maxChi2ClusterAttachment, + propagator, + matCorrType); + gpu::cubExclusiveScanInPlace(foundSeedsTable, nCells[startLayer] + 1); + + thrust::device_vector updatedCellId(foundSeedsTable.back()); + thrust::device_vector updatedCellSeed(foundSeedsTable.back()); + gpu::processNeighboursKernel<<>>( + startLayer, + startLevel, + allCellSeeds, + currentCellSeeds, + nullptr, + nCells[startLayer], + thrust::raw_pointer_cast(&updatedCellSeed[0]), + thrust::raw_pointer_cast(&updatedCellId[0]), + thrust::raw_pointer_cast(&foundSeedsTable[0]), + usedClusters, + neighbours[startLayer - 1], + neighboursDeviceLUTs[startLayer - 1], + foundTrackingFrameInfo, + bz, + maxChi2ClusterAttachment, + propagator, + matCorrType); - GPUChkErrS(cudaFree(d_temp_storage)); int level = startLevel; + thrust::device_vector lastCellId; + thrust::device_vector lastCellSeed; for (int iLayer{startLayer - 1}; iLayer > 0 && level > 2; --iLayer) { - temp_storage_bytes = 0; lastCellSeed.swap(updatedCellSeed); lastCellId.swap(updatedCellId); thrust::device_vector().swap(updatedCellSeed); @@ -1260,62 +1214,55 @@ void processNeighboursHandler(const int startLayer, auto lastCellSeedSize{lastCellSeed.size()}; foundSeedsTable.resize(lastCellSeedSize + 1); thrust::fill(foundSeedsTable.begin(), foundSeedsTable.end(), 0); - --level; - gpu::processNeighboursKernel<<>>(iLayer, - level, - allCellSeeds, - thrust::raw_pointer_cast(&lastCellSeed[0]), - thrust::raw_pointer_cast(&lastCellId[0]), - lastCellSeedSize, - nullptr, - nullptr, - thrust::raw_pointer_cast(&foundSeedsTable[0]), - usedClusters, - neighbours[iLayer - 1], - neighboursDeviceLUTs[iLayer - 1], - foundTrackingFrameInfo, - bz, - maxChi2ClusterAttachment, - propagator, - matCorrType); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(nullptr, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out - foundSeedsTable.size(), // num_i_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out - foundSeedsTable.size(), // num_i_items - 0)); // NOLINT: this is the offset of the sum, not a pointer + + gpu::processNeighboursKernel<<>>( + iLayer, + --level, + allCellSeeds, + thrust::raw_pointer_cast(&lastCellSeed[0]), + thrust::raw_pointer_cast(&lastCellId[0]), + lastCellSeedSize, + nullptr, + nullptr, + thrust::raw_pointer_cast(&foundSeedsTable[0]), + usedClusters, + neighbours[iLayer - 1], + neighboursDeviceLUTs[iLayer - 1], + foundTrackingFrameInfo, + bz, + maxChi2ClusterAttachment, + propagator, + matCorrType); + gpu::cubExclusiveScanInPlace(foundSeedsTable, foundSeedsTable.size()); + auto foundSeeds{foundSeedsTable.back()}; updatedCellId.resize(foundSeeds); thrust::fill(updatedCellId.begin(), updatedCellId.end(), 0); updatedCellSeed.resize(foundSeeds); thrust::fill(updatedCellSeed.begin(), updatedCellSeed.end(), CellSeed()); - gpu::processNeighboursKernel<<>>(iLayer, - level, - allCellSeeds, - thrust::raw_pointer_cast(&lastCellSeed[0]), - thrust::raw_pointer_cast(&lastCellId[0]), - lastCellSeedSize, - thrust::raw_pointer_cast(&updatedCellSeed[0]), - thrust::raw_pointer_cast(&updatedCellId[0]), - thrust::raw_pointer_cast(&foundSeedsTable[0]), - usedClusters, - neighbours[iLayer - 1], - neighboursDeviceLUTs[iLayer - 1], - foundTrackingFrameInfo, - bz, - maxChi2ClusterAttachment, - propagator, - matCorrType); - GPUChkErrS(cudaFree(d_temp_storage)); + gpu::processNeighboursKernel<<>>( + iLayer, + level, + allCellSeeds, + thrust::raw_pointer_cast(&lastCellSeed[0]), + thrust::raw_pointer_cast(&lastCellId[0]), + lastCellSeedSize, + thrust::raw_pointer_cast(&updatedCellSeed[0]), + thrust::raw_pointer_cast(&updatedCellId[0]), + thrust::raw_pointer_cast(&foundSeedsTable[0]), + usedClusters, + neighbours[iLayer - 1], + neighboursDeviceLUTs[iLayer - 1], + foundTrackingFrameInfo, + bz, + maxChi2ClusterAttachment, + propagator, + matCorrType); } + thrust::device_vector outSeeds(updatedCellSeed.size()); auto end = thrust::copy_if(updatedCellSeed.begin(), updatedCellSeed.end(), outSeeds.begin(), gpu::seed_selector(1.e3, maxChi2NDF * ((startLevel + 2) * 2 - 5))); auto s{end - outSeeds.begin()}; @@ -1339,7 +1286,8 @@ void trackSeedHandler(CellSeed* trackSeeds, const int nThreads) { thrust::device_vector minPts(minPtsHost); - gpu::fitTrackSeedsKernel<<>>( + gpu::fitTrackSeedsKernel<<>>( trackSeeds, // CellSeed* foundTrackingFrameInfo, // TrackingFrameInfo** tracks, // TrackITSExt* diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h index 58483e4aa9f6f..b63e61f0b76f4 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h @@ -50,20 +50,17 @@ class TrackerTraits; class Tracker { + using LogFunc = std::function; public: Tracker(TrackerTraits* traits); - Tracker(const Tracker&) = delete; - Tracker& operator=(const Tracker&) = delete; - ~Tracker(); - void adoptTimeFrame(TimeFrame& tf); void clustersToTracks( - std::function = [](std::string s) { std::cout << s << std::endl; }, std::function = [](std::string s) { std::cerr << s << std::endl; }); + LogFunc = [](std::string s) { std::cout << s << std::endl; }, LogFunc = [](std::string s) { std::cerr << s << std::endl; }); void clustersToTracksHybrid( - std::function = [](std::string s) { std::cout << s << std::endl; }, std::function = [](std::string s) { std::cerr << s << std::endl; }); + LogFunc = [](std::string s) { std::cout << s << std::endl; }, LogFunc = [](std::string s) { std::cerr << s << std::endl; }); std::vector& getTracks(); void setParameters(const std::vector&); @@ -74,15 +71,25 @@ class Tracker bool isMatLUT() const; void setNThreads(int n); int getNThreads() const; - std::uint32_t mTimeFrameCounter = 0; + void printSummary() const; private: + enum TrackerType : uint8_t { CPU = 0, + Hybrid, + NSize }; + template + void clusterToTracksImpl(LogFunc, LogFunc); + static constexpr const char* sTrackerNames[TrackerType::NSize] = {"CPU", "Hybrid"}; + + // CPU void initialiseTimeFrame(int& iteration); void computeTracklets(int& iteration, int& iROFslice, int& iVertex); void computeCells(int& iteration); void findCellsNeighbours(int& iteration); void findRoads(int& iteration); - + void findShortPrimaries(); + void extendTracks(int& iteration); + // Hyrbid void initialiseTimeFrameHybrid(int& iteration); void computeTrackletsHybrid(int& iteration, int& iROFslice, int& iVertex); void computeCellsHybrid(int& iteration); @@ -90,17 +97,13 @@ class Tracker void findRoadsHybrid(int& iteration); void findTracksHybrid(int& iteration); - void findShortPrimaries(); - void findTracks(); - void extendTracks(int& iteration); - // MC interaction void computeRoadsMClabels(); void computeTracksMClabels(); void rectifyClusterIndices(); template - float evaluateTask(void (Tracker::*)(T...), const char*, std::function logger, T&&... args); + float evaluateTask(void (Tracker::*)(T...), const char*, LogFunc logger, T&&... args); TrackerTraits* mTraits = nullptr; /// Observer pointer, not owned by this class TimeFrame* mTimeFrame = nullptr; /// Observer pointer, not owned by this class @@ -108,7 +111,8 @@ class Tracker std::vector mTrkParams; o2::gpu::GPUChainITS* mRecoChain = nullptr; - unsigned int mNumberOfRuns{0}; + unsigned int mNumberOfDroppedTFs{0}; + unsigned int mTimeFrameCounter{0}; }; inline void Tracker::setParameters(const std::vector& trkPars) @@ -117,8 +121,7 @@ inline void Tracker::setParameters(const std::vector& trkPar } template -float Tracker::evaluateTask(void (Tracker::*task)(T...), const char* taskName, std::function logger, - T&&... args) +float Tracker::evaluateTask(void (Tracker::*task)(T...), const char* taskName, LogFunc logger, T&&... args) { float diff{0.f}; diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h index 46499db92d4d5..f8d593fbf2480 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h @@ -51,20 +51,21 @@ class TrackerTraits public: virtual ~TrackerTraits() = default; virtual void adoptTimeFrame(TimeFrame* tf); + virtual void initialiseTimeFrame(const int iteration); virtual void computeLayerTracklets(const int iteration, int iROFslice, int iVertex); virtual void computeLayerCells(const int iteration); virtual void findCellsNeighbours(const int iteration); virtual void findRoads(const int iteration); - virtual void initialiseTimeFrameHybrid(const int iteration) { LOGP(error, "initialiseTimeFrameHybrid: this method should never be called with CPU traits"); } - virtual void computeTrackletsHybrid(const int iteration, int, int) { LOGP(error, "computeTrackletsHybrid: this method should never be called with CPU traits"); } - virtual void computeCellsHybrid(const int iteration) { LOGP(error, "computeCellsHybrid: this method should never be called with CPU traits"); } - virtual void findCellsNeighboursHybrid(const int iteration) { LOGP(error, "findCellsNeighboursHybrid: this method should never be called with CPU traits"); } - virtual void findRoadsHybrid(const int iteration) { LOGP(error, "findRoadsHybrid: this method should never be called with CPU traits"); } - virtual void findTracksHybrid(const int iteration) { LOGP(error, "findTracksHybrid: this method should never be called with CPU traits"); } - virtual void findTracks() { LOGP(error, "findTracks: this method is deprecated."); } virtual void extendTracks(const int iteration); virtual void findShortPrimaries(); + + virtual void initialiseTimeFrameHybrid(const int iteration) { LOGP(fatal, "initialiseTimeFrameHybrid: this method should never be called with CPU traits"); } + virtual void computeTrackletsHybrid(const int iteration, int, int) { LOGP(fatal, "computeTrackletsHybrid: this method should never be called with CPU traits"); } + virtual void computeCellsHybrid(const int iteration) { LOGP(fatal, "computeCellsHybrid: this method should never be called with CPU traits"); } + virtual void findCellsNeighboursHybrid(const int iteration) { LOGP(fatal, "findCellsNeighboursHybrid: this method should never be called with CPU traits"); } + virtual void findRoadsHybrid(const int iteration) { LOGP(fatal, "findRoadsHybrid: this method should never be called with CPU traits"); } + virtual void setBz(float bz); virtual bool trackFollowing(TrackITSExt* track, int rof, bool outward, const int iteration); virtual void processNeighbours(int iLayer, int iLevel, const std::vector& currentCellSeed, const std::vector& currentCellId, std::vector& updatedCellSeed, std::vector& updatedCellId); diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h index b584bf6b8008b..6eacb94ebb1ea 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h @@ -37,9 +37,7 @@ class ITSTrackingInterface const bool overrBeamEst) : mIsMC{isMC}, mUseTriggers{trgType}, - mOverrideBeamEstimation{overrBeamEst} - { - } + mOverrideBeamEstimation{overrBeamEst} {} void setClusterDictionary(const o2::itsmft::TopologyDictionary* d) { mDict = d; } void setMeanVertex(const o2::dataformats::MeanVertexObject* v) @@ -56,6 +54,7 @@ class ITSTrackingInterface void initialise(); template void run(framework::ProcessingContext& pc); + void printSummary() const; virtual void updateTimeDependentParams(framework::ProcessingContext& pc); virtual void finaliseCCDB(framework::ConcreteDataMatcher& matcher, void* obj); diff --git a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx index 50dc1f5dfd039..68d5952c41ec6 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx @@ -25,8 +25,7 @@ #include "ReconstructionDataFormats/Track.h" #include -#include -#include +#include #include #include #include @@ -37,17 +36,33 @@ namespace its { using o2::its::constants::GB; -Tracker::Tracker(o2::its::TrackerTraits* traits) +Tracker::Tracker(o2::its::TrackerTraits* traits) : mTraits(traits) { /// Initialise standard configuration with 1 iteration mTrkParams.resize(1); - mTraits = traits; } -Tracker::~Tracker() = default; - -void Tracker::clustersToTracks(std::function logger, std::function error) +template +void Tracker::clusterToTracksImpl(LogFunc logger, LogFunc error) { + constexpr auto pickFunc = [](F1&& cpu, F2&& hybrid) { + if constexpr (T == TrackerType::CPU) { + return std::forward(cpu); + } else if constexpr (T == TrackerType::Hybrid) { + return std::forward(hybrid); + } else { + static_assert(false, "Wrong TrackerType!"); + } + }; + constexpr auto initialiseTimeFrame = pickFunc(&Tracker::initialiseTimeFrame, &Tracker::initialiseTimeFrameHybrid); + constexpr auto computeTracklets = pickFunc(&Tracker::computeTracklets, &Tracker::computeTrackletsHybrid); + constexpr auto computeCells = pickFunc(&Tracker::computeCells, &Tracker::computeCellsHybrid); + constexpr auto findCellsNeighbours = pickFunc(&Tracker::findCellsNeighbours, &Tracker::findCellsNeighboursHybrid); + constexpr auto findRoads = pickFunc(&Tracker::findRoads, &Tracker::findRoadsHybrid); + constexpr auto extendTracks = pickFunc(&Tracker::extendTracks, nullptr); + constexpr auto findShortPrimaries = pickFunc(&Tracker::findShortPrimaries, nullptr); + LogFunc evalLog = [](const std::string&) {}; + double total{0}; mTraits->UpdateTrackingParameters(mTrkParams); int maxNvertices{-1}; @@ -62,22 +77,20 @@ void Tracker::clustersToTracks(std::function logger, std::f if (iteration == 3 && mTrkParams[0].DoUPCIteration) { mTimeFrame->swapMasks(); } - logger(fmt::format("ITS Tracking iteration {} summary:", iteration)); double timeTracklets{0.}, timeCells{0.}, timeNeighbours{0.}, timeRoads{0.}; int nTracklets{0}, nCells{0}, nNeighbours{0}, nTracks{-static_cast(mTimeFrame->getNumberOfTracks())}; - - total += evaluateTask(&Tracker::initialiseTimeFrame, "Timeframe initialisation", logger, iteration); int nROFsIterations = mTrkParams[iteration].nROFsPerIterations > 0 ? mTimeFrame->getNrof() / mTrkParams[iteration].nROFsPerIterations + bool(mTimeFrame->getNrof() % mTrkParams[iteration].nROFsPerIterations) : 1; int iVertex{std::min(maxNvertices, 0)}; + logger(std::format("==== ITS {} Tracking iteration {} === nRofsIter {} maxNVert {} === summary ====", sTrackerNames[T], iteration, nROFsIterations, maxNvertices)); + total += evaluateTask(initialiseTimeFrame, "Timeframe initialisation", logger, iteration); do { for (int iROFs{0}; iROFs < nROFsIterations; ++iROFs) { - timeTracklets += evaluateTask( - &Tracker::computeTracklets, "Tracklet finding", [](std::string) {}, iteration, iROFs, iVertex); + timeTracklets += evaluateTask(computeTracklets, "Tracklet finding", evalLog, iteration, iROFs, iVertex); nTracklets += mTraits->getTFNumberOfTracklets(); if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { mTimeFrame->printSliceInfo(iROFs, mTrkParams[iteration].nROFsPerIterations); - error(fmt::format("Too much memory used during trackleting in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", + error(std::format("Too much memory used during trackleting in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTimeFrame->getArtefactsMemory() / GB, mTrkParams[iteration].MaxMemory / GB)); if (mTrkParams[iteration].DropTFUponFailure) { dropTF = true; @@ -86,17 +99,16 @@ void Tracker::clustersToTracks(std::function logger, std::f } float trackletsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfTracklets()) / mTraits->getTFNumberOfClusters() : 0.f; if (trackletsPerCluster > mTrkParams[iteration].TrackletsPerClusterLimit) { - error(fmt::format("Too many tracklets per cluster ({}) in iteration {} in ROF span {}-{}:, check the detector status and/or the selections. Current limit is {}", + error(std::format("Too many tracklets per cluster ({}) in iteration {} in ROF span {}-{}:, check the detector status and/or the selections. Current limit is {}", trackletsPerCluster, iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTrkParams[iteration].TrackletsPerClusterLimit)); break; } - timeCells += evaluateTask( - &Tracker::computeCells, "Cell finding", [](std::string) {}, iteration); + timeCells += evaluateTask(computeCells, "Cell finding", evalLog, iteration); nCells += mTraits->getTFNumberOfCells(); if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { mTimeFrame->printSliceInfo(iROFs, mTrkParams[iteration].nROFsPerIterations); - error(fmt::format("Too much memory used during cell finding in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", + error(std::format("Too much memory used during cell finding in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTimeFrame->getArtefactsMemory() / GB, mTrkParams[iteration].MaxMemory / GB)); if (mTrkParams[iteration].DropTFUponFailure) { dropTF = true; @@ -105,131 +117,67 @@ void Tracker::clustersToTracks(std::function logger, std::f } float cellsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfCells()) / mTraits->getTFNumberOfClusters() : 0.f; if (cellsPerCluster > mTrkParams[iteration].CellsPerClusterLimit) { - error(fmt::format("Too many cells per cluster ({}) in iteration {} in ROF span {}-{}, check the detector status and/or the selections. Current limit is {}", + error(std::format("Too many cells per cluster ({}) in iteration {} in ROF span {}-{}, check the detector status and/or the selections. Current limit is {}", cellsPerCluster, iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTrkParams[iteration].CellsPerClusterLimit)); break; } - timeNeighbours += evaluateTask( - &Tracker::findCellsNeighbours, "Neighbour finding", [](std::string) {}, iteration); + timeNeighbours += evaluateTask(findCellsNeighbours, "Neighbour finding", evalLog, iteration); nNeighbours += mTimeFrame->getNumberOfNeighbours(); - timeRoads += evaluateTask( - &Tracker::findRoads, "Road finding", [](std::string) {}, iteration); + timeRoads += evaluateTask(findRoads, "Road finding", evalLog, iteration); } iVertex++; } while (iVertex < maxNvertices && !dropTF); - logger(fmt::format(" - Tracklet finding: {} tracklets found in {:.2f} ms", nTracklets, timeTracklets)); - logger(fmt::format(" - Cell finding: {} cells found in {:.2f} ms", nCells, timeCells)); - logger(fmt::format(" - Neighbours finding: {} neighbours found in {:.2f} ms", nNeighbours, timeNeighbours)); - logger(fmt::format(" - Track finding: {} tracks found in {:.2f} ms", nTracks + mTimeFrame->getNumberOfTracks(), timeRoads)); + logger(std::format(" - {} tracklet finding: {} tracklets found in {:.2f} ms", sTrackerNames[T], nTracklets, timeTracklets)); + logger(std::format(" - {} cell finding: {} cells found in {:.2f} ms", sTrackerNames[T], nCells, timeCells)); + logger(std::format(" - {} neighbours finding: {} neighbours found in {:.2f} ms", sTrackerNames[T], nNeighbours, timeNeighbours)); + logger(std::format(" - {} track finding: {} tracks found in {:.2f} ms", sTrackerNames[T], nTracks + mTimeFrame->getNumberOfTracks(), timeRoads)); total += timeTracklets + timeCells + timeNeighbours + timeRoads; - if (mTrkParams[iteration].UseTrackFollower) { - int nExtendedTracks{-mTimeFrame->mNExtendedTracks}, nExtendedClusters{-mTimeFrame->mNExtendedUsedClusters}; - auto timeExtending = evaluateTask(&Tracker::extendTracks, "Extending tracks", [](const std::string&) {}, iteration); - total += timeExtending; - logger(fmt::format(" - Extending Tracks: {} extended tracks using {} clusters found in {:.2f} ms", nExtendedTracks + mTimeFrame->mNExtendedTracks, nExtendedClusters + mTimeFrame->mNExtendedUsedClusters, timeExtending)); + if constexpr (extendTracks != nullptr) { + if (mTrkParams[iteration].UseTrackFollower && !dropTF) { + int nExtendedTracks{-mTimeFrame->mNExtendedTracks}, nExtendedClusters{-mTimeFrame->mNExtendedUsedClusters}; + auto timeExtending = evaluateTask( + extendTracks, "Extending tracks", [](const std::string&) {}, iteration); + total += timeExtending; + logger(std::format(" - Extending Tracks: {} extended tracks using {} clusters found in {:.2f} ms", nExtendedTracks + mTimeFrame->mNExtendedTracks, nExtendedClusters + mTimeFrame->mNExtendedUsedClusters, timeExtending)); + } } if (dropTF) { - error(fmt::format("...Dropping Timeframe...")); + error("...Dropping Timeframe..."); mTimeFrame->dropTracks(); - break; // breaking out the iterations loop + ++mNumberOfDroppedTFs; + return; } } - total += evaluateTask(&Tracker::findShortPrimaries, "Short primaries finding", logger); + if constexpr (findShortPrimaries != nullptr) { + if (mTrkParams[0].FindShortTracks) { + auto nTracksB = mTimeFrame->getNumberOfTracks(); + total += evaluateTask(findShortPrimaries, "Short primaries finding", logger); + auto nTracksA = mTimeFrame->getNumberOfTracks(); + logger(std::format(" `-> found {} additional tracks", nTracksA - nTracksB)); + } + } - std::stringstream sstream; if constexpr (constants::DoTimeBenchmarks) { - sstream << std::setw(2) << " - " - << "Timeframe " << mTimeFrameCounter++ << " processing completed in: " << total << "ms using " << mTraits->getNThreads() << " threads."; + logger(std::format("=== TimeFrame {} processing completed in: {:.2f} ms using {} threads ===", mTimeFrameCounter, total, mTraits->getNThreads())); } - logger(sstream.str()); if (mTimeFrame->hasMCinformation()) { computeTracksMClabels(); } rectifyClusterIndices(); - mNumberOfRuns++; + ++mTimeFrameCounter; } -void Tracker::clustersToTracksHybrid(std::function logger, std::function error) +void Tracker::clustersToTracks(LogFunc logger, LogFunc error) { - double total{0.}; - mTraits->UpdateTrackingParameters(mTrkParams); - int maxNvertices{-1}; - if (mTrkParams[0].PerPrimaryVertexProcessing) { - for (int iROF{0}; iROF < mTimeFrame->getNrof(); ++iROF) { - maxNvertices = std::max(maxNvertices, (int)mTimeFrame->getPrimaryVertices(iROF).size()); - } - } - - for (int iteration = 0; iteration < (int)mTrkParams.size(); ++iteration) { - int nROFsIterations = mTrkParams[iteration].nROFsPerIterations > 0 ? mTimeFrame->getNrof() / mTrkParams[iteration].nROFsPerIterations + bool(mTimeFrame->getNrof() % mTrkParams[iteration].nROFsPerIterations) : 1; - logger(fmt::format("=========== ITS Hybrid Tracking iteration {} summary ===========", iteration, nROFsIterations, maxNvertices)); - double timeTracklets{0.}, timeCells{0.}, timeNeighbours{0.}, timeRoads{0.}; - int nTracklets{0}, nCells{0}, nNeighbours{0}, nTracks{-static_cast(mTimeFrame->getNumberOfTracks())}; - - total += evaluateTask(&Tracker::initialiseTimeFrameHybrid, "Hybrid Timeframe initialisation", logger, iteration); - int iVertex{std::min(maxNvertices, 0)}; - - do { - for (int iROFs{0}; iROFs < nROFsIterations; ++iROFs) { - timeTracklets += evaluateTask( - &Tracker::computeTrackletsHybrid, "Tracklet finding", [](std::string) {}, iteration, iROFs, iVertex); - nTracklets += mTraits->getTFNumberOfTracklets(); - if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { - error(fmt::format("Too much memory used during trackleting in iteration {}, check the detector status and/or the selections.", iteration)); - break; - } - float trackletsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfTracklets()) / mTraits->getTFNumberOfClusters() : 0.f; - if (trackletsPerCluster > mTrkParams[iteration].TrackletsPerClusterLimit) { - error(fmt::format("Too many tracklets per cluster ({}) in iteration {}, check the detector status and/or the selections. Current limit is {}", trackletsPerCluster, iteration, mTrkParams[iteration].TrackletsPerClusterLimit)); - break; - } - - timeCells += evaluateTask( - &Tracker::computeCellsHybrid, "Cell finding", [](std::string) {}, iteration); - nCells += mTraits->getTFNumberOfCells(); - if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { - error(fmt::format("Too much memory used during cell finding in iteration {}, check the detector status and/or the selections.", iteration)); - break; - } - float cellsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfCells()) / mTraits->getTFNumberOfClusters() : 0.f; - if (cellsPerCluster > mTrkParams[iteration].CellsPerClusterLimit) { - error(fmt::format("Too many cells per cluster ({}) in iteration {}, check the detector status and/or the selections. Current limit is {}", cellsPerCluster, iteration, mTrkParams[iteration].CellsPerClusterLimit)); - break; - } - - timeNeighbours += evaluateTask( - &Tracker::findCellsNeighboursHybrid, "Neighbour finding", [](std::string) {}, iteration); - nNeighbours += mTimeFrame->getNumberOfNeighbours(); - timeRoads += evaluateTask( - &Tracker::findRoads, "Road finding", [](std::string) {}, iteration); - } - iVertex++; - } while (iVertex < maxNvertices); - logger(fmt::format(" - Hybrid tracklet finding: {} tracklets found in {:.2f} ms", nTracklets, timeTracklets)); - logger(fmt::format(" - Hybrid cell finding: {} cells found in {:.2f} ms", nCells, timeCells)); - logger(fmt::format(" - Hybrid neighbours finding: {} neighbours found in {:.2f} ms", nNeighbours, timeNeighbours)); - logger(fmt::format(" - Hybrid track finding: {} tracks found in {:.2f} ms", nTracks + mTimeFrame->getNumberOfTracks(), timeRoads)); - total += timeTracklets + timeCells + timeNeighbours + timeRoads; - // total += evaluateTask(&Tracker::extendTracks, "Hybrid extending tracks", logger, iteration); - } - - // total += evaluateTask(&Tracker::findShortPrimaries, "Hybrid short primaries finding", logger); - - std::stringstream sstream; - if constexpr (constants::DoTimeBenchmarks) { - sstream << std::setw(2) << " - " - << "Timeframe " << mTimeFrameCounter++ << " processing completed in: " << total << "ms using " << mTraits->getNThreads() << " threads."; - } - logger(sstream.str()); + clusterToTracksImpl(logger, error); +} - if (mTimeFrame->hasMCinformation()) { - computeTracksMClabels(); - } - rectifyClusterIndices(); - mNumberOfRuns++; +void Tracker::clustersToTracksHybrid(LogFunc logger, LogFunc error) +{ + clusterToTracksImpl(logger, error); } void Tracker::initialiseTimeFrame(int& iteration) @@ -257,6 +205,16 @@ void Tracker::findRoads(int& iteration) mTraits->findRoads(iteration); } +void Tracker::extendTracks(int& iteration) +{ + mTraits->extendTracks(iteration); +} + +void Tracker::findShortPrimaries() +{ + mTraits->findShortPrimaries(); +} + void Tracker::initialiseTimeFrameHybrid(int& iteration) { mTraits->initialiseTimeFrameHybrid(iteration); @@ -282,26 +240,6 @@ void Tracker::findRoadsHybrid(int& iteration) mTraits->findRoadsHybrid(iteration); } -void Tracker::findTracksHybrid(int& iteration) -{ - mTraits->findTracksHybrid(iteration); -} - -void Tracker::findTracks() -{ - mTraits->findTracks(); -} - -void Tracker::extendTracks(int& iteration) -{ - mTraits->extendTracks(iteration); -} - -void Tracker::findShortPrimaries() -{ - mTraits->findShortPrimaries(); -} - void Tracker::computeRoadsMClabels() { /// Moore's Voting Algorithm @@ -575,5 +513,11 @@ int Tracker::getNThreads() const { return mTraits->getNThreads(); } + +void Tracker::printSummary() const +{ + LOGP(info, "Tracker summary: Processed {} TFs (dropped {})", mTimeFrameCounter, mNumberOfDroppedTFs); +} + } // namespace its } // namespace o2 diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx index 8dcb7bfd315c1..c2ee3d3ed6111 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx @@ -723,10 +723,7 @@ void TrackerTraits::extendTracks(const int iteration) void TrackerTraits::findShortPrimaries() { - if (!mTrkParams[0].FindShortTracks) { - return; - } - auto propagator = o2::base::Propagator::Instance(); + const auto propagator = o2::base::Propagator::Instance(); mTimeFrame->fillPrimaryVerticesXandAlpha(); for (auto& cell : mTimeFrame->getCells()[0]) { diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx index 613402ce56e97..8570d134fe30d 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx @@ -438,6 +438,11 @@ void ITSTrackingInterface::finaliseCCDB(ConcreteDataMatcher& matcher, void* obj) } } +void ITSTrackingInterface::printSummary() const +{ + mTracker->printSummary(); +} + void ITSTrackingInterface::setTraitsFromProvider(VertexerTraits* vertexerTraits, TrackerTraits* trackerTraits, TimeFrame* frame) diff --git a/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx b/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx index 9e4c98ad6e9a1..abbb88aea42fa 100644 --- a/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx +++ b/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx @@ -48,6 +48,7 @@ void TrackerDPL::init(InitContext& ic) void TrackerDPL::stop() { + mITSTrackingInterface.printSummary(); LOGF(info, "CPU Reconstruction total timing: Cpu: %.3e Real: %.3e s in %d slots", mTimer.CpuTime(), mTimer.RealTime(), mTimer.Counter() - 1); } @@ -69,6 +70,7 @@ void TrackerDPL::finaliseCCDB(ConcreteDataMatcher& matcher, void* obj) void TrackerDPL::endOfStream(EndOfStreamContext& ec) { + mITSTrackingInterface.printSummary(); LOGF(info, "ITS CA-Tracker total timing: Cpu: %.3e Real: %.3e s in %d slots", mTimer.CpuTime(), mTimer.RealTime(), mTimer.Counter() - 1); }