diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index 94c6610ab9430..0e5fd21e5e90e 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -311,7 +311,7 @@ GPUdii() gpuSpan getClustersOnLayer(const int rof, } template -GPUg() void fitTrackSeedsKernel( +GPUg() void __launch_bounds__(256, 1) fitTrackSeedsKernel( CellSeed* trackSeeds, const TrackingFrameInfo** foundTrackingFrameInfo, o2::its::TrackITSExt* tracks, @@ -374,7 +374,7 @@ GPUg() void fitTrackSeedsKernel( } template -GPUg() void computeLayerCellNeighboursKernel( +GPUg() void __launch_bounds__(256, 1) computeLayerCellNeighboursKernel( CellSeed** cellSeedArray, int* neighboursLUT, int* neighboursIndexTable, @@ -438,7 +438,7 @@ GPUg() void computeLayerCellNeighboursKernel( } template -GPUg() void computeLayerCellsKernel( +GPUg() void __launch_bounds__(256, 1) computeLayerCellsKernel( const Cluster** sortedClusters, const Cluster** unsortedClusters, const TrackingFrameInfo** tfInfo, @@ -525,7 +525,7 @@ GPUg() void computeLayerCellsKernel( } template -GPUg() void computeLayerTrackletsMultiROFKernel( +GPUg() void __launch_bounds__(256, 1) computeLayerTrackletsMultiROFKernel( const IndexTableUtils* utils, const uint8_t* multMask, const int layerIndex, @@ -652,9 +652,10 @@ GPUg() void computeLayerTrackletsMultiROFKernel( } } -GPUg() void compileTrackletsLookupTableKernel(const Tracklet* tracklets, - int* trackletsLookUpTable, - const int nTracklets) +GPUg() void __launch_bounds__(256, 1) compileTrackletsLookupTableKernel( + const Tracklet* tracklets, + int* trackletsLookUpTable, + const int nTracklets) { for (int currentTrackletIndex = blockIdx.x * blockDim.x + threadIdx.x; currentTrackletIndex < nTracklets; currentTrackletIndex += blockDim.x * gridDim.x) { atomicAdd(&trackletsLookUpTable[tracklets[currentTrackletIndex].firstClusterIndex], 1); @@ -662,23 +663,24 @@ GPUg() void compileTrackletsLookupTableKernel(const Tracklet* tracklets, } template -GPUg() void processNeighboursKernel(const int layer, - const int level, - CellSeed** allCellSeeds, - CellSeed* currentCellSeeds, - const int* currentCellIds, - const unsigned int nCurrentCells, - CellSeed* updatedCellSeeds, - int* updatedCellsIds, - int* foundSeedsTable, // auxiliary only in GPU code to compute the number of cells per iteration - const unsigned char** usedClusters, // Used clusters - int* neighbours, - int* neighboursLUT, - const TrackingFrameInfo** foundTrackingFrameInfo, - const float bz, - const float maxChi2ClusterAttachment, - const o2::base::Propagator* propagator, - const o2::base::PropagatorF::MatCorrType matCorrType) +GPUg() void __launch_bounds__(256, 1) processNeighboursKernel( + const int layer, + const int level, + CellSeed** allCellSeeds, + CellSeed* currentCellSeeds, + const int* currentCellIds, + const unsigned int nCurrentCells, + CellSeed* updatedCellSeeds, + int* updatedCellsIds, + int* foundSeedsTable, // auxiliary only in GPU code to compute the number of cells per iteration + const unsigned char** usedClusters, // Used clusters + int* neighbours, + int* neighboursLUT, + const TrackingFrameInfo** foundTrackingFrameInfo, + const float bz, + const float maxChi2ClusterAttachment, + const o2::base::Propagator* propagator, + const o2::base::PropagatorF::MatCorrType matCorrType) { constexpr float layerxX0[7] = {5.e-3f, 5.e-3f, 5.e-3f, 1.e-2f, 1.e-2f, 1.e-2f, 1.e-2f}; // Hardcoded here for the moment. for (unsigned int iCurrentCell = blockIdx.x * blockDim.x + threadIdx.x; iCurrentCell < nCurrentCells; iCurrentCell += blockDim.x * gridDim.x) { diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h index e8d3692909d05..4c445bdbbfa16 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h @@ -121,19 +121,19 @@ struct ITSGpuTrackingParamConfig : public o2::conf::ConfigurableParamHelper