Skip to content

Commit 29435a5

Browse files
committed
ITS: simplify configuration of iterations
Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch>
1 parent 0528e00 commit 29435a5

16 files changed

Lines changed: 192 additions & 167 deletions

File tree

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -42,30 +42,30 @@ class TimeFrameGPU final : public TimeFrame<NLayers>
4242
void popMemoryStack(const int);
4343
void registerHostMemory(const int);
4444
void unregisterHostMemory(const int);
45-
void initialise(const int, const TrackingParameters&, const int);
46-
void loadIndexTableUtils(const int);
47-
void loadTrackingFrameInfoDevice(const int, const int);
48-
void createTrackingFrameInfoDeviceArray(const int);
49-
void loadUnsortedClustersDevice(const int, const int);
50-
void createUnsortedClustersDeviceArray(const int, const int = NLayers);
51-
void loadClustersDevice(const int, const int);
52-
void createClustersDeviceArray(const int, const int = NLayers);
53-
void loadClustersIndexTables(const int, const int);
54-
void createClustersIndexTablesArray(const int);
55-
void createUsedClustersDevice(const int, const int);
56-
void createUsedClustersDeviceArray(const int, const int = NLayers);
45+
void initialise(const TrackingParameters&, int maxLayers);
46+
void loadIndexTableUtils();
47+
void loadTrackingFrameInfoDevice(const int);
48+
void createTrackingFrameInfoDeviceArray();
49+
void loadUnsortedClustersDevice(const int);
50+
void createUnsortedClustersDeviceArray(const int = NLayers);
51+
void loadClustersDevice(const int);
52+
void createClustersDeviceArray(const int = NLayers);
53+
void loadClustersIndexTables(const int);
54+
void createClustersIndexTablesArray();
55+
void createUsedClustersDevice(const int);
56+
void createUsedClustersDeviceArray(const int = NLayers);
5757
void loadUsedClustersDevice();
58-
void loadROFrameClustersDevice(const int, const int);
59-
void createROFrameClustersDeviceArray(const int);
58+
void loadROFrameClustersDevice(const int);
59+
void createROFrameClustersDeviceArray();
6060
void loadROFCutMask(const int);
61-
void loadVertices(const int);
62-
void loadROFOverlapTable(const int);
63-
void loadROFVertexLookupTable(const int);
64-
void updateROFVertexLookupTable(const int);
61+
void loadVertices();
62+
void loadROFOverlapTable();
63+
void loadROFVertexLookupTable();
64+
void updateROFVertexLookupTable();
6565

6666
///
67-
void createTrackletsLUTDevice(const int, const int);
68-
void createTrackletsLUTDeviceArray(const int);
67+
void createTrackletsLUTDevice(bool, const int);
68+
void createTrackletsLUTDeviceArray();
6969
void loadTrackletsDevice();
7070
void loadTrackletsLUTDevice();
7171
void loadCellsDevice();
@@ -74,12 +74,12 @@ class TimeFrameGPU final : public TimeFrame<NLayers>
7474
void loadTrackSeedsChi2Device();
7575
void loadTrackSeedsDevice(bounded_vector<TrackSeedN>&);
7676
void createTrackletsBuffers(const int);
77-
void createTrackletsBuffersArray(const int);
77+
void createTrackletsBuffersArray();
7878
void createCellsBuffers(const int);
79-
void createCellsBuffersArray(const int);
79+
void createCellsBuffersArray();
8080
void createCellsDevice();
8181
void createCellsLUTDevice(const int);
82-
void createCellsLUTDeviceArray(const int);
82+
void createCellsLUTDeviceArray();
8383
void createNeighboursIndexTablesDevice(const int);
8484
void createNeighboursDevice(const unsigned int layer);
8585
void createNeighboursLUTDevice(const int, const unsigned int);

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ void countTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
5151
const int** clustersIndexTables,
5252
int** trackletsLUTs,
5353
gsl::span<int*> trackletsLUTsHost,
54-
const int iteration,
54+
const bool selectUPCVertices,
5555
const float NSigmaCut,
5656
bounded_vector<float>& phiCuts,
5757
const float resolutionPV,
@@ -82,7 +82,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
8282
gsl::span<int> nTracklets,
8383
int** trackletsLUTs,
8484
gsl::span<int*> trackletsLUTsHost,
85-
const int iteration,
85+
const bool selectUPCVertices,
8686
const float NSigmaCut,
8787
bounded_vector<float>& phiCuts,
8888
const float resolutionPV,

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 47 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,10 @@ void TimeFrameGPU<NLayers>::allocMem(void** ptr, size_t size, bool extAllocator,
5252
}
5353

5454
template <int NLayers>
55-
void TimeFrameGPU<NLayers>::loadIndexTableUtils(const int iteration)
55+
void TimeFrameGPU<NLayers>::loadIndexTableUtils()
5656
{
5757
GPUTimer timer("loading indextable utils");
58-
if (!iteration) {
58+
{
5959
GPULog("gpu-allocation: allocating IndexTableUtils buffer, for {:.2f} MB.", sizeof(IndexTableUtilsN) / constants::MB);
6060
allocMem(reinterpret_cast<void**>(&mIndexTableUtilsDevice), sizeof(IndexTableUtilsN), this->hasFrameworkAllocator());
6161
}
@@ -64,9 +64,9 @@ void TimeFrameGPU<NLayers>::loadIndexTableUtils(const int iteration)
6464
}
6565

6666
template <int NLayers>
67-
void TimeFrameGPU<NLayers>::createUnsortedClustersDeviceArray(const int iteration, const int maxLayers)
67+
void TimeFrameGPU<NLayers>::createUnsortedClustersDeviceArray(const int maxLayers)
6868
{
69-
if (!iteration) {
69+
{
7070
GPUTimer timer("creating unsorted clusters array");
7171
allocMem(reinterpret_cast<void**>(&mUnsortedClustersDeviceArray), NLayers * sizeof(Cluster*), this->hasFrameworkAllocator());
7272
GPUChkErrS(cudaHostRegister(mUnsortedClustersDevice.data(), NLayers * sizeof(Cluster*), cudaHostRegisterPortable));
@@ -81,9 +81,9 @@ void TimeFrameGPU<NLayers>::createUnsortedClustersDeviceArray(const int iteratio
8181
}
8282

8383
template <int NLayers>
84-
void TimeFrameGPU<NLayers>::loadUnsortedClustersDevice(const int iteration, const int layer)
84+
void TimeFrameGPU<NLayers>::loadUnsortedClustersDevice(const int layer)
8585
{
86-
if (!iteration) {
86+
{
8787
GPUTimer timer(mGpuStreams[layer], "loading unsorted clusters", layer);
8888
GPULog("gpu-transfer: loading {} unsorted clusters on layer {}, for {:.2f} MB.", this->mUnsortedClusters[layer].size(), layer, this->mUnsortedClusters[layer].size() * sizeof(Cluster) / constants::MB);
8989
allocMemAsync(reinterpret_cast<void**>(&mUnsortedClustersDevice[layer]), this->mUnsortedClusters[layer].size() * sizeof(Cluster), mGpuStreams[layer], this->hasFrameworkAllocator());
@@ -93,9 +93,9 @@ void TimeFrameGPU<NLayers>::loadUnsortedClustersDevice(const int iteration, cons
9393
}
9494

9595
template <int NLayers>
96-
void TimeFrameGPU<NLayers>::createClustersDeviceArray(const int iteration, const int maxLayers)
96+
void TimeFrameGPU<NLayers>::createClustersDeviceArray(const int maxLayers)
9797
{
98-
if (!iteration) {
98+
{
9999
GPUTimer timer("creating sorted clusters array");
100100
allocMem(reinterpret_cast<void**>(&mClustersDeviceArray), NLayers * sizeof(Cluster*), this->hasFrameworkAllocator());
101101
GPUChkErrS(cudaHostRegister(mClustersDevice.data(), NLayers * sizeof(Cluster*), cudaHostRegisterPortable));
@@ -110,9 +110,9 @@ void TimeFrameGPU<NLayers>::createClustersDeviceArray(const int iteration, const
110110
}
111111

112112
template <int NLayers>
113-
void TimeFrameGPU<NLayers>::loadClustersDevice(const int iteration, const int layer)
113+
void TimeFrameGPU<NLayers>::loadClustersDevice(const int layer)
114114
{
115-
if (!iteration) {
115+
{
116116
GPUTimer timer(mGpuStreams[layer], "loading sorted clusters", layer);
117117
GPULog("gpu-transfer: loading {} clusters on layer {}, for {:.2f} MB.", this->mClusters[layer].size(), layer, this->mClusters[layer].size() * sizeof(Cluster) / constants::MB);
118118
allocMemAsync(reinterpret_cast<void**>(&mClustersDevice[layer]), this->mClusters[layer].size() * sizeof(Cluster), mGpuStreams[layer], this->hasFrameworkAllocator());
@@ -122,9 +122,9 @@ void TimeFrameGPU<NLayers>::loadClustersDevice(const int iteration, const int la
122122
}
123123

124124
template <int NLayers>
125-
void TimeFrameGPU<NLayers>::createClustersIndexTablesArray(const int iteration)
125+
void TimeFrameGPU<NLayers>::createClustersIndexTablesArray()
126126
{
127-
if (!iteration) {
127+
{
128128
GPUTimer timer("creating clustersindextable array");
129129
allocMem(reinterpret_cast<void**>(&mClustersIndexTablesDeviceArray), NLayers * sizeof(int*), this->hasFrameworkAllocator());
130130
GPUChkErrS(cudaHostRegister(mClustersIndexTablesDevice.data(), NLayers * sizeof(int*), cudaHostRegisterPortable));
@@ -139,9 +139,9 @@ void TimeFrameGPU<NLayers>::createClustersIndexTablesArray(const int iteration)
139139
}
140140

141141
template <int NLayers>
142-
void TimeFrameGPU<NLayers>::loadClustersIndexTables(const int iteration, const int layer)
142+
void TimeFrameGPU<NLayers>::loadClustersIndexTables(const int layer)
143143
{
144-
if (!iteration) {
144+
{
145145
GPUTimer timer(mGpuStreams[layer], "loading sorted clusters", layer);
146146
GPULog("gpu-transfer: loading clusters indextable for layer {} with {} elements, for {:.2f} MB.", layer, this->mIndexTables[layer].size(), this->mIndexTables[layer].size() * sizeof(int) / constants::MB);
147147
allocMemAsync(reinterpret_cast<void**>(&mClustersIndexTablesDevice[layer]), this->mIndexTables[layer].size() * sizeof(int), mGpuStreams[layer], this->hasFrameworkAllocator());
@@ -151,9 +151,9 @@ void TimeFrameGPU<NLayers>::loadClustersIndexTables(const int iteration, const i
151151
}
152152

153153
template <int NLayers>
154-
void TimeFrameGPU<NLayers>::createUsedClustersDeviceArray(const int iteration, const int maxLayers)
154+
void TimeFrameGPU<NLayers>::createUsedClustersDeviceArray(const int maxLayers)
155155
{
156-
if (!iteration) {
156+
{
157157
GPUTimer timer("creating used clusters flags");
158158
allocMem(reinterpret_cast<void**>(&mUsedClustersDeviceArray), NLayers * sizeof(uint8_t*), this->hasFrameworkAllocator());
159159
GPUChkErrS(cudaHostRegister(mUsedClustersDevice.data(), NLayers * sizeof(uint8_t*), cudaHostRegisterPortable));
@@ -168,9 +168,9 @@ void TimeFrameGPU<NLayers>::createUsedClustersDeviceArray(const int iteration, c
168168
}
169169

170170
template <int NLayers>
171-
void TimeFrameGPU<NLayers>::createUsedClustersDevice(const int iteration, const int layer)
171+
void TimeFrameGPU<NLayers>::createUsedClustersDevice(const int layer)
172172
{
173-
if (!iteration) {
173+
{
174174
GPUTimer timer(mGpuStreams[layer], "creating used clusters flags", layer);
175175
GPULog("gpu-transfer: creating {} used clusters flags on layer {}, for {:.2f} MB.", this->mUsedClusters[layer].size(), layer, this->mUsedClusters[layer].size() * sizeof(unsigned char) / constants::MB);
176176
allocMemAsync(reinterpret_cast<void**>(&mUsedClustersDevice[layer]), this->mUsedClusters[layer].size() * sizeof(unsigned char), mGpuStreams[layer], this->hasFrameworkAllocator());
@@ -190,9 +190,9 @@ void TimeFrameGPU<NLayers>::loadUsedClustersDevice()
190190
}
191191

192192
template <int NLayers>
193-
void TimeFrameGPU<NLayers>::createROFrameClustersDeviceArray(const int iteration)
193+
void TimeFrameGPU<NLayers>::createROFrameClustersDeviceArray()
194194
{
195-
if (!iteration) {
195+
{
196196
GPUTimer timer("creating ROFrame clusters array");
197197
allocMem(reinterpret_cast<void**>(&mROFramesClustersDeviceArray), NLayers * sizeof(int*), this->hasFrameworkAllocator());
198198
GPUChkErrS(cudaHostRegister(mROFramesClustersDevice.data(), NLayers * sizeof(int*), cudaHostRegisterPortable));
@@ -207,9 +207,9 @@ void TimeFrameGPU<NLayers>::createROFrameClustersDeviceArray(const int iteration
207207
}
208208

209209
template <int NLayers>
210-
void TimeFrameGPU<NLayers>::loadROFrameClustersDevice(const int iteration, const int layer)
210+
void TimeFrameGPU<NLayers>::loadROFrameClustersDevice(const int layer)
211211
{
212-
if (!iteration) {
212+
{
213213
GPUTimer timer(mGpuStreams[layer], "loading ROframe clusters", layer);
214214
GPULog("gpu-transfer: loading {} ROframe clusters info on layer {}, for {:.2f} MB.", this->mROFramesClusters[layer].size(), layer, this->mROFramesClusters[layer].size() * sizeof(int) / constants::MB);
215215
allocMemAsync(reinterpret_cast<void**>(&mROFramesClustersDevice[layer]), this->mROFramesClusters[layer].size() * sizeof(int), mGpuStreams[layer], this->hasFrameworkAllocator());
@@ -219,9 +219,9 @@ void TimeFrameGPU<NLayers>::loadROFrameClustersDevice(const int iteration, const
219219
}
220220

221221
template <int NLayers>
222-
void TimeFrameGPU<NLayers>::createTrackingFrameInfoDeviceArray(const int iteration)
222+
void TimeFrameGPU<NLayers>::createTrackingFrameInfoDeviceArray()
223223
{
224-
if (!iteration) {
224+
{
225225
GPUTimer timer("creating trackingframeinfo array");
226226
allocMem(reinterpret_cast<void**>(&mTrackingFrameInfoDeviceArray), NLayers * sizeof(TrackingFrameInfo*), this->hasFrameworkAllocator());
227227
GPUChkErrS(cudaHostRegister(mTrackingFrameInfoDevice.data(), NLayers * sizeof(TrackingFrameInfo*), cudaHostRegisterPortable));
@@ -236,9 +236,9 @@ void TimeFrameGPU<NLayers>::createTrackingFrameInfoDeviceArray(const int iterati
236236
}
237237

238238
template <int NLayers>
239-
void TimeFrameGPU<NLayers>::loadTrackingFrameInfoDevice(const int iteration, const int layer)
239+
void TimeFrameGPU<NLayers>::loadTrackingFrameInfoDevice(const int layer)
240240
{
241-
if (!iteration) {
241+
{
242242
GPUTimer timer(mGpuStreams[layer], "loading trackingframeinfo", layer);
243243
GPULog("gpu-transfer: loading {} tfinfo on layer {}, for {:.2f} MB.", this->mTrackingFrameInfo[layer].size(), layer, this->mTrackingFrameInfo[layer].size() * sizeof(TrackingFrameInfo) / constants::MB);
244244
allocMemAsync(reinterpret_cast<void**>(&mTrackingFrameInfoDevice[layer]), this->mTrackingFrameInfo[layer].size() * sizeof(TrackingFrameInfo), mGpuStreams[layer], this->hasFrameworkAllocator());
@@ -250,7 +250,7 @@ void TimeFrameGPU<NLayers>::loadTrackingFrameInfoDevice(const int iteration, con
250250
template <int NLayers>
251251
void TimeFrameGPU<NLayers>::loadROFCutMask(const int iteration)
252252
{
253-
if (!iteration || iteration == 3) { // we need to re-load the swapped mult-mask in upc iteration
253+
{
254254
GPUTimer timer("loading multiplicity cut mask");
255255
const auto& hostTable = *(this->mROFMask);
256256
const auto hostView = hostTable.getView();
@@ -270,9 +270,9 @@ void TimeFrameGPU<NLayers>::loadROFCutMask(const int iteration)
270270
}
271271

272272
template <int NLayers>
273-
void TimeFrameGPU<NLayers>::loadVertices(const int iteration)
273+
void TimeFrameGPU<NLayers>::loadVertices()
274274
{
275-
if (!iteration) {
275+
{
276276
GPUTimer timer("loading seeding vertices");
277277
GPULog("gpu-transfer: loading {} seeding vertices, for {:.2f} MB.", this->mPrimaryVertices.size(), this->mPrimaryVertices.size() * sizeof(Vertex) / constants::MB);
278278
allocMem(reinterpret_cast<void**>(&mPrimaryVerticesDevice), this->mPrimaryVertices.size() * sizeof(Vertex), this->hasFrameworkAllocator());
@@ -281,9 +281,9 @@ void TimeFrameGPU<NLayers>::loadVertices(const int iteration)
281281
}
282282

283283
template <int NLayers>
284-
void TimeFrameGPU<NLayers>::loadROFOverlapTable(const int iteration)
284+
void TimeFrameGPU<NLayers>::loadROFOverlapTable()
285285
{
286-
if (!iteration) {
286+
{
287287
GPUTimer timer("initialising device view of ROFOverlapTable");
288288
const auto& hostTable = this->getROFOverlapTable();
289289
const auto& hostView = this->getROFOverlapTableView();
@@ -305,9 +305,9 @@ void TimeFrameGPU<NLayers>::loadROFOverlapTable(const int iteration)
305305
}
306306

307307
template <int NLayers>
308-
void TimeFrameGPU<NLayers>::loadROFVertexLookupTable(const int iteration)
308+
void TimeFrameGPU<NLayers>::loadROFVertexLookupTable()
309309
{
310-
if (!iteration) {
310+
{
311311
GPUTimer timer("initialising device view of ROFVertexLookupTable");
312312
const auto& hostTable = this->getROFVertexLookupTable();
313313
const auto& hostView = this->getROFVertexLookupTableView();
@@ -329,10 +329,10 @@ void TimeFrameGPU<NLayers>::loadROFVertexLookupTable(const int iteration)
329329
}
330330

331331
template <int NLayers>
332-
void TimeFrameGPU<NLayers>::updateROFVertexLookupTable(const int iteration)
332+
void TimeFrameGPU<NLayers>::updateROFVertexLookupTable()
333333
{
334334
const auto& hostTable = this->getROFVertexLookupTable();
335-
if (!iteration) {
335+
{
336336
GPUTimer timer("updating device view of ROFVertexLookupTable");
337337
const auto& hostView = this->getROFVertexLookupTableView();
338338
using TableEntry = ROFVertexLookupTable<NLayers>::TableEntry;
@@ -345,19 +345,19 @@ void TimeFrameGPU<NLayers>::updateROFVertexLookupTable(const int iteration)
345345
}
346346

347347
template <int NLayers>
348-
void TimeFrameGPU<NLayers>::createTrackletsLUTDeviceArray(const int iteration)
348+
void TimeFrameGPU<NLayers>::createTrackletsLUTDeviceArray()
349349
{
350-
if (!iteration) {
350+
{
351351
allocMem(reinterpret_cast<void**>(&mTrackletsLUTDeviceArray), (NLayers - 1) * sizeof(int*), this->hasFrameworkAllocator());
352352
}
353353
}
354354

355355
template <int NLayers>
356-
void TimeFrameGPU<NLayers>::createTrackletsLUTDevice(const int iteration, const int layer)
356+
void TimeFrameGPU<NLayers>::createTrackletsLUTDevice(bool allocate, const int layer)
357357
{
358358
GPUTimer timer(mGpuStreams[layer], "creating tracklets LUTs", layer);
359359
const int ncls = this->mClusters[layer].size() + 1;
360-
if (!iteration) {
360+
if (allocate) {
361361
GPULog("gpu-allocation: creating tracklets LUT for {} elements on layer {}, for {:.2f} MB.", ncls, layer, ncls * sizeof(int) / constants::MB);
362362
allocMemAsync(reinterpret_cast<void**>(&mTrackletsLUTDevice[layer]), ncls * sizeof(int), mGpuStreams[layer], this->hasFrameworkAllocator());
363363
GPUChkErrS(cudaMemcpyAsync(&mTrackletsLUTDeviceArray[layer], &mTrackletsLUTDevice[layer], sizeof(int*), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
@@ -366,9 +366,9 @@ void TimeFrameGPU<NLayers>::createTrackletsLUTDevice(const int iteration, const
366366
}
367367

368368
template <int NLayers>
369-
void TimeFrameGPU<NLayers>::createTrackletsBuffersArray(const int iteration)
369+
void TimeFrameGPU<NLayers>::createTrackletsBuffersArray()
370370
{
371-
if (!iteration) {
371+
{
372372
GPUTimer timer("creating tracklet buffers array");
373373
allocMem(reinterpret_cast<void**>(&mTrackletsDeviceArray), (NLayers - 1) * sizeof(Tracklet*), this->hasFrameworkAllocator());
374374
}
@@ -442,9 +442,9 @@ void TimeFrameGPU<NLayers>::loadCellsDevice()
442442
}
443443

444444
template <int NLayers>
445-
void TimeFrameGPU<NLayers>::createCellsLUTDeviceArray(const int iteration)
445+
void TimeFrameGPU<NLayers>::createCellsLUTDeviceArray()
446446
{
447-
if (!iteration) {
447+
{
448448
GPUTimer timer("creating cells LUTs array");
449449
allocMem(reinterpret_cast<void**>(&mCellsLUTDeviceArray), (NLayers - 2) * sizeof(int*), this->hasFrameworkAllocator());
450450
}
@@ -461,9 +461,9 @@ void TimeFrameGPU<NLayers>::createCellsLUTDevice(const int layer)
461461
}
462462

463463
template <int NLayers>
464-
void TimeFrameGPU<NLayers>::createCellsBuffersArray(const int iteration)
464+
void TimeFrameGPU<NLayers>::createCellsBuffersArray()
465465
{
466-
if (!iteration) {
466+
{
467467
GPUTimer timer("creating cells buffers array");
468468
allocMem(reinterpret_cast<void**>(&mCellsDeviceArray), (NLayers - 2) * sizeof(CellSeed*), this->hasFrameworkAllocator());
469469
GPUChkErrS(cudaMemcpy(mCellsDeviceArray, mCellsDevice.data(), mCellsDevice.size() * sizeof(CellSeed*), cudaMemcpyHostToDevice));
@@ -646,12 +646,10 @@ void TimeFrameGPU<NLayers>::popMemoryStack(const int iteration)
646646
}
647647

648648
template <int NLayers>
649-
void TimeFrameGPU<NLayers>::initialise(const int iteration,
650-
const TrackingParameters& trkParam,
651-
const int maxLayers)
649+
void TimeFrameGPU<NLayers>::initialise(const TrackingParameters& trkParam, int maxLayers)
652650
{
653651
mGpuStreams.resize(NLayers);
654-
o2::its::TimeFrame<NLayers>::initialise(iteration, trkParam, maxLayers, false);
652+
o2::its::TimeFrame<NLayers>::initialise(trkParam, maxLayers);
655653
}
656654

657655
template <int NLayers>

0 commit comments

Comments
 (0)