@@ -52,10 +52,10 @@ void TimeFrameGPU<NLayers>::allocMem(void** ptr, size_t size, bool extAllocator,
5252}
5353
5454template <int NLayers>
55- void TimeFrameGPU<NLayers>::loadIndexTableUtils(const int iteration )
55+ void TimeFrameGPU<NLayers>::loadIndexTableUtils()
5656{
5757 GPUTimer timer (" loading indextable utils" );
58- if (!iteration) {
58+ {
5959 GPULog (" gpu-allocation: allocating IndexTableUtils buffer, for {:.2f} MB." , sizeof (IndexTableUtilsN) / constants::MB);
6060 allocMem (reinterpret_cast <void **>(&mIndexTableUtilsDevice ), sizeof (IndexTableUtilsN), this ->hasFrameworkAllocator ());
6161 }
@@ -64,9 +64,9 @@ void TimeFrameGPU<NLayers>::loadIndexTableUtils(const int iteration)
6464}
6565
6666template <int NLayers>
67- void TimeFrameGPU<NLayers>::createUnsortedClustersDeviceArray(const int iteration, const int maxLayers)
67+ void TimeFrameGPU<NLayers>::createUnsortedClustersDeviceArray(const int maxLayers)
6868{
69- if (!iteration) {
69+ {
7070 GPUTimer timer (" creating unsorted clusters array" );
7171 allocMem (reinterpret_cast <void **>(&mUnsortedClustersDeviceArray ), NLayers * sizeof (Cluster*), this ->hasFrameworkAllocator ());
7272 GPUChkErrS (cudaHostRegister (mUnsortedClustersDevice .data (), NLayers * sizeof (Cluster*), cudaHostRegisterPortable));
@@ -81,9 +81,9 @@ void TimeFrameGPU<NLayers>::createUnsortedClustersDeviceArray(const int iteratio
8181}
8282
8383template <int NLayers>
84- void TimeFrameGPU<NLayers>::loadUnsortedClustersDevice(const int iteration, const int layer)
84+ void TimeFrameGPU<NLayers>::loadUnsortedClustersDevice(const int layer)
8585{
86- if (!iteration) {
86+ {
8787 GPUTimer timer (mGpuStreams [layer], " loading unsorted clusters" , layer);
8888 GPULog (" gpu-transfer: loading {} unsorted clusters on layer {}, for {:.2f} MB." , this ->mUnsortedClusters [layer].size (), layer, this ->mUnsortedClusters [layer].size () * sizeof (Cluster) / constants::MB);
8989 allocMemAsync (reinterpret_cast <void **>(&mUnsortedClustersDevice [layer]), this ->mUnsortedClusters [layer].size () * sizeof (Cluster), mGpuStreams [layer], this ->hasFrameworkAllocator ());
@@ -93,9 +93,9 @@ void TimeFrameGPU<NLayers>::loadUnsortedClustersDevice(const int iteration, cons
9393}
9494
9595template <int NLayers>
96- void TimeFrameGPU<NLayers>::createClustersDeviceArray(const int iteration, const int maxLayers)
96+ void TimeFrameGPU<NLayers>::createClustersDeviceArray(const int maxLayers)
9797{
98- if (!iteration) {
98+ {
9999 GPUTimer timer (" creating sorted clusters array" );
100100 allocMem (reinterpret_cast <void **>(&mClustersDeviceArray ), NLayers * sizeof (Cluster*), this ->hasFrameworkAllocator ());
101101 GPUChkErrS (cudaHostRegister (mClustersDevice .data (), NLayers * sizeof (Cluster*), cudaHostRegisterPortable));
@@ -110,9 +110,9 @@ void TimeFrameGPU<NLayers>::createClustersDeviceArray(const int iteration, const
110110}
111111
112112template <int NLayers>
113- void TimeFrameGPU<NLayers>::loadClustersDevice(const int iteration, const int layer)
113+ void TimeFrameGPU<NLayers>::loadClustersDevice(const int layer)
114114{
115- if (!iteration) {
115+ {
116116 GPUTimer timer (mGpuStreams [layer], " loading sorted clusters" , layer);
117117 GPULog (" gpu-transfer: loading {} clusters on layer {}, for {:.2f} MB." , this ->mClusters [layer].size (), layer, this ->mClusters [layer].size () * sizeof (Cluster) / constants::MB);
118118 allocMemAsync (reinterpret_cast <void **>(&mClustersDevice [layer]), this ->mClusters [layer].size () * sizeof (Cluster), mGpuStreams [layer], this ->hasFrameworkAllocator ());
@@ -122,9 +122,9 @@ void TimeFrameGPU<NLayers>::loadClustersDevice(const int iteration, const int la
122122}
123123
124124template <int NLayers>
125- void TimeFrameGPU<NLayers>::createClustersIndexTablesArray(const int iteration )
125+ void TimeFrameGPU<NLayers>::createClustersIndexTablesArray()
126126{
127- if (!iteration) {
127+ {
128128 GPUTimer timer (" creating clustersindextable array" );
129129 allocMem (reinterpret_cast <void **>(&mClustersIndexTablesDeviceArray ), NLayers * sizeof (int *), this ->hasFrameworkAllocator ());
130130 GPUChkErrS (cudaHostRegister (mClustersIndexTablesDevice .data (), NLayers * sizeof (int *), cudaHostRegisterPortable));
@@ -139,9 +139,9 @@ void TimeFrameGPU<NLayers>::createClustersIndexTablesArray(const int iteration)
139139}
140140
141141template <int NLayers>
142- void TimeFrameGPU<NLayers>::loadClustersIndexTables(const int iteration, const int layer)
142+ void TimeFrameGPU<NLayers>::loadClustersIndexTables(const int layer)
143143{
144- if (!iteration) {
144+ {
145145 GPUTimer timer (mGpuStreams [layer], " loading sorted clusters" , layer);
146146 GPULog (" gpu-transfer: loading clusters indextable for layer {} with {} elements, for {:.2f} MB." , layer, this ->mIndexTables [layer].size (), this ->mIndexTables [layer].size () * sizeof (int ) / constants::MB);
147147 allocMemAsync (reinterpret_cast <void **>(&mClustersIndexTablesDevice [layer]), this ->mIndexTables [layer].size () * sizeof (int ), mGpuStreams [layer], this ->hasFrameworkAllocator ());
@@ -151,9 +151,9 @@ void TimeFrameGPU<NLayers>::loadClustersIndexTables(const int iteration, const i
151151}
152152
153153template <int NLayers>
154- void TimeFrameGPU<NLayers>::createUsedClustersDeviceArray(const int iteration, const int maxLayers)
154+ void TimeFrameGPU<NLayers>::createUsedClustersDeviceArray(const int maxLayers)
155155{
156- if (!iteration) {
156+ {
157157 GPUTimer timer (" creating used clusters flags" );
158158 allocMem (reinterpret_cast <void **>(&mUsedClustersDeviceArray ), NLayers * sizeof (uint8_t *), this ->hasFrameworkAllocator ());
159159 GPUChkErrS (cudaHostRegister (mUsedClustersDevice .data (), NLayers * sizeof (uint8_t *), cudaHostRegisterPortable));
@@ -168,9 +168,9 @@ void TimeFrameGPU<NLayers>::createUsedClustersDeviceArray(const int iteration, c
168168}
169169
170170template <int NLayers>
171- void TimeFrameGPU<NLayers>::createUsedClustersDevice(const int iteration, const int layer)
171+ void TimeFrameGPU<NLayers>::createUsedClustersDevice(const int layer)
172172{
173- if (!iteration) {
173+ {
174174 GPUTimer timer (mGpuStreams [layer], " creating used clusters flags" , layer);
175175 GPULog (" gpu-transfer: creating {} used clusters flags on layer {}, for {:.2f} MB." , this ->mUsedClusters [layer].size (), layer, this ->mUsedClusters [layer].size () * sizeof (unsigned char ) / constants::MB);
176176 allocMemAsync (reinterpret_cast <void **>(&mUsedClustersDevice [layer]), this ->mUsedClusters [layer].size () * sizeof (unsigned char ), mGpuStreams [layer], this ->hasFrameworkAllocator ());
@@ -190,9 +190,9 @@ void TimeFrameGPU<NLayers>::loadUsedClustersDevice()
190190}
191191
192192template <int NLayers>
193- void TimeFrameGPU<NLayers>::createROFrameClustersDeviceArray(const int iteration )
193+ void TimeFrameGPU<NLayers>::createROFrameClustersDeviceArray()
194194{
195- if (!iteration) {
195+ {
196196 GPUTimer timer (" creating ROFrame clusters array" );
197197 allocMem (reinterpret_cast <void **>(&mROFramesClustersDeviceArray ), NLayers * sizeof (int *), this ->hasFrameworkAllocator ());
198198 GPUChkErrS (cudaHostRegister (mROFramesClustersDevice .data (), NLayers * sizeof (int *), cudaHostRegisterPortable));
@@ -207,9 +207,9 @@ void TimeFrameGPU<NLayers>::createROFrameClustersDeviceArray(const int iteration
207207}
208208
209209template <int NLayers>
210- void TimeFrameGPU<NLayers>::loadROFrameClustersDevice(const int iteration, const int layer)
210+ void TimeFrameGPU<NLayers>::loadROFrameClustersDevice(const int layer)
211211{
212- if (!iteration) {
212+ {
213213 GPUTimer timer (mGpuStreams [layer], " loading ROframe clusters" , layer);
214214 GPULog (" gpu-transfer: loading {} ROframe clusters info on layer {}, for {:.2f} MB." , this ->mROFramesClusters [layer].size (), layer, this ->mROFramesClusters [layer].size () * sizeof (int ) / constants::MB);
215215 allocMemAsync (reinterpret_cast <void **>(&mROFramesClustersDevice [layer]), this ->mROFramesClusters [layer].size () * sizeof (int ), mGpuStreams [layer], this ->hasFrameworkAllocator ());
@@ -219,9 +219,9 @@ void TimeFrameGPU<NLayers>::loadROFrameClustersDevice(const int iteration, const
219219}
220220
221221template <int NLayers>
222- void TimeFrameGPU<NLayers>::createTrackingFrameInfoDeviceArray(const int iteration )
222+ void TimeFrameGPU<NLayers>::createTrackingFrameInfoDeviceArray()
223223{
224- if (!iteration) {
224+ {
225225 GPUTimer timer (" creating trackingframeinfo array" );
226226 allocMem (reinterpret_cast <void **>(&mTrackingFrameInfoDeviceArray ), NLayers * sizeof (TrackingFrameInfo*), this ->hasFrameworkAllocator ());
227227 GPUChkErrS (cudaHostRegister (mTrackingFrameInfoDevice .data (), NLayers * sizeof (TrackingFrameInfo*), cudaHostRegisterPortable));
@@ -236,9 +236,9 @@ void TimeFrameGPU<NLayers>::createTrackingFrameInfoDeviceArray(const int iterati
236236}
237237
238238template <int NLayers>
239- void TimeFrameGPU<NLayers>::loadTrackingFrameInfoDevice(const int iteration, const int layer)
239+ void TimeFrameGPU<NLayers>::loadTrackingFrameInfoDevice(const int layer)
240240{
241- if (!iteration) {
241+ {
242242 GPUTimer timer (mGpuStreams [layer], " loading trackingframeinfo" , layer);
243243 GPULog (" gpu-transfer: loading {} tfinfo on layer {}, for {:.2f} MB." , this ->mTrackingFrameInfo [layer].size (), layer, this ->mTrackingFrameInfo [layer].size () * sizeof (TrackingFrameInfo) / constants::MB);
244244 allocMemAsync (reinterpret_cast <void **>(&mTrackingFrameInfoDevice [layer]), this ->mTrackingFrameInfo [layer].size () * sizeof (TrackingFrameInfo), mGpuStreams [layer], this ->hasFrameworkAllocator ());
@@ -250,7 +250,7 @@ void TimeFrameGPU<NLayers>::loadTrackingFrameInfoDevice(const int iteration, con
250250template <int NLayers>
251251void TimeFrameGPU<NLayers>::loadROFCutMask(const int iteration)
252252{
253- if (!iteration || iteration == 3 ) { // we need to re-load the swapped mult-mask in upc iteration
253+ {
254254 GPUTimer timer (" loading multiplicity cut mask" );
255255 const auto & hostTable = *(this ->mROFMask );
256256 const auto hostView = hostTable.getView ();
@@ -270,9 +270,9 @@ void TimeFrameGPU<NLayers>::loadROFCutMask(const int iteration)
270270}
271271
272272template <int NLayers>
273- void TimeFrameGPU<NLayers>::loadVertices(const int iteration )
273+ void TimeFrameGPU<NLayers>::loadVertices()
274274{
275- if (!iteration) {
275+ {
276276 GPUTimer timer (" loading seeding vertices" );
277277 GPULog (" gpu-transfer: loading {} seeding vertices, for {:.2f} MB." , this ->mPrimaryVertices .size (), this ->mPrimaryVertices .size () * sizeof (Vertex) / constants::MB);
278278 allocMem (reinterpret_cast <void **>(&mPrimaryVerticesDevice ), this ->mPrimaryVertices .size () * sizeof (Vertex), this ->hasFrameworkAllocator ());
@@ -281,9 +281,9 @@ void TimeFrameGPU<NLayers>::loadVertices(const int iteration)
281281}
282282
283283template <int NLayers>
284- void TimeFrameGPU<NLayers>::loadROFOverlapTable(const int iteration )
284+ void TimeFrameGPU<NLayers>::loadROFOverlapTable()
285285{
286- if (!iteration) {
286+ {
287287 GPUTimer timer (" initialising device view of ROFOverlapTable" );
288288 const auto & hostTable = this ->getROFOverlapTable ();
289289 const auto & hostView = this ->getROFOverlapTableView ();
@@ -305,9 +305,9 @@ void TimeFrameGPU<NLayers>::loadROFOverlapTable(const int iteration)
305305}
306306
307307template <int NLayers>
308- void TimeFrameGPU<NLayers>::loadROFVertexLookupTable(const int iteration )
308+ void TimeFrameGPU<NLayers>::loadROFVertexLookupTable()
309309{
310- if (!iteration) {
310+ {
311311 GPUTimer timer (" initialising device view of ROFVertexLookupTable" );
312312 const auto & hostTable = this ->getROFVertexLookupTable ();
313313 const auto & hostView = this ->getROFVertexLookupTableView ();
@@ -329,10 +329,10 @@ void TimeFrameGPU<NLayers>::loadROFVertexLookupTable(const int iteration)
329329}
330330
331331template <int NLayers>
332- void TimeFrameGPU<NLayers>::updateROFVertexLookupTable(const int iteration )
332+ void TimeFrameGPU<NLayers>::updateROFVertexLookupTable()
333333{
334334 const auto & hostTable = this ->getROFVertexLookupTable ();
335- if (!iteration) {
335+ {
336336 GPUTimer timer (" updating device view of ROFVertexLookupTable" );
337337 const auto & hostView = this ->getROFVertexLookupTableView ();
338338 using TableEntry = ROFVertexLookupTable<NLayers>::TableEntry;
@@ -345,19 +345,19 @@ void TimeFrameGPU<NLayers>::updateROFVertexLookupTable(const int iteration)
345345}
346346
347347template <int NLayers>
348- void TimeFrameGPU<NLayers>::createTrackletsLUTDeviceArray(const int iteration )
348+ void TimeFrameGPU<NLayers>::createTrackletsLUTDeviceArray()
349349{
350- if (!iteration) {
350+ {
351351 allocMem (reinterpret_cast <void **>(&mTrackletsLUTDeviceArray ), (NLayers - 1 ) * sizeof (int *), this ->hasFrameworkAllocator ());
352352 }
353353}
354354
355355template <int NLayers>
356- void TimeFrameGPU<NLayers>::createTrackletsLUTDevice(const int iteration , const int layer)
356+ void TimeFrameGPU<NLayers>::createTrackletsLUTDevice(bool allocate , const int layer)
357357{
358358 GPUTimer timer (mGpuStreams [layer], " creating tracklets LUTs" , layer);
359359 const int ncls = this ->mClusters [layer].size () + 1 ;
360- if (!iteration ) {
360+ if (allocate ) {
361361 GPULog (" gpu-allocation: creating tracklets LUT for {} elements on layer {}, for {:.2f} MB." , ncls, layer, ncls * sizeof (int ) / constants::MB);
362362 allocMemAsync (reinterpret_cast <void **>(&mTrackletsLUTDevice [layer]), ncls * sizeof (int ), mGpuStreams [layer], this ->hasFrameworkAllocator ());
363363 GPUChkErrS (cudaMemcpyAsync (&mTrackletsLUTDeviceArray [layer], &mTrackletsLUTDevice [layer], sizeof (int *), cudaMemcpyHostToDevice, mGpuStreams [layer].get ()));
@@ -366,9 +366,9 @@ void TimeFrameGPU<NLayers>::createTrackletsLUTDevice(const int iteration, const
366366}
367367
368368template <int NLayers>
369- void TimeFrameGPU<NLayers>::createTrackletsBuffersArray(const int iteration )
369+ void TimeFrameGPU<NLayers>::createTrackletsBuffersArray()
370370{
371- if (!iteration) {
371+ {
372372 GPUTimer timer (" creating tracklet buffers array" );
373373 allocMem (reinterpret_cast <void **>(&mTrackletsDeviceArray ), (NLayers - 1 ) * sizeof (Tracklet*), this ->hasFrameworkAllocator ());
374374 }
@@ -442,9 +442,9 @@ void TimeFrameGPU<NLayers>::loadCellsDevice()
442442}
443443
444444template <int NLayers>
445- void TimeFrameGPU<NLayers>::createCellsLUTDeviceArray(const int iteration )
445+ void TimeFrameGPU<NLayers>::createCellsLUTDeviceArray()
446446{
447- if (!iteration) {
447+ {
448448 GPUTimer timer (" creating cells LUTs array" );
449449 allocMem (reinterpret_cast <void **>(&mCellsLUTDeviceArray ), (NLayers - 2 ) * sizeof (int *), this ->hasFrameworkAllocator ());
450450 }
@@ -461,9 +461,9 @@ void TimeFrameGPU<NLayers>::createCellsLUTDevice(const int layer)
461461}
462462
463463template <int NLayers>
464- void TimeFrameGPU<NLayers>::createCellsBuffersArray(const int iteration )
464+ void TimeFrameGPU<NLayers>::createCellsBuffersArray()
465465{
466- if (!iteration) {
466+ {
467467 GPUTimer timer (" creating cells buffers array" );
468468 allocMem (reinterpret_cast <void **>(&mCellsDeviceArray ), (NLayers - 2 ) * sizeof (CellSeed*), this ->hasFrameworkAllocator ());
469469 GPUChkErrS (cudaMemcpy (mCellsDeviceArray , mCellsDevice .data (), mCellsDevice .size () * sizeof (CellSeed*), cudaMemcpyHostToDevice));
@@ -646,12 +646,10 @@ void TimeFrameGPU<NLayers>::popMemoryStack(const int iteration)
646646}
647647
648648template <int NLayers>
649- void TimeFrameGPU<NLayers>::initialise(const int iteration,
650- const TrackingParameters& trkParam,
651- const int maxLayers)
649+ void TimeFrameGPU<NLayers>::initialise(const TrackingParameters& trkParam, int maxLayers)
652650{
653651 mGpuStreams .resize (NLayers);
654- o2::its::TimeFrame<NLayers>::initialise (iteration, trkParam, maxLayers, false );
652+ o2::its::TimeFrame<NLayers>::initialise (trkParam, maxLayers);
655653}
656654
657655template <int NLayers>
0 commit comments