AliceO2Group
diff --git a/‎Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h‎
Lines changed: 23 additions & 23 deletions b/‎Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h‎
Lines changed: 23 additions & 23 deletions
diff --git a/‎Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h‎
Lines changed: 2 additions & 2 deletions b/‎Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu‎
Lines changed: 47 additions & 49 deletions b/‎Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu‎
Lines changed: 47 additions & 49 deletions
@@ -42,30 +42,30 @@ class TimeFrameGPU final : public TimeFrame<NLayers>
   void popMemoryStack(const int);
   void registerHostMemory(const int);
   void unregisterHostMemory(const int);
-  void initialise(const int, const TrackingParameters&, const int);
-  void loadIndexTableUtils(const int);
-  void loadTrackingFrameInfoDevice(const int, const int);
-  void createTrackingFrameInfoDeviceArray(const int);
-  void loadUnsortedClustersDevice(const int, const int);
-  void createUnsortedClustersDeviceArray(const int, const int = NLayers);
-  void loadClustersDevice(const int, const int);
-  void createClustersDeviceArray(const int, const int = NLayers);
-  void loadClustersIndexTables(const int, const int);
-  void createClustersIndexTablesArray(const int);
-  void createUsedClustersDevice(const int, const int);
-  void createUsedClustersDeviceArray(const int, const int = NLayers);
+  void initialise(const TrackingParameters&, int maxLayers);
+  void loadIndexTableUtils();
+  void loadTrackingFrameInfoDevice(const int);
+  void createTrackingFrameInfoDeviceArray();
+  void loadUnsortedClustersDevice(const int);
+  void createUnsortedClustersDeviceArray(const int = NLayers);
+  void loadClustersDevice(const int);
+  void createClustersDeviceArray(const int = NLayers);
+  void loadClustersIndexTables(const int);
+  void createClustersIndexTablesArray();
+  void createUsedClustersDevice(const int);
+  void createUsedClustersDeviceArray(const int = NLayers);
   void loadUsedClustersDevice();
-  void loadROFrameClustersDevice(const int, const int);
-  void createROFrameClustersDeviceArray(const int);
+  void loadROFrameClustersDevice(const int);
+  void createROFrameClustersDeviceArray();
   void loadROFCutMask(const int);
-  void loadVertices(const int);
-  void loadROFOverlapTable(const int);
-  void loadROFVertexLookupTable(const int);
-  void updateROFVertexLookupTable(const int);
+  void loadVertices();
+  void loadROFOverlapTable();
+  void loadROFVertexLookupTable();
+  void updateROFVertexLookupTable();
 
   ///
-  void createTrackletsLUTDevice(const int, const int);
-  void createTrackletsLUTDeviceArray(const int);
+  void createTrackletsLUTDevice(bool, const int);
+  void createTrackletsLUTDeviceArray();
   void loadTrackletsDevice();
   void loadTrackletsLUTDevice();
   void loadCellsDevice();
@@ -74,12 +74,12 @@ class TimeFrameGPU final : public TimeFrame<NLayers>
   void loadTrackSeedsChi2Device();
   void loadTrackSeedsDevice(bounded_vector<TrackSeedN>&);
   void createTrackletsBuffers(const int);
-  void createTrackletsBuffersArray(const int);
+  void createTrackletsBuffersArray();
   void createCellsBuffers(const int);
-  void createCellsBuffersArray(const int);
+  void createCellsBuffersArray();
   void createCellsDevice();
   void createCellsLUTDevice(const int);
-  void createCellsLUTDeviceArray(const int);
+  void createCellsLUTDeviceArray();
   void createNeighboursIndexTablesDevice(const int);
   void createNeighboursDevice(const unsigned int layer);
   void createNeighboursLUTDevice(const int, const unsigned int);
 
@@ -51,7 +51,7 @@ void countTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
                                  const int** clustersIndexTables,
                                  int** trackletsLUTs,
                                  gsl::span<int*> trackletsLUTsHost,
-                                 const int iteration,
+                                 const bool selectUPCVertices,
                                  const float NSigmaCut,
                                  bounded_vector<float>& phiCuts,
                                  const float resolutionPV,
@@ -82,7 +82,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
                                    gsl::span<int> nTracklets,
                                    int** trackletsLUTs,
                                    gsl::span<int*> trackletsLUTsHost,
-                                   const int iteration,
+                                   const bool selectUPCVertices,
                                    const float NSigmaCut,
                                    bounded_vector<float>& phiCuts,
                                    const float resolutionPV,
 
@@ -52,10 +52,10 @@ void TimeFrameGPU<NLayers>::allocMem(void** ptr, size_t size, bool extAllocator,
 }
 
 template <int NLayers>
-void TimeFrameGPU<NLayers>::loadIndexTableUtils(const int iteration)
+void TimeFrameGPU<NLayers>::loadIndexTableUtils()
 {
   GPUTimer timer("loading indextable utils");
-  if (!iteration) {
+  {
     GPULog("gpu-allocation: allocating IndexTableUtils buffer, for {:.2f} MB.", sizeof(IndexTableUtilsN) / constants::MB);
     allocMem(reinterpret_cast<void**>(&mIndexTableUtilsDevice), sizeof(IndexTableUtilsN), this->hasFrameworkAllocator());
   }
@@ -64,9 +64,9 @@ void TimeFrameGPU<NLayers>::loadIndexTableUtils(const int iteration)
 }
 
 template <int NLayers>
-void TimeFrameGPU<NLayers>::createUnsortedClustersDeviceArray(const int iteration, const int maxLayers)
+void TimeFrameGPU<NLayers>::createUnsortedClustersDeviceArray(const int maxLayers)
 {
-  if (!iteration) {
+  {
     GPUTimer timer("creating unsorted clusters array");
     allocMem(reinterpret_cast<void**>(&mUnsortedClustersDeviceArray), NLayers * sizeof(Cluster*), this->hasFrameworkAllocator());
     GPUChkErrS(cudaHostRegister(mUnsortedClustersDevice.data(), NLayers * sizeof(Cluster*), cudaHostRegisterPortable));
@@ -81,9 +81,9 @@ void TimeFrameGPU<NLayers>::createUnsortedClustersDeviceArray(const int iteratio
 }
 
 template <int NLayers>
-void TimeFrameGPU<NLayers>::loadUnsortedClustersDevice(const int iteration, const int layer)
+void TimeFrameGPU<NLayers>::loadUnsortedClustersDevice(const int layer)
 {
-  if (!iteration) {
+  {
     GPUTimer timer(mGpuStreams[layer], "loading unsorted clusters", layer);
     GPULog("gpu-transfer: loading {} unsorted clusters on layer {}, for {:.2f} MB.", this->mUnsortedClusters[layer].size(), layer, this->mUnsortedClusters[layer].size() * sizeof(Cluster) / constants::MB);
     allocMemAsync(reinterpret_cast<void**>(&mUnsortedClustersDevice[layer]), this->mUnsortedClusters[layer].size() * sizeof(Cluster), mGpuStreams[layer], this->hasFrameworkAllocator());
@@ -93,9 +93,9 @@ void TimeFrameGPU<NLayers>::loadUnsortedClustersDevice(const int iteration, cons
 }
 
 template <int NLayers>
-void TimeFrameGPU<NLayers>::createClustersDeviceArray(const int iteration, const int maxLayers)
+void TimeFrameGPU<NLayers>::createClustersDeviceArray(const int maxLayers)
 {
-  if (!iteration) {
+  {
     GPUTimer timer("creating sorted clusters array");
     allocMem(reinterpret_cast<void**>(&mClustersDeviceArray), NLayers * sizeof(Cluster*), this->hasFrameworkAllocator());
     GPUChkErrS(cudaHostRegister(mClustersDevice.data(), NLayers * sizeof(Cluster*), cudaHostRegisterPortable));
@@ -110,9 +110,9 @@ void TimeFrameGPU<NLayers>::createClustersDeviceArray(const int iteration, const
 }
 
 template <int NLayers>
-void TimeFrameGPU<NLayers>::loadClustersDevice(const int iteration, const int layer)
+void TimeFrameGPU<NLayers>::loadClustersDevice(const int layer)
 {
-  if (!iteration) {
+  {
     GPUTimer timer(mGpuStreams[layer], "loading sorted clusters", layer);
     GPULog("gpu-transfer: loading {} clusters on layer {}, for {:.2f} MB.", this->mClusters[layer].size(), layer, this->mClusters[layer].size() * sizeof(Cluster) / constants::MB);
     allocMemAsync(reinterpret_cast<void**>(&mClustersDevice[layer]), this->mClusters[layer].size() * sizeof(Cluster), mGpuStreams[layer], this->hasFrameworkAllocator());
@@ -122,9 +122,9 @@ void TimeFrameGPU<NLayers>::loadClustersDevice(const int iteration, const int la
 }
 
 template <int NLayers>
-void TimeFrameGPU<NLayers>::createClustersIndexTablesArray(const int iteration)
+void TimeFrameGPU<NLayers>::createClustersIndexTablesArray()
 {
-  if (!iteration) {
+  {
     GPUTimer timer("creating clustersindextable array");
     allocMem(reinterpret_cast<void**>(&mClustersIndexTablesDeviceArray), NLayers * sizeof(int*), this->hasFrameworkAllocator());
     GPUChkErrS(cudaHostRegister(mClustersIndexTablesDevice.data(), NLayers * sizeof(int*), cudaHostRegisterPortable));
@@ -139,9 +139,9 @@ void TimeFrameGPU<NLayers>::createClustersIndexTablesArray(const int iteration)
 }
 
 template <int NLayers>
-void TimeFrameGPU<NLayers>::loadClustersIndexTables(const int iteration, const int layer)
+void TimeFrameGPU<NLayers>::loadClustersIndexTables(const int layer)
 {
-  if (!iteration) {
+  {
     GPUTimer timer(mGpuStreams[layer], "loading sorted clusters", layer);
     GPULog("gpu-transfer: loading clusters indextable for layer {} with {} elements, for {:.2f} MB.", layer, this->mIndexTables[layer].size(), this->mIndexTables[layer].size() * sizeof(int) / constants::MB);
     allocMemAsync(reinterpret_cast<void**>(&mClustersIndexTablesDevice[layer]), this->mIndexTables[layer].size() * sizeof(int), mGpuStreams[layer], this->hasFrameworkAllocator());
@@ -151,9 +151,9 @@ void TimeFrameGPU<NLayers>::loadClustersIndexTables(const int iteration, const i
 }
 
 template <int NLayers>
-void TimeFrameGPU<NLayers>::createUsedClustersDeviceArray(const int iteration, const int maxLayers)
+void TimeFrameGPU<NLayers>::createUsedClustersDeviceArray(const int maxLayers)
 {
-  if (!iteration) {
+  {
     GPUTimer timer("creating used clusters flags");
     allocMem(reinterpret_cast<void**>(&mUsedClustersDeviceArray), NLayers * sizeof(uint8_t*), this->hasFrameworkAllocator());
     GPUChkErrS(cudaHostRegister(mUsedClustersDevice.data(), NLayers * sizeof(uint8_t*), cudaHostRegisterPortable));
@@ -168,9 +168,9 @@ void TimeFrameGPU<NLayers>::createUsedClustersDeviceArray(const int iteration, c
 }
 
 template <int NLayers>
-void TimeFrameGPU<NLayers>::createUsedClustersDevice(const int iteration, const int layer)
+void TimeFrameGPU<NLayers>::createUsedClustersDevice(const int layer)
 {
-  if (!iteration) {
+  {
     GPUTimer timer(mGpuStreams[layer], "creating used clusters flags", layer);
     GPULog("gpu-transfer: creating {} used clusters flags on layer {}, for {:.2f} MB.", this->mUsedClusters[layer].size(), layer, this->mUsedClusters[layer].size() * sizeof(unsigned char) / constants::MB);
     allocMemAsync(reinterpret_cast<void**>(&mUsedClustersDevice[layer]), this->mUsedClusters[layer].size() * sizeof(unsigned char), mGpuStreams[layer], this->hasFrameworkAllocator());
@@ -190,9 +190,9 @@ void TimeFrameGPU<NLayers>::loadUsedClustersDevice()
 }
 
 template <int NLayers>
-void TimeFrameGPU<NLayers>::createROFrameClustersDeviceArray(const int iteration)
+void TimeFrameGPU<NLayers>::createROFrameClustersDeviceArray()
 {
-  if (!iteration) {
+  {
     GPUTimer timer("creating ROFrame clusters array");
     allocMem(reinterpret_cast<void**>(&mROFramesClustersDeviceArray), NLayers * sizeof(int*), this->hasFrameworkAllocator());
     GPUChkErrS(cudaHostRegister(mROFramesClustersDevice.data(), NLayers * sizeof(int*), cudaHostRegisterPortable));
@@ -207,9 +207,9 @@ void TimeFrameGPU<NLayers>::createROFrameClustersDeviceArray(const int iteration
 }
 
 template <int NLayers>
-void TimeFrameGPU<NLayers>::loadROFrameClustersDevice(const int iteration, const int layer)
+void TimeFrameGPU<NLayers>::loadROFrameClustersDevice(const int layer)
 {
-  if (!iteration) {
+  {
     GPUTimer timer(mGpuStreams[layer], "loading ROframe clusters", layer);
     GPULog("gpu-transfer: loading {} ROframe clusters info on layer {}, for {:.2f} MB.", this->mROFramesClusters[layer].size(), layer, this->mROFramesClusters[layer].size() * sizeof(int) / constants::MB);
     allocMemAsync(reinterpret_cast<void**>(&mROFramesClustersDevice[layer]), this->mROFramesClusters[layer].size() * sizeof(int), mGpuStreams[layer], this->hasFrameworkAllocator());
@@ -219,9 +219,9 @@ void TimeFrameGPU<NLayers>::loadROFrameClustersDevice(const int iteration, const
 }
 
 template <int NLayers>
-void TimeFrameGPU<NLayers>::createTrackingFrameInfoDeviceArray(const int iteration)
+void TimeFrameGPU<NLayers>::createTrackingFrameInfoDeviceArray()
 {
-  if (!iteration) {
+  {
     GPUTimer timer("creating trackingframeinfo array");
     allocMem(reinterpret_cast<void**>(&mTrackingFrameInfoDeviceArray), NLayers * sizeof(TrackingFrameInfo*), this->hasFrameworkAllocator());
     GPUChkErrS(cudaHostRegister(mTrackingFrameInfoDevice.data(), NLayers * sizeof(TrackingFrameInfo*), cudaHostRegisterPortable));
@@ -236,9 +236,9 @@ void TimeFrameGPU<NLayers>::createTrackingFrameInfoDeviceArray(const int iterati
 }
 
 template <int NLayers>
-void TimeFrameGPU<NLayers>::loadTrackingFrameInfoDevice(const int iteration, const int layer)
+void TimeFrameGPU<NLayers>::loadTrackingFrameInfoDevice(const int layer)
 {
-  if (!iteration) {
+  {
     GPUTimer timer(mGpuStreams[layer], "loading trackingframeinfo", layer);
     GPULog("gpu-transfer: loading {} tfinfo on layer {}, for {:.2f} MB.", this->mTrackingFrameInfo[layer].size(), layer, this->mTrackingFrameInfo[layer].size() * sizeof(TrackingFrameInfo) / constants::MB);
     allocMemAsync(reinterpret_cast<void**>(&mTrackingFrameInfoDevice[layer]), this->mTrackingFrameInfo[layer].size() * sizeof(TrackingFrameInfo), mGpuStreams[layer], this->hasFrameworkAllocator());
@@ -250,7 +250,7 @@ void TimeFrameGPU<NLayers>::loadTrackingFrameInfoDevice(const int iteration, con
 template <int NLayers>
 void TimeFrameGPU<NLayers>::loadROFCutMask(const int iteration)
 {
-  if (!iteration || iteration == 3) { // we need to re-load the swapped mult-mask in upc iteration
+  {
     GPUTimer timer("loading multiplicity cut mask");
     const auto& hostTable = *(this->mROFMask);
     const auto hostView = hostTable.getView();
@@ -270,9 +270,9 @@ void TimeFrameGPU<NLayers>::loadROFCutMask(const int iteration)
 }
 
 template <int NLayers>
-void TimeFrameGPU<NLayers>::loadVertices(const int iteration)
+void TimeFrameGPU<NLayers>::loadVertices()
 {
-  if (!iteration) {
+  {
     GPUTimer timer("loading seeding vertices");
     GPULog("gpu-transfer: loading {} seeding vertices, for {:.2f} MB.", this->mPrimaryVertices.size(), this->mPrimaryVertices.size() * sizeof(Vertex) / constants::MB);
     allocMem(reinterpret_cast<void**>(&mPrimaryVerticesDevice), this->mPrimaryVertices.size() * sizeof(Vertex), this->hasFrameworkAllocator());
@@ -281,9 +281,9 @@ void TimeFrameGPU<NLayers>::loadVertices(const int iteration)
 }
 
 template <int NLayers>
-void TimeFrameGPU<NLayers>::loadROFOverlapTable(const int iteration)
+void TimeFrameGPU<NLayers>::loadROFOverlapTable()
 {
-  if (!iteration) {
+  {
     GPUTimer timer("initialising device view of ROFOverlapTable");
     const auto& hostTable = this->getROFOverlapTable();
     const auto& hostView = this->getROFOverlapTableView();
@@ -305,9 +305,9 @@ void TimeFrameGPU<NLayers>::loadROFOverlapTable(const int iteration)
 }
 
 template <int NLayers>
-void TimeFrameGPU<NLayers>::loadROFVertexLookupTable(const int iteration)
+void TimeFrameGPU<NLayers>::loadROFVertexLookupTable()
 {
-  if (!iteration) {
+  {
     GPUTimer timer("initialising device view of ROFVertexLookupTable");
     const auto& hostTable = this->getROFVertexLookupTable();
     const auto& hostView = this->getROFVertexLookupTableView();
@@ -329,10 +329,10 @@ void TimeFrameGPU<NLayers>::loadROFVertexLookupTable(const int iteration)
 }
 
 template <int NLayers>
-void TimeFrameGPU<NLayers>::updateROFVertexLookupTable(const int iteration)
+void TimeFrameGPU<NLayers>::updateROFVertexLookupTable()
 {
   const auto& hostTable = this->getROFVertexLookupTable();
-  if (!iteration) {
+  {
     GPUTimer timer("updating device view of ROFVertexLookupTable");
     const auto& hostView = this->getROFVertexLookupTableView();
     using TableEntry = ROFVertexLookupTable<NLayers>::TableEntry;
@@ -345,19 +345,19 @@ void TimeFrameGPU<NLayers>::updateROFVertexLookupTable(const int iteration)
 }
 
 template <int NLayers>
-void TimeFrameGPU<NLayers>::createTrackletsLUTDeviceArray(const int iteration)
+void TimeFrameGPU<NLayers>::createTrackletsLUTDeviceArray()
 {
-  if (!iteration) {
+  {
     allocMem(reinterpret_cast<void**>(&mTrackletsLUTDeviceArray), (NLayers - 1) * sizeof(int*), this->hasFrameworkAllocator());
   }
 }
 
 template <int NLayers>
-void TimeFrameGPU<NLayers>::createTrackletsLUTDevice(const int iteration, const int layer)
+void TimeFrameGPU<NLayers>::createTrackletsLUTDevice(bool allocate, const int layer)
 {
   GPUTimer timer(mGpuStreams[layer], "creating tracklets LUTs", layer);
   const int ncls = this->mClusters[layer].size() + 1;
-  if (!iteration) {
+  if (allocate) {
     GPULog("gpu-allocation: creating tracklets LUT for {} elements on layer {}, for {:.2f} MB.", ncls, layer, ncls * sizeof(int) / constants::MB);
     allocMemAsync(reinterpret_cast<void**>(&mTrackletsLUTDevice[layer]), ncls * sizeof(int), mGpuStreams[layer], this->hasFrameworkAllocator());
     GPUChkErrS(cudaMemcpyAsync(&mTrackletsLUTDeviceArray[layer], &mTrackletsLUTDevice[layer], sizeof(int*), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
@@ -366,9 +366,9 @@ void TimeFrameGPU<NLayers>::createTrackletsLUTDevice(const int iteration, const
 }
 
 template <int NLayers>
-void TimeFrameGPU<NLayers>::createTrackletsBuffersArray(const int iteration)
+void TimeFrameGPU<NLayers>::createTrackletsBuffersArray()
 {
-  if (!iteration) {
+  {
     GPUTimer timer("creating tracklet buffers array");
     allocMem(reinterpret_cast<void**>(&mTrackletsDeviceArray), (NLayers - 1) * sizeof(Tracklet*), this->hasFrameworkAllocator());
   }
@@ -442,9 +442,9 @@ void TimeFrameGPU<NLayers>::loadCellsDevice()
 }
 
 template <int NLayers>
-void TimeFrameGPU<NLayers>::createCellsLUTDeviceArray(const int iteration)
+void TimeFrameGPU<NLayers>::createCellsLUTDeviceArray()
 {
-  if (!iteration) {
+  {
     GPUTimer timer("creating cells LUTs array");
     allocMem(reinterpret_cast<void**>(&mCellsLUTDeviceArray), (NLayers - 2) * sizeof(int*), this->hasFrameworkAllocator());
   }
@@ -461,9 +461,9 @@ void TimeFrameGPU<NLayers>::createCellsLUTDevice(const int layer)
 }
 
 template <int NLayers>
-void TimeFrameGPU<NLayers>::createCellsBuffersArray(const int iteration)
+void TimeFrameGPU<NLayers>::createCellsBuffersArray()
 {
-  if (!iteration) {
+  {
     GPUTimer timer("creating cells buffers array");
     allocMem(reinterpret_cast<void**>(&mCellsDeviceArray), (NLayers - 2) * sizeof(CellSeed*), this->hasFrameworkAllocator());
     GPUChkErrS(cudaMemcpy(mCellsDeviceArray, mCellsDevice.data(), mCellsDevice.size() * sizeof(CellSeed*), cudaMemcpyHostToDevice));
@@ -646,12 +646,10 @@ void TimeFrameGPU<NLayers>::popMemoryStack(const int iteration)
 }
 
 template <int NLayers>
-void TimeFrameGPU<NLayers>::initialise(const int iteration,
-                                       const TrackingParameters& trkParam,
-                                       const int maxLayers)
+void TimeFrameGPU<NLayers>::initialise(const TrackingParameters& trkParam, int maxLayers)
 {
   mGpuStreams.resize(NLayers);
-  o2::its::TimeFrame<NLayers>::initialise(iteration, trkParam, maxLayers, false);
+  o2::its::TimeFrame<NLayers>::initialise(trkParam, maxLayers);
 }
 
 template <int NLayers>
Original file line number	Diff line number	Diff line change
`@@ -52,10 +52,10 @@ void TimeFrameGPU<NLayers>::allocMem(void** ptr, size_t size, bool extAllocator,`
`52`	`52`	`}`
`53`	`53`
`54`	`54`	`template <int NLayers>`
`55`		`-void TimeFrameGPU<NLayers>::loadIndexTableUtils(const int iteration)`
	`55`	`+void TimeFrameGPU<NLayers>::loadIndexTableUtils()`
`56`	`56`	`{`
`57`	`57`	`GPUTimer timer("loading indextable utils");`
`58`		`- if (!iteration) {`
	`58`	`+ {`
`59`	`59`	`GPULog("gpu-allocation: allocating IndexTableUtils buffer, for {:.2f} MB.", sizeof(IndexTableUtilsN) / constants::MB);`
`60`	`60`	`allocMem(reinterpret_cast<void**>(&mIndexTableUtilsDevice), sizeof(IndexTableUtilsN), this->hasFrameworkAllocator());`
`61`	`61`	`}`
`@@ -64,9 +64,9 @@ void TimeFrameGPU<NLayers>::loadIndexTableUtils(const int iteration)`
`64`	`64`	`}`
`65`	`65`
`66`	`66`	`template <int NLayers>`
`67`		`-void TimeFrameGPU<NLayers>::createUnsortedClustersDeviceArray(const int iteration, const int maxLayers)`
	`67`	`+void TimeFrameGPU<NLayers>::createUnsortedClustersDeviceArray(const int maxLayers)`
`68`	`68`	`{`
`69`		`- if (!iteration) {`
	`69`	`+ {`
`70`	`70`	`GPUTimer timer("creating unsorted clusters array");`
`71`	`71`	`allocMem(reinterpret_cast<void*>(&mUnsortedClustersDeviceArray), NLayers sizeof(Cluster*), this->hasFrameworkAllocator());`
`72`	`72`	`GPUChkErrS(cudaHostRegister(mUnsortedClustersDevice.data(), NLayers * sizeof(Cluster*), cudaHostRegisterPortable));`
`@@ -81,9 +81,9 @@ void TimeFrameGPU<NLayers>::createUnsortedClustersDeviceArray(const int iteratio`
`81`	`81`	`}`
`82`	`82`
`83`	`83`	`template <int NLayers>`
`84`		`-void TimeFrameGPU<NLayers>::loadUnsortedClustersDevice(const int iteration, const int layer)`
	`84`	`+void TimeFrameGPU<NLayers>::loadUnsortedClustersDevice(const int layer)`
`85`	`85`	`{`
`86`		`- if (!iteration) {`
	`86`	`+ {`
`87`	`87`	`GPUTimer timer(mGpuStreams[layer], "loading unsorted clusters", layer);`
`88`	`88`	`GPULog("gpu-transfer: loading {} unsorted clusters on layer {}, for {:.2f} MB.", this->mUnsortedClusters[layer].size(), layer, this->mUnsortedClusters[layer].size() * sizeof(Cluster) / constants::MB);`
`89`	`89`	`allocMemAsync(reinterpret_cast<void*>(&mUnsortedClustersDevice[layer]), this->mUnsortedClusters[layer].size() sizeof(Cluster), mGpuStreams[layer], this->hasFrameworkAllocator());`
`@@ -93,9 +93,9 @@ void TimeFrameGPU<NLayers>::loadUnsortedClustersDevice(const int iteration, cons`
`93`	`93`	`}`
`94`	`94`
`95`	`95`	`template <int NLayers>`
`96`		`-void TimeFrameGPU<NLayers>::createClustersDeviceArray(const int iteration, const int maxLayers)`
	`96`	`+void TimeFrameGPU<NLayers>::createClustersDeviceArray(const int maxLayers)`
`97`	`97`	`{`
`98`		`- if (!iteration) {`
	`98`	`+ {`
`99`	`99`	`GPUTimer timer("creating sorted clusters array");`
`100`	`100`	`allocMem(reinterpret_cast<void*>(&mClustersDeviceArray), NLayers sizeof(Cluster*), this->hasFrameworkAllocator());`
`101`	`101`	`GPUChkErrS(cudaHostRegister(mClustersDevice.data(), NLayers * sizeof(Cluster*), cudaHostRegisterPortable));`
`@@ -110,9 +110,9 @@ void TimeFrameGPU<NLayers>::createClustersDeviceArray(const int iteration, const`
`110`	`110`	`}`
`111`	`111`
`112`	`112`	`template <int NLayers>`
`113`		`-void TimeFrameGPU<NLayers>::loadClustersDevice(const int iteration, const int layer)`
	`113`	`+void TimeFrameGPU<NLayers>::loadClustersDevice(const int layer)`
`114`	`114`	`{`
`115`		`- if (!iteration) {`
	`115`	`+ {`
`116`	`116`	`GPUTimer timer(mGpuStreams[layer], "loading sorted clusters", layer);`
`117`	`117`	`GPULog("gpu-transfer: loading {} clusters on layer {}, for {:.2f} MB.", this->mClusters[layer].size(), layer, this->mClusters[layer].size() * sizeof(Cluster) / constants::MB);`
`118`	`118`	`allocMemAsync(reinterpret_cast<void*>(&mClustersDevice[layer]), this->mClusters[layer].size() sizeof(Cluster), mGpuStreams[layer], this->hasFrameworkAllocator());`
`@@ -122,9 +122,9 @@ void TimeFrameGPU<NLayers>::loadClustersDevice(const int iteration, const int la`
`122`	`122`	`}`
`123`	`123`
`124`	`124`	`template <int NLayers>`
`125`		`-void TimeFrameGPU<NLayers>::createClustersIndexTablesArray(const int iteration)`
	`125`	`+void TimeFrameGPU<NLayers>::createClustersIndexTablesArray()`
`126`	`126`	`{`
`127`		`- if (!iteration) {`
	`127`	`+ {`
`128`	`128`	`GPUTimer timer("creating clustersindextable array");`
`129`	`129`	`allocMem(reinterpret_cast<void*>(&mClustersIndexTablesDeviceArray), NLayers sizeof(int*), this->hasFrameworkAllocator());`
`130`	`130`	`GPUChkErrS(cudaHostRegister(mClustersIndexTablesDevice.data(), NLayers * sizeof(int*), cudaHostRegisterPortable));`
`@@ -139,9 +139,9 @@ void TimeFrameGPU<NLayers>::createClustersIndexTablesArray(const int iteration)`
`139`	`139`	`}`
`140`	`140`
`141`	`141`	`template <int NLayers>`
`142`		`-void TimeFrameGPU<NLayers>::loadClustersIndexTables(const int iteration, const int layer)`
	`142`	`+void TimeFrameGPU<NLayers>::loadClustersIndexTables(const int layer)`
`143`	`143`	`{`
`144`		`- if (!iteration) {`
	`144`	`+ {`
`145`	`145`	`GPUTimer timer(mGpuStreams[layer], "loading sorted clusters", layer);`
`146`	`146`	`GPULog("gpu-transfer: loading clusters indextable for layer {} with {} elements, for {:.2f} MB.", layer, this->mIndexTables[layer].size(), this->mIndexTables[layer].size() * sizeof(int) / constants::MB);`
`147`	`147`	`allocMemAsync(reinterpret_cast<void*>(&mClustersIndexTablesDevice[layer]), this->mIndexTables[layer].size() sizeof(int), mGpuStreams[layer], this->hasFrameworkAllocator());`
`@@ -151,9 +151,9 @@ void TimeFrameGPU<NLayers>::loadClustersIndexTables(const int iteration, const i`
`151`	`151`	`}`
`152`	`152`
`153`	`153`	`template <int NLayers>`
`154`		`-void TimeFrameGPU<NLayers>::createUsedClustersDeviceArray(const int iteration, const int maxLayers)`
	`154`	`+void TimeFrameGPU<NLayers>::createUsedClustersDeviceArray(const int maxLayers)`
`155`	`155`	`{`
`156`		`- if (!iteration) {`
	`156`	`+ {`
`157`	`157`	`GPUTimer timer("creating used clusters flags");`
`158`	`158`	`allocMem(reinterpret_cast<void*>(&mUsedClustersDeviceArray), NLayers sizeof(uint8_t*), this->hasFrameworkAllocator());`
`159`	`159`	`GPUChkErrS(cudaHostRegister(mUsedClustersDevice.data(), NLayers * sizeof(uint8_t*), cudaHostRegisterPortable));`
`@@ -168,9 +168,9 @@ void TimeFrameGPU<NLayers>::createUsedClustersDeviceArray(const int iteration, c`
`168`	`168`	`}`
`169`	`169`
`170`	`170`	`template <int NLayers>`
`171`		`-void TimeFrameGPU<NLayers>::createUsedClustersDevice(const int iteration, const int layer)`
	`171`	`+void TimeFrameGPU<NLayers>::createUsedClustersDevice(const int layer)`
`172`	`172`	`{`
`173`		`- if (!iteration) {`
	`173`	`+ {`
`174`	`174`	`GPUTimer timer(mGpuStreams[layer], "creating used clusters flags", layer);`
`175`	`175`	`GPULog("gpu-transfer: creating {} used clusters flags on layer {}, for {:.2f} MB.", this->mUsedClusters[layer].size(), layer, this->mUsedClusters[layer].size() * sizeof(unsigned char) / constants::MB);`
`176`	`176`	`allocMemAsync(reinterpret_cast<void*>(&mUsedClustersDevice[layer]), this->mUsedClusters[layer].size() sizeof(unsigned char), mGpuStreams[layer], this->hasFrameworkAllocator());`
`@@ -190,9 +190,9 @@ void TimeFrameGPU<NLayers>::loadUsedClustersDevice()`
`190`	`190`	`}`
`191`	`191`
`192`	`192`	`template <int NLayers>`
`193`		`-void TimeFrameGPU<NLayers>::createROFrameClustersDeviceArray(const int iteration)`
	`193`	`+void TimeFrameGPU<NLayers>::createROFrameClustersDeviceArray()`
`194`	`194`	`{`
`195`		`- if (!iteration) {`
	`195`	`+ {`
`196`	`196`	`GPUTimer timer("creating ROFrame clusters array");`
`197`	`197`	`allocMem(reinterpret_cast<void*>(&mROFramesClustersDeviceArray), NLayers sizeof(int*), this->hasFrameworkAllocator());`
`198`	`198`	`GPUChkErrS(cudaHostRegister(mROFramesClustersDevice.data(), NLayers * sizeof(int*), cudaHostRegisterPortable));`
`@@ -207,9 +207,9 @@ void TimeFrameGPU<NLayers>::createROFrameClustersDeviceArray(const int iteration`
`207`	`207`	`}`
`208`	`208`
`209`	`209`	`template <int NLayers>`
`210`		`-void TimeFrameGPU<NLayers>::loadROFrameClustersDevice(const int iteration, const int layer)`
	`210`	`+void TimeFrameGPU<NLayers>::loadROFrameClustersDevice(const int layer)`
`211`	`211`	`{`
`212`		`- if (!iteration) {`
	`212`	`+ {`
`213`	`213`	`GPUTimer timer(mGpuStreams[layer], "loading ROframe clusters", layer);`
`214`	`214`	`GPULog("gpu-transfer: loading {} ROframe clusters info on layer {}, for {:.2f} MB.", this->mROFramesClusters[layer].size(), layer, this->mROFramesClusters[layer].size() * sizeof(int) / constants::MB);`
`215`	`215`	`allocMemAsync(reinterpret_cast<void*>(&mROFramesClustersDevice[layer]), this->mROFramesClusters[layer].size() sizeof(int), mGpuStreams[layer], this->hasFrameworkAllocator());`
`@@ -219,9 +219,9 @@ void TimeFrameGPU<NLayers>::loadROFrameClustersDevice(const int iteration, const`
`219`	`219`	`}`
`220`	`220`
`221`	`221`	`template <int NLayers>`
`222`		`-void TimeFrameGPU<NLayers>::createTrackingFrameInfoDeviceArray(const int iteration)`
	`222`	`+void TimeFrameGPU<NLayers>::createTrackingFrameInfoDeviceArray()`
`223`	`223`	`{`
`224`		`- if (!iteration) {`
	`224`	`+ {`
`225`	`225`	`GPUTimer timer("creating trackingframeinfo array");`
`226`	`226`	`allocMem(reinterpret_cast<void*>(&mTrackingFrameInfoDeviceArray), NLayers sizeof(TrackingFrameInfo*), this->hasFrameworkAllocator());`
`227`	`227`	`GPUChkErrS(cudaHostRegister(mTrackingFrameInfoDevice.data(), NLayers * sizeof(TrackingFrameInfo*), cudaHostRegisterPortable));`
`@@ -236,9 +236,9 @@ void TimeFrameGPU<NLayers>::createTrackingFrameInfoDeviceArray(const int iterati`
`236`	`236`	`}`
`237`	`237`
`238`	`238`	`template <int NLayers>`
`239`		`-void TimeFrameGPU<NLayers>::loadTrackingFrameInfoDevice(const int iteration, const int layer)`
	`239`	`+void TimeFrameGPU<NLayers>::loadTrackingFrameInfoDevice(const int layer)`
`240`	`240`	`{`
`241`		`- if (!iteration) {`
	`241`	`+ {`
`242`	`242`	`GPUTimer timer(mGpuStreams[layer], "loading trackingframeinfo", layer);`
`243`	`243`	`GPULog("gpu-transfer: loading {} tfinfo on layer {}, for {:.2f} MB.", this->mTrackingFrameInfo[layer].size(), layer, this->mTrackingFrameInfo[layer].size() * sizeof(TrackingFrameInfo) / constants::MB);`
`244`	`244`	`allocMemAsync(reinterpret_cast<void*>(&mTrackingFrameInfoDevice[layer]), this->mTrackingFrameInfo[layer].size() sizeof(TrackingFrameInfo), mGpuStreams[layer], this->hasFrameworkAllocator());`
`@@ -250,7 +250,7 @@ void TimeFrameGPU<NLayers>::loadTrackingFrameInfoDevice(const int iteration, con`
`250`	`250`	`template <int NLayers>`
`251`	`251`	`void TimeFrameGPU<NLayers>::loadROFCutMask(const int iteration)`
`252`	`252`	`{`
`253`		`- if (!iteration \|\| iteration == 3) { // we need to re-load the swapped mult-mask in upc iteration`
	`253`	`+ {`
`254`	`254`	`GPUTimer timer("loading multiplicity cut mask");`
`255`	`255`	`const auto& hostTable = *(this->mROFMask);`
`256`	`256`	`const auto hostView = hostTable.getView();`
`@@ -270,9 +270,9 @@ void TimeFrameGPU<NLayers>::loadROFCutMask(const int iteration)`
`270`	`270`	`}`
`271`	`271`
`272`	`272`	`template <int NLayers>`
`273`		`-void TimeFrameGPU<NLayers>::loadVertices(const int iteration)`
	`273`	`+void TimeFrameGPU<NLayers>::loadVertices()`
`274`	`274`	`{`
`275`		`- if (!iteration) {`
	`275`	`+ {`
`276`	`276`	`GPUTimer timer("loading seeding vertices");`
`277`	`277`	`GPULog("gpu-transfer: loading {} seeding vertices, for {:.2f} MB.", this->mPrimaryVertices.size(), this->mPrimaryVertices.size() * sizeof(Vertex) / constants::MB);`
`278`	`278`	`allocMem(reinterpret_cast<void*>(&mPrimaryVerticesDevice), this->mPrimaryVertices.size() sizeof(Vertex), this->hasFrameworkAllocator());`
`@@ -281,9 +281,9 @@ void TimeFrameGPU<NLayers>::loadVertices(const int iteration)`
`281`	`281`	`}`
`282`	`282`
`283`	`283`	`template <int NLayers>`
`284`		`-void TimeFrameGPU<NLayers>::loadROFOverlapTable(const int iteration)`
	`284`	`+void TimeFrameGPU<NLayers>::loadROFOverlapTable()`
`285`	`285`	`{`
`286`		`- if (!iteration) {`
	`286`	`+ {`
`287`	`287`	`GPUTimer timer("initialising device view of ROFOverlapTable");`
`288`	`288`	`const auto& hostTable = this->getROFOverlapTable();`
`289`	`289`	`const auto& hostView = this->getROFOverlapTableView();`
`@@ -305,9 +305,9 @@ void TimeFrameGPU<NLayers>::loadROFOverlapTable(const int iteration)`
`305`	`305`	`}`
`306`	`306`
`307`	`307`	`template <int NLayers>`
`308`		`-void TimeFrameGPU<NLayers>::loadROFVertexLookupTable(const int iteration)`
	`308`	`+void TimeFrameGPU<NLayers>::loadROFVertexLookupTable()`
`309`	`309`	`{`
`310`		`- if (!iteration) {`
	`310`	`+ {`
`311`	`311`	`GPUTimer timer("initialising device view of ROFVertexLookupTable");`
`312`	`312`	`const auto& hostTable = this->getROFVertexLookupTable();`
`313`	`313`	`const auto& hostView = this->getROFVertexLookupTableView();`
`@@ -329,10 +329,10 @@ void TimeFrameGPU<NLayers>::loadROFVertexLookupTable(const int iteration)`
`329`	`329`	`}`
`330`	`330`
`331`	`331`	`template <int NLayers>`
`332`		`-void TimeFrameGPU<NLayers>::updateROFVertexLookupTable(const int iteration)`
	`332`	`+void TimeFrameGPU<NLayers>::updateROFVertexLookupTable()`
`333`	`333`	`{`
`334`	`334`	`const auto& hostTable = this->getROFVertexLookupTable();`
`335`		`- if (!iteration) {`
	`335`	`+ {`
`336`	`336`	`GPUTimer timer("updating device view of ROFVertexLookupTable");`
`337`	`337`	`const auto& hostView = this->getROFVertexLookupTableView();`
`338`	`338`	`using TableEntry = ROFVertexLookupTable<NLayers>::TableEntry;`
`@@ -345,19 +345,19 @@ void TimeFrameGPU<NLayers>::updateROFVertexLookupTable(const int iteration)`
`345`	`345`	`}`
`346`	`346`
`347`	`347`	`template <int NLayers>`
`348`		`-void TimeFrameGPU<NLayers>::createTrackletsLUTDeviceArray(const int iteration)`
	`348`	`+void TimeFrameGPU<NLayers>::createTrackletsLUTDeviceArray()`
`349`	`349`	`{`
`350`		`- if (!iteration) {`
	`350`	`+ {`
`351`	`351`	`allocMem(reinterpret_cast<void*>(&mTrackletsLUTDeviceArray), (NLayers - 1) sizeof(int*), this->hasFrameworkAllocator());`
`352`	`352`	`}`
`353`	`353`	`}`
`354`	`354`
`355`	`355`	`template <int NLayers>`
`356`		`-void TimeFrameGPU<NLayers>::createTrackletsLUTDevice(const int iteration, const int layer)`
	`356`	`+void TimeFrameGPU<NLayers>::createTrackletsLUTDevice(bool allocate, const int layer)`
`357`	`357`	`{`
`358`	`358`	`GPUTimer timer(mGpuStreams[layer], "creating tracklets LUTs", layer);`
`359`	`359`	`const int ncls = this->mClusters[layer].size() + 1;`
`360`		`- if (!iteration) {`
	`360`	`+ if (allocate) {`
`361`	`361`	`GPULog("gpu-allocation: creating tracklets LUT for {} elements on layer {}, for {:.2f} MB.", ncls, layer, ncls * sizeof(int) / constants::MB);`
`362`	`362`	`allocMemAsync(reinterpret_cast<void*>(&mTrackletsLUTDevice[layer]), ncls sizeof(int), mGpuStreams[layer], this->hasFrameworkAllocator());`
`363`	`363`	`GPUChkErrS(cudaMemcpyAsync(&mTrackletsLUTDeviceArray[layer], &mTrackletsLUTDevice[layer], sizeof(int*), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));`
`@@ -366,9 +366,9 @@ void TimeFrameGPU<NLayers>::createTrackletsLUTDevice(const int iteration, const`
`366`	`366`	`}`
`367`	`367`
`368`	`368`	`template <int NLayers>`
`369`		`-void TimeFrameGPU<NLayers>::createTrackletsBuffersArray(const int iteration)`
	`369`	`+void TimeFrameGPU<NLayers>::createTrackletsBuffersArray()`
`370`	`370`	`{`
`371`		`- if (!iteration) {`
	`371`	`+ {`
`372`	`372`	`GPUTimer timer("creating tracklet buffers array");`
`373`	`373`	`allocMem(reinterpret_cast<void*>(&mTrackletsDeviceArray), (NLayers - 1) sizeof(Tracklet*), this->hasFrameworkAllocator());`
`374`	`374`	`}`
`@@ -442,9 +442,9 @@ void TimeFrameGPU<NLayers>::loadCellsDevice()`
`442`	`442`	`}`
`443`	`443`
`444`	`444`	`template <int NLayers>`
`445`		`-void TimeFrameGPU<NLayers>::createCellsLUTDeviceArray(const int iteration)`
	`445`	`+void TimeFrameGPU<NLayers>::createCellsLUTDeviceArray()`
`446`	`446`	`{`
`447`		`- if (!iteration) {`
	`447`	`+ {`
`448`	`448`	`GPUTimer timer("creating cells LUTs array");`
`449`	`449`	`allocMem(reinterpret_cast<void*>(&mCellsLUTDeviceArray), (NLayers - 2) sizeof(int*), this->hasFrameworkAllocator());`
`450`	`450`	`}`
`@@ -461,9 +461,9 @@ void TimeFrameGPU<NLayers>::createCellsLUTDevice(const int layer)`
`461`	`461`	`}`
`462`	`462`
`463`	`463`	`template <int NLayers>`
`464`		`-void TimeFrameGPU<NLayers>::createCellsBuffersArray(const int iteration)`
	`464`	`+void TimeFrameGPU<NLayers>::createCellsBuffersArray()`
`465`	`465`	`{`
`466`		`- if (!iteration) {`
	`466`	`+ {`
`467`	`467`	`GPUTimer timer("creating cells buffers array");`
`468`	`468`	`allocMem(reinterpret_cast<void*>(&mCellsDeviceArray), (NLayers - 2) sizeof(CellSeed*), this->hasFrameworkAllocator());`
`469`	`469`	`GPUChkErrS(cudaMemcpy(mCellsDeviceArray, mCellsDevice.data(), mCellsDevice.size() * sizeof(CellSeed*), cudaMemcpyHostToDevice));`
`@@ -646,12 +646,10 @@ void TimeFrameGPU<NLayers>::popMemoryStack(const int iteration)`
`646`	`646`	`}`
`647`	`647`
`648`	`648`	`template <int NLayers>`
`649`		`-void TimeFrameGPU<NLayers>::initialise(const int iteration,`
`650`		`- const TrackingParameters& trkParam,`
`651`		`- const int maxLayers)`
	`649`	`+void TimeFrameGPU<NLayers>::initialise(const TrackingParameters& trkParam, int maxLayers)`
`652`	`650`	`{`
`653`	`651`	`mGpuStreams.resize(NLayers);`
`654`		`- o2::its::TimeFrame<NLayers>::initialise(iteration, trkParam, maxLayers, false);`
	`652`	`+ o2::its::TimeFrame<NLayers>::initialise(trkParam, maxLayers);`
`655`	`653`	`}`
`656`	`654`
`657`	`655`	`template <int NLayers>`