From cf8bafed689cc87af12420da5ed3c0f8aa1c5eb1 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 27 Aug 2025 13:26:49 +0200 Subject: [PATCH] DPL Analysis: percolate DataOrigin so that we can use it for multiple files reading. --- .../AnalysisSupport/src/DataInputDirector.cxx | 38 ++++++++++++------- .../AnalysisSupport/src/DataInputDirector.h | 8 ++-- .../Framework/AnalysisSupportHelpers.h | 4 +- .../Core/src/AnalysisDataModelHelpers.cxx | 10 +++-- .../TestWorkflows/src/o2TestHistograms.cxx | 5 ++- 5 files changed, 41 insertions(+), 24 deletions(-) diff --git a/Framework/AnalysisSupport/src/DataInputDirector.cxx b/Framework/AnalysisSupport/src/DataInputDirector.cxx index 590329de146f7..7cc0134a27968 100644 --- a/Framework/AnalysisSupport/src/DataInputDirector.cxx +++ b/Framework/AnalysisSupport/src/DataInputDirector.cxx @@ -124,15 +124,22 @@ void DataInputDescriptor::addFileNameHolder(FileNameHolder* fn) mfilenames.emplace_back(fn); } -bool DataInputDescriptor::setFile(int counter) +bool DataInputDescriptor::setFile(int counter, std::string_view origin) { // no files left if (counter >= getNumberInputfiles()) { return false; } + // In case the origin starts with a anything but AOD, we add the origin as the suffix + // of the filename. In the future we might expand this for proper rewriting of the + // filename based on the origin and the original file information. + std::string filename = mfilenames[counter]->fileName; + if (!origin.starts_with("AOD")) { + filename = std::regex_replace(filename, std::regex("[.]root$"), fmt::format("_{}.root", origin)); + } + // open file - auto filename = mfilenames[counter]->fileName; auto rootFS = std::dynamic_pointer_cast(mCurrentFilesystem); if (rootFS.get()) { if (rootFS->GetFile()->GetName() == filename) { @@ -213,11 +220,11 @@ bool DataInputDescriptor::setFile(int counter) return true; } -uint64_t DataInputDescriptor::getTimeFrameNumber(int counter, int numTF) +uint64_t DataInputDescriptor::getTimeFrameNumber(int counter, int numTF, std::string_view origin) { // open file - if (!setFile(counter)) { + if (!setFile(counter, origin)) { return 0ul; } @@ -229,10 +236,10 @@ uint64_t DataInputDescriptor::getTimeFrameNumber(int counter, int numTF) return (mfilenames[counter]->listOfTimeFrameNumbers)[numTF]; } -arrow::dataset::FileSource DataInputDescriptor::getFileFolder(int counter, int numTF) +arrow::dataset::FileSource DataInputDescriptor::getFileFolder(int counter, int numTF, std::string_view origin) { // open file - if (!setFile(counter)) { + if (!setFile(counter, origin)) { return {}; } @@ -246,7 +253,7 @@ arrow::dataset::FileSource DataInputDescriptor::getFileFolder(int counter, int n return {fmt::format("DF_{}", mfilenames[counter]->listOfTimeFrameNumbers[numTF]), mCurrentFilesystem}; } -DataInputDescriptor* DataInputDescriptor::getParentFile(int counter, int numTF, std::string treename) +DataInputDescriptor* DataInputDescriptor::getParentFile(int counter, int numTF, std::string treename, std::string_view origin) { if (!mParentFileMap) { // This file has no parent map @@ -283,7 +290,7 @@ DataInputDescriptor* DataInputDescriptor::getParentFile(int counter, int numTF, mParentFile->mdefaultFilenamesPtr = new std::vector; mParentFile->mdefaultFilenamesPtr->emplace_back(makeFileNameHolder(parentFileName->GetString().Data())); mParentFile->fillInputfiles(); - mParentFile->setFile(0); + mParentFile->setFile(0, origin); return mParentFile; } @@ -427,7 +434,8 @@ struct CalculateDelta { mTarget += (uv_hrtime() - start); } - void deactivate() { + void deactivate() + { active = false; } @@ -440,7 +448,8 @@ struct CalculateDelta { bool DataInputDescriptor::readTree(DataAllocator& outputs, header::DataHeader dh, int counter, int numTF, std::string treename, size_t& totalSizeCompressed, size_t& totalSizeUncompressed) { CalculateDelta t(mIOTime); - auto folder = getFileFolder(counter, numTF); + std::string origin = dh.dataOrigin.as(); + auto folder = getFileFolder(counter, numTF, origin); if (!folder.filesystem()) { t.deactivate(); return false; @@ -473,7 +482,7 @@ bool DataInputDescriptor::readTree(DataAllocator& outputs, header::DataHeader dh if (!format) { t.deactivate(); LOGP(debug, "Could not find tree {}. Trying in parent file.", fullpath.path()); - auto parentFile = getParentFile(counter, numTF, treename); + auto parentFile = getParentFile(counter, numTF, treename, origin); if (parentFile != nullptr) { int parentNumTF = parentFile->findDFNumber(0, folder.path()); if (parentNumTF == -1) { @@ -817,8 +826,9 @@ arrow::dataset::FileSource DataInputDirector::getFileFolder(header::DataHeader d if (!didesc) { didesc = mdefaultDataInputDescriptor; } + std::string origin = dh.dataOrigin.as(); - return didesc->getFileFolder(counter, numTF); + return didesc->getFileFolder(counter, numTF, origin); } int DataInputDirector::getTimeFramesInFile(header::DataHeader dh, int counter) @@ -839,8 +849,9 @@ uint64_t DataInputDirector::getTimeFrameNumber(header::DataHeader dh, int counte if (!didesc) { didesc = mdefaultDataInputDescriptor; } + std::string origin = dh.dataOrigin.as(); - return didesc->getTimeFrameNumber(counter, numTF); + return didesc->getTimeFrameNumber(counter, numTF, origin); } bool DataInputDirector::readTree(DataAllocator& outputs, header::DataHeader dh, int counter, int numTF, size_t& totalSizeCompressed, size_t& totalSizeUncompressed) @@ -858,6 +869,7 @@ bool DataInputDirector::readTree(DataAllocator& outputs, header::DataHeader dh, didesc = mdefaultDataInputDescriptor; treename = aod::datamodel::getTreeName(dh); } + std::string origin = dh.dataOrigin.as(); auto result = didesc->readTree(outputs, dh, counter, numTF, treename, totalSizeCompressed, totalSizeUncompressed); return result; diff --git a/Framework/AnalysisSupport/src/DataInputDirector.h b/Framework/AnalysisSupport/src/DataInputDirector.h index 94bdcf2c9368e..61b477bd8522d 100644 --- a/Framework/AnalysisSupport/src/DataInputDirector.h +++ b/Framework/AnalysisSupport/src/DataInputDirector.h @@ -64,7 +64,7 @@ class DataInputDescriptor void addFileNameHolder(FileNameHolder* fn); int fillInputfiles(); - bool setFile(int counter); + bool setFile(int counter, std::string_view origin); // getters std::string getInputfilesFilename(); @@ -74,9 +74,9 @@ class DataInputDescriptor int getNumberTimeFrames() { return mtotalNumberTimeFrames; } int findDFNumber(int file, std::string dfName); - uint64_t getTimeFrameNumber(int counter, int numTF); - arrow::dataset::FileSource getFileFolder(int counter, int numTF); - DataInputDescriptor* getParentFile(int counter, int numTF, std::string treename); + uint64_t getTimeFrameNumber(int counter, int numTF, std::string_view origin); + arrow::dataset::FileSource getFileFolder(int counter, int numTF, std::string_view origin); + DataInputDescriptor* getParentFile(int counter, int numTF, std::string treename, std::string_view origin); int getTimeFramesInFile(int counter); int getReadTimeFramesInFile(int counter); diff --git a/Framework/Core/include/Framework/AnalysisSupportHelpers.h b/Framework/Core/include/Framework/AnalysisSupportHelpers.h index a4e80decf2bbe..cc4d45a46c8bc 100644 --- a/Framework/Core/include/Framework/AnalysisSupportHelpers.h +++ b/Framework/Core/include/Framework/AnalysisSupportHelpers.h @@ -20,8 +20,8 @@ namespace o2::framework { -static constexpr std::array AODOrigins{header::DataOrigin{"AOD"}, header::DataOrigin{"AOD1"}, header::DataOrigin{"AOD2"}}; -static constexpr std::array extendedAODOrigins{header::DataOrigin{"AOD"}, header::DataOrigin{"AOD1"}, header::DataOrigin{"AOD2"}, header::DataOrigin{"DYN"}, header::DataOrigin{"AMD"}}; +static constexpr std::array AODOrigins{header::DataOrigin{"AOD"}, header::DataOrigin{"AOD1"}, header::DataOrigin{"AOD2"}, header::DataOrigin{"EMB"}}; +static constexpr std::array extendedAODOrigins{header::DataOrigin{"AOD"}, header::DataOrigin{"AOD1"}, header::DataOrigin{"AOD2"}, header::DataOrigin{"DYN"}, header::DataOrigin{"AMD"}, header::DataOrigin{"EMB"}}; static constexpr std::array writableAODOrigins{header::DataOrigin{"AOD"}, header::DataOrigin{"AOD1"}, header::DataOrigin{"AOD2"}, header::DataOrigin{"DYN"}}; class DataOutputDirector; diff --git a/Framework/Core/src/AnalysisDataModelHelpers.cxx b/Framework/Core/src/AnalysisDataModelHelpers.cxx index bfc53c8972e2c..b7b459c89d847 100644 --- a/Framework/Core/src/AnalysisDataModelHelpers.cxx +++ b/Framework/Core/src/AnalysisDataModelHelpers.cxx @@ -11,6 +11,7 @@ #include "Framework/AnalysisDataModelHelpers.h" #include "Framework/AnalysisDataModel.h" +#include "Framework/AnalysisSupportHelpers.h" #include "Framework/StringHelpers.h" #include "Framework/Logger.h" @@ -27,7 +28,6 @@ namespace o2::aod::datamodel std::string getTreeName(header::DataHeader dh) { auto description = std::string(dh.dataDescription.str); - auto origin = std::string(dh.dataOrigin.str); auto iver = (float)dh.subSpecification; // lower case of first part of description @@ -38,11 +38,15 @@ std::string getTreeName(header::DataHeader dh) } // add prefix according to origin - if (origin == "AOD") { - treeName = "O2" + treeName; + for (auto possibleOrigin : framework::AODOrigins) { + if (dh.dataOrigin == possibleOrigin) { + treeName = "O2" + treeName; + break; + } } // exceptions from this + auto origin = std::string(dh.dataOrigin.str); if (origin == "AOD" && description == "MCCOLLISLABEL") { treeName = "O2mccollisionlabel"; } diff --git a/Framework/TestWorkflows/src/o2TestHistograms.cxx b/Framework/TestWorkflows/src/o2TestHistograms.cxx index 38cfc00b6df7c..61710e1f63d5f 100644 --- a/Framework/TestWorkflows/src/o2TestHistograms.cxx +++ b/Framework/TestWorkflows/src/o2TestHistograms.cxx @@ -25,6 +25,7 @@ using namespace o2::framework::expressions; namespace o2::aod { +O2ORIGIN("EMB"); namespace skimmedExampleTrack { DECLARE_SOA_COLUMN(Pt, pt, float); //! @@ -49,7 +50,7 @@ struct EtaAndClsHistogramsSimple { } } - void process(soa::Filtered const& tracks, aod::FT0s const&) + void process(soa::Filtered const& tracks, aod::FT0s const&, aod::StoredTracksFrom> const& ortherTracks) { LOGP(info, "Invoking the simple one"); for (auto& track : tracks) { @@ -72,7 +73,7 @@ struct EtaAndClsHistogramsIUSimple { } } - void process(soa::Filtered const& tracks, aod::FT0s const&) + void process(soa::Filtered const& tracks, aod::FT0s const&, aod::TracksIUFrom> const& otherTracks) { LOGP(info, "Invoking the simple one IU"); for (auto& track : tracks) {