From 8f9d3c671a32ac8f6e0bb8857b766b9f90f1743c Mon Sep 17 00:00:00 2001 From: Sandro Wenzel Date: Tue, 25 Nov 2025 14:47:50 +0100 Subject: [PATCH] CalDet: Protect against invalid reads A continuation of the CalDet saga, possibly related to https://its.cern.ch/jira/browse/O2-4671 Tests on ARM, even after deployment of the custom streamer in https://github.com/AliceO2Group/AliceO2/pull/14830, still showed segfaults in TPC digitization. With the relevant code isolated into a unit test in https://github.com/AliceO2Group/AliceO2/pull/14850, it was possible to do a valgrind study. This showed Invalid reads to the mData of CalArray. Thereafter, putting assert statements showed that we often access CalArray data slightly out of bounds - irrespective of custom streamer or not. This then either indicates a problem in the code logic or a problem with the calibration CCDB objects. This should clearly be fixed. In the meantime, this commit adds a protection against invalid accesses and returns a trivial answer as well as an error message. This is in any case better than undefined behaviour. In addition, this commit introduces possibility to switch off the custom streamer for further studies. --- Detectors/TPC/base/include/TPCBase/CalArray.h | 11 +++++++- Detectors/TPC/base/include/TPCBase/CalDet.h | 27 ++++++++++++++++++- .../base/src/TPCFlagsMemberCustomStreamer.cxx | 4 ++- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/Detectors/TPC/base/include/TPCBase/CalArray.h b/Detectors/TPC/base/include/TPCBase/CalArray.h index 2679eae4e706e..c0d5a14bd86de 100644 --- a/Detectors/TPC/base/include/TPCBase/CalArray.h +++ b/Detectors/TPC/base/include/TPCBase/CalArray.h @@ -26,6 +26,11 @@ #include #endif +#ifdef NDEBUG +#undef NDEBUG +#include +#endif + namespace o2 { namespace tpc @@ -93,7 +98,11 @@ class CalArray int getPadSubsetNumber() const { return mPadSubsetNumber; } void setValue(const size_t channel, const T& value) { mData[channel] = value; } - const T getValue(const size_t channel) const { return mData[channel]; } + const T getValue(const size_t channel) const + { + assert(channel < mData.size()); + return mData[channel]; + } void setValue(const size_t row, const size_t pad, const T& value); const T getValue(const size_t row, const size_t pad) const; diff --git a/Detectors/TPC/base/include/TPCBase/CalDet.h b/Detectors/TPC/base/include/TPCBase/CalDet.h index cab1bd5757f27..76bbeaf8bebd1 100644 --- a/Detectors/TPC/base/include/TPCBase/CalDet.h +++ b/Detectors/TPC/base/include/TPCBase/CalDet.h @@ -30,6 +30,12 @@ #include "Rtypes.h" #endif +#ifndef NDEBUG +#undef NDEBUG +// always enable assert +#include +#endif + namespace o2 { namespace tpc @@ -211,7 +217,26 @@ inline const T CalDet::getValue(const ROC roc, const size_t row, const size_t } case PadSubset::Region: { const auto globalRow = roc.isOROC() ? mappedRow + mapper.getNumberOfRowsROC(ROC(0)) : mappedRow; - return mData[Mapper::REGION[globalRow] + roc.getSector() * Mapper::NREGIONS].getValue(Mapper::OFFSETCRUGLOBAL[globalRow] + mappedPad); + const auto dataRow = Mapper::REGION[globalRow] + roc.getSector() * Mapper::NREGIONS; + const auto index = Mapper::OFFSETCRUGLOBAL[globalRow] + mappedPad; + assert(dataRow < mData.size()); + if (index >= mData[dataRow].getData().size()) { + // S. Wenzel: We shouldn't come here but we do. For instance for CalDet calibrations loaded from + // creator.loadIDCPadFlags(1731274461770); + + // In this case there is an index overflow, leading to invalid reads and potentially a segfault. + // To increase stability, for now returning a trivial answer. This can be removed once either the algorithm + // or the calibration data has been fixed. +#ifndef GPUCA_ALIGPUCODE // hide from GPU standalone compilation + static bool printMsg = true; + if (printMsg) { + LOG(error) << "Out of bound access in TPC CalDet ROC " << roc << " row " << row << " pad " << pad << " (no more messages printed)"; + } + printMsg = false; +#endif + return T{}; + } + return mData[dataRow].getValue(index); break; } } diff --git a/Detectors/TPC/base/src/TPCFlagsMemberCustomStreamer.cxx b/Detectors/TPC/base/src/TPCFlagsMemberCustomStreamer.cxx index 1dfb775a14aaa..7e3499dec14d9 100644 --- a/Detectors/TPC/base/src/TPCFlagsMemberCustomStreamer.cxx +++ b/Detectors/TPC/base/src/TPCFlagsMemberCustomStreamer.cxx @@ -69,7 +69,9 @@ static __attribute__((used)) int _R__dummyStreamer_3 = ([]() { auto cl = TClass::GetClass>(); if (cl) { - cl->AdoptMemberStreamer("mData", new TMemberStreamer(MemberVectorPadFlagsStreamer)); + if (!getenv("TPC_PADFLAGS_STREAMER_OFF")) { + cl->AdoptMemberStreamer("mData", new TMemberStreamer(MemberVectorPadFlagsStreamer)); + } } else { // we should never come here ... and if we do we should assert/fail assert(false);