Skip to content

Commit db916c2

Browse files
committed
DPL Analysis: add support for BinaryView columns
The idea is to be able to have BinaryViews on top of the CCDB object blobs which are already cached in shared memory, so that we can have a table with rows of the kind: (timestamp, blob-requested-ccdb-object-2, blob-requested-ccdb-object-2) which then can be joined to the timestamps to provide access to the associated CCDB Object.
1 parent 59033a5 commit db916c2

File tree

4 files changed

+86
-8
lines changed

4 files changed

+86
-8
lines changed

Framework/Core/include/Framework/ASoA.h

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,12 @@
2323
#include "Framework/ArrowTableSlicingCache.h" // IWYU pragma: export
2424
#include "Framework/SliceCache.h" // IWYU pragma: export
2525
#include "Framework/VariantHelpers.h" // IWYU pragma: export
26-
#include <arrow/table.h> // IWYU pragma: export
27-
#include <arrow/array.h> // IWYU pragma: export
28-
#include <arrow/util/config.h> // IWYU pragma: export
29-
#include <gandiva/selection_vector.h> // IWYU pragma: export
30-
#include <array> // IWYU pragma: export
26+
#include <arrow/array/array_binary.h>
27+
#include <arrow/table.h> // IWYU pragma: export
28+
#include <arrow/array.h> // IWYU pragma: export
29+
#include <arrow/util/config.h> // IWYU pragma: export
30+
#include <gandiva/selection_vector.h> // IWYU pragma: export
31+
#include <array> // IWYU pragma: export
3132
#include <cassert>
3233
#include <fmt/format.h>
3334
#include <concepts>
@@ -578,7 +579,15 @@ class ColumnIterator : ChunkingPolicy
578579
}
579580

580581
decltype(auto) operator*() const
581-
requires((!std::same_as<bool, std::decay_t<T>>) && !std::same_as<arrow_array_for_t<T>, arrow::ListArray>)
582+
requires((!std::same_as<bool, std::decay_t<T>>) && std::same_as<arrow_array_for_t<T>, arrow::BinaryViewArray>)
583+
{
584+
checkSkipChunk();
585+
auto array = std::static_pointer_cast<arrow::BinaryViewArray>(mColumn->chunk(mCurrentChunk));
586+
return array->GetView(*mCurrentPos - mFirstIndex);
587+
}
588+
589+
decltype(auto) operator*() const
590+
requires((!std::same_as<bool, std::decay_t<T>>) && !std::same_as<arrow_array_for_t<T>, arrow::ListArray> && !std::same_as<arrow_array_for_t<T>, arrow::BinaryViewArray>)
582591
{
583592
checkSkipChunk();
584593
return *(mCurrent + (*mCurrentPos >> SCALE_FACTOR));

Framework/Core/include/Framework/ArrowTypes.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#ifndef O2_FRAMEWORK_ARROWTYPES_H
1313
#define O2_FRAMEWORK_ARROWTYPES_H
1414
#include "arrow/type_fwd.h"
15+
#include <span>
1516

1617
namespace o2::soa
1718
{
@@ -62,6 +63,10 @@ template <>
6263
struct arrow_array_for<double> {
6364
using type = arrow::DoubleArray;
6465
};
66+
template <>
67+
struct arrow_array_for<std::span<std::byte>> {
68+
using type = arrow::BinaryViewArray;
69+
};
6570
template <int N>
6671
struct arrow_array_for<float[N]> {
6772
using type = arrow::FixedSizeListArray;

Framework/Core/include/Framework/TableBuilder.h

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ O2_ARROW_STL_CONVERSION(long unsigned, UInt64Type)
9898
O2_ARROW_STL_CONVERSION(float, FloatType)
9999
O2_ARROW_STL_CONVERSION(double, DoubleType)
100100
O2_ARROW_STL_CONVERSION(std::string, StringType)
101+
O2_ARROW_STL_CONVERSION(std::span<std::byte>, BinaryViewType)
101102
} // namespace detail
102103

103104
void addLabelToSchema(std::shared_ptr<arrow::Schema>& schema, const char* label);
@@ -274,6 +275,29 @@ struct BuilderMaker<bool> {
274275
}
275276
};
276277

278+
template <>
279+
struct BuilderMaker<std::span<std::byte>> {
280+
using FillType = std::span<std::byte>;
281+
using STLValueType = std::span<std::byte>;
282+
using ArrowType = typename detail::ConversionTraits<std::span<std::byte>>::ArrowType;
283+
using BuilderType = typename arrow::TypeTraits<ArrowType>::BuilderType;
284+
285+
static std::unique_ptr<BuilderType> make(arrow::MemoryPool* pool)
286+
{
287+
return std::make_unique<BuilderType>(pool);
288+
}
289+
290+
static std::shared_ptr<arrow::DataType> make_datatype()
291+
{
292+
return arrow::TypeTraits<ArrowType>::type_singleton();
293+
}
294+
295+
static arrow::Status append(BuilderType& builder, std::span<std::byte> value)
296+
{
297+
return builder.Append((char*)value.data(), (int64_t)value.size());
298+
}
299+
};
300+
277301
template <typename ITERATOR>
278302
struct BuilderMaker<std::pair<ITERATOR, ITERATOR>> {
279303
using FillType = std::pair<ITERATOR, ITERATOR>;
@@ -422,6 +446,13 @@ struct DirectInsertion {
422446
return builder->Append(value);
423447
}
424448

449+
template <typename BUILDER>
450+
requires std::same_as<std::span<std::byte>, T>
451+
arrow::Status append(BUILDER& builder, T value)
452+
{
453+
return builder->Append((char*)value.data(), (int64_t)value.size());
454+
}
455+
425456
template <typename BUILDER>
426457
arrow::Status flush(BUILDER&)
427458
{
@@ -569,7 +600,7 @@ template <typename... ARGS>
569600
using IndexedHoldersTuple = decltype(makeHolderTypes<ARGS...>());
570601

571602
template <typename T>
572-
concept ShouldNotDeconstruct = std::is_bounded_array_v<T> || std::is_arithmetic_v<T> || framework::is_base_of_template_v<std::vector, T>;
603+
concept ShouldNotDeconstruct = std::is_bounded_array_v<T> || std::is_arithmetic_v<T> || framework::is_base_of_template_v<std::vector, T> || std::same_as<std::span<std::byte>, T>;
573604

574605
/// Helper class which creates a lambda suitable for building
575606
/// an arrow table from a tuple. This can be used, for example

Framework/Core/test/test_TableBuilder.cxx

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
1+
// Copyright 2019-2025 CERN and copyright holders of ALICE O2.
22
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
33
// All rights not expressly granted are reserved.
44
//
@@ -19,6 +19,8 @@
1919
#include <arrow/ipc/writer.h>
2020
#include <arrow/ipc/reader.h>
2121

22+
#include <string_view>
23+
2224
using namespace o2::framework;
2325

2426
// We use a different namespace to avoid clashes with the
@@ -27,10 +29,12 @@ namespace test2
2729
{
2830
DECLARE_SOA_COLUMN_FULL(X, x, uint64_t, "x");
2931
DECLARE_SOA_COLUMN_FULL(Y, y, uint64_t, "y");
32+
DECLARE_SOA_COLUMN_FULL(Blob, blob, std::span<std::byte>, "blob");
3033
DECLARE_SOA_COLUMN_FULL(Pos, pos, int[4], "pos");
3134
} // namespace test2
3235

3336
using TestTable = o2::soa::InPlaceTable<0, test2::X, test2::Y>;
37+
using SpanTable = o2::soa::InPlaceTable<0, test2::Blob>;
3438
using ArrayTable = o2::soa::InPlaceTable<0, test2::Pos>;
3539

3640
TEST_CASE("TestTableBuilder")
@@ -189,6 +193,35 @@ TEST_CASE("TestTableBuilderMore")
189193
REQUIRE(table->schema()->field(3)->type()->id() == arrow::boolean()->id());
190194
}
191195

196+
TEST_CASE("TestSpan")
197+
{
198+
TableBuilder builder;
199+
std::vector<std::byte> buffer{10, std::byte{'c'}};
200+
std::vector<std::byte> buffer1{10, std::byte{'a'}};
201+
202+
auto rowWriter = builder.persist<std::span<std::byte>>({"blob"});
203+
rowWriter(0, std::span(buffer));
204+
rowWriter(0, std::span(buffer.data() + 1, 9));
205+
rowWriter(0, std::span(buffer1.data(), 3));
206+
rowWriter(0, std::span(buffer1.data(), 1));
207+
auto table = builder.finalize();
208+
209+
REQUIRE(table->num_columns() == 1);
210+
REQUIRE(table->num_rows() == 4);
211+
REQUIRE(table->schema()->field(0)->name() == "blob");
212+
REQUIRE(table->schema()->field(0)->type()->id() == arrow::binary_view()->id());
213+
214+
auto readBack = SpanTable{table};
215+
auto row = readBack.begin();
216+
REQUIRE(row.blob() == "cccccccccc");
217+
++row;
218+
REQUIRE(row.blob() == "ccccccccc");
219+
++row;
220+
REQUIRE(row.blob() == "aaa");
221+
++row;
222+
REQUIRE(row.blob() == "a");
223+
}
224+
192225
TEST_CASE("TestSoAIntegration")
193226
{
194227
TableBuilder builder;

0 commit comments

Comments
 (0)