Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -795,6 +795,7 @@ if(ARROW_COMPUTE)
compute/kernels/vector_rank.cc
compute/kernels/vector_replace.cc
compute/kernels/vector_run_end_encode.cc
compute/kernels/vector_search_sorted.cc
compute/kernels/vector_select_k.cc
compute/kernels/vector_sort.cc
compute/kernels/vector_statistics.cc
Expand Down
28 changes: 28 additions & 0 deletions cpp/src/arrow/compute/api_vector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ using compute::FilterOptions;
using compute::NullPlacement;
using compute::RankOptions;
using compute::RankQuantileOptions;
using compute::SearchSortedOptions;

template <>
struct EnumTraits<FilterOptions::NullSelectionBehavior>
Expand Down Expand Up @@ -96,6 +97,21 @@ struct EnumTraits<NullPlacement>
}
};
template <>
struct EnumTraits<SearchSortedOptions::Side>
: BasicEnumTraits<SearchSortedOptions::Side, SearchSortedOptions::Left,
SearchSortedOptions::Right> {
static std::string name() { return "SearchSortedOptions::Side"; }
static std::string value_name(SearchSortedOptions::Side value) {
switch (value) {
case SearchSortedOptions::Left:
return "Left";
case SearchSortedOptions::Right:
return "Right";
}
return "<INVALID>";
}
};
template <>
struct EnumTraits<RankOptions::Tiebreaker>
: BasicEnumTraits<RankOptions::Tiebreaker, RankOptions::Min, RankOptions::Max,
RankOptions::First, RankOptions::Dense> {
Expand Down Expand Up @@ -137,6 +153,8 @@ static auto kRunEndEncodeOptionsType = GetFunctionOptionsType<RunEndEncodeOption
static auto kArraySortOptionsType = GetFunctionOptionsType<ArraySortOptions>(
DataMember("order", &ArraySortOptions::order),
DataMember("null_placement", &ArraySortOptions::null_placement));
static auto kSearchSortedOptionsType = GetFunctionOptionsType<SearchSortedOptions>(
DataMember("side", &SearchSortedOptions::side));
static auto kSortOptionsType = GetFunctionOptionsType<SortOptions>(
DataMember("sort_keys", &SortOptions::sort_keys),
DataMember("null_placement", &SortOptions::null_placement));
Expand Down Expand Up @@ -196,6 +214,10 @@ ArraySortOptions::ArraySortOptions(SortOrder order, NullPlacement null_placement
null_placement(null_placement) {}
constexpr char ArraySortOptions::kTypeName[];

SearchSortedOptions::SearchSortedOptions(SearchSortedOptions::Side side)
: FunctionOptions(internal::kSearchSortedOptionsType), side(side) {}
constexpr char SearchSortedOptions::kTypeName[];

SortOptions::SortOptions(std::vector<SortKey> sort_keys, NullPlacement null_placement)
: FunctionOptions(internal::kSortOptionsType),
sort_keys(std::move(sort_keys)),
Expand Down Expand Up @@ -274,6 +296,7 @@ void RegisterVectorOptions(FunctionRegistry* registry) {
DCHECK_OK(registry->AddFunctionOptionsType(kDictionaryEncodeOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kRunEndEncodeOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kArraySortOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kSearchSortedOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kSortOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kPartitionNthOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kSelectKOptionsType));
Expand Down Expand Up @@ -315,6 +338,11 @@ Result<std::shared_ptr<Array>> SelectKUnstable(const Datum& datum,
return result.make_array();
}

Result<Datum> SearchSorted(const Datum& values, const Datum& needles,
const SearchSortedOptions& options, ExecContext* ctx) {
return CallFunction("search_sorted", {values, needles}, &options, ctx);
}

Result<Datum> ReplaceWithMask(const Datum& values, const Datum& mask,
const Datum& replacements, ExecContext* ctx) {
return CallFunction("replace_with_mask", {values, mask, replacements}, ctx);
Expand Down
36 changes: 36 additions & 0 deletions cpp/src/arrow/compute/api_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,21 @@ class ARROW_EXPORT ArraySortOptions : public FunctionOptions {
NullPlacement null_placement;
};

class ARROW_EXPORT SearchSortedOptions : public FunctionOptions {
public:
enum Side {
Left,
Right,
};

explicit SearchSortedOptions(Side side = Side::Left);
static constexpr const char kTypeName[] = "SearchSortedOptions";
static SearchSortedOptions Defaults() { return SearchSortedOptions(); }

/// Whether to return the leftmost or rightmost insertion point.
Side side;
};

class ARROW_EXPORT SortOptions : public FunctionOptions {
public:
explicit SortOptions(std::vector<SortKey> sort_keys = {},
Expand Down Expand Up @@ -515,6 +530,27 @@ Result<std::shared_ptr<Array>> SelectKUnstable(const Datum& datum,
const SelectKOptions& options,
ExecContext* ctx = NULLPTR);

/// \brief Find insertion indices that preserve sorted order.
///
/// The `values` datum must be a plain array or run-end encoded array sorted in
/// ascending order. `needles` may be a scalar, plain array, or run-end encoded
/// array whose logical value type matches `values`.
///
/// Nulls in `values` are supported when clustered entirely at the start or the
/// end of the sorted array. Non-null needles are matched only against the
/// non-null portion of `values`. Null needles yield null outputs.
///
/// \param[in] values sorted array to search within
/// \param[in] needles scalar or array-like values to search for
/// \param[in] options selects left or right insertion semantics
/// \param[in] ctx the function execution context, optional
/// \return insertion indices as uint64 scalar or array
ARROW_EXPORT
Result<Datum> SearchSorted(
const Datum& values, const Datum& needles,
const SearchSortedOptions& options = SearchSortedOptions::Defaults(),
ExecContext* ctx = NULLPTR);

/// \brief Return the indices that would sort an array.
///
/// Perform an indirect sort of array. The output array will contain
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/compute/initialize.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Status RegisterComputeKernels() {
internal::RegisterVectorNested(registry);
internal::RegisterVectorRank(registry);
internal::RegisterVectorReplace(registry);
internal::RegisterVectorSearchSorted(registry);
internal::RegisterVectorSelectK(registry);
internal::RegisterVectorSort(registry);
internal::RegisterVectorRunEndEncode(registry);
Expand Down
8 changes: 8 additions & 0 deletions cpp/src/arrow/compute/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,13 @@ add_arrow_compute_test(vector_sort_test
arrow_compute_kernels_testing
arrow_compute_testing)

add_arrow_compute_test(vector_search_sorted_test
SOURCES
vector_search_sorted_test.cc
EXTRA_LINK_LIBS
arrow_compute_kernels_testing
arrow_compute_testing)

add_arrow_compute_test(vector_selection_test
SOURCES
vector_selection_test.cc
Expand All @@ -141,6 +148,7 @@ add_arrow_compute_benchmark(vector_sort_benchmark)
add_arrow_compute_benchmark(vector_partition_benchmark)
add_arrow_compute_benchmark(vector_topk_benchmark)
add_arrow_compute_benchmark(vector_replace_benchmark)
add_arrow_compute_benchmark(vector_search_sorted_benchmark)
add_arrow_compute_benchmark(vector_selection_benchmark)

# ----------------------------------------------------------------------
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/compute/kernels/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ vector_kernel_benchmarks = [
'vector_partition_benchmark',
'vector_topk_benchmark',
'vector_replace_benchmark',
'vector_search_sorted_benchmark',
'vector_selection_benchmark',
]

Expand Down
Loading
Loading