Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
227 changes: 151 additions & 76 deletions src/dispatch.jl
Original file line number Diff line number Diff line change
Expand Up @@ -82,32 +82,70 @@ end
# ===========================================================================

# Exponential search forward from `lo`, then bounded binary search inside the
# final bracket. Used internally by `ExpFromLeft`.
# final bracket. Used internally by `ExpFromLeft`. The `order` parameter
# makes the comparison polarity-aware so `ExpFromLeft` works natively under
# both `Base.Order.Forward` (ascending) and `Base.Order.Reverse` (descending)
# without falling back to plain `Base.searchsortedfirst`.
#
# Finds the smallest index `y` in `[lo, hi]` with `!lt(order, v[y], x)`
# (equivalent to `v[y] >= x` under Forward, `v[y] <= x` under Reverse).
Base.@propagate_inbounds function searchsortedfirstexp(
v::AbstractVector,
x,
lo::Integer = firstindex(v),
hi::Integer = lastindex(v),
order::Base.Order.Ordering = Base.Order.Forward,
)
# Linear search for first few elements
for i in 0:4
ind = lo + i
ind > hi && return ind
x <= v[ind] && return ind
!Base.Order.lt(order, v[ind], x) && return ind
end
# Exponential search with doubling steps
n = 3
tn2 = 2^n
tn2m1 = 2^(n - 1)
ind = lo + tn2
while ind <= hi
x <= v[ind] &&
return searchsortedfirst(v, x, lo + tn2 - tn2m1, ind, Base.Order.Forward)
!Base.Order.lt(order, v[ind], x) &&
return searchsortedfirst(v, x, lo + tn2 - tn2m1, ind, order)
tn2 *= 2
tn2m1 *= 2
ind = lo + tn2
end
return searchsortedfirst(v, x, lo + tn2 - tn2m1, hi, Base.Order.Forward)
return searchsortedfirst(v, x, lo + tn2 - tn2m1, hi, order)
end

# Sibling of `searchsortedfirstexp` for the `searchsortedlast` polarity:
# finds the largest `y` in `[lo, hi]` with `!lt(order, x, v[y])`
# (equivalent to `v[y] <= x` under Forward, `v[y] >= x` under Reverse).
# Uses the *strict* comparison `lt(order, x, v[ind])` to detect the crossing
# past `x`, then returns `ind - 1`. Without this dedicated helper, callers
# would have to post-process `searchsortedfirstexp`'s result and re-scan for
# duplicates of `x` to find the last occurrence.
Base.@propagate_inbounds function searchsortedlastexp(
v::AbstractVector,
x,
lo::Integer = firstindex(v),
hi::Integer = lastindex(v),
order::Base.Order.Ordering = Base.Order.Forward,
)
for i in 0:4
ind = lo + i
ind > hi && return hi
Base.Order.lt(order, x, v[ind]) && return ind - 1
end
n = 3
tn2 = 2^n
tn2m1 = 2^(n - 1)
ind = lo + tn2
while ind <= hi
Base.Order.lt(order, x, v[ind]) &&
return searchsortedlast(v, x, lo + tn2 - tn2m1, ind, order)
tn2 *= 2
tn2m1 *= 2
ind = lo + tn2
end
return searchsortedlast(v, x, lo + tn2 - tn2m1, hi, order)
end

# ===========================================================================
Expand Down Expand Up @@ -199,87 +237,111 @@ Base.searchsortedfirst(

# ===========================================================================
# Strategy: SIMDLinearScan — specialized forward walk for DenseVector{Int64}
# and DenseVector{Float64}. Backward walks reuse the scalar LinearScan path
# (rare from a good hint, and the SIMD primitive only exists for the
# forward-scan direction).
# and DenseVector{Float64}. Backward walks reuse a scalar walk (rare from a
# good hint). The SIMD primitive is order-aware: under `Forward`, scans for
# the first lane with `v[i] > x` (or `>= x` for searchsortedfirst); under
# `Reverse`, scans for the first lane with `v[i] < x` (or `<= x`). Any other
# ordering falls back to scalar `LinearScan`.
# ===========================================================================

@inline function _simdscan_last_specialized(
v::Union{DenseVector{Int64}, DenseVector{Float64}}, x, hint::Integer,
v::Union{DenseVector{Int64}, DenseVector{Float64}},
x, hint::Integer,
order::Base.Order.Ordering,
)
lo = firstindex(v)
hi = lastindex(v)
hi < lo && return lo - 1
i = clamp(hint, lo, hi)
@inbounds vi = v[i]
if vi > x
# Backward walk (scalar).
if Base.Order.lt(order, x, vi)
# `v[i]` is past the answer in this ordering — backward walk (scalar).
while i > lo
i -= 1
@inbounds v[i] <= x && return i
@inbounds !Base.Order.lt(order, x, v[i]) && return i
end
return lo - 1
end
i == hi && return hi
start = i + 1
len = hi - start + 1
offset = GC.@preserve v _simd_first_gt(x, pointer(v, start), Int64(len))
# SIMD forward scan for the first lane that crosses the threshold.
offset = if order === Base.Order.Forward
GC.@preserve v _simd_first_gt(x, pointer(v, start), Int64(len))
else
GC.@preserve v _simd_first_lt(x, pointer(v, start), Int64(len))
end
return offset < 0 ? hi : (start + offset) - 1
end

@inline function _simdscan_first_specialized(
v::Union{DenseVector{Int64}, DenseVector{Float64}}, x, hint::Integer,
v::Union{DenseVector{Int64}, DenseVector{Float64}},
x, hint::Integer,
order::Base.Order.Ordering,
)
lo = firstindex(v)
hi = lastindex(v)
hi < lo && return lo
i = clamp(hint, lo, hi)
@inbounds vi = v[i]
if vi < x
if Base.Order.lt(order, vi, x)
# `v[i]` is before the answer — SIMD-scan forward for the first lane
# that meets-or-passes the search target.
i == hi && return hi + 1
start = i + 1
len = hi - start + 1
offset = GC.@preserve v _simd_first_ge(x, pointer(v, start), Int64(len))
offset = if order === Base.Order.Forward
GC.@preserve v _simd_first_ge(x, pointer(v, start), Int64(len))
else
GC.@preserve v _simd_first_le(x, pointer(v, start), Int64(len))
end
return offset < 0 ? hi + 1 : start + offset
end
# `v[i]` meets or passes — retreat (scalar).
while i > lo
@inbounds v[i - 1] >= x && (i -= 1; continue)
return i
@inbounds Base.Order.lt(order, v[i - 1], x) && return i
i -= 1
end
return lo
end

# Dispatch helper: only `Forward` and `Reverse` orderings use the SIMD path;
# anything else (custom `By`, `Lt`, etc.) falls back to scalar `LinearScan`.
@inline function _simd_supported_order(order::Base.Order.Ordering)
return order === Base.Order.Forward || order === Base.Order.Reverse
end

function Base.searchsortedlast(
::SIMDLinearScan, v::DenseVector{Int64}, x::Int64, hint::Integer;
order::Base.Order.Ordering = Base.Order.Forward,
)
order === Base.Order.Forward ||
_simd_supported_order(order) ||
return searchsortedlast(LinearScan(), v, x, hint; order = order)
return _simdscan_last_specialized(v, x, hint)
return _simdscan_last_specialized(v, x, hint, order)
end
function Base.searchsortedlast(
::SIMDLinearScan, v::DenseVector{Float64}, x::Float64, hint::Integer;
order::Base.Order.Ordering = Base.Order.Forward,
)
order === Base.Order.Forward ||
_simd_supported_order(order) ||
return searchsortedlast(LinearScan(), v, x, hint; order = order)
return _simdscan_last_specialized(v, x, hint)
return _simdscan_last_specialized(v, x, hint, order)
end
function Base.searchsortedfirst(
::SIMDLinearScan, v::DenseVector{Int64}, x::Int64, hint::Integer;
order::Base.Order.Ordering = Base.Order.Forward,
)
order === Base.Order.Forward ||
_simd_supported_order(order) ||
return searchsortedfirst(LinearScan(), v, x, hint; order = order)
return _simdscan_first_specialized(v, x, hint)
return _simdscan_first_specialized(v, x, hint, order)
end
function Base.searchsortedfirst(
::SIMDLinearScan, v::DenseVector{Float64}, x::Float64, hint::Integer;
order::Base.Order.Ordering = Base.Order.Forward,
)
order === Base.Order.Forward ||
_simd_supported_order(order) ||
return searchsortedfirst(LinearScan(), v, x, hint; order = order)
return _simdscan_first_specialized(v, x, hint)
return _simdscan_first_specialized(v, x, hint, order)
end

# Other eltypes fall back to the scalar LinearScan walk.
Expand Down Expand Up @@ -352,17 +414,14 @@ function Base.searchsortedfirst(
return lo
end
h = clamp(hint, lo, hi)
# `searchsortedfirst` semantics: smallest i with v[i] >= x. We can only
# gallop forward from `h` when v[h] < x (then the answer is strictly
# > h). When v[h] >= x, the first occurrence of `x` may be at index
# ≤ h (duplicates to the left), so fall back to a full search rather
# than risk skipping past earlier duplicates.
# `searchsortedfirst` semantics: smallest i with `!lt(order, v[i], x)`.
# We can only gallop forward from `h` when `v[h]` is still "before" the
# answer in the ordering — `lt(order, v[h], x)`. Otherwise the first
# occurrence may be at index ≤ h (duplicates) and we'd skip past it.
@inbounds if !Base.Order.lt(order, v[h], x)
return searchsortedfirst(v, x, order)
end
return order === Base.Order.Forward ?
searchsortedfirstexp(v, x, h, hi) :
searchsortedfirst(v, x, h, hi, order)
return searchsortedfirstexp(v, x, h, hi, order)
end

function Base.searchsortedlast(
Expand All @@ -378,16 +437,7 @@ function Base.searchsortedlast(
@inbounds if Base.Order.lt(order, x, v[h])
return searchsortedlast(v, x, order)
end
if order === Base.Order.Forward
y = searchsortedfirstexp(v, x, h, hi)
return if y > hi
hi
else
@inbounds v[y] == x ? y : y - 1
end
else
return searchsortedlast(v, x, h, hi, order)
end
return searchsortedlastexp(v, x, h, hi, order)
end

# ExpFromLeft without a hint falls back to BinaryBracket.
Expand Down Expand Up @@ -423,12 +473,14 @@ function Base.searchsortedlast(
::InterpolationSearch, v::AbstractVector{<:Number}, x::Number;
order::Base.Order.Ordering = Base.Order.Forward,
)
if order !== Base.Order.Forward
# Linear interpolation doesn't carry over to reverse order; fall back
return searchsortedlast(v, x, order)
end
lo, hi = firstindex(v), lastindex(v)
hi < lo && return lo - 1
# `_interp_guess` works for both `Forward` and `Reverse`: for a
# `Reverse`-sorted (decreasing) vector, `vhi - vlo < 0` and
# `x - vlo < 0` for queries inside `[vhi, vlo]`, so the ratio
# `(x - vlo) / (vhi - vlo)` still lands in `[0, 1]` and gives the
# correct fractional position. Falls back to `BinaryBracket` for any
# other ordering via `BracketGallop`.
g = _interp_guess(v, x, lo, hi)
return searchsortedlast(BracketGallop(), v, x, g; order = order)
end
Expand All @@ -437,9 +489,6 @@ function Base.searchsortedfirst(
::InterpolationSearch, v::AbstractVector{<:Number}, x::Number;
order::Base.Order.Ordering = Base.Order.Forward,
)
if order !== Base.Order.Forward
return searchsortedfirst(v, x, order)
end
lo, hi = firstindex(v), lastindex(v)
hi < lo && return lo
g = _interp_guess(v, x, lo, hi)
Expand Down Expand Up @@ -477,57 +526,83 @@ Base.searchsortedfirst(
# ===========================================================================
# Strategy: BitInterpolationSearch — InterpolationSearch on the IEEE bit
# pattern of positive Float64. Cheaper than reinterpret-as-array because we
# only need two endpoint reads and one query bitcast per call.
# only need two endpoint reads and one query bitcast per call. Order-aware:
# under `Forward` (ascending bit patterns) and `Reverse` (descending bit
# patterns) the guess formula is the same fractional linear extrapolation,
# only the directionality of the comparison swaps.
# ===========================================================================

@inline function _bit_interp_guess_f64(
v::DenseVector{Float64}, x::Float64, lo::Integer, hi::Integer,
order::Base.Order.Ordering,
)
@inbounds vlo_bits = reinterpret(UInt64, v[lo])
@inbounds vhi_bits = reinterpret(UInt64, v[hi])
xu = reinterpret(UInt64, x)
span = vhi_bits - vlo_bits
iszero(span) && return lo
if xu <= vlo_bits
return lo
elseif xu >= vhi_bits
return hi
return if order === Base.Order.Forward
# Forward: vlo_bits ≤ vhi_bits. Standard fractional interp on
# unsigned bit patterns.
span = vhi_bits - vlo_bits
if iszero(span)
lo
elseif xu <= vlo_bits
lo
elseif xu >= vhi_bits
hi
else
num = xu - vlo_bits
f = Float64(num) / Float64(span)
clamp(lo + round(Int, f * (hi - lo)), lo, hi)
end
else
# Reverse: vlo_bits ≥ vhi_bits. Mirror the arithmetic.
span = vlo_bits - vhi_bits
if iszero(span)
lo
elseif xu >= vlo_bits
lo
elseif xu <= vhi_bits
hi
else
num = vlo_bits - xu
f = Float64(num) / Float64(span)
clamp(lo + round(Int, f * (hi - lo)), lo, hi)
end
end
num = xu - vlo_bits
f = Float64(num) / Float64(span)
g = lo + round(Int, f * (hi - lo))
return clamp(g, lo, hi)
end

# `Forward` requires both endpoints strictly positive (negative / subnormal
# / non-finite Float64 bit patterns are not monotonic with float value).
# `Reverse` requires the same on both endpoints. For unsupported orderings
# (custom `By`, `Lt`, …) fall back to `BinaryBracket`.
@inline function _bit_interp_eligible(v::DenseVector{Float64}, x::Float64, lo, hi, order)
_simd_supported_order(order) || return false
@inbounds return v[lo] > 0.0 && isfinite(v[lo]) &&
v[hi] > 0.0 && isfinite(v[hi]) &&
x > 0.0 && isfinite(x)
end

function Base.searchsortedlast(
::BitInterpolationSearch, v::DenseVector{Float64}, x::Float64;
order::Base.Order.Ordering = Base.Order.Forward,
)
if order !== Base.Order.Forward
return searchsortedlast(v, x, order)
end
lo, hi = firstindex(v), lastindex(v)
hi < lo && return lo - 1
@inbounds if v[lo] <= 0.0 || !isfinite(v[lo]) || x <= 0.0 || !isfinite(x)
_bit_interp_eligible(v, x, lo, hi, order) ||
return searchsortedlast(BinaryBracket(), v, x; order = order)
end
g = _bit_interp_guess_f64(v, x, lo, hi)
g = _bit_interp_guess_f64(v, x, lo, hi, order)
return searchsortedlast(BracketGallop(), v, x, g; order = order)
end

function Base.searchsortedfirst(
::BitInterpolationSearch, v::DenseVector{Float64}, x::Float64;
order::Base.Order.Ordering = Base.Order.Forward,
)
if order !== Base.Order.Forward
return searchsortedfirst(v, x, order)
end
lo, hi = firstindex(v), lastindex(v)
hi < lo && return lo
@inbounds if v[lo] <= 0.0 || !isfinite(v[lo]) || x <= 0.0 || !isfinite(x)
_bit_interp_eligible(v, x, lo, hi, order) ||
return searchsortedfirst(BinaryBracket(), v, x; order = order)
end
g = _bit_interp_guess_f64(v, x, lo, hi)
g = _bit_interp_guess_f64(v, x, lo, hi, order)
return searchsortedfirst(BracketGallop(), v, x, g; order = order)
end

Expand Down
Loading
Loading