diff --git a/src/dispatch.jl b/src/dispatch.jl index f2a5575..0851d63 100644 --- a/src/dispatch.jl +++ b/src/dispatch.jl @@ -82,32 +82,70 @@ end # =========================================================================== # Exponential search forward from `lo`, then bounded binary search inside the -# final bracket. Used internally by `ExpFromLeft`. +# final bracket. Used internally by `ExpFromLeft`. The `order` parameter +# makes the comparison polarity-aware so `ExpFromLeft` works natively under +# both `Base.Order.Forward` (ascending) and `Base.Order.Reverse` (descending) +# without falling back to plain `Base.searchsortedfirst`. +# +# Finds the smallest index `y` in `[lo, hi]` with `!lt(order, v[y], x)` +# (equivalent to `v[y] >= x` under Forward, `v[y] <= x` under Reverse). Base.@propagate_inbounds function searchsortedfirstexp( v::AbstractVector, x, lo::Integer = firstindex(v), hi::Integer = lastindex(v), + order::Base.Order.Ordering = Base.Order.Forward, ) - # Linear search for first few elements for i in 0:4 ind = lo + i ind > hi && return ind - x <= v[ind] && return ind + !Base.Order.lt(order, v[ind], x) && return ind end - # Exponential search with doubling steps n = 3 tn2 = 2^n tn2m1 = 2^(n - 1) ind = lo + tn2 while ind <= hi - x <= v[ind] && - return searchsortedfirst(v, x, lo + tn2 - tn2m1, ind, Base.Order.Forward) + !Base.Order.lt(order, v[ind], x) && + return searchsortedfirst(v, x, lo + tn2 - tn2m1, ind, order) tn2 *= 2 tn2m1 *= 2 ind = lo + tn2 end - return searchsortedfirst(v, x, lo + tn2 - tn2m1, hi, Base.Order.Forward) + return searchsortedfirst(v, x, lo + tn2 - tn2m1, hi, order) +end + +# Sibling of `searchsortedfirstexp` for the `searchsortedlast` polarity: +# finds the largest `y` in `[lo, hi]` with `!lt(order, x, v[y])` +# (equivalent to `v[y] <= x` under Forward, `v[y] >= x` under Reverse). +# Uses the *strict* comparison `lt(order, x, v[ind])` to detect the crossing +# past `x`, then returns `ind - 1`. Without this dedicated helper, callers +# would have to post-process `searchsortedfirstexp`'s result and re-scan for +# duplicates of `x` to find the last occurrence. +Base.@propagate_inbounds function searchsortedlastexp( + v::AbstractVector, + x, + lo::Integer = firstindex(v), + hi::Integer = lastindex(v), + order::Base.Order.Ordering = Base.Order.Forward, + ) + for i in 0:4 + ind = lo + i + ind > hi && return hi + Base.Order.lt(order, x, v[ind]) && return ind - 1 + end + n = 3 + tn2 = 2^n + tn2m1 = 2^(n - 1) + ind = lo + tn2 + while ind <= hi + Base.Order.lt(order, x, v[ind]) && + return searchsortedlast(v, x, lo + tn2 - tn2m1, ind, order) + tn2 *= 2 + tn2m1 *= 2 + ind = lo + tn2 + end + return searchsortedlast(v, x, lo + tn2 - tn2m1, hi, order) end # =========================================================================== @@ -199,87 +237,111 @@ Base.searchsortedfirst( # =========================================================================== # Strategy: SIMDLinearScan — specialized forward walk for DenseVector{Int64} -# and DenseVector{Float64}. Backward walks reuse the scalar LinearScan path -# (rare from a good hint, and the SIMD primitive only exists for the -# forward-scan direction). +# and DenseVector{Float64}. Backward walks reuse a scalar walk (rare from a +# good hint). The SIMD primitive is order-aware: under `Forward`, scans for +# the first lane with `v[i] > x` (or `>= x` for searchsortedfirst); under +# `Reverse`, scans for the first lane with `v[i] < x` (or `<= x`). Any other +# ordering falls back to scalar `LinearScan`. # =========================================================================== @inline function _simdscan_last_specialized( - v::Union{DenseVector{Int64}, DenseVector{Float64}}, x, hint::Integer, + v::Union{DenseVector{Int64}, DenseVector{Float64}}, + x, hint::Integer, + order::Base.Order.Ordering, ) lo = firstindex(v) hi = lastindex(v) hi < lo && return lo - 1 i = clamp(hint, lo, hi) @inbounds vi = v[i] - if vi > x - # Backward walk (scalar). + if Base.Order.lt(order, x, vi) + # `v[i]` is past the answer in this ordering — backward walk (scalar). while i > lo i -= 1 - @inbounds v[i] <= x && return i + @inbounds !Base.Order.lt(order, x, v[i]) && return i end return lo - 1 end i == hi && return hi start = i + 1 len = hi - start + 1 - offset = GC.@preserve v _simd_first_gt(x, pointer(v, start), Int64(len)) + # SIMD forward scan for the first lane that crosses the threshold. + offset = if order === Base.Order.Forward + GC.@preserve v _simd_first_gt(x, pointer(v, start), Int64(len)) + else + GC.@preserve v _simd_first_lt(x, pointer(v, start), Int64(len)) + end return offset < 0 ? hi : (start + offset) - 1 end @inline function _simdscan_first_specialized( - v::Union{DenseVector{Int64}, DenseVector{Float64}}, x, hint::Integer, + v::Union{DenseVector{Int64}, DenseVector{Float64}}, + x, hint::Integer, + order::Base.Order.Ordering, ) lo = firstindex(v) hi = lastindex(v) hi < lo && return lo i = clamp(hint, lo, hi) @inbounds vi = v[i] - if vi < x + if Base.Order.lt(order, vi, x) + # `v[i]` is before the answer — SIMD-scan forward for the first lane + # that meets-or-passes the search target. i == hi && return hi + 1 start = i + 1 len = hi - start + 1 - offset = GC.@preserve v _simd_first_ge(x, pointer(v, start), Int64(len)) + offset = if order === Base.Order.Forward + GC.@preserve v _simd_first_ge(x, pointer(v, start), Int64(len)) + else + GC.@preserve v _simd_first_le(x, pointer(v, start), Int64(len)) + end return offset < 0 ? hi + 1 : start + offset end + # `v[i]` meets or passes — retreat (scalar). while i > lo - @inbounds v[i - 1] >= x && (i -= 1; continue) - return i + @inbounds Base.Order.lt(order, v[i - 1], x) && return i + i -= 1 end return lo end +# Dispatch helper: only `Forward` and `Reverse` orderings use the SIMD path; +# anything else (custom `By`, `Lt`, etc.) falls back to scalar `LinearScan`. +@inline function _simd_supported_order(order::Base.Order.Ordering) + return order === Base.Order.Forward || order === Base.Order.Reverse +end + function Base.searchsortedlast( ::SIMDLinearScan, v::DenseVector{Int64}, x::Int64, hint::Integer; order::Base.Order.Ordering = Base.Order.Forward, ) - order === Base.Order.Forward || + _simd_supported_order(order) || return searchsortedlast(LinearScan(), v, x, hint; order = order) - return _simdscan_last_specialized(v, x, hint) + return _simdscan_last_specialized(v, x, hint, order) end function Base.searchsortedlast( ::SIMDLinearScan, v::DenseVector{Float64}, x::Float64, hint::Integer; order::Base.Order.Ordering = Base.Order.Forward, ) - order === Base.Order.Forward || + _simd_supported_order(order) || return searchsortedlast(LinearScan(), v, x, hint; order = order) - return _simdscan_last_specialized(v, x, hint) + return _simdscan_last_specialized(v, x, hint, order) end function Base.searchsortedfirst( ::SIMDLinearScan, v::DenseVector{Int64}, x::Int64, hint::Integer; order::Base.Order.Ordering = Base.Order.Forward, ) - order === Base.Order.Forward || + _simd_supported_order(order) || return searchsortedfirst(LinearScan(), v, x, hint; order = order) - return _simdscan_first_specialized(v, x, hint) + return _simdscan_first_specialized(v, x, hint, order) end function Base.searchsortedfirst( ::SIMDLinearScan, v::DenseVector{Float64}, x::Float64, hint::Integer; order::Base.Order.Ordering = Base.Order.Forward, ) - order === Base.Order.Forward || + _simd_supported_order(order) || return searchsortedfirst(LinearScan(), v, x, hint; order = order) - return _simdscan_first_specialized(v, x, hint) + return _simdscan_first_specialized(v, x, hint, order) end # Other eltypes fall back to the scalar LinearScan walk. @@ -352,17 +414,14 @@ function Base.searchsortedfirst( return lo end h = clamp(hint, lo, hi) - # `searchsortedfirst` semantics: smallest i with v[i] >= x. We can only - # gallop forward from `h` when v[h] < x (then the answer is strictly - # > h). When v[h] >= x, the first occurrence of `x` may be at index - # ≤ h (duplicates to the left), so fall back to a full search rather - # than risk skipping past earlier duplicates. + # `searchsortedfirst` semantics: smallest i with `!lt(order, v[i], x)`. + # We can only gallop forward from `h` when `v[h]` is still "before" the + # answer in the ordering — `lt(order, v[h], x)`. Otherwise the first + # occurrence may be at index ≤ h (duplicates) and we'd skip past it. @inbounds if !Base.Order.lt(order, v[h], x) return searchsortedfirst(v, x, order) end - return order === Base.Order.Forward ? - searchsortedfirstexp(v, x, h, hi) : - searchsortedfirst(v, x, h, hi, order) + return searchsortedfirstexp(v, x, h, hi, order) end function Base.searchsortedlast( @@ -378,16 +437,7 @@ function Base.searchsortedlast( @inbounds if Base.Order.lt(order, x, v[h]) return searchsortedlast(v, x, order) end - if order === Base.Order.Forward - y = searchsortedfirstexp(v, x, h, hi) - return if y > hi - hi - else - @inbounds v[y] == x ? y : y - 1 - end - else - return searchsortedlast(v, x, h, hi, order) - end + return searchsortedlastexp(v, x, h, hi, order) end # ExpFromLeft without a hint falls back to BinaryBracket. @@ -423,12 +473,14 @@ function Base.searchsortedlast( ::InterpolationSearch, v::AbstractVector{<:Number}, x::Number; order::Base.Order.Ordering = Base.Order.Forward, ) - if order !== Base.Order.Forward - # Linear interpolation doesn't carry over to reverse order; fall back - return searchsortedlast(v, x, order) - end lo, hi = firstindex(v), lastindex(v) hi < lo && return lo - 1 + # `_interp_guess` works for both `Forward` and `Reverse`: for a + # `Reverse`-sorted (decreasing) vector, `vhi - vlo < 0` and + # `x - vlo < 0` for queries inside `[vhi, vlo]`, so the ratio + # `(x - vlo) / (vhi - vlo)` still lands in `[0, 1]` and gives the + # correct fractional position. Falls back to `BinaryBracket` for any + # other ordering via `BracketGallop`. g = _interp_guess(v, x, lo, hi) return searchsortedlast(BracketGallop(), v, x, g; order = order) end @@ -437,9 +489,6 @@ function Base.searchsortedfirst( ::InterpolationSearch, v::AbstractVector{<:Number}, x::Number; order::Base.Order.Ordering = Base.Order.Forward, ) - if order !== Base.Order.Forward - return searchsortedfirst(v, x, order) - end lo, hi = firstindex(v), lastindex(v) hi < lo && return lo g = _interp_guess(v, x, lo, hi) @@ -477,41 +526,71 @@ Base.searchsortedfirst( # =========================================================================== # Strategy: BitInterpolationSearch — InterpolationSearch on the IEEE bit # pattern of positive Float64. Cheaper than reinterpret-as-array because we -# only need two endpoint reads and one query bitcast per call. +# only need two endpoint reads and one query bitcast per call. Order-aware: +# under `Forward` (ascending bit patterns) and `Reverse` (descending bit +# patterns) the guess formula is the same fractional linear extrapolation, +# only the directionality of the comparison swaps. # =========================================================================== @inline function _bit_interp_guess_f64( v::DenseVector{Float64}, x::Float64, lo::Integer, hi::Integer, + order::Base.Order.Ordering, ) @inbounds vlo_bits = reinterpret(UInt64, v[lo]) @inbounds vhi_bits = reinterpret(UInt64, v[hi]) xu = reinterpret(UInt64, x) - span = vhi_bits - vlo_bits - iszero(span) && return lo - if xu <= vlo_bits - return lo - elseif xu >= vhi_bits - return hi + return if order === Base.Order.Forward + # Forward: vlo_bits ≤ vhi_bits. Standard fractional interp on + # unsigned bit patterns. + span = vhi_bits - vlo_bits + if iszero(span) + lo + elseif xu <= vlo_bits + lo + elseif xu >= vhi_bits + hi + else + num = xu - vlo_bits + f = Float64(num) / Float64(span) + clamp(lo + round(Int, f * (hi - lo)), lo, hi) + end + else + # Reverse: vlo_bits ≥ vhi_bits. Mirror the arithmetic. + span = vlo_bits - vhi_bits + if iszero(span) + lo + elseif xu >= vlo_bits + lo + elseif xu <= vhi_bits + hi + else + num = vlo_bits - xu + f = Float64(num) / Float64(span) + clamp(lo + round(Int, f * (hi - lo)), lo, hi) + end end - num = xu - vlo_bits - f = Float64(num) / Float64(span) - g = lo + round(Int, f * (hi - lo)) - return clamp(g, lo, hi) +end + +# `Forward` requires both endpoints strictly positive (negative / subnormal +# / non-finite Float64 bit patterns are not monotonic with float value). +# `Reverse` requires the same on both endpoints. For unsupported orderings +# (custom `By`, `Lt`, …) fall back to `BinaryBracket`. +@inline function _bit_interp_eligible(v::DenseVector{Float64}, x::Float64, lo, hi, order) + _simd_supported_order(order) || return false + @inbounds return v[lo] > 0.0 && isfinite(v[lo]) && + v[hi] > 0.0 && isfinite(v[hi]) && + x > 0.0 && isfinite(x) end function Base.searchsortedlast( ::BitInterpolationSearch, v::DenseVector{Float64}, x::Float64; order::Base.Order.Ordering = Base.Order.Forward, ) - if order !== Base.Order.Forward - return searchsortedlast(v, x, order) - end lo, hi = firstindex(v), lastindex(v) hi < lo && return lo - 1 - @inbounds if v[lo] <= 0.0 || !isfinite(v[lo]) || x <= 0.0 || !isfinite(x) + _bit_interp_eligible(v, x, lo, hi, order) || return searchsortedlast(BinaryBracket(), v, x; order = order) - end - g = _bit_interp_guess_f64(v, x, lo, hi) + g = _bit_interp_guess_f64(v, x, lo, hi, order) return searchsortedlast(BracketGallop(), v, x, g; order = order) end @@ -519,15 +598,11 @@ function Base.searchsortedfirst( ::BitInterpolationSearch, v::DenseVector{Float64}, x::Float64; order::Base.Order.Ordering = Base.Order.Forward, ) - if order !== Base.Order.Forward - return searchsortedfirst(v, x, order) - end lo, hi = firstindex(v), lastindex(v) hi < lo && return lo - @inbounds if v[lo] <= 0.0 || !isfinite(v[lo]) || x <= 0.0 || !isfinite(x) + _bit_interp_eligible(v, x, lo, hi, order) || return searchsortedfirst(BinaryBracket(), v, x; order = order) - end - g = _bit_interp_guess_f64(v, x, lo, hi) + g = _bit_interp_guess_f64(v, x, lo, hi, order) return searchsortedfirst(BracketGallop(), v, x, g; order = order) end diff --git a/src/simd_ir.jl b/src/simd_ir.jl index 1c543da..3880a9a 100644 --- a/src/simd_ir.jl +++ b/src/simd_ir.jl @@ -100,6 +100,15 @@ const _SIMD_GE_I64_IR = _simd_scan_ir("i64", "sge") const _SIMD_GT_F64_IR = _simd_scan_ir("double", "ogt") const _SIMD_GE_F64_IR = _simd_scan_ir("double", "oge") +# Reverse-direction predicates: used by `SIMDLinearScan` under +# `Base.Order.Reverse` ordering, where the array is decreasing and we want +# to find the first lane where `v[i] < x` (searchsortedlast) or `v[i] <= x` +# (searchsortedfirst). +const _SIMD_LT_I64_IR = _simd_scan_ir("i64", "slt") +const _SIMD_LE_I64_IR = _simd_scan_ir("i64", "sle") +const _SIMD_LT_F64_IR = _simd_scan_ir("double", "olt") +const _SIMD_LE_F64_IR = _simd_scan_ir("double", "ole") + # Backing primitives for SIMDLinearScan. Each returns the 0-based offset of # the first lane satisfying the predicate, or -1 if none. Caveat: NaN inputs # always compare false under the ordered `o*` float predicates, so NaN in `v` @@ -133,3 +142,33 @@ function _simd_first_ge(x::Float64, ptr::Ptr{Float64}, len::Int64) x, ptr, len ) end + +# Reverse-direction primitives. +function _simd_first_lt(x::Int64, ptr::Ptr{Int64}, len::Int64) + return Base.llvmcall( + (_SIMD_LT_I64_IR, "entry"), + Int64, Tuple{Int64, Ptr{Int64}, Int64}, + x, ptr, len + ) +end +function _simd_first_le(x::Int64, ptr::Ptr{Int64}, len::Int64) + return Base.llvmcall( + (_SIMD_LE_I64_IR, "entry"), + Int64, Tuple{Int64, Ptr{Int64}, Int64}, + x, ptr, len + ) +end +function _simd_first_lt(x::Float64, ptr::Ptr{Float64}, len::Int64) + return Base.llvmcall( + (_SIMD_LT_F64_IR, "entry"), + Int64, Tuple{Float64, Ptr{Float64}, Int64}, + x, ptr, len + ) +end +function _simd_first_le(x::Float64, ptr::Ptr{Float64}, len::Int64) + return Base.llvmcall( + (_SIMD_LE_F64_IR, "entry"), + Int64, Tuple{Float64, Ptr{Float64}, Int64}, + x, ptr, len + ) +end diff --git a/test/runtests.jl b/test/runtests.jl index 45e04a7..11e4b9d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -64,6 +64,103 @@ end @test searchsortedlast(GuesserHint(guesser), v1, 42.0) == 1 end + @safetestset "Native reverse-order paths (issue #67)" begin + using FindFirstFunctions, StableRNGs + + # ExpFromLeft, InterpolationSearch, SIMDLinearScan, and + # BitInterpolationSearch all used to fall back to BinaryBracket + # for `Base.Order.Reverse`. They now have native Reverse paths; + # verify parity vs `Base.searchsorted*(v, x, Reverse)` across + # eltypes and hint positions. + + rng = StableRNG(2027) + order = Base.Order.Reverse + + @testset "Float64 reverse-sorted parity, m=200" begin + # Decreasing sorted Float64 vector. + v = sort!(randn(rng, 200); rev = true) + for _ in 1:500 + x = (rand(rng) - 0.5) * 6 + hint = rand(rng, 1:length(v)) + expected_last = searchsortedlast(v, x, order) + expected_first = searchsortedfirst(v, x, order) + for strategy in ( + BinaryBracket(), LinearScan(), SIMDLinearScan(), + BracketGallop(), ExpFromLeft(), + InterpolationSearch(), BitInterpolationSearch(), + Auto(), + ) + @test searchsortedlast(strategy, v, x, hint; order = order) == + expected_last + @test searchsortedfirst(strategy, v, x, hint; order = order) == + expected_first + end + end + end + + @testset "Int64 reverse-sorted parity, m=200" begin + v = sort!(rand(rng, Int64(-100):Int64(100), 200); rev = true) + for _ in 1:500 + x = rand(rng, Int64(-110):Int64(110)) + hint = rand(rng, 1:length(v)) + expected_last = searchsortedlast(v, x, order) + expected_first = searchsortedfirst(v, x, order) + for strategy in ( + BinaryBracket(), LinearScan(), SIMDLinearScan(), + BracketGallop(), ExpFromLeft(), + InterpolationSearch(), Auto(), + ) + @test searchsortedlast(strategy, v, x, hint; order = order) == + expected_last + @test searchsortedfirst(strategy, v, x, hint; order = order) == + expected_first + end + end + end + + @testset "BitInterp reverse-sorted positive Float64" begin + # Reverse log-spaced — exercises the bit-pattern Reverse path. + v = sort!(collect(exp.(range(0.0, log(1.0e6); length = 1024))); rev = true) + for _ in 1:200 + x = exp(rand(rng) * log(1.0e6)) + expected_last = searchsortedlast(v, x, order) + expected_first = searchsortedfirst(v, x, order) + @test searchsortedlast( + BitInterpolationSearch(), v, x; order = order + ) == expected_last + @test searchsortedfirst( + BitInterpolationSearch(), v, x; order = order + ) == expected_first + end + # Non-positive endpoint falls back to BinaryBracket cleanly. + v_signed = sort!(randn(rng, 100); rev = true) + for x in (-0.5, 0.0, 0.5, -2.0, 2.0) + @test searchsortedlast( + BitInterpolationSearch(), v_signed, x; order = order + ) == searchsortedlast(v_signed, x, order) + end + end + + @testset "Edge cases under Reverse" begin + # Out-of-range queries: x larger than first, smaller than last. + v = collect(10.0:-0.5:1.0) + # x > v[1] → searchsortedfirst returns 1, searchsortedlast returns 0. + for strategy in ( + SIMDLinearScan(), ExpFromLeft(), + InterpolationSearch(), BitInterpolationSearch(), + ) + @test searchsortedlast(strategy, v, 100.0, 5; order = order) == + searchsortedlast(v, 100.0, order) + @test searchsortedlast(strategy, v, -100.0, 5; order = order) == + searchsortedlast(v, -100.0, order) + @test searchsortedfirst(strategy, v, 100.0, 5; order = order) == + searchsortedfirst(v, 100.0, order) + @test searchsortedfirst(strategy, v, -100.0, 5; order = order) == + searchsortedfirst(v, -100.0, order) + end + end + end + @safetestset "Custom ordering for strategy dispatch" begin using FindFirstFunctions: Guesser, GuesserHint, BracketGallop, LinearScan,