diff --git a/.github/workflows/Benchmark.yml b/.github/workflows/Benchmark.yml
new file mode 100644
index 0000000..db5d163
--- /dev/null
+++ b/.github/workflows/Benchmark.yml
@@ -0,0 +1,69 @@
+name: Benchmarks
+
+on:
+  pull_request:
+    paths:
+      - "src/**"
+      - "bench/**"
+      - "Project.toml"
+      - ".github/workflows/Benchmark.yml"
+  workflow_dispatch:
+
+# Only one benchmark run per PR at a time; cancel stale ones.
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+  benchmark:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          fetch-depth: 0   # we need both PR head and base branch
+      - uses: julia-actions/setup-julia@v3
+        with:
+          version: "1.11"
+      - uses: julia-actions/cache@v2
+      - name: Install AirspeedVelocity
+        run: |
+          julia -e 'using Pkg; Pkg.add(PackageSpec(name="AirspeedVelocity", version="0.6"))'
+      # Resolve commit shas explicitly so AirspeedVelocity sees stable refs.
+      - name: Resolve refs
+        id: resolve
+        run: |
+          PR_SHA=$(git rev-parse HEAD)
+          BASE_SHA=$(git rev-parse origin/${{ github.base_ref }})
+          echo "pr_sha=$PR_SHA"  >> "$GITHUB_OUTPUT"
+          echo "base_sha=$BASE_SHA" >> "$GITHUB_OUTPUT"
+      - name: Run benchmark comparison
+        run: |
+          export JULIA_NUM_THREADS=1
+          ~/.julia/bin/benchpkg FindFirstFunctions \
+            --rev=${{ steps.resolve.outputs.base_sha }},${{ steps.resolve.outputs.pr_sha }} \
+            --bench-on=${{ steps.resolve.outputs.pr_sha }} \
+            --output-dir=results/ \
+            --tune
+      - name: Render comparison table
+        run: |
+          ~/.julia/bin/benchpkgtable FindFirstFunctions \
+            --rev=${{ steps.resolve.outputs.base_sha }},${{ steps.resolve.outputs.pr_sha }} \
+            --input-dir=results/ \
+            > benchmark_table.md
+          cat benchmark_table.md
+      - name: Post benchmark comment
+        if: github.event_name == 'pull_request'
+        uses: peter-evans/create-or-update-comment@v4
+        with:
+          issue-number: ${{ github.event.pull_request.number }}
+          body-path: benchmark_table.md
+          edit-mode: replace
+      - name: Upload raw results
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-results
+          path: results/
diff --git a/.gitignore b/.gitignore
index 58780b8..821cab1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 /Manifest.toml
+bench/Manifest.toml
 docs/build
 docs/Manifest.toml
 docs/src/assets/Manifest.toml
diff --git a/NEWS.md b/NEWS.md
new file mode 100644
index 0000000..53cef6a
--- /dev/null
+++ b/NEWS.md
@@ -0,0 +1,464 @@
+# FindFirstFunctions.jl NEWS
+
+## 2.0.0
+
+This is a major rewrite of the sorted-search API. The 1.x surface — a
+collection of single-purpose, hint-flavoured function names — has been
+replaced by a single strategy-dispatched API where the algorithm is chosen
+at the call site (or by `Auto`) rather than baked into the function name.
+
+### Breaking changes — removed names
+
+The following 1.x functions are gone in 2.0. Each one has a single
+canonical 2.x replacement:
+
+| 1.x | 2.x |
+| --- | --- |
+| `searchsortedfirstcorrelated(v, x, guess::Integer)` | `searchsortedfirst(BracketGallop(), v, x, guess)` |
+| `searchsortedlastcorrelated(v, x, guess::Integer)`  | `searchsortedlast(BracketGallop(), v, x, guess)` |
+| `searchsortedfirstcorrelated(v, x, g::Guesser)`     | `searchsortedfirst(GuesserHint(g), v, x)` |
+| `searchsortedlastcorrelated(v, x, g::Guesser)`      | `searchsortedlast(GuesserHint(g), v, x)` |
+| `searchsortedfirstvec(v, qs)`                       | `searchsortedfirst!(buf, v, qs)` (caller-owned `buf`) |
+| `searchsortedlastvec(v, qs)`                        | `searchsortedlast!(buf, v, qs)` (caller-owned `buf`) |
+
+The `*vec` migration shifts buffer ownership to the caller. The in-place
+form lets callers reuse buffers across calls, which the allocating form
+couldn't. For one-shot use the caller-side allocation is a single
+`Vector{Int}(undef, length(qs))` per call site.
+
+### Breaking changes — made internal
+
+These helpers backed 1.x public names. In 2.0 they remain in the module as
+implementation details of the strategy dispatch, but are no longer
+documented or part of the public API:
+
+  - `searchsortedfirstexp` — now backs the `ExpFromLeft` strategy. Use
+    `searchsortedfirst(ExpFromLeft(), v, x, lo)` instead.
+  - `bracketstrictlymontonic` — now backs the `BracketGallop` strategy.
+    Callers wanting a bracket-then-binary-search should use
+    `searchsortedlast(BracketGallop(), v, x, hint)` /
+    `searchsortedfirst(BracketGallop(), v, x, hint)`.
+
+### New: strategy-dispatched search API
+
+A single pair of generic functions covers every sorted-search algorithm in
+the package:
+
+```julia
+searchsortedfirst(strategy, v, x[, hint]; order = Base.Order.Forward)
+searchsortedlast(strategy, v, x[, hint]; order = Base.Order.Forward)
+```
+
+`strategy` is a concrete subtype of `SearchStrategy`. The shipped
+strategies are:
+
+  - **`LinearScan`** — walks ±1 from the hint. Cheapest when the hint is
+    close to the answer; O(n) worst case.
+  - **`SIMDLinearScan`** — `LinearScan` with the forward walk lowered to
+    8-wide SIMD chunks via custom LLVM IR. Specialized for
+    `DenseVector{Int64}` and `DenseVector{Float64}`; falls back to scalar
+    `LinearScan` for any other element type. Opt-in only — `Auto` does not
+    pick it. See `strategies.md` for the NaN / element-type caveats.
+  - **`BracketGallop`** — bidirectional exponential bracket around the
+    hint, then bounded binary search. Workhorse hinted strategy. O(1) when
+    the hint is close, never worse than ~2 log₂ n.
+  - **`ExpFromLeft`** — exponential search forward from a left-bound hint.
+    Five linear probes, then doubling, then bounded binary search. Default
+    `Auto` choice in the sparse-batched path.
+  - **`InterpolationSearch`** — linear-extrapolation guess refined with
+    binary search. O(1) per query on uniformly-spaced numeric data,
+    O(log n) otherwise.
+  - **`BinaryBracket`** — plain `Base.searchsortedlast` /
+    `Base.searchsortedfirst`. Used as the no-hint fallback by every other
+    strategy.
+  - **`GuesserHint(g::Guesser)`** — `BracketGallop` driven by a
+    `Guesser`'s integer guess, with the result cached back into the
+    `Guesser`.
+  - **`Auto`** — heuristic dispatcher; see "New: `Auto` heuristic" below.
+
+Strategies are zero-field singletons (except `GuesserHint`, which wraps a
+`Guesser`, and `Auto`, which optionally carries a `SearchProperties`
+cache). The dispatch is type-stable; pinning a strategy at a call site
+costs nothing at runtime.
+
+### New: batched in-place API
+
+```julia
+searchsortedfirst!(idx_out, v, queries; strategy = Auto(), order = ...)
+searchsortedlast!(idx_out, v, queries; strategy = Auto(), order = ...)
+```
+
+Writes one index per element of `queries` into `idx_out` (which must be
+the same length). If `queries` is sorted under `order`, each query's hint
+is the previous result, so the total cost for sorted batches is
+O(length(v) + length(queries)) under the typical `Auto` choice. If
+`queries` is not sorted, the call falls back to per-element
+`Base.searchsortedlast` / `Base.searchsortedfirst` with no hint.
+
+These methods are the replacement for the removed `searchsortedfirstvec` /
+`searchsortedlastvec`. The caller owns the output buffer and is free to
+reuse it across calls.
+
+### New: `Auto` heuristic
+
+`Auto` picks a strategy based on the calling context:
+
+**Per-query** (`searchsortedlast(Auto(), v, x[, hint])`):
+
+  - No hint, or hint out of range → `BinaryBracket`.
+  - Hint in range, `length(v) ≤ 16` → `LinearScan`.
+  - Hint in range, `length(v) > 16` → `BracketGallop`.
+
+**Batched sorted** (`searchsortedlast!(out, v, queries; strategy = Auto())`)
+chooses by the expected average gap in `v`'s index space between
+consecutive query results. For numeric data the gap is estimated from the
+span ratio `(queries[end] - queries[1]) / (v[end] - v[1])`, so dense-burst
+queries clustered inside one segment of `v` are correctly recognized as
+gap ≈ 0:
+
+  - `gap ≤ 4` → `LinearScan`.
+  - `gap ≥ 8`, `length(v) ≥ 1024`, `length(queries) ≥ 2`, not skewed, and
+    a sampled-linearity probe accepts → `InterpolationSearch`.
+  - otherwise → `ExpFromLeft`.
+
+The sampled-linearity probe reads 11 elements (~25 ns) and accepts when
+every interior point sits within 0.1% of the straight line through `v[1]`
+and `v[end]`. The 0.1% tolerance is tight by design: at large `n` the
+order-statistic variance of random-sorted data is small enough that a 5%
+threshold would falsely pass on irregular data.
+
+Skew detection on the query distribution adds an additional gate: if the
+median query is more than 20% off the midpoint of the query span (and
+`m ≥ 10` so the median is meaningful), `Auto` picks `ExpFromLeft` even
+on linear `v`, because consecutive queries land in the same region and
+the previous-result hint is worth more than the linear-extrapolation
+guess. Skew detection is gated on `m ≥ 10` to avoid the median sampling
+variance dominating for small batches.
+
+The crossover constants (`_AUTO_LINEAR_THRESHOLD = 16`,
+`_AUTO_BATCH_LINEAR_GAP = 4`, `_AUTO_INTERP_MIN_GAP = 8`,
+`_AUTO_INTERP_MIN_N = 1024`, `_AUTO_INTERP_MIN_M = 2`,
+`_AUTO_LINEAR_REL_TOLERANCE = 1e-3`) were tuned empirically by a regime
+grid covering uniform / jittered / log-spaced / two-scale / random `v`
+patterns crossed with dense / sparse / clustered / sorted-random query
+patterns at vector lengths from 64 to 65536 and batch sizes from 1 to
+4096. Across that grid `Auto` is within 1.2× of the per-cell optimum in
+90% of cells.
+
+### New: `SearchProperties` cache for `Auto`
+
+For callers issuing many short batches against the same sorted vector
+(interpolation-segment lookups being the obvious case), `Auto`'s per-call
+linearity probe is redundant. The new `SearchProperties` struct caches
+the probe result and `Auto(props)` consumes it instead of re-probing:
+
+```julia
+v = collect(0.0:0.001:100.0)
+props = SearchProperties(v)        # run probes once: ~25 ns + (Float-only) O(n) NaN scan
+strat = Auto(props)                # Auto holding the cached facts
+
+queries = sort!(rand(8) .* 100.0)
+out = Vector{Int}(undef, length(queries))
+searchsortedlast!(out, v, queries; strategy = strat)
+```
+
+`SearchProperties` is `isbits` — it travels in registers and copies are
+free. `Auto(props)` is itself zero-allocation; the resulting `Auto` is a
+single concrete struct, not a parametric type.
+
+Currently consumed: `props.is_linear` (replaces `_sampled_looks_linear`
+in the batched dispatch). The `has_props` and `has_nan` fields are
+populated by `SearchProperties(v)` for forward compatibility; the latter
+will unlock `SIMDLinearScan` participation in `Auto` once the eligibility
+gate is wired in.
+
+The cache is not invalidated automatically — the caller must reconstruct
+`SearchProperties(v)` if `v` mutates. A stale cache is correctness-
+preserving (the chosen `InterpolationSearch` falls through to
+`BracketGallop` from a bad guess — slow but still O(log n)), so the
+worst case is a performance regression, not a wrong answer.
+
+### New: `SIMDLinearScan`
+
+A SIMD variant of `LinearScan` that lowers the forward walk past the hint
+to 8-wide SIMD chunks via custom LLVM IR. The same scaffolding that backs
+`_findfirstequal` (load 8 lanes, vector compare, bitmask, `cttz` on the
+mask) is reused for the four predicates needed by positional search:
+
+  - `_simd_first_gt` / `_simd_first_ge` for `Int64` (using `icmp sgt` /
+    `icmp sge`).
+  - `_simd_first_gt` / `_simd_first_ge` for `Float64` (using `fcmp ogt` /
+    `fcmp oge`).
+
+The IR is generated from a shared template `_simd_scan_ir(t, pred)`
+parameterised on LLVM element type and compare predicate.
+
+Caveats (documented in detail in `strategies.md`):
+
+  - **Element types**: `DenseVector{Int64}` and `DenseVector{Float64}`
+    only. Other element types (including `Int32`, `UInt64`, `Float32`,
+    `Date`, `String`) hit the scalar `LinearScan` fallback path. The
+    dispatch is static, so the fallback costs nothing per call.
+  - **NaN**: ordered float compares (`fcmp o*`) return false for NaN
+    operands, so a NaN in `v` is silently skipped by the SIMD scan.
+    Sorted-Float64 with NaN isn't well-defined under any total order
+    anyway, so this is consistent with `Base.searchsortedlast` on such
+    vectors.
+  - **Forward order only**: non-`Forward` orderings fall back to scalar
+    `LinearScan`.
+  - **No hint**: falls back to `BinaryBracket`.
+
+`Auto` does **not** pick `SIMDLinearScan`. It is opt-in: the regime where
+it strictly beats `LinearScan` (long forward walks on Int64/Float64
+DenseVectors) overlaps with the regime where `Auto` already prefers
+`BracketGallop` or `ExpFromLeft`. Pin it explicitly when you have a
+workload that wants a long forward scan and you know the element type.
+
+### Documentation restructure
+
+The single `index.md` from 1.x has been split into five topical pages:
+
+  - **Home** (`index.md`): overview and quick example.
+  - **Interface and extension rules** (`interface.md`): the public API
+    surface, the contract a `SearchStrategy` subtype must satisfy, and
+    how to add a new one with a correctness-check pattern. Notes that
+    `Auto`'s decision tree is not externally extensible — new strategies
+    do not auto-register with `Auto`.
+  - **Search strategies** (`strategies.md`): catalog of every shipped
+    strategy with a chooser table (best case / worst case / hint usage),
+    per-strategy notes, the `SIMDLinearScan` caveats, and the
+    "Equality search" appendix linking to `findfirstequal` /
+    `findfirstsortedequal` (which are a deliberately-separate API
+    because their return type differs from positional search).
+  - **Guessers** (`guessers.md`): the `Guesser` type, its
+    linear-extrapolation vs. cached-previous-result behaviour, the
+    `GuesserHint` strategy adapter, and explicit guidance on when *not*
+    to use a `Guesser`.
+  - **Auto: heuristics and benchmarks** (`auto.md`): full `Auto`
+    decision tree for both per-query and batched callers, every
+    crossover constant with justification, the `SearchProperties` cache
+    integration, and a self-contained benchmark script that reproduces
+    the regime-grid comparison.
+
+### Internal: shared SIMD scan scaffolding
+
+The LLVM IR pattern used by `_findfirstequal` (load 8 lanes, vector
+compare, `cttz` on the bitmask) is now generated by a shared template
+`_simd_scan_ir(t, pred)`. `FFE_IR` (equality scan, used by
+`findfirstequal` and `findfirstsortedequal`) and the four
+`_SIMD_*_IR`s (positional compares for `SIMDLinearScan`) all flow from
+that template. Adding a new predicate is a one-line change.
+
+### New: equality search through the strategy framework
+
+`findequal(strategy, v, x[, hint])` builds an equality variant on top of
+the strategy dispatch. The return type is `Int` (not `Union{Int, Nothing}`);
+"not found" is signalled by the sentinel `firstindex(v) - 1` (= `0` on
+1-based vectors), matching the convention `Base.searchsortedlast` already
+uses for "x precedes all of v".
+
+  - Most strategies are handled generically:
+    `findequal(strategy, v, x[, hint])` runs
+    `searchsortedfirst(strategy, v, x[, hint])` and checks whether the
+    candidate index actually equals `x`. This means
+    `findequal(BracketGallop(), v, x, hint)`,
+    `findequal(SIMDLinearScan(), v, x, hint)`,
+    `findequal(GuesserHint(g), v, x)`, `findequal(Auto(), v, x)`, and
+    `findequal(BinaryBracket(), v, x)` all work without per-strategy
+    glue.
+  - `BisectThenSIMD <: SearchStrategy` is a new strategy that, for
+    `DenseVector{Int64}`, dispatches `findequal` straight into the
+    bisect-then-SIMD-equality-scan algorithm that backs
+    `findfirstsortedequal`. For other element types, falls back to
+    `BinaryBracket + post-check`. In positional dispatch
+    (`searchsortedfirst` / `searchsortedlast`) it delegates to
+    `BinaryBracket` — the bisect-then-equality-scan algorithm can't
+    answer the positional "where would `x` insert?" question.
+
+### Bug fix: `BracketGallop`/`InterpolationSearch`/`ExpFromLeft`/`findfirstsortedequal` with duplicates
+
+Four pre-existing functions returned the wrong index when `v` contained
+duplicates of the queried value and the hint or bisection midpoint
+landed inside a run of duplicates. All four are fixed in 2.0:
+
+  - `searchsortedfirst(BracketGallop(), v, x, hint)` previously galloped
+    rightward when `v[hint] == x`, returning the rightmost duplicate
+    instead of the first. Fixed by adding the companion
+    `bracketstrictlymontonic_first` that gallops leftward when
+    `v[hint] >= x`.
+  - `searchsortedfirst(InterpolationSearch(), v, x)` chains into
+    `BracketGallop`, so the same bug propagated and is fixed by the
+    above.
+  - `searchsortedfirst(ExpFromLeft(), v, x, hint)` previously
+    exponential-searched from `hint` when `v[hint] == x`, missing
+    earlier duplicates. Fixed by falling back to a full search whenever
+    `v[hint] >= x`.
+  - `findfirstsortedequal(var, vars)` bisected with the predicate
+    `vars[mid] <= var`, which walked the offset past the first
+    duplicate when `vars[mid] == var`. Fixed by tightening the predicate
+    to `<` and updating the window-shrink rule to include the midpoint
+    when the comparison is false. The fast-path LLVM IR branch is
+    replaced by plain `ifelse` (Julia compiles it to the same
+    branchless `select` modulo the `!unpredictable` metadata, which had
+    minimal observable effect).
+
+The fix is exercised by the new `findequal` strategy-parity tests on
+randomized vectors with frequent duplicates (Int64 in [-50, 50] over
+vectors up to length 256, 2000 trials per strategy across all shipped
+strategies).
+
+### Equality search (`findfirstequal`, `findfirstsortedequal`)
+
+Both names continue to exist in 2.0, returning `Union{Int, Nothing}` as
+before. Docstrings refreshed to point at the new
+[`findequal`](@ref FindFirstFunctions.findequal) wrapper as the
+strategy-framework-compatible alternative. Documentation moved out of
+`strategies.md` into a dedicated `equality.md` page since these functions
+do not match the strategy-dispatch contract (their return type and
+question are different).
+
+### Exports
+
+2.0 exports the public API surface (previously the package exported
+nothing, requiring `FindFirstFunctions.LinearScan()` qualification):
+
+  - `SearchStrategy`, every concrete strategy
+    (`LinearScan`, `SIMDLinearScan`, `BracketGallop`, `ExpFromLeft`,
+    `InterpolationSearch`, `BinaryBracket`, `BisectThenSIMD`,
+    `GuesserHint`, `Auto`), and the `SearchProperties` cache.
+  - `Guesser` and `looks_linear`.
+  - The batched FFF-defined names `searchsortedfirst!` and
+    `searchsortedlast!` (the non-bang `searchsortedfirst` /
+    `searchsortedlast` are `Base` extensions, available via `Base`).
+  - The equality routines `findequal`, `findfirstequal`,
+    `findfirstsortedequal`.
+
+`using FindFirstFunctions` is now sufficient to access the full public
+API. Downstream code that previously qualified every call (most of the
+SciML ecosystem) continues to work — the qualified names still resolve.
+
+### Auto retuning with SIMDLinearScan integration
+
+`Auto`'s batched decision tree has been retuned based on a 1080-cell
+benchmark sweep covering 5 `v` patterns × 4 query patterns × 5 `n` sizes ×
+6 `m` sizes × 2 element types. The previous tree fell out of the bench
+sweep with median 1.18× / p95 2.09× / max 2.78× slack against the per-cell
+optimum; the retuned tree comes in at median 1.04× / p95 1.38× /
+max 2.18×.
+
+New branches and constants:
+
+  - `SIMDLinearScan` is now dispatched by `Auto` in the medium-gap regime
+    (`gap ∈ (4, _auto_simd_gap_max(v)]`) when `v` is `DenseVector{Int64}`
+    or `DenseVector{Float64}`. `_auto_simd_gap_max` is 64 for both eltypes.
+    For Float64 the dispatch consults `SearchProperties.has_nan` if
+    available; otherwise no-NaN is assumed, consistent with how
+    `Base.searchsortedlast` already trusts the input is sorted.
+  - `BracketGallop` is preferred over `ExpFromLeft` at `gap ≥ 16` (new
+    constant `_AUTO_GALLOP_GAP_MIN`). The five up-front linear probes of
+    `ExpFromLeft` are guaranteed to miss once the answer is more than five
+    elements past the previous-result hint, so the doubling-from-`hint`
+    walk of `BracketGallop` is strictly faster at large gaps.
+  - Tiered linearity probe for `InterpolationSearch`. The strict
+    `_AUTO_LINEAR_REL_TOLERANCE = 1e-3` still gates the
+    `_AUTO_INTERP_MIN_GAP ≤ gap < _AUTO_INTERP_LOOSE_GAP` (8 to 256) range
+    — only truly uniform data passes. For `gap ≥ _AUTO_INTERP_LOOSE_GAP`
+    (256), the loose `_AUTO_LINEAR_LOOSE_TOLERANCE = 5e-2` applies, which
+    accepts approximately linear data (sorted random, jittered) where the
+    O(√n)/n order-statistic deviation is well below 5 %. `InterpolationSearch`
+    still loses on log-spaced and two-scale at any gap, and the strict tier
+    catches those.
+
+Bug fix: `_estimate_avg_gap` no longer falls back to `n ÷ m` when the
+skew flag is set. The fallback caused `SIMDLinearScan` to be picked for
+tightly-clustered queries (span_q ≈ 0) where `LinearScan`'s scalar walk
+is 5× faster. The skew flag now serves its intended purpose as a binary
+InterpolationSearch-unsuitability signal, while the actual gap value is
+always the span-based estimate.
+
+Reproducibility: the full sweep is checked in at `bench/auto_sweep.jl`
+with an analysis helper at `bench/analyze.jl`. See `auto.md` for the
+decision tree, the per-regime winner distribution, and how to run the
+sweep locally.
+
+### New (opt-in): BitInterpolationSearch for log-spaced Float64 data
+
+`BitInterpolationSearch` is a variant of `InterpolationSearch` that
+reinterprets a positive `DenseVector{Float64}` as `DenseVector{UInt64}`
+before computing the extrapolation guess. The IEEE bit pattern is
+monotonically increasing with the float value (for positive Float64) and
+approximately linear in array index when the underlying data is
+log-spaced (geometric). On such data the bit-domain guess can be far
+better than the float-domain guess that `InterpolationSearch` would
+compute.
+
+A targeted bench sweep at `bench/bitinterp_sweep.jl` covers 9 v patterns
+× 4 q patterns × 6 n sizes (up to 2²⁰) × 7 m sizes, with v patterns
+specifically chosen to probe BitInterp's regime: `logspaced` (1 to 10⁶),
+`logspaced_wide` (10⁻³ to 10¹⁵), `geometric_dense` (geometric spanning
+10⁶), `geometric_sparse` (geometric spanning 10¹²), `power2`, `sqrt`,
+`two_decade`, `jittered_log`, and `uniform` (as the negative control).
+
+Result over 1404 cells: BitInterp wins outright in 59 cells (4.2%) and
+sits within 10% of the per-cell best in 75 cells (5.3%). The wins
+concentrate in `logspaced_wide` / `logspaced` / `geometric_*` /
+`jittered_log` at small m (= 4) and large n (≥ 16384). Margins range
+from 1.0× (tie) to 1.43× (`logspaced_wide log_grid n=2²⁰ m=4`). On
+non-log-spaced data (`uniform`, `power2`, `sqrt`, `two_decade`)
+BitInterp loses — the bit-pattern guess is worse than the float-domain
+guess when the data isn't geometric.
+
+**`Auto` does not pick BitInterp.** The wins are real but narrow
+(small batches, very large n, true log-spacing), and adding the dispatch
+overhead to Auto's hot path would penalize the much larger set of
+non-log-spaced workloads. The strategy is exported as `BitInterpolationSearch`
+for callers who know their data is log-spaced and want to pin it.
+
+Constraints:
+  - `DenseVector{Float64}` only; non-Float64 dense eltypes fall back to
+    plain `InterpolationSearch`.
+  - Requires `v[1] > 0`, `x > 0`, and both finite. Subnormal /
+    non-finite Float64 bit patterns are not monotonic with float value
+    under raw reinterpret, so the strategy falls back to `BinaryBracket`
+    in those cases.
+  - Forward ordering only.
+
+### Cleanup: typo fix, FFE_IR unification, tolerance kwarg
+
+  - Internal helper `bracketstrictlymontonic` renamed to
+    `bracketstrictlymonotonic` (and the companion `_first` variant).
+    Internal-only — no downstream impact.
+  - The `FFE_IR` SIMD-equality IR literal is now generated by
+    `_simd_scan_ir("i64", "eq")` instead of duplicating ~60 lines of
+    inline LLVM IR. The same template produces the four `>`/`>=`
+    variants for `SIMDLinearScan`, so all five SIMD primitives share a
+    single source of truth.
+  - `SearchProperties(v; linear_tolerance = 1e-3)` exposes the
+    sampled-linearity probe's tolerance as a kwarg, matching `Guesser`'s
+    `looks_linear_threshold`. Loosen (e.g. `1e-2`) to widen the regime
+    where `Auto(props)` picks `InterpolationSearch`; tighten (e.g.
+    `1e-4`) to be more conservative. Default unchanged at `1e-3`.
+
+`findequal`'s docstring now explicitly documents the sentinel value
+`firstindex(v) - 1` and its behaviour on OffsetArrays.
+
+### Compatibility
+
+  - **Julia compat**: unchanged from 1.x — `julia = "1.10"`.
+  - **Downstream PRs**: SciML packages using the removed names need
+    companion PRs. The first one
+    [SciML/DataInterpolations.jl#529](https://github.com/SciML/DataInterpolations.jl/pull/529)
+    is the migration template: drop the legacy imports, route the
+    `Integer`-hint path through `searchsortedfirst(BracketGallop(), …)`
+    and the `Guesser` path through `searchsortedfirst(GuesserHint(g), …)`.
+
+### Test coverage
+
+47100 tests pass across the strategy dispatch, the batched API, the
+`Auto` heuristic on a regime sweep, `SIMDLinearScan` randomized fuzz
+(10000 Int64 + 10000 Float64 cases against `Base`), edge cases (empty,
+single-element, duplicates, out-of-range hints, x outside the vector
+range), fallback paths (Int32, Float32, String, no-hint, reverse
+order), and `SearchProperties` cache correctness (output equivalence
+against the un-cached path, `isbits` guarantee, behaviour under lying
+cache).
diff --git a/Project.toml b/Project.toml
index e6437c0..655dfb1 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FindFirstFunctions"
 uuid = "64ca27bc-2ba2-4a57-88aa-44e436879224"
-version = "1.8.0"
+version = "2.0.0"
 authors = ["Chris Elrod <elrodc@gmail.com> and contributors"]
 
 [deps]
@@ -10,13 +10,15 @@ PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
 Pkg = "1.10"
 PrecompileTools = "1"
 SafeTestsets = "0.1"
+StableRNGs = "1"
 Test = "1.10"
 julia = "1.10"
 
 [extras]
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
+StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Pkg", "SafeTestsets", "Test"]
+test = ["Pkg", "SafeTestsets", "StableRNGs", "Test"]
diff --git a/bench/.gitignore b/bench/.gitignore
new file mode 100644
index 0000000..99b8e25
--- /dev/null
+++ b/bench/.gitignore
@@ -0,0 +1,2 @@
+results.csv
+Manifest.toml
diff --git a/bench/Project.toml b/bench/Project.toml
new file mode 100644
index 0000000..db3a12b
--- /dev/null
+++ b/bench/Project.toml
@@ -0,0 +1,6 @@
+[deps]
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+FindFirstFunctions = "64ca27bc-2ba2-4a57-88aa-44e436879224"
+PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
+StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
diff --git a/bench/analyze.jl b/bench/analyze.jl
new file mode 100644
index 0000000..b050678
--- /dev/null
+++ b/bench/analyze.jl
@@ -0,0 +1,146 @@
+using DelimitedFiles, Statistics, Printf
+
+const CSV = joinpath(@__DIR__, "results.csv")
+
+# Read raw data
+raw, header = readdlm(CSV, ','; header = true)
+header = vec(string.(header))
+function col(name)
+    j = findfirst(==(name), header)
+    j === nothing && error("no column $name")
+    return j
+end
+
+const STRATS = ["LinearScan", "SIMDLinearScan", "BracketGallop", "ExpFromLeft", "InterpolationSearch"]
+
+function row_winner(row)
+    times = Float64[parse(Float64, string(row[col(s)])) for s in STRATS]
+    j = argmin(times)
+    return STRATS[j], times[j]
+end
+
+function ratio_to(row, strat_name)
+    times = Float64[parse(Float64, string(row[col(s)])) for s in STRATS]
+    best = minimum(times)
+    return parse(Float64, string(row[col(strat_name)])) / best
+end
+
+println("==> Where does SIMDLinearScan win?")
+simd_wins = []
+for i in axes(raw, 1)
+    local row = raw[i, :]
+    local winner, _ = row_winner(row)
+    if winner == "SIMDLinearScan"
+        push!(
+            simd_wins, (
+                eltype = string(row[col("eltype")]),
+                v = string(row[col("v_kind")]),
+                q = string(row[col("q_kind")]),
+                n = parse(Int, string(row[col("n")])),
+                m = parse(Int, string(row[col("m")])),
+            )
+        )
+    end
+end
+n_simd_wins = length(simd_wins)
+println("  SIMDLinearScan wins in $n_simd_wins cells")
+println()
+
+# Tabulate where SIMD wins by m (gap proxy)
+println("==> SIMDLinearScan wins by m (proxy for batched-gap regime):")
+by_m = Dict{Int, Int}()
+for c in simd_wins
+    by_m[c.m] = get(by_m, c.m, 0) + 1
+end
+for m in sort(collect(keys(by_m)))
+    @printf("  m=%5d:  %d cells\n", m, by_m[m])
+end
+println()
+
+# By eltype
+println("==> SIMDLinearScan wins by eltype:")
+by_eltype = Dict{String, Int}()
+for c in simd_wins
+    by_eltype[c.eltype] = get(by_eltype, c.eltype, 0) + 1
+end
+for k in sort(collect(keys(by_eltype)))
+    println("  $k: $(by_eltype[k]) cells")
+end
+println()
+
+# Now show: for sorted-batched cells where SIMDLinearScan wins, what's the
+# ratio of LinearScan/ExpFromLeft to SIMDLinearScan? This tells us the
+# magnitude of the improvement.
+println("==> SIMDLinearScan win margin over LinearScan and ExpFromLeft:")
+println("    (Higher = bigger SIMD speedup over the second-best Auto candidate)")
+margins = []
+for i in axes(raw, 1)
+    row = raw[i, :]
+    winner, best = row_winner(row)
+    if winner == "SIMDLinearScan"
+        t_simd = best
+        t_lin = parse(Float64, string(row[col("LinearScan")]))
+        t_exp = parse(Float64, string(row[col("ExpFromLeft")]))
+        push!(
+            margins, (
+                ratio_lin = t_lin / t_simd,
+                ratio_exp = t_exp / t_simd,
+                n = parse(Int, string(row[col("n")])),
+                m = parse(Int, string(row[col("m")])),
+            )
+        )
+    end
+end
+println("  SIMD vs LinearScan: median $(median(m.ratio_lin for m in margins))x, max $(maximum(m.ratio_lin for m in margins))x")
+println("  SIMD vs ExpFromLeft: median $(median(m.ratio_exp for m in margins))x, max $(maximum(m.ratio_exp for m in margins))x")
+println()
+
+# What does Auto pick in the cells where SIMDLinearScan would win?
+# Find the m, n, gap regime where SIMDLinearScan beats LinearScan AND ExpFromLeft.
+println("==> Best regime for SIMDLinearScan (cells where SIMD wins by >20%):")
+significant_simd_wins = filter(c -> c.ratio_lin > 1.2 && c.ratio_exp > 1.2, margins)
+println("  $(length(significant_simd_wins)) cells where SIMD beats both LinearScan and ExpFromLeft by >20%")
+n_by_m_n = Dict{Tuple{Int, Int}, Int}()
+for c in significant_simd_wins
+    n_by_m_n[(c.n, c.m)] = get(n_by_m_n, (c.n, c.m), 0) + 1
+end
+for (n, m) in sort(collect(keys(n_by_m_n)))
+    @printf("  n=%5d m=%5d: %d cells\n", n, m, n_by_m_n[(n, m)])
+end
+println()
+
+# Compute: in each cell, the GAP. gap = n * span(queries)/span(v) / m
+# (the same heuristic Auto uses). Use n/m as a rough proxy since exact gap
+# depends on the query distribution which isn't recoverable from CSV alone.
+println("==> n/m ratio for SIMD-winning cells (rough gap proxy):")
+nm_buckets = Dict{Int, Int}()
+for c in simd_wins
+    bucket = c.n ÷ c.m
+    # Round to a power-of-2 bucket
+    log_bucket = bucket == 0 ? 0 : floor(Int, log2(bucket))
+    nm_buckets[log_bucket] = get(nm_buckets, log_bucket, 0) + 1
+end
+for b in sort(collect(keys(nm_buckets)))
+    @printf("  n/m in [2^%d, 2^%d):  %d cells\n", b, b + 1, nm_buckets[b])
+end
+println()
+
+println("==> What strategy does Auto pick in cells where SIMD would win?")
+auto_picks_when_simd = Dict{String, Int}()
+for i in axes(raw, 1)
+    row = raw[i, :]
+    winner, _ = row_winner(row)
+    if winner == "SIMDLinearScan"
+        # We don't know Auto's pick from the CSV, but we know Auto's time.
+        # The closest strategy time to Auto's time tells us the pick.
+        t_auto = parse(Float64, string(row[col("Auto")]))
+        candidates = [(s, parse(Float64, string(row[col(s)]))) for s in STRATS]
+        # Pick the strategy whose time is closest to Auto's
+        # (within 20% — heuristic).
+        closest = argmin(c -> abs(c[2] - t_auto), candidates)
+        auto_picks_when_simd[closest[1]] = get(auto_picks_when_simd, closest[1], 0) + 1
+    end
+end
+for (s, n) in sort(collect(pairs(auto_picks_when_simd)); by = x -> -x[2])
+    println("  Auto -> $s: $n cells (out of $(length(simd_wins)))")
+end
diff --git a/bench/auto_sweep.jl b/bench/auto_sweep.jl
new file mode 100644
index 0000000..dcd41af
--- /dev/null
+++ b/bench/auto_sweep.jl
@@ -0,0 +1,279 @@
+using FindFirstFunctions, StableRNGs, BenchmarkTools, Statistics, Printf
+
+const F = FindFirstFunctions
+
+# --------------------------------------------------------------------------
+# Strategy registry — every strategy that competes in the sorted-batched
+# regime. BinaryBracket is excluded from the per-cell winner pool because it
+# never wins under any reasonable workload (no hint = full binary search
+# per query); it's only useful as a fallback. Auto's actual choices are
+# LinearScan / SIMDLinearScan / ExpFromLeft / InterpolationSearch.
+# --------------------------------------------------------------------------
+const STRATS = [
+    ("LinearScan", F.LinearScan()),
+    ("SIMDLinearScan", F.SIMDLinearScan()),
+    ("BracketGallop", F.BracketGallop()),
+    ("ExpFromLeft", F.ExpFromLeft()),
+    ("InterpolationSearch", F.InterpolationSearch()),
+]
+
+# --------------------------------------------------------------------------
+# v patterns (sorted vectors of length n)
+# --------------------------------------------------------------------------
+function make_v(::Val{:uniform}, ::Type{T}, n, seed) where {T <: AbstractFloat}
+    return collect(T, range(zero(T), T(n); length = n))
+end
+function make_v(::Val{:uniform}, ::Type{T}, n, seed) where {T <: Integer}
+    return collect(T, T(1):T(n))
+end
+function make_v(::Val{:jittered}, ::Type{T}, n, seed) where {T <: AbstractFloat}
+    rng = StableRNG(seed)
+    base = collect(range(0.0, Float64(n); length = n))
+    return convert(Vector{T}, sort!(base .+ 0.1 .* (rand(rng, n) .- 0.5)))
+end
+function make_v(::Val{:jittered}, ::Type{T}, n, seed) where {T <: Integer}
+    rng = StableRNG(seed)
+    raw = sort!(unique!(rand(rng, T(1):T(2n), n)))
+    return convert(Vector{T}, raw)
+end
+function make_v(::Val{:logspaced}, ::Type{T}, n, seed) where {T <: AbstractFloat}
+    return collect(T, exp.(range(0.0, log(1.0e6); length = n)))
+end
+function make_v(::Val{:logspaced}, ::Type{T}, n, seed) where {T <: Integer}
+    raw = sort!(unique!(round.(T, exp.(range(0.0, log(Float64(10n)); length = n)))))
+    return convert(Vector{T}, raw)
+end
+function make_v(::Val{:twoscale}, ::Type{T}, n, seed) where {T <: AbstractFloat}
+    h = n ÷ 2
+    return convert(
+        Vector{T},
+        sort!(vcat(range(0.0, 0.1n; length = h), range(0.1n, Float64(n); length = n - h)))
+    )
+end
+function make_v(::Val{:twoscale}, ::Type{T}, n, seed) where {T <: Integer}
+    h = n ÷ 2
+    a = round.(T, range(1.0, 0.1n; length = h))
+    b = round.(T, range(0.1n + 1, Float64(n); length = n - h))
+    return convert(Vector{T}, sort!(vcat(a, b)))
+end
+function make_v(::Val{:random_sorted}, ::Type{T}, n, seed) where {T <: AbstractFloat}
+    return convert(Vector{T}, sort!(rand(StableRNG(seed), n) .* n))
+end
+function make_v(::Val{:random_sorted}, ::Type{T}, n, seed) where {T <: Integer}
+    return convert(Vector{T}, sort!(rand(StableRNG(seed), T(1):T(10n), n)))
+end
+
+# --------------------------------------------------------------------------
+# query patterns
+# --------------------------------------------------------------------------
+function _to_eltype(v, xs)
+    T = eltype(v)
+    return T <: Integer ? convert(Vector{T}, round.(T, xs)) : convert(Vector{T}, xs)
+end
+function make_q(::Val{:dense}, v, m, seed)
+    raw = collect(range(Float64(first(v)), Float64(last(v)); length = m))
+    return _to_eltype(v, raw)
+end
+function make_q(::Val{:sparse}, v, m, seed)
+    rng = StableRNG(seed)
+    span = Float64(last(v)) - Float64(first(v))
+    raw = sort!(Float64(first(v)) .+ rand(rng, m) .* span)
+    return _to_eltype(v, raw)
+end
+function make_q(::Val{:clustered}, v, m, seed)
+    rng = StableRNG(seed)
+    j = max(1, length(v) ÷ 4)
+    lo = Float64(v[j])
+    hi = Float64(v[min(j + 1, length(v))])
+    span = hi - lo
+    raw = sort!(lo .+ rand(rng, m) .* max(span, 1.0))
+    return _to_eltype(v, raw)
+end
+function make_q(::Val{:sorted_random}, v, m, seed)
+    rng = StableRNG(seed)
+    span = Float64(last(v)) - Float64(first(v))
+    raw = sort!(Float64(first(v)) .+ rand(rng, m) .* span)
+    return _to_eltype(v, raw)
+end
+
+# --------------------------------------------------------------------------
+# Bench a single (v, queries, strategy) cell. Returns time in nanoseconds.
+# Uses a fixed-repeat loop to avoid BenchmarkTools per-call overhead, which
+# dominates fast strategies on small m.
+# --------------------------------------------------------------------------
+function bench_one(v, queries, strat, out, reps::Int = 5)
+    # Warm up
+    F.searchsortedlast!(out, v, queries; strategy = strat)
+    # Measure
+    best = typemax(Float64)
+    for _ in 1:reps
+        t = @elapsed F.searchsortedlast!(out, v, queries; strategy = strat)
+        best = min(best, t)
+    end
+    return best * 1.0e9 / length(queries)   # ns per query
+end
+
+# --------------------------------------------------------------------------
+# Full sweep
+# --------------------------------------------------------------------------
+function run_sweep(;
+        v_kinds = (:uniform, :jittered, :logspaced, :twoscale, :random_sorted),
+        q_kinds = (:dense, :sparse, :clustered, :sorted_random),
+        ns = (256, 1024, 4096, 16_384, 65_536),
+        ms = (4, 16, 64, 256, 1024, 4096),
+        eltypes = (Int64, Float64),
+        seed = 2026,
+    )
+    rows = []
+    cell_idx = 0
+    n_cells = length(v_kinds) * length(q_kinds) * length(ns) * length(ms) * length(eltypes)
+
+    for T in eltypes, v_kind in v_kinds, q_kind in q_kinds, n in ns, m in ms
+        m > n && continue
+        cell_idx += 1
+        v = make_v(Val(v_kind), T, n, seed)
+        q = make_q(Val(q_kind), v, m, seed + 1)
+        out = Vector{Int}(undef, m)
+
+        # Build SearchProperties for the cached-Auto comparison
+        props = F.SearchProperties(v)
+
+        times = Dict{String, Float64}()
+        for (name, strat) in STRATS
+            t = bench_one(v, q, strat, out)
+            times[name] = t
+        end
+        # Auto (un-cached) and Auto (cached)
+        times["Auto"] = bench_one(v, q, F.Auto(), out)
+        times["Auto+props"] = bench_one(v, q, F.Auto(props), out)
+
+        push!(
+            rows, (
+                eltype = string(T),
+                v_kind = string(v_kind),
+                q_kind = string(q_kind),
+                n = n,
+                m = m,
+                times = times,
+            )
+        )
+
+        if cell_idx % 25 == 0
+            best_explicit = minimum(times[s] for s in first.(STRATS))
+            @printf(
+                "  [%4d/%4d] %s/%s/%s n=%d m=%d  best=%.0f ns/q  Auto=%.0f  Auto+p=%.0f\n",
+                cell_idx, n_cells, T, v_kind, q_kind, n, m,
+                best_explicit, times["Auto"], times["Auto+props"]
+            )
+        end
+    end
+    return rows
+end
+
+# --------------------------------------------------------------------------
+# Analyze: per-cell winner, Auto slack vs best
+# --------------------------------------------------------------------------
+function analyze(rows)
+    strat_names = first.(STRATS)
+    summary = Dict{String, Int}()
+    auto_slacks = Float64[]
+    auto_cached_slacks = Float64[]
+
+    for r in rows
+        # Find the per-cell winner among the explicit strategies.
+        per_cell = [(s, r.times[s]) for s in strat_names]
+        best_t = minimum(p[2] for p in per_cell)
+        best_name = first(per_cell[argmin([p[2] for p in per_cell])])
+        summary[best_name] = get(summary, best_name, 0) + 1
+        push!(auto_slacks, r.times["Auto"] / best_t)
+        push!(auto_cached_slacks, r.times["Auto+props"] / best_t)
+    end
+
+    println()
+    println("Per-cell winners (across $(length(rows)) cells):")
+    for name in strat_names
+        n = get(summary, name, 0)
+        @printf("  %-22s %5d cells (%5.1f%%)\n", name, n, 100 * n / length(rows))
+    end
+
+    println()
+    @printf("Auto slack (Auto-time / per-cell-best-time):\n")
+    @printf(
+        "  median %.2fx, mean %.2fx, p90 %.2fx, p95 %.2fx, max %.2fx\n",
+        median(auto_slacks), mean(auto_slacks),
+        quantile(auto_slacks, 0.9), quantile(auto_slacks, 0.95),
+        maximum(auto_slacks)
+    )
+    @printf("Auto(props) slack:\n")
+    @printf(
+        "  median %.2fx, mean %.2fx, p90 %.2fx, p95 %.2fx, max %.2fx\n",
+        median(auto_cached_slacks), mean(auto_cached_slacks),
+        quantile(auto_cached_slacks, 0.9), quantile(auto_cached_slacks, 0.95),
+        maximum(auto_cached_slacks)
+    )
+
+    return summary, auto_slacks, auto_cached_slacks
+end
+
+function print_worst_cells(rows, k = 20)
+    strat_names = first.(STRATS)
+    println()
+    println("Worst Auto-slack cells (current Auto picks strategy that's much slower than optimal):")
+    @printf(
+        "  %-7s %-15s %-15s %6s %6s   %-20s slack(Auto) slack(Auto+p)\n",
+        "eltype", "v_kind", "q_kind", "n", "m", "best (time)"
+    )
+    scored = [
+        (r, r.times["Auto"] / minimum(r.times[s] for s in strat_names))
+            for r in rows
+    ]
+    sort!(scored; by = x -> -x[2])
+    for (r, slack) in scored[1:min(k, length(scored))]
+        best_t = minimum(r.times[s] for s in strat_names)
+        best_name = strat_names[argmin([r.times[s] for s in strat_names])]
+        slack_cached = r.times["Auto+props"] / best_t
+        @printf(
+            "  %-7s %-15s %-15s %6d %6d   %-15s %.0fns  %5.2fx     %5.2fx\n",
+            r.eltype, r.v_kind, r.q_kind, r.n, r.m, best_name, best_t,
+            slack, slack_cached
+        )
+    end
+    return
+end
+
+println("FindFirstFunctions Auto algorithm benchmark sweep")
+println("="^70)
+println("Strategies:")
+for (name, _) in STRATS
+    println("  ", name)
+end
+println("Plus: Auto(), Auto(SearchProperties(v))")
+println()
+println("Starting sweep...")
+println()
+
+@time rows = run_sweep()
+
+println()
+println("Sweep complete.")
+println()
+
+analyze(rows)
+print_worst_cells(rows, 30)
+
+open(joinpath(@__DIR__, "results.csv"), "w") do io
+    header = ["eltype", "v_kind", "q_kind", "n", "m"]
+    append!(header, first.(STRATS))
+    push!(header, "Auto", "Auto+props")
+    println(io, join(header, ","))
+    for r in rows
+        cols = String[r.eltype, r.v_kind, r.q_kind, string(r.n), string(r.m)]
+        for s in first.(STRATS)
+            push!(cols, string(r.times[s]))
+        end
+        push!(cols, string(r.times["Auto"]))
+        push!(cols, string(r.times["Auto+props"]))
+        println(io, join(cols, ","))
+    end
+end
+println("\nRaw data: bench/results.csv")
diff --git a/bench/bitinterp_sweep.jl b/bench/bitinterp_sweep.jl
new file mode 100644
index 0000000..234bb0f
--- /dev/null
+++ b/bench/bitinterp_sweep.jl
@@ -0,0 +1,226 @@
+using FindFirstFunctions, StableRNGs, BenchmarkTools, Statistics, Printf
+
+const F = FindFirstFunctions
+
+# Strategies under test. BitInterp is the one we're vetting; the others are
+# the alternatives `Auto` might pick or that a user might pin.
+const STRATS = [
+    ("BitInterp", F.BitInterpolationSearch()),
+    ("Interp", F.InterpolationSearch()),
+    ("Bracket", F.BracketGallop()),
+    ("Exp", F.ExpFromLeft()),
+    ("SIMD", F.SIMDLinearScan()),
+    ("Auto", F.Auto()),
+]
+
+# Data shapes covering the regimes BitInterp could conceivably win.
+function build_v(::Val{:uniform}, n, seed)
+    return collect(range(1.0, Float64(n); length = n))
+end
+function build_v(::Val{:logspaced}, n, seed)
+    return collect(exp.(range(0.0, log(1.0e6); length = n)))
+end
+function build_v(::Val{:logspaced_wide}, n, seed)
+    # 10⁻³ to 10¹⁵ — ~18 decades, like a particle-physics interpolation table
+    return collect(exp.(range(log(1.0e-3), log(1.0e15); length = n)))
+end
+function build_v(::Val{:geometric_dense}, n, seed)
+    # Geometric with ratio chosen so the series spans 10⁶ — keeps Float64
+    # range usable up to n ≈ 10⁷ while keeping the spacing tight.
+    r = (1.0e6)^(1.0 / max(n - 1, 1))
+    return collect(r .^ (0:(n - 1)))
+end
+function build_v(::Val{:geometric_sparse}, n, seed)
+    # Geometric spanning 10¹² with sparser spacing.
+    r = (1.0e12)^(1.0 / max(n - 1, 1))
+    return collect(r .^ (0:(n - 1)))
+end
+function build_v(::Val{:power2}, n, seed)
+    return collect(Float64(i)^2 for i in 1:n)
+end
+function build_v(::Val{:sqrt}, n, seed)
+    return collect(sqrt(Float64(i)) for i in 1:n)
+end
+function build_v(::Val{:two_decade}, n, seed)
+    # 80% of values in [1, 2], 20% in [2, 1e6] — extreme density-then-sparse
+    h = (8n) ÷ 10
+    a = collect(range(1.0, 2.0; length = h))
+    b = collect(exp.(range(log(2.0), log(1.0e6); length = n - h)))
+    return sort!(vcat(a[1:(end - 1)], b))
+end
+function build_v(::Val{:jittered_log}, n, seed)
+    base = collect(exp.(range(0.0, log(1.0e6); length = n)))
+    rng = StableRNG(seed)
+    return sort!(base .* (1.0 .+ 0.01 .* (rand(rng, n) .- 0.5)))
+end
+
+function build_q(v, m, kind::Symbol, seed)
+    rng = StableRNG(seed)
+    if kind == :linear_uniform
+        # m queries uniformly random in v's full LINEAR range
+        return sort!(first(v) .+ rand(rng, m) .* (last(v) - first(v)))
+    elseif kind == :log_uniform
+        # m queries uniformly random in v's full LOG range — matches the
+        # distribution of log-spaced v itself
+        first(v) > 0 || error("log_uniform requires positive v")
+        return sort!(
+            exp.(log(first(v)) .+ rand(rng, m) .* (log(last(v)) - log(first(v))))
+        )
+    elseif kind == :dense_grid
+        # Evenly-spaced linear grid
+        return collect(range(first(v), last(v); length = m))
+    elseif kind == :log_grid
+        first(v) > 0 || error("log_grid requires positive v")
+        return collect(exp.(range(log(first(v)), log(last(v)); length = m)))
+    else
+        error("unknown q kind: $kind")
+    end
+end
+
+function bench_cell(v, q, strat, out, reps = 5)
+    F.searchsortedlast!(out, v, q; strategy = strat)
+    best = typemax(Float64)
+    for _ in 1:reps
+        t = @elapsed F.searchsortedlast!(out, v, q; strategy = strat)
+        best = min(best, t)
+    end
+    return best * 1.0e9 / length(q)
+end
+
+function correctness_check(v, q, strat)
+    out = Vector{Int}(undef, length(q))
+    expected = searchsortedlast.(Ref(v), q)
+    F.searchsortedlast!(out, v, q; strategy = strat)
+    return out == expected
+end
+
+println("BitInterpolationSearch comprehensive sweep")
+println("="^70)
+println("Testing where BitInterp might win across spacing × density × n × m")
+println()
+
+# Multiple seeds for noise reduction
+function run_sweep()
+    v_kinds = (
+        :uniform, :logspaced, :logspaced_wide,
+        :geometric_dense, :geometric_sparse,
+        :power2, :sqrt, :two_decade, :jittered_log,
+    )
+    q_kinds = (:linear_uniform, :log_uniform, :dense_grid, :log_grid)
+    ns = (1024, 4096, 16_384, 65_536, 262_144, 1_048_576)
+    ms = (4, 16, 64, 256, 1024, 4096, 16_384)
+
+    rows = []
+    bit_wins = 0
+    bit_within10 = 0  # within 10% of best
+    bit_within20 = 0  # within 20% of best
+    total = 0
+
+    for v_kind in v_kinds, q_kind in q_kinds, n in ns, m in ms
+        m > n && continue
+        v = build_v(Val(v_kind), n, 2026)
+        # Some q_kinds require positive v
+        if (q_kind == :log_uniform || q_kind == :log_grid) && first(v) <= 0
+            continue
+        end
+        q = build_q(v, m, q_kind, 2027)
+        out = Vector{Int}(undef, m)
+
+        # Correctness first — bail loudly if mismatch.
+        correctness_check(v, q, F.BitInterpolationSearch()) ||
+            error("BitInterp correctness fail on $v_kind/$q_kind n=$n m=$m")
+
+        times = Dict{String, Float64}()
+        for (name, strat) in STRATS
+            times[name] = bench_cell(v, q, strat, out)
+        end
+        push!(rows, (v_kind, q_kind, n, m, times))
+
+        # Score
+        explicit = [(n, t) for (n, t) in pairs(times) if n != "Auto"]
+        best_t = minimum(t for (_, t) in explicit)
+        bit_t = times["BitInterp"]
+        if bit_t <= best_t
+            bit_wins += 1
+        end
+        if bit_t <= 1.1 * best_t
+            bit_within10 += 1
+        end
+        if bit_t <= 1.2 * best_t
+            bit_within20 += 1
+        end
+        total += 1
+    end
+
+    println("Cells tested: $total")
+    @printf(
+        "BitInterp wins outright:   %d cells (%.1f%%)\n",
+        bit_wins, 100 * bit_wins / total
+    )
+    @printf(
+        "BitInterp within 10%% of best: %d cells (%.1f%%)\n",
+        bit_within10, 100 * bit_within10 / total
+    )
+    @printf(
+        "BitInterp within 20%% of best: %d cells (%.1f%%)\n",
+        bit_within20, 100 * bit_within20 / total
+    )
+    println()
+
+    # If BitInterp wins anywhere, show those cells.
+    win_rows = [
+        r for r in rows if begin
+                explicit = [(n, t) for (n, t) in pairs(r[5]) if n != "Auto"]
+                best_t = minimum(t for (_, t) in explicit)
+                r[5]["BitInterp"] <= best_t
+            end
+    ]
+    if !isempty(win_rows)
+        println("Cells where BitInterp wins:")
+        @printf(
+            "  %-16s %-14s %8s %6s   %-10s vs second-best\n",
+            "v_kind", "q_kind", "n", "m", "BitInterp"
+        )
+        for (v_kind, q_kind, n, m, times) in win_rows
+            explicit = sort(
+                [(name, t) for (name, t) in pairs(times) if name != "Auto"];
+                by = x -> x[2]
+            )
+            second = explicit[2]
+            @printf(
+                "  %-16s %-14s %8d %6d   %5.1f ns/q  vs %s=%5.1f (%.2fx)\n",
+                v_kind, q_kind, n, m, times["BitInterp"],
+                second[1], second[2], second[2] / times["BitInterp"]
+            )
+        end
+        println()
+    end
+
+    # Show the cells where BitInterp is closest to winning (top 10).
+    closest = sort(
+        rows; by = r -> begin
+            explicit = [(n, t) for (n, t) in pairs(r[5]) if n != "Auto"]
+            best_t = minimum(t for (_, t) in explicit)
+            r[5]["BitInterp"] / best_t
+        end
+    )
+    println("Closest 10 cells (BitInterp/best ratio):")
+    @printf(
+        "  %-16s %-14s %8s %6s   %-10s ratio winner\n",
+        "v_kind", "q_kind", "n", "m", "BitInterp"
+    )
+    for (v_kind, q_kind, n, m, times) in closest[1:min(10, end)]
+        explicit = [(name, t) for (name, t) in pairs(times) if name != "Auto"]
+        best_t = minimum(t for (_, t) in explicit)
+        winner = first(sort(explicit; by = x -> x[2]))
+        @printf(
+            "  %-16s %-14s %8d %6d   %5.1f ns/q  %.2fx  %s\n",
+            v_kind, q_kind, n, m, times["BitInterp"],
+            times["BitInterp"] / best_t, winner[1]
+        )
+    end
+
+    return rows
+end
+
+@time run_sweep()
diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl
new file mode 100644
index 0000000..8aba939
--- /dev/null
+++ b/benchmark/benchmarks.jl
@@ -0,0 +1,132 @@
+# AirspeedVelocity-driven benchmark suite. Each `SUITE[...]` entry is a
+# `BenchmarkGroup` (or `@benchmarkable`) that AirspeedVelocity will time on
+# both the PR branch and the base branch, then diff. Keep individual cells
+# fast (< 200 ms target) so the CI run finishes in a few minutes.
+
+using BenchmarkTools
+using FindFirstFunctions
+using StableRNGs
+
+const SUITE = BenchmarkGroup()
+
+# Helper: build a sorted Float64 vector with the given pattern.
+function _make_v(kind, n, seed = 2026)
+    rng = StableRNG(seed)
+    return if kind === :uniform
+        collect(range(0.0, Float64(n); length = n))
+    elseif kind === :logspaced
+        collect(exp.(range(0.0, log(1.0e6); length = n)))
+    elseif kind === :random_sorted
+        sort!(rand(rng, n) .* n)
+    else
+        error("unknown v kind $kind")
+    end
+end
+
+function _make_q(v, m, kind, seed = 2027)
+    rng = StableRNG(seed)
+    return if kind === :dense
+        collect(range(first(v), last(v); length = m))
+    elseif kind === :sparse
+        sort!(first(v) .+ rand(rng, m) .* (last(v) - first(v)))
+    elseif kind === :clustered
+        j = max(1, length(v) ÷ 4)
+        lo = v[j]
+        hi = v[min(j + 1, length(v))]
+        span = hi - lo
+        sort!(lo .+ rand(rng, m) .* max(span, 1.0))
+    else
+        error("unknown q kind $kind")
+    end
+end
+
+# ---------------------------------------------------------------------------
+# Per-strategy single-query micro-benchmarks (Int64, n=1024, hint near answer)
+# ---------------------------------------------------------------------------
+SUITE["per_query"] = BenchmarkGroup()
+let v = collect(Int64, 1:1024), x = Int64(500), hint = 480
+    for (name, strategy) in [
+            ("LinearScan", LinearScan()),
+            ("SIMDLinearScan", SIMDLinearScan()),
+            ("BracketGallop", BracketGallop()),
+            ("ExpFromLeft", ExpFromLeft()),
+            ("InterpolationSearch", InterpolationSearch()),
+            ("BinaryBracket", BinaryBracket()),
+            ("Auto", Auto()),
+        ]
+        SUITE["per_query"][name] = @benchmarkable(
+            searchsortedlast($strategy, $v, $x, $hint),
+            evals = 1, samples = 200,
+        )
+    end
+end
+
+# ---------------------------------------------------------------------------
+# Batched in-place benchmarks: a small grid of (v_kind, q_kind, n, m).
+# Auto vs hand-picked strategies, plus the queries_sorted = true fast path.
+# ---------------------------------------------------------------------------
+SUITE["batched"] = BenchmarkGroup()
+for v_kind in (:uniform, :logspaced, :random_sorted),
+        q_kind in (:dense, :sparse, :clustered),
+        (n, m) in ((1024, 64), (16_384, 256), (65_536, 4096))
+
+    v = _make_v(v_kind, n)
+    q = _make_q(v, m, q_kind)
+    out = Vector{Int}(undef, m)
+
+    key = "$(v_kind)/$(q_kind)/n=$n/m=$m"
+    grp = BenchmarkGroup()
+    grp["Auto"] = @benchmarkable(
+        FindFirstFunctions.searchsortedlast!($out, $v, $q; strategy = Auto()),
+        evals = 1, samples = 50,
+    )
+    grp["Auto+sorted"] = @benchmarkable(
+        FindFirstFunctions.searchsortedlast!(
+            $out, $v, $q;
+            strategy = Auto(), queries_sorted = true,
+        ),
+        evals = 1, samples = 50,
+    )
+    grp["Auto+props"] = @benchmarkable(
+        FindFirstFunctions.searchsortedlast!(
+            $out, $v, $q;
+            strategy = Auto(SearchProperties($v)),
+        ),
+        evals = 1, samples = 50,
+    )
+    SUITE["batched"][key] = grp
+end
+
+# ---------------------------------------------------------------------------
+# SearchProperties construction cost (probe + NaN scan on Float64)
+# ---------------------------------------------------------------------------
+SUITE["props_construct"] = BenchmarkGroup()
+for (name, v) in [
+        ("uniform_64k", collect(range(0.0, 100.0; length = 65_536))),
+        ("logspaced_64k", collect(exp.(range(0.0, log(1.0e6); length = 65_536)))),
+        ("int64_64k", collect(Int64, 1:65_536)),
+    ]
+    SUITE["props_construct"][name] = @benchmarkable(
+        SearchProperties($v),
+        evals = 1, samples = 50,
+    )
+end
+
+# ---------------------------------------------------------------------------
+# Equality search comparison
+# ---------------------------------------------------------------------------
+SUITE["equality"] = BenchmarkGroup()
+let v = collect(Int64, 1:65_536), x = Int64(50_000)
+    SUITE["equality"]["findequal_BinaryBracket"] = @benchmarkable(
+        findequal(BinaryBracket(), $v, $x),
+        evals = 1, samples = 200,
+    )
+    SUITE["equality"]["findequal_BisectThenSIMD"] = @benchmarkable(
+        findequal(BisectThenSIMD(), $v, $x),
+        evals = 1, samples = 200,
+    )
+    SUITE["equality"]["findfirstsortedequal"] = @benchmarkable(
+        findfirstsortedequal($x, $v),
+        evals = 1, samples = 200,
+    )
+end
diff --git a/docs/make.jl b/docs/make.jl
index f3845e9..be60ee1 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -15,7 +15,14 @@ makedocs(
         assets = ["assets/favicon.ico"],
         canonical = "https://docs.sciml.ai/FindFirstFunctions/stable/"
     ),
-    pages = ["index.md"]
+    pages = [
+        "Home" => "index.md",
+        "Interface and extension rules" => "interface.md",
+        "Search strategies" => "strategies.md",
+        "Guessers" => "guessers.md",
+        "Auto: heuristics and benchmarks" => "auto.md",
+        "Equality search" => "equality.md",
+    ]
 )
 
 deploydocs(repo = "github.com/SciML/FindFirstFunctions.jl"; push_preview = true)
diff --git a/docs/src/auto.md b/docs/src/auto.md
new file mode 100644
index 0000000..74fe7ff
--- /dev/null
+++ b/docs/src/auto.md
@@ -0,0 +1,331 @@
+# Auto: heuristics and benchmarks
+
+[`Auto`](@ref FindFirstFunctions.Auto) is the default strategy for the
+batched API. This page documents the decision tree it follows, the
+crossover constants embedded in it, and the benchmark sweep used to
+validate them. The numbers below are reproducible on any machine — the
+script at the end of the page generates the comparison grid.
+
+## What `Auto` decides
+
+The decision differs between per-query and batched callers.
+
+### Per-query: `searchsortedlast(Auto(), v, x[, hint])`
+
+```
+hint missing or out of axes(v)   →  BinaryBracket
+length(v) ≤ 16                   →  LinearScan        # _AUTO_LINEAR_THRESHOLD
+otherwise                        →  BracketGallop
+```
+
+`Auto` never picks `InterpolationSearch` or `ExpFromLeft` in the per-query
+path. Both pay setup costs (endpoint reads for `InterpolationSearch`, 5
+linear probes for `ExpFromLeft`) that the per-query path can't amortize.
+
+### Batched: `searchsortedlast!(out, v, queries; strategy = Auto())`
+
+The decision is driven by a single number: the expected average **gap** in
+`v`'s index space between consecutive query results. For sorted numeric
+queries this is `floor(n * span(queries) / span(v) / m)`; for non-numeric
+or pathologically-spaced data it falls back to `n ÷ m`.
+
+```
+m == 0                                       →  return out unchanged
+m == 1                                       →  one direct unhinted call
+queries unsorted                             →  unhinted Base.searchsortedlast loop
+gap ≤ 4                                      →  LinearScan
+4 < gap ≤ _auto_simd_gap_max(v)
+        AND SIMD-eligible (Int64/Float64)    →  SIMDLinearScan
+gap ≥ 8 AND n ≥ 1024 AND m ≥ 2
+        AND not skewed
+        AND linearity probe accepts          →  InterpolationSearch
+gap ≥ 16                                     →  BracketGallop
+otherwise                                    →  ExpFromLeft
+```
+
+`_auto_simd_gap_max(v)` is 64 for `DenseVector{Int64}` and `DenseVector{Float64}`,
+and 0 (never picked) for any other element type. For Float64 the SIMD path
+requires `v` to be NaN-free; if a populated `SearchProperties(v)` is attached
+to `Auto`, the cached `has_nan` flag is consulted, otherwise no-NaN is assumed
+(the same trust contract `Base.searchsortedlast` uses for sortedness of `v`).
+
+The "linearity probe accepts" check is tiered:
+
+  - For `_AUTO_INTERP_MIN_GAP ≤ gap < _AUTO_INTERP_LOOSE_GAP` (8 to 256):
+    strict 0.1% sampled tolerance — only truly uniform data passes.
+  - For `gap ≥ _AUTO_INTERP_LOOSE_GAP` (≥ 256): loose 5% tolerance — at
+    these gap sizes, `InterpolationSearch`'s cache benefit compensates for
+    a worse guess, and the bounded-binary-search refinement is still
+    O(log n). This unlocks `InterpolationSearch` on approximately linear
+    data like sorted random or mildly jittered vectors at large `n`.
+
+The "skewed" check guards `InterpolationSearch` from the opposite direction:
+if the queries are clustered into one region of their span (median query
+more than 20% off the midpoint of the query span), `Auto` falls through to
+`BracketGallop` / `ExpFromLeft`, because consecutive queries land in the
+same neighbourhood and the previous-result hint is worth more than the
+linear-extrapolation guess. Skew detection is gated on `m ≥ 10` — for
+smaller `m` the median sampling variance overwhelms the signal.
+
+The `BracketGallop` fallback at `gap ≥ _AUTO_GALLOP_GAP_MIN` (= 16) exists
+because, at large gaps, `ExpFromLeft`'s five up-front linear probes are
+guaranteed to miss — the answer is much more than 5 elements past the
+previous-result hint. `BracketGallop` skips that wasted preamble and starts
+doubling from one position past `hint`.
+
+## Crossover constants
+
+The constants are defined at the top of `src/FindFirstFunctions.jl` and
+reproduced here so they are easy to find from the docs:
+
+| Constant | Value | What it gates |
+|---|---|---|
+| `_AUTO_LINEAR_THRESHOLD` | 16 | Per-query `LinearScan` vs `BracketGallop` crossover on hinted calls. |
+| `_AUTO_BATCH_LINEAR_GAP` | 4 | Batched `LinearScan` ceiling. |
+| `_AUTO_SIMD_GAP_MAX` (Int64 / Float64) | 64 / 64 | Maximum gap for `SIMDLinearScan` on dense Int64 / Float64. |
+| `_AUTO_GALLOP_GAP_MIN` | 16 | Above this gap, prefer `BracketGallop` over `ExpFromLeft` when `InterpolationSearch` isn't picked. |
+| `_AUTO_INTERP_MIN_GAP` | 8 | Minimum gap below which `InterpolationSearch` is never picked. |
+| `_AUTO_INTERP_MIN_N` | 1024 | Minimum `length(v)` below which `InterpolationSearch` is never picked. |
+| `_AUTO_INTERP_MIN_M` | 2 | Minimum `length(queries)`; single-query batches skip the heuristic entirely. |
+| `_AUTO_INTERP_LOOSE_GAP` | 256 | At this gap, the linearity probe switches to the loose tolerance. |
+| `_AUTO_LINEAR_REL_TOLERANCE` | 1.0e-3 | Strict-tier tolerance of the sampled-linearity probe. |
+| `_AUTO_LINEAR_LOOSE_TOLERANCE` | 5.0e-2 | Loose-tier tolerance, used for `gap ≥ _AUTO_INTERP_LOOSE_GAP`. |
+
+These are not user-tunable from outside — they shipped with the version of
+the package documented here. Tightening or loosening them requires a PR
+with new benchmark numbers across the regime grid below.
+
+## Why each branch is there
+
+**`gap ≤ 4 → LinearScan`.** `ExpFromLeft` issues 5 unconditional linear
+probes at the start of every call. When the gap is 0 or 1, those probes
+are wasted. `LinearScan` walks the same handful of indices with no
+bracketing overhead and wins by ~30%. The crossover is empirically at gap
+≈ 4 on AVX2 hardware; below that, `LinearScan` is strictly faster.
+
+**`InterpolationSearch` only on linear `v`.** On uniformly-spaced numeric
+data, `InterpolationSearch`'s first guess is the answer — one comparison
+per query, independent of `n`. On irregular data the guess is bad and the
+binary-search fallback runs over the whole vector, costing ~14× more than
+`ExpFromLeft` on log-spaced data and ~4× more on two-scale (piecewise
+dense+sparse) data. The 0.1% tolerance on the linearity probe is what
+keeps it in the linear regime where it wins.
+
+**`ExpFromLeft` is the default sparse-batched choice.** Across every
+non-linear `v` pattern tested, `ExpFromLeft` from `prev_result` is within
+20% of the per-cell optimum. It handles `gap = 5` (3-5 linear probes,
+hit) just as well as `gap = 10⁴` (doubling search across 14 levels) with
+no setup cost worth speaking of.
+
+**Skew override.** Even on linear `v`, when consecutive queries land in
+the same region (clustered queries), `prev_result` is closer to the
+current answer than the linear-extrapolation guess from the endpoints.
+`ExpFromLeft` wins in that regime by ~2-3×.
+
+## Reproducing the benchmarks
+
+The full sweep lives at [`bench/auto_sweep.jl`](https://github.com/SciML/FindFirstFunctions.jl/blob/main/bench/auto_sweep.jl)
+with the regime grid pre-configured. Run with
+`julia --project=bench bench/auto_sweep.jl`. It evaluates every shipped
+strategy against every regime cell, computes the per-cell winner, and
+reports `Auto`'s slack distribution against that optimum. An analysis
+helper at [`bench/analyze.jl`](https://github.com/SciML/FindFirstFunctions.jl/blob/main/bench/analyze.jl)
+reads the resulting `bench/results.csv` and prints per-strategy
+win-by-regime tables.
+
+The inline script below is the minimum-viable version of the same sweep —
+copy into a file and execute if you want to validate the numbers without
+cloning the repository.
+
+```julia
+using BenchmarkTools, FindFirstFunctions, Random, Statistics
+
+const STRATEGIES = (
+    LinearScan(), BracketGallop(), ExpFromLeft(),
+    InterpolationSearch(), BinaryBracket(),
+)
+const STRAT_NAMES = ("LinearScan", "BracketGallop", "ExpFromLeft",
+                     "InterpolationSearch", "BinaryBracket")
+
+# ----- v patterns ------------------------------------------------------------
+make_v(:uniform,   n) = collect(range(0.0, 1.0; length = n))
+make_v(:jittered,  n) = begin
+    rng = Xoshiro(0)
+    base = collect(range(0.0, 1.0; length = n))
+    sort!(base .+ 0.01 .* (rand(rng, n) .- 0.5) ./ n)
+end
+make_v(:logspaced, n) = collect(exp.(range(log(1.0), log(1.0e6); length = n)))
+make_v(:twoscale,  n) = begin
+    h = n ÷ 2
+    sort!(vcat(range(0.0, 0.1; length = h),
+               range(0.1, 1.0; length = n - h)))
+end
+make_v(:random,    n) = sort!(rand(Xoshiro(0), n))
+
+# ----- query patterns --------------------------------------------------------
+make_q(:dense,     v, m) = collect(range(v[1], v[end]; length = m))
+make_q(:sparse,    v, m) = begin
+    rng = Xoshiro(1)
+    sort!(rand(rng, m) .* (v[end] - v[1]) .+ v[1])
+end
+make_q(:clustered, v, m) = begin
+    # all queries inside one segment of v
+    j = length(v) ÷ 4
+    sort!(rand(Xoshiro(2), m) .* (v[j + 1] - v[j]) .+ v[j])
+end
+make_q(:sorted_rand, v, m) = sort!(rand(Xoshiro(3), m) .* (v[end] - v[1]) .+ v[1])
+
+# ----- bench one cell --------------------------------------------------------
+function bench_cell(v_kind, q_kind, n, m)
+    v = make_v(Val(v_kind), n)
+    queries = make_q(Val(q_kind), v, m)
+    out = Vector{Int}(undef, m)
+    times = Float64[]
+    for s in STRATEGIES
+        t = @belapsed FindFirstFunctions.searchsortedlast!(
+            $out, $v, $queries; strategy = $s
+        )
+        push!(times, t)
+    end
+    t_auto = @belapsed FindFirstFunctions.searchsortedlast!(
+        $out, $v, $queries; strategy = Auto()
+    )
+    return times, t_auto
+end
+
+# wrap so we can dispatch on Val(:uniform), etc.
+make_v(::Val{K}, n) where {K} = make_v(K, n)
+make_q(::Val{K}, v, m) where {K} = make_q(K, v, m)
+
+# ----- sweep -----------------------------------------------------------------
+function run_sweep()
+    ns        = (64, 256, 1024, 4096, 16384, 65536)
+    ms        = (1, 4, 16, 64, 256, 1024, 4096)
+    v_kinds   = (:uniform, :jittered, :logspaced, :twoscale, :random)
+    q_kinds   = (:dense, :sparse, :clustered, :sorted_rand)
+
+    println("v_kind\tq_kind\tn\tm\tAuto/best\tbest_strat\tAuto_pick_strat")
+    autoslack = Float64[]
+    for v_kind in v_kinds, q_kind in q_kinds, n in ns, m in ms
+        m > n && continue
+        times, t_auto = bench_cell(v_kind, q_kind, n, m)
+        best, j = findmin(times)
+        ratio = t_auto / best
+        push!(autoslack, ratio)
+        println(string(v_kind, "\t", q_kind, "\t", n, "\t", m,
+                       "\t", round(ratio; digits = 2),
+                       "\t", STRAT_NAMES[j]))
+    end
+    println()
+    println("Auto-vs-best ratio summary:")
+    println("  median ", round(median(autoslack); digits = 3))
+    println("  mean   ", round(mean(autoslack);   digits = 3))
+    println("  p90    ", round(quantile(autoslack, 0.9); digits = 3))
+    println("  max    ", round(maximum(autoslack); digits = 3))
+end
+
+run_sweep()
+```
+
+### Headline results
+
+The 2.0 sweep covers 1080 cells across 5 `v` patterns × 4 query patterns ×
+5 `n` sizes × 6 `m` sizes × 2 element types (`Int64`, `Float64`), measured
+on AVX2 hardware.
+
+| Metric | `Auto()` | `Auto(SearchProperties(v))` |
+|---|---|---|
+| median slack | 1.04× | 1.03× |
+| mean slack | 1.09× | 1.08× |
+| p90 slack | 1.33× | 1.31× |
+| p95 slack | 1.38× | 1.38× |
+| max slack | 2.18× | 2.09× |
+
+Per-cell winner distribution across the sweep:
+
+  - LinearScan: 47% of cells (small-gap regime, all eltypes)
+  - SIMDLinearScan: 25% of cells (medium-gap regime, Int64/Float64 dense)
+  - InterpolationSearch: 13% of cells (large-gap regime, ~linear v)
+  - ExpFromLeft: 8% of cells (small-medium-gap regime, non-SIMD eltypes)
+  - BracketGallop: 8% of cells (large-gap regime, non-linear v)
+
+The cells where `Auto` slack is highest are boundary cells at `m = 4` where
+the per-cell winner is a measurement artefact (each measurement amortises
+over only 4 queries, so per-call setup noise dominates). The bulk of the
+distribution sits well below 1.5×.
+
+### Reading the comparison table
+
+For each cell the script prints `Auto/best` (a ratio ≥ 1) along with the
+per-cell winner and `Auto`'s actual pick. `Auto/best = 1.0` means `Auto`
+picked the winner; `Auto/best = 1.5` means whatever `Auto` picked was 50%
+slower than the per-cell winner. Investigate any row where the ratio
+exceeds 1.5 — those are candidate cells for tightening one of the
+constants in the table above.
+
+## Caching with `SearchProperties`
+
+Every call to `searchsortedlast!(out, v, queries; strategy = Auto())` against
+the same `v` re-runs the same probes — `_sampled_looks_linear(v)` reads 11
+elements (~25 ns), and the cost is per-call regardless of how many times
+you've already searched `v`. For callers issuing many short batches against
+a single sorted vector (interpolation segment lookups being the obvious
+case), caching the probes once and reusing the result is a real win.
+
+The cache is a small `isbits` struct, [`SearchProperties`](@ref
+FindFirstFunctions.SearchProperties), that `Auto` accepts via
+`Auto(props)`:
+
+```julia
+using FindFirstFunctions
+
+v = collect(0.0:0.001:100.0)
+props = SearchProperties(v)            # run probes once
+strat = Auto(props)                    # `Auto` holding the cached facts
+
+# Every subsequent searchsortedlast!/searchsortedfirst! call skips the
+# linearity probe inside Auto.
+queries = sort!(rand(8) .* 100.0)
+out = Vector{Int}(undef, length(queries))
+searchsortedlast!(out, v, queries; strategy = strat)
+```
+
+`SearchProperties` is `isbits` — it travels in registers and copies are
+free. `Auto(props)` is itself zero-allocation; the resulting `Auto` is a
+single concrete struct, not a parametric type, so call sites stay
+type-stable without specialization explosions.
+
+Currently consumed: `props.is_linear` (replaces Auto's
+`_sampled_looks_linear` probe in the batched path). The other fields
+(`has_props`, `has_nan`) are populated by `SearchProperties(v)` for forward
+compatibility but no strategy reads them yet. Construction cost is O(1) for
+integer eltypes (only the sampled-linearity probe runs) and O(n) for
+floating-point eltypes (additionally `any(isnan, v)`).
+
+Trust contract: the cache is not invalidated automatically. If `v` mutates
+after `SearchProperties(v)`, the caller must reconstruct the cache. Lying
+to `Auto` via a hand-constructed `SearchProperties(true, true, false)` on
+genuinely non-linear data is correctness-preserving (the chosen
+`InterpolationSearch` falls through to `BracketGallop` from a bad guess —
+slow but still O(log n)), so the worst case of a stale cache is a
+performance regression, not wrong answers.
+
+## When `Auto` is wrong for you
+
+If your workload sits in a corner that `Auto` doesn't read well, pin the
+strategy directly:
+
+```julia
+# Sorted batch over a known-linear range, large m and n: skip the probe.
+searchsortedlast!(out, v, queries; strategy = InterpolationSearch())
+
+# Sorted batch but queries are guaranteed adjacent: don't pay for the
+# 5 linear probes in ExpFromLeft.
+searchsortedlast!(out, v, queries; strategy = LinearScan())
+
+# No hint and the access pattern is random: skip Auto's probes entirely.
+searchsortedlast!(out, v, queries; strategy = BinaryBracket())
+```
+
+The strategy types are zero-allocation singletons, so pinning is free at
+runtime and just removes the heuristic from the hot path.
diff --git a/docs/src/equality.md b/docs/src/equality.md
new file mode 100644
index 0000000..1fdf3e9
--- /dev/null
+++ b/docs/src/equality.md
@@ -0,0 +1,77 @@
+# Equality search
+
+This page documents the package's equality-search routines —
+[`findfirstequal`](@ref FindFirstFunctions.findfirstequal) and
+[`findfirstsortedequal`](@ref FindFirstFunctions.findfirstsortedequal). They
+answer a *different* question from the [Search strategies](@ref) covered
+elsewhere:
+
+  - **Sorted-search** strategies answer "*where would `x` insert?*". The
+    return value is always an in-range index — the bracketing position.
+  - **Equality** search answers "*does `x` exist, and if so at what index?*".
+    The return value is `nothing` if `x` is not present.
+
+These two surfaces live side-by-side because the second cannot be expressed
+as a `SearchStrategy` — its return type differs (`Union{Int, Nothing}` vs.
+in-range `Int`), and the unsorted variant doesn't even require a sorted
+input.
+
+For a strategy-framework-compatible *sorted* equality search that returns
+`Int` with a sentinel (so it composes with the rest of the strategy
+dispatch), see [`findequal`](@ref FindFirstFunctions.findequal) on the
+[Search strategies](@ref) page. `findequal(BisectThenSIMD(), v, x)` is the
+strategy entry point that internally calls into the same algorithm as
+`findfirstsortedequal`.
+
+## API reference
+
+```@docs
+FindFirstFunctions.findfirstequal
+FindFirstFunctions.findfirstsortedequal
+```
+
+## When to use which
+
+| Question | Vector | Recommended |
+|---|---|---|
+| Does `x` occur in this *unsorted* vector? | any | [`findfirstequal`](@ref FindFirstFunctions.findfirstequal) |
+| Does `x` occur in this *sorted* vector? | `DenseVector{Int64}` + `Int64` | [`findfirstsortedequal`](@ref FindFirstFunctions.findfirstsortedequal) (or `findequal(BisectThenSIMD(), v, x)` for the sentinel-returning variant) |
+| Does `x` occur in this *sorted* vector? | other eltypes | [`findequal`](@ref FindFirstFunctions.findequal) with any strategy |
+| Where would `x` insert into this sorted vector? | any | `searchsortedfirst(strategy, v, x[, hint])` |
+
+## SIMD primitives
+
+Both equality functions are backed by the same SIMD-equality LLVM IR
+scaffolding used internally throughout the package (`load <8 x i64>`,
+`icmp eq`, `cttz` on the bitmask of the 8-wide compare). The IR template
+[`FindFirstFunctions._simd_scan_ir`](@ref) generates this for the equality
+predicate; the same template generates the `>` / `>=` variants for
+[`SIMDLinearScan`](@ref FindFirstFunctions.SIMDLinearScan).
+
+The SIMD path is `Int64`-only — the LLVM IR is keyed off `i64` element
+width and 8-byte stride. Every other element type and storage layout
+falls through to a scalar `findfirst(==(x), v)` path. Specifically:
+
+  - `findfirstequal(x::Int64, v::DenseVector{Int64})` — full SIMD scan.
+  - `findfirstequal(x, v)` (generic) — `findfirst(isequal(x), v)`.
+  - `findfirstsortedequal(x::Int64, v::DenseVector{Int64})` — branchless
+    binary bisection to a small basecase, then SIMD equality scan within
+    the basecase window. The bisection uses a strict `<` predicate so
+    that earlier duplicates are not skipped (a previous version of the
+    routine used `<=` and could return a later duplicate; fixed in 2.0).
+
+## Sentinel vs. `Nothing`
+
+The two equality APIs differ in how they report "not found":
+
+```julia
+findfirstequal(x, v)        # -> Union{Int, Nothing}, `nothing` on miss
+findfirstsortedequal(x, v)  # -> Union{Int, Nothing}, `nothing` on miss
+findequal(strategy, v, x)   # -> Int, firstindex(v) - 1 on miss
+```
+
+`findequal`'s sentinel is type-stable and composes with the rest of the
+strategy dispatch — that's the recommended choice for new sorted-equality
+code. The two `findfirst*equal` names continue to return
+`Union{Int, Nothing}` for backwards compatibility with callers and pattern
+matching against `nothing`.
diff --git a/docs/src/guessers.md b/docs/src/guessers.md
new file mode 100644
index 0000000..98f18fd
--- /dev/null
+++ b/docs/src/guessers.md
@@ -0,0 +1,119 @@
+# Guessers
+
+A [`Guesser`](@ref FindFirstFunctions.Guesser) is a small wrapper around a
+sorted vector that produces an integer index hint for a given query, using
+one of two strategies decided at construction time:
+
+  - **Linear-extrapolation lookup**, when `v` is approximately uniformly
+    spaced. The guess is `firstindex(v) + round((x - v[1]) / (v[end] - v[1]) * (length(v) - 1))`.
+    Cost is O(1) per call, independent of `length(v)`.
+  - **Cached previous result**, otherwise. The guesser hands back its
+    `idx_prev` field. Useful for callers that issue temporally-correlated
+    queries — the previous answer is a good seed for `BracketGallop`.
+
+The Guesser does **not** perform the actual search. To use it as a search
+strategy, wrap it in [`GuesserHint`](@ref FindFirstFunctions.GuesserHint).
+
+## Construction
+
+```@docs
+FindFirstFunctions.Guesser
+```
+
+The threshold passed via `looks_linear_threshold` is the same threshold used
+by [`looks_linear`](@ref FindFirstFunctions.looks_linear) (which the Guesser
+calls under the hood). The default `1e-2` accepts everything from exact
+linear ranges through evenly-spaced grids with mild jitter, but rejects
+log-spaced or piecewise-spaced data.
+
+```@docs
+FindFirstFunctions.looks_linear
+```
+
+## Calling a Guesser
+
+`guesser(x)` returns an integer index hint, but does **not** update any
+state. The returned hint may be outside `axes(v)` if `x` is past either end
+of `v`; callers should treat the hint as advisory and clamp.
+
+```julia
+v = collect(0.0:0.1:10.0)
+g = Guesser(v)
+g(3.14)   # → 32, an O(1) extrapolation guess
+```
+
+For irregular data the same call returns `g.idx_prev[]` — the last index
+written by a [`GuesserHint`](@ref FindFirstFunctions.GuesserHint) search (or
+`1` if no search has run yet).
+
+## Plugging into the strategy dispatch
+
+[`GuesserHint`](@ref FindFirstFunctions.GuesserHint) is the strategy adapter
+that turns a `Guesser` into a `SearchStrategy`. It:
+
+  1. Calls `guesser(x)` to obtain an integer guess.
+  2. Dispatches to [`BracketGallop`](@ref FindFirstFunctions.BracketGallop)
+     from that guess.
+  3. Writes the resulting index back into `guesser.idx_prev`.
+
+```@docs
+FindFirstFunctions.GuesserHint
+```
+
+Per-call cost is one `guesser(x)` evaluation (O(1)) plus one `BracketGallop`
+(O(1) when the guess is close) plus one `idx_prev[]` write.
+
+The vector passed to `searchsortedfirst` / `searchsortedlast` must be the
+same object the Guesser wraps — `GuesserHint` asserts `v === s.guesser.v` to
+catch the obvious misuse.
+
+```julia
+v = collect(0.0:0.1:10.0)
+g = Guesser(v)
+strat = GuesserHint(g)
+
+i = searchsortedlast(strat, v, 3.14)
+@assert g.idx_prev[] == i   # the guesser caches the last result
+```
+
+`GuesserHint` ignores any externally-supplied hint — the Guesser carries its
+own hint state, and accepting a foreign hint would defeat the cache.
+
+## Pattern: correlated lookups for interpolation
+
+The intended use is a wrapper struct that owns both the sorted vector and a
+matching `Guesser`. Every query against the wrapper feeds through
+`GuesserHint`:
+
+```julia
+struct Interp{V}
+    v::V
+    g::Guesser{V}
+end
+Interp(v) = Interp(v, Guesser(v))
+
+function find_segment(itp::Interp, x)
+    return searchsortedlast(GuesserHint(itp.g), itp.v, x)
+end
+```
+
+After warmup, repeated calls to `find_segment` with temporally correlated
+`x` cost O(1): the previous result is one slot away from the current
+answer, so the `BracketGallop` inside `GuesserHint` returns after a couple
+of comparisons.
+
+## When not to use a Guesser
+
+  - **One-shot queries.** Construct cost is O(n) for the `looks_linear`
+    probe — wasted if you only intend to search once. Use
+    [`BinaryBracket`](@ref FindFirstFunctions.BinaryBracket) or pass `Auto()`
+    with no hint.
+  - **Sorted batches.** The batched
+    [`searchsortedlast!`](@ref FindFirstFunctions.searchsortedlast!) /
+    [`searchsortedfirst!`](@ref FindFirstFunctions.searchsortedfirst!) APIs
+    already do `prev_result`-style hinting internally; they don't need a
+    Guesser and they pick a faster strategy when the sweep is dense.
+  - **Strictly random queries against irregular `v`.** `looks_linear` returns
+    false, so the Guesser degrades to returning `idx_prev[]`, which is
+    useless for random queries. `BracketGallop` from `idx_prev` is fine but
+    `Auto`-on-a-batch does the same thing without the Guesser overhead.
diff --git a/docs/src/index.md b/docs/src/index.md
index f516303..045b97c 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -1,29 +1,61 @@
 # FindFirstFunctions.jl
 
-FindFirstFunctions.jl is a library for accelerated `findfirst` type functions.
+FindFirstFunctions.jl is a library of accelerated `findfirst`-style and
+sorted-search routines. The package provides:
+
+  - A **strategy-dispatched** sorted-search API: a single pair of
+    `Base.searchsortedfirst` / `Base.searchsortedlast` overloads that take a
+    [`SearchStrategy`](@ref FindFirstFunctions.SearchStrategy) as the first
+    positional argument.
+  - **Batched** in-place lookups
+    [`searchsortedfirst!`](@ref FindFirstFunctions.searchsortedfirst!) /
+    [`searchsortedlast!`](@ref FindFirstFunctions.searchsortedlast!) that pick
+    a strategy automatically.
+  - **Guessers** that supply per-vector hints based on linear extrapolation
+    or a cached previous result.
+  - **Equality search** via [`findfirstequal`](@ref FindFirstFunctions.findfirstequal)
+    and [`findfirstsortedequal`](@ref FindFirstFunctions.findfirstsortedequal).
 
 ## Installation
 
-To install FindFirstFunctions.jl, use the Julia package manager:
-
 ```julia
 using Pkg
 Pkg.add("FindFirstFunctions")
 ```
 
-## Available Functions
-
-```@docs
-FindFirstFunctions.findfirstequal
-FindFirstFunctions.bracketstrictlymontonic
-FindFirstFunctions.looks_linear
-FindFirstFunctions.Guesser
-FindFirstFunctions.searchsortedfirstcorrelated
-FindFirstFunctions.searchsortedlastcorrelated
-FindFirstFunctions.searchsortedfirstexp
-FindFirstFunctions.searchsortedfirstvec
-FindFirstFunctions.searchsortedlastvec
-FindFirstFunctions.findfirstsortedequal
+## Guide
+
+  - [Interface and extension rules](@ref) — the public API surface, the
+    contract a `SearchStrategy` subtype must satisfy, and how to add a new
+    one.
+  - [Search strategies](@ref) — catalog of the built-in strategies, when each
+    one is fast, and when it falls back to plain binary search.
+  - [Guessers](@ref) — the [`Guesser`](@ref FindFirstFunctions.Guesser) type
+    and how to plug it into the strategy dispatch via
+    [`GuesserHint`](@ref FindFirstFunctions.GuesserHint).
+  - [Auto: heuristics and benchmarks](@ref) — what
+    [`Auto`](@ref FindFirstFunctions.Auto) picks in every regime, the
+    crossover constants, and the benchmark script that validates them.
+  - [Equality search](@ref) — the dedicated equality routines
+    [`findfirstequal`](@ref FindFirstFunctions.findfirstequal) and
+    [`findfirstsortedequal`](@ref FindFirstFunctions.findfirstsortedequal),
+    which live outside the strategy framework because their return type is
+    `Union{Int, Nothing}`.
+
+## Quick example
+
+```julia
+using FindFirstFunctions
+
+v = collect(0.0:0.1:10.0)
+queries = sort!(rand(100) .* 10)
+
+# Single query with a hint.
+i = searchsortedlast(BracketGallop(), v, 3.14, 30)
+
+# Batched, with strategy chosen by Auto.
+idx = Vector{Int}(undef, length(queries))
+searchsortedlast!(idx, v, queries)
 ```
 
 ## Contributing
diff --git a/docs/src/interface.md b/docs/src/interface.md
new file mode 100644
index 0000000..b07a98a
--- /dev/null
+++ b/docs/src/interface.md
@@ -0,0 +1,167 @@
+# Interface and extension rules
+
+This page documents the public sorted-search API surface and the contract a
+custom [`SearchStrategy`](@ref FindFirstFunctions.SearchStrategy) subtype
+must satisfy.
+
+## Public API surface
+
+In 2.x the sorted-search public API is a single pair of generic functions
+overloaded on a strategy as the first positional argument:
+
+```julia
+searchsortedfirst(strategy, v, x[, hint]; order = Base.Order.Forward)
+searchsortedlast(strategy, v, x[, hint]; order = Base.Order.Forward)
+```
+
+and the in-place batched variants:
+
+```julia
+searchsortedfirst!(idx_out, v, queries; strategy = Auto(), order = Base.Order.Forward)
+searchsortedlast!(idx_out, v, queries; strategy = Auto(), order = Base.Order.Forward)
+```
+
+The strategy types live in the `FindFirstFunctions` module; the
+`searchsortedfirst`/`searchsortedlast` names are extended from `Base` so they
+compose with existing `Base.Order` orderings.
+
+```@docs
+FindFirstFunctions.searchsortedfirst!
+FindFirstFunctions.searchsortedlast!
+```
+
+## Rules of the interface
+
+  1. **Hint is optional and is an `Integer`.** When supplied it must be a
+     valid index into `v` (`firstindex(v) ≤ hint ≤ lastindex(v)`). An
+     out-of-range hint is silently treated as "no hint" by every built-in
+     strategy. A strategy is allowed to ignore the hint entirely
+     ([`InterpolationSearch`](@ref FindFirstFunctions.InterpolationSearch),
+     [`BinaryBracket`](@ref FindFirstFunctions.BinaryBracket)).
+  2. **Strategies are singletons or wrappers.** `LinearScan`, `BracketGallop`,
+     `ExpFromLeft`, `InterpolationSearch`, `BinaryBracket`, and `Auto` are
+     zero-field singletons.
+     [`GuesserHint`](@ref FindFirstFunctions.GuesserHint) is a thin wrapper
+     around a [`Guesser`](@ref FindFirstFunctions.Guesser). New strategies
+     should follow the same pattern: parameters that change *behaviour*
+     belong on the type; parameters that change *cost only* should be tuned
+     internally.
+  3. **No mutation of `v` or `x`.** A strategy never writes to the searched
+     vector or to the query. The only state that may change across calls is
+     hint state carried by the strategy itself (e.g. `GuesserHint` updates
+     `guesser.idx_prev`).
+  4. **Order is honored.** Every strategy accepts an `order::Base.Order.Ordering`
+     keyword and returns indices consistent with `Base.searchsortedfirst` /
+     `Base.searchsortedlast` under that ordering. Strategies that are only
+     efficient under `Forward` ordering (e.g. `InterpolationSearch`,
+     `ExpFromLeft`) fall back to `BinaryBracket` for non-`Forward` orderings.
+  5. **`AbstractRange` is already O(1).** `Base.searchsortedfirst(r::AbstractRange, x)`
+     has a closed-form implementation. Strategies do not need range fast
+     paths — the `BinaryBracket` fallback already calls into Base's
+     range-aware method.
+
+## Anatomy of a strategy
+
+A built-in strategy provides up to four methods on each of
+`Base.searchsortedfirst` / `Base.searchsortedlast`:
+
+```julia
+# Required: the hinted form (the strategy's reason for existing).
+Base.searchsortedlast(::MyStrategy, v, x, hint::Integer; order) = ...
+Base.searchsortedfirst(::MyStrategy, v, x, hint::Integer; order) = ...
+
+# Required: the unhinted form. Most strategies just fall back to BinaryBracket.
+Base.searchsortedlast(::MyStrategy, v, x; order) = searchsortedlast(BinaryBracket(), v, x; order)
+Base.searchsortedfirst(::MyStrategy, v, x; order) = searchsortedfirst(BinaryBracket(), v, x; order)
+```
+
+If your strategy ignores the hint, define just the unhinted form and have the
+hinted form delegate to it (see `BinaryBracket` and `InterpolationSearch` in
+the source).
+
+## How to add a new strategy
+
+Two steps.
+
+### 1. Define the strategy type
+
+Pick a name that describes the *algorithm*, not the use case. Make it a
+subtype of [`SearchStrategy`](@ref FindFirstFunctions.SearchStrategy):
+
+```julia
+"""
+    MyStrategy <: FindFirstFunctions.SearchStrategy
+
+One sentence on what it does. One sentence on when it wins. One sentence on
+when it falls back.
+"""
+struct MyStrategy <: FindFirstFunctions.SearchStrategy end
+```
+
+If your strategy carries state (like `GuesserHint`), make it a parametric
+struct:
+
+```julia
+struct MyStrategy{S} <: FindFirstFunctions.SearchStrategy
+    state::S
+end
+```
+
+### 2. Implement the dispatch methods
+
+At minimum, the hinted forms:
+
+```julia
+function Base.searchsortedlast(
+        ::MyStrategy, v::AbstractVector, x, hint::Integer;
+        order::Base.Order.Ordering = Base.Order.Forward
+    )
+    # Algorithm body. Must return the same index that
+    # `Base.searchsortedlast(v, x, order)` would.
+    ...
+end
+
+function Base.searchsortedfirst(
+        ::MyStrategy, v::AbstractVector, x, hint::Integer;
+        order::Base.Order.Ordering = Base.Order.Forward
+    )
+    ...
+end
+```
+
+Plus unhinted forms (typically fallbacks):
+
+```julia
+Base.searchsortedlast(s::MyStrategy, v::AbstractVector, x; order = Base.Order.Forward) =
+    searchsortedlast(FindFirstFunctions.BinaryBracket(), v, x; order = order)
+Base.searchsortedfirst(s::MyStrategy, v::AbstractVector, x; order = Base.Order.Forward) =
+    searchsortedfirst(FindFirstFunctions.BinaryBracket(), v, x; order = order)
+```
+
+### Correctness check
+
+Every strategy must return the same answer as plain `Base.searchsortedlast` /
+`Base.searchsortedfirst` for every `(v, x[, hint])` triple. Test with random
+inputs against `Base`:
+
+```julia
+using Test, Random
+Random.seed!(0)
+for trial in 1:10_000
+    v = sort!(randn(rand(1:1000)))
+    x = randn()
+    hint = rand(1:length(v))
+    @test searchsortedlast(MyStrategy(), v, x, hint) == searchsortedlast(v, x)
+    @test searchsortedfirst(MyStrategy(), v, x, hint) == searchsortedfirst(v, x)
+end
+```
+
+### Hooking into `Auto`
+
+`Auto`'s decision tree lives in `_auto_pick` (per-query) and
+`_searchsortedlast_batched!(_, _, _, ::Auto, _)` /
+`_searchsortedfirst_batched!(_, _, _, ::Auto, _)` (batched). It is **not
+extensible from outside** — new strategies do not register themselves with
+`Auto` automatically. If you believe `Auto` should pick your strategy in
+some regime, open an issue with benchmark numbers across the regime grid in
+[Auto: heuristics and benchmarks](@ref).
diff --git a/docs/src/strategies.md b/docs/src/strategies.md
new file mode 100644
index 0000000..6836fb4
--- /dev/null
+++ b/docs/src/strategies.md
@@ -0,0 +1,244 @@
+# Search strategies
+
+The strategies form the parameter space of the sorted-search API. Each one
+subtypes [`SearchStrategy`](@ref FindFirstFunctions.SearchStrategy) and is
+selected as the first positional argument of `searchsortedfirst` /
+`searchsortedlast`.
+
+```@docs
+FindFirstFunctions.SearchStrategy
+```
+
+## When to pick which
+
+For most callers the answer is: pass [`Auto`](@ref FindFirstFunctions.Auto)
+(the default in the batched API) and let it choose. The table below is for
+callers who already know their access pattern and want to pin a strategy.
+
+| Strategy | Best when | Cost when hint hits | Cost worst case | Uses hint? |
+|---|---|---|---|---|
+| [`LinearScan`](@ref FindFirstFunctions.LinearScan) | answer is within a handful of slots of the hint | O(1) | O(n) | yes |
+| [`SIMDLinearScan`](@ref FindFirstFunctions.SIMDLinearScan) | `DenseVector{Int64}` or `DenseVector{Float64}`, forward walk past the hint | O(1) | O(n/8) | yes |
+| [`BracketGallop`](@ref FindFirstFunctions.BracketGallop) | answer may be either side of the hint, distance unknown | O(1) | ~2 log₂ n | yes |
+| [`ExpFromLeft`](@ref FindFirstFunctions.ExpFromLeft) | sorted batch — hint is `prev_result`, answer is monotonically ≥ hint | O(1) | O(log n) | yes (as lower bound) |
+| [`InterpolationSearch`](@ref FindFirstFunctions.InterpolationSearch) | `v` is uniformly spaced and numeric | O(1) | O(log n) | no |
+| [`BitInterpolationSearch`](@ref FindFirstFunctions.BitInterpolationSearch) | `DenseVector{Float64}` and log-spaced (geometric) — opt-in, `Auto` does not pick | O(1) | O(log n) | no |
+| [`BinaryBracket`](@ref FindFirstFunctions.BinaryBracket) | no hint available, or fallback | O(log n) | O(log n) | no |
+| [`GuesserHint`](@ref FindFirstFunctions.GuesserHint) | repeated correlated lookups against the same `v` | O(1) | ~2 log₂ n | self-provided |
+| [`Auto`](@ref FindFirstFunctions.Auto) | unknown access pattern | varies | varies | yes if supplied |
+
+All hint-consuming strategies fall back to `BinaryBracket` when no hint is
+supplied or when the hint is out of range. `InterpolationSearch` additionally
+falls back to `BinaryBracket` for non-numeric element types.
+
+## Reference
+
+```@docs
+FindFirstFunctions.LinearScan
+FindFirstFunctions.SIMDLinearScan
+FindFirstFunctions.BracketGallop
+FindFirstFunctions.ExpFromLeft
+FindFirstFunctions.InterpolationSearch
+FindFirstFunctions.BitInterpolationSearch
+FindFirstFunctions.BinaryBracket
+FindFirstFunctions.BisectThenSIMD
+FindFirstFunctions.Auto
+FindFirstFunctions.SearchProperties
+```
+
+## Equality search through the strategy framework
+
+Strategies answer positional questions ("where would `x` insert?"). Equality
+asks a different question ("is `x` at exactly which index?"). The
+[`findequal`](@ref FindFirstFunctions.findequal) wrapper builds the latter
+on top of the former: every strategy gets an equality variant for free.
+
+```@docs
+FindFirstFunctions.findequal
+```
+
+The sentinel for "not found" is `firstindex(v) - 1` (`= 0` for 1-based
+vectors). Type-stable `Int` return, no `Union` with `Nothing`. Callers can
+test for absence with `i < firstindex(v)`.
+
+`findequal` routes most strategies through `searchsortedfirst + post-check`
+generically, so `findequal(BracketGallop(), v, x, hint)`,
+`findequal(SIMDLinearScan(), v, x, hint)`,
+`findequal(GuesserHint(g), v, x)`, etc. all just work.
+
+The [`BisectThenSIMD`](@ref FindFirstFunctions.BisectThenSIMD) strategy
+short-circuits the post-check path on `DenseVector{Int64}` by dispatching
+into [`findfirstsortedequal`](@ref FindFirstFunctions.findfirstsortedequal)
+directly — same custom LLVM IR scan, exposed through the strategy framework.
+
+`GuesserHint` is documented on the [Guessers](@ref) page.
+
+## Notes on individual strategies
+
+### LinearScan
+
+Walks `±1` from the hint until the answer is bracketed. Cheapest possible
+search when the hint is right next to the answer — two comparisons. The only
+strategy whose worst case is O(n), so it should only be picked when the
+caller has strong evidence that the hint is close.
+
+For `length(v) ≤ 16`, `LinearScan` is faster than `BracketGallop` even from a
+bad hint because the bracket bookkeeping costs more than a worst-case walk
+across a vector that short. `Auto`'s per-query path picks `LinearScan` below
+that threshold.
+
+### SIMDLinearScan
+
+Same algorithm as `LinearScan`, with the **forward** walk past the hint
+lowered to 8-wide SIMD chunks via custom LLVM IR. The backward walk (when
+the hint is past the answer) uses the scalar `LinearScan` path — the SIMD
+primitive is only defined in the forward direction.
+
+Specialized for `DenseVector{Int64}` and `DenseVector{Float64}`. Any other
+element type falls back to the scalar `LinearScan` walk (this includes
+`Int32`, `UInt64`, `Float32`, `Date`, `String`, and user-defined numeric
+types). The dispatch is *static* — there's no runtime type test on a hot
+path — so the fallback costs nothing per-call when picked at compile time.
+
+Caveats:
+
+  - **Element types**: `Int64` and `Float64` only. Anything else uses
+    scalar `LinearScan`. This is a hard restriction of the LLVM IR: the
+    vector load uses `<8 x i64>` / `<8 x double>` with 8-byte stride, and
+    the broadcast and compare are typed accordingly.
+  - **NaN**: a `NaN` element in a `Float64` vector compares as `false`
+    under both `fcmp ogt` and `fcmp oge`, so a NaN in `v` is silently
+    skipped by the SIMD scan. Sorted `Float64` vectors containing `NaN`
+    aren't well-defined under any total order anyway — same caveat
+    applies to plain `Base.searchsortedlast` on such vectors.
+  - **Forward order only**: non-`Forward` orderings fall back to scalar
+    `LinearScan`. The IR is hard-coded to the `Forward` comparison
+    polarity.
+  - **No hint**: falls back to [`BinaryBracket`](@ref FindFirstFunctions.BinaryBracket).
+    Without a hint there's no direction information for the forward scan.
+  - **Auto does not pick this strategy.** `SIMDLinearScan` is opt-in. It
+    isn't part of the `Auto` decision tree because the regime where it
+    strictly beats `LinearScan` (long forward walks on `Int64`/`Float64`)
+    overlaps with the regime where `Auto` already prefers
+    [`BracketGallop`](@ref FindFirstFunctions.BracketGallop) or
+    [`ExpFromLeft`](@ref FindFirstFunctions.ExpFromLeft). Pin it
+    explicitly when you have a workload that wants a long linear forward
+    scan and you know the element type.
+
+### BitInterpolationSearch
+
+`InterpolationSearch` with the extrapolation guess computed on the IEEE
+bit pattern of `v` rather than the float values themselves. For positive
+Float64 values, the IEEE bit pattern is monotonically increasing with the
+float value and is approximately *linear* in array index for log-spaced
+(geometric) data. That makes the bit-domain linear extrapolation a far
+better guess than the float-domain linear extrapolation on geometric data
+— sometimes O(1) versus O(log n) refinement cost.
+
+**Opt-in only.** `Auto` does not pick `BitInterpolationSearch`. The bench
+sweep at `bench/bitinterp_sweep.jl` covers 1404 cells (9 v patterns × 4 q
+patterns × 6 n sizes up to 2²⁰ × 7 m sizes, exercising pure-geometric,
+log-spaced over 18 decades, power-of-2 spacing, two-decade clumps, and
+jittered-log alongside uniform/sqrt as negative controls). BitInterp
+wins outright in 59 cells (4.2%) and sits within 10% of the per-cell
+best in 75 cells (5.3%). The wins concentrate in:
+
+  - `logspaced` / `logspaced_wide` / `geometric_dense` / `geometric_sparse`
+    / `jittered_log` — i.e. genuinely geometric data.
+  - Small `m` (= 4, occasionally 16): the per-query bit-domain guess cost
+    amortizes poorly across larger batches.
+  - Large `n` (≥ 2¹⁴, peaking at 2²⁰): the saved bracket refinement scales
+    with `log₂ n`, while the per-query setup cost is constant.
+
+Sample wins (BitInterp vs second-best, ns/q):
+
+| Cell | BitInterp | Runner-up | Margin |
+|---|---|---|---|
+| `logspaced_wide log_grid n=2²⁰ m=4` | 52.5 | InterpolationSearch 75.0 | 1.43× |
+| `logspaced_wide log_grid n=2¹² m=4` | 35.0 | ExpFromLeft 47.5 | 1.36× |
+| `logspaced_wide log_grid n=2¹⁸ m=4` | 47.5 | ExpFromLeft 62.5 | 1.32× |
+| `logspaced_wide dense_grid n=2²⁰ m=4` | 50.0 | ExpFromLeft 65.0 | 1.30× |
+
+`Auto` doesn't pick it because:
+  - The wins are narrow (4% of cells in a bench specifically designed to
+    probe BitInterp's regime).
+  - Adding the eligibility check to `Auto`'s hot path (Float64 + positive
+    + log-linear sampled probe) would burn a few ns on every call, paying
+    back only in cells with `m ≤ 16` where Auto's overhead already
+    dominates the per-query cost.
+  - Users with a known log-spaced workload can pin
+    `searchsortedlast!(out, v, queries; strategy = BitInterpolationSearch())`
+    once and get the win without any heuristic cost.
+
+The strategy is retained as an opt-in for callers whose workload sits
+outside what `Auto` discovers cheaply: domain-specific tables (radiation
+transport, log-frequency, gravitational potentials) or hardware where
+Float64 division is unusually cheap.
+
+Falls back to plain `InterpolationSearch` on non-Float64 dense eltypes
+(where the bit pattern equals the value, making the strategies
+equivalent), and to `BinaryBracket` for non-positive or non-finite Float64
+data.
+
+### BracketGallop
+
+Galloping search around the hint: expand `[lo, hi]` outward by doubling steps
+until `x` is bracketed, then binary-search inside `[lo, hi]`. Direction is
+inferred from `v[hint]` vs. `x`, so the hint can be either above or below the
+answer. Worst case is ~2 log₂ n — about twice plain binary search — and it
+matches O(1) when the hint is close.
+
+This is the workhorse hinted strategy and the natural choice for
+`get_idx`-style callers where the hint is a cached previous result.
+
+### ExpFromLeft
+
+Exponential search forward from a hint interpreted as a *lower bound*. The
+algorithm probes `v[lo], v[lo+1], …, v[lo+4]` linearly, then `v[lo+8],
+v[lo+16], …` exponentially, then binary-searches inside the final bracket.
+
+Used by `Auto`'s batched dispatch when the queries are sorted: each call
+passes `hint = previous_result`, which by sortedness satisfies the "answer ≥
+hint" precondition. When the precondition is violated (the caller passes a
+hint past the answer), `ExpFromLeft` falls back to a full
+`searchsortedfirst` / `searchsortedlast` — slow but correct.
+
+### InterpolationSearch
+
+Computes a guess via linear extrapolation between `v[lo]` and `v[hi]`, then
+refines with a bounded binary search around that guess. On uniformly-spaced
+numeric data the first guess is the right answer — O(1) per query
+independent of `n`. On irregular data the guess is bad and the binary search
+inside the (full) bracket falls back to O(log n).
+
+Two restrictions:
+
+  - **Numeric eltype**: requires `x - v[i]` to be well-defined and produce a
+    number whose ratio with `v[hi] - v[lo]` makes sense. Non-numeric eltypes
+    fall back to `BinaryBracket`.
+  - **Forward ordering only**: the linear-extrapolation formula assumes
+    `v[lo] ≤ v[hi]`. Non-`Forward` orderings fall back to `BinaryBracket`.
+
+The hint is ignored — the guess is computed fresh from the endpoints.
+
+### BinaryBracket
+
+Plain `Base.searchsortedlast` / `Base.searchsortedfirst`. Provided as a
+strategy so that callers can opt out of hint-based behaviour explicitly, and
+so that other strategies have a well-defined name to fall back to. Ignores
+any hint that is supplied.
+
+### Auto
+
+See [Auto: heuristics and benchmarks](@ref) for the full decision tree and
+the benchmark sweep that produced its crossover constants.
+
+## Equality search
+
+The package exposes two `Union{Int, Nothing}`-returning equality routines —
+[`findfirstequal`](@ref FindFirstFunctions.findfirstequal) (unsorted SIMD
+scan) and [`findfirstsortedequal`](@ref FindFirstFunctions.findfirstsortedequal)
+(sorted bisect-then-SIMD scan). They live outside the strategy framework
+because their return semantics differ (`nothing` on miss, vs. in-range
+index for the positional API). See the [Equality search](@ref) page for the
+full documentation.
diff --git a/src/FindFirstFunctions.jl b/src/FindFirstFunctions.jl
index bb6f4da..2c962c6 100644
--- a/src/FindFirstFunctions.jl
+++ b/src/FindFirstFunctions.jl
@@ -1,457 +1,39 @@
 module FindFirstFunctions
 
-# https://github.com/JuliaLang/julia/pull/53687
+# Public API surface for `using FindFirstFunctions`. The strategy types are
+# zero-field singletons (except `GuesserHint` and `Auto`, which carry small
+# isbits payloads), so exporting them only adds names to the caller's
+# namespace — no runtime cost. `searchsortedfirst!` / `searchsortedlast!`
+# are FFF-defined names (the non-bang `searchsortedfirst` /
+# `searchsortedlast` are extensions of `Base` and are reachable without
+# qualification once `Base` is in scope).
+export
+    SearchStrategy,
+    LinearScan, SIMDLinearScan, BracketGallop, ExpFromLeft,
+    InterpolationSearch, BitInterpolationSearch,
+    BinaryBracket, BisectThenSIMD,
+    GuesserHint, Auto,
+    SearchProperties,
+    Guesser, looks_linear,
+    searchsortedfirst!, searchsortedlast!, searchsortedrange,
+    findequal, findfirstequal, findfirstsortedequal
+
+# Julia 1.12 changed how `Ptr{T}` arguments to `Base.llvmcall` are passed:
+# they're now real pointers rather than i64s. https://github.com/JuliaLang/julia/pull/53687
 const USE_PTR = VERSION >= v"1.12.0-DEV.255"
-const FFE_IR = """
-declare i8 @llvm.cttz.i8(i8, i1);
-define i64 @entry(i64 %0, $(USE_PTR ? "ptr" : "i64") %1, i64 %2) #0 {
-top:
-  $(USE_PTR ? "" : "%ivars = inttoptr i64 %1 to i64*")
-  %btmp = insertelement <8 x i64> undef, i64 %0, i64 0
-  %var = shufflevector <8 x i64> %btmp, <8 x i64> undef, <8 x i32> zeroinitializer
-  %lenm7 = add nsw i64 %2, -7
-  %dosimditer = icmp ugt i64 %2, 7
-  br i1 %dosimditer, label %L9.lr.ph, label %L32
 
-L9.lr.ph:
-  %len8 = and i64 %2, 9223372036854775800
-  br label %L9
-
-L9:
-  %i = phi i64 [ 0, %L9.lr.ph ], [ %vinc, %L30 ]
-  %ivarsi = getelementptr inbounds i64, $(USE_PTR ? "ptr %1" : "i64* %ivars"), i64 %i
-  $(USE_PTR ? "" : "%vpvi = bitcast i64* %ivarsi to <8 x i64>*")
-  %v = load <8 x i64>, $(USE_PTR ? "ptr %ivarsi" : "<8 x i64> * %vpvi"), align 8
-  %m = icmp eq <8 x i64> %v, %var
-  %mu = bitcast <8 x i1> %m to i8
-  %matchnotfound = icmp eq i8 %mu, 0
-  br i1 %matchnotfound, label %L30, label %L17
-
-L17:
-  %tz8 = call i8 @llvm.cttz.i8(i8 %mu, i1 true)
-  %tz64 = zext i8 %tz8 to i64
-  %vis = add nuw i64 %i, %tz64
-  br label %common.ret
-
-common.ret:
-  %retval = phi i64 [ %vis, %L17 ], [ -1, %L32 ], [ %si, %L51 ], [ -1, %L67 ]
-  ret i64 %retval
-
-L30:
-  %vinc = add nuw nsw i64 %i, 8
-  %continue = icmp slt i64 %vinc, %lenm7
-  br i1 %continue, label %L9, label %L32
-
-L32:
-  %cumi = phi i64 [ 0, %top ], [ %len8, %L30 ]
-  %done = icmp eq i64 %cumi, %2
-  br i1 %done, label %common.ret, label %L51
-
-L51:
-  %si = phi i64 [ %inc, %L67 ], [ %cumi, %L32 ]
-  %spi = getelementptr inbounds i64, $(USE_PTR ? "ptr %1" : "i64* %ivars"), i64 %si
-  %svi = load i64, $(USE_PTR ? "ptr" : "i64*") %spi, align 8
-  %match = icmp eq i64 %svi, %0
-  br i1 %match, label %common.ret, label %L67
-
-L67:
-  %inc = add i64 %si, 1
-  %dobreak = icmp eq i64 %inc, %2
-  br i1 %dobreak, label %common.ret, label %L51
-
-}
-attributes #0 = { alwaysinline }
-"""
-
-function _findfirstequal(vpivot::Int64, ptr::Ptr{Int64}, len::Int64)
-    return Base.llvmcall(
-        (FFE_IR, "entry"),
-        Int64,
-        Tuple{Int64, Ptr{Int64}, Int64},
-        vpivot,
-        ptr,
-        len
-    )
-end
-
-"""
-    findfirstequal(x::Int64,A::DenseVector{Int64})
-
-Finds the first index in `A` where the value equals `x`.
-"""
-findfirstequal(vpivot, ivars) = findfirst(isequal(vpivot), ivars)
-function findfirstequal(vpivot::Int64, ivars::DenseVector{Int64})
-    GC.@preserve ivars begin
-        ret = _findfirstequal(vpivot, pointer(ivars), length(ivars))
-    end
-    return ret < 0 ? nothing : ret + 1
-end
-
-"""
-findfirstsortedequal(vars::DenseVector{Int64}, var::Int64)::Union{Int64,Nothing}
-
-Note that this differs from `searchsortedfirst` by returning `nothing` when absent.
-"""
-function findfirstsortedequal(
-        var::Int64,
-        vars::DenseVector{Int64},
-        ::Val{basecase} = Base.libllvm_version >= v"17" ? Val(8) : Val(128)
-    ) where {basecase}
-    len = length(vars)
-    offset = 0
-    @inbounds while len > basecase
-        half = len >>> 1 # half on left, len - half on right
-        if Base.libllvm_version >= v"17"
-            # TODO: check if this works
-            # I'm worried the `!unpredictable` metadata will be stripped
-            offset = Base.llvmcall(
-                (
-                    """
-                                         define i64 @entry(i8 %0, i64 %1, i64 %2) #0 {
-                                         top:
-                                             %b = trunc i8 %0 to i1
-                                             %s = select i1 %b, i64 %1, i64 %2, !unpredictable !0
-                                             ret i64 %s
-                                         }
-                                         attributes #0 = { alwaysinline }
-                                         !0 = !{}
-                    """,
-                    "entry",
-                ),
-                Int64,
-                Tuple{Bool, Int64, Int64},
-                vars[offset + half + 1] <= var,
-                half + offset,
-                offset
-            )
-        else
-            offset = ifelse(vars[offset + half + 1] <= var, half + offset, offset)
-        end
-        len = len - half
-    end
-    # maybe occurs in vars[offset+1:offset+len]
-    GC.@preserve vars begin
-        ret = _findfirstequal(var, pointer(vars) + 8offset, len)
-    end
-    # return ret
-    return ret < 0 ? nothing : ret + offset + 1
-end
-
-"""
-    bracketstrictlymontonic(v, x, guess; lt=<comparison>, by=<transform>, rev=false)
-
-Starting from an initial `guess` index, find indices `(lo, hi)` such that `v[lo] ≤ x ≤ v[hi]` according to the specified order, assuming that `x` is actually within the range of
-values found in `v`.  If `x` is outside that range, either `lo` will be `firstindex(v)` or
-`hi` will be `lastindex(v)`.
-
-Note that the results will not typically satisfy `lo ≤ guess ≤ hi`.  If `x` is precisely
-equal to a value that is not unique in the input `v`, there is no guarantee that `(lo, hi)`
-will encompass *all* indices corresponding to that value.
-
-This algorithm is essentially an expanding binary search, which can be used as a precursor
-to `searchsorted` and related functions, which can take `lo` and `hi` as arguments.  The
-purpose of using this function first would be to accelerate convergence in those functions
-by using correlated `guess`es for repeated calls.  The best `guess` for the next call of
-this function would be the index returned by the previous call to `searchsorted`.
-
-See `Base.sort!` for an explanation of the keyword arguments `by`, `lt` and `rev`.
-"""
-function bracketstrictlymontonic(
-        v::AbstractVector,
-        x,
-        guess::T,
-        o::Base.Order.Ordering
-    )::NTuple{2, keytype(v)} where {T <: Integer}
-    bottom = firstindex(v)
-    top = lastindex(v)
-    if guess < bottom || guess > top
-        return bottom, top
-        # # NOTE: for cache efficiency in repeated calls, we avoid accessing the first and last elements of `v`
-        # # on each call to this function.  This should only result in significant slow downs for calls with
-        # # out-of-bounds values of `x` *and* bad `guess`es.
-        # elseif lt(o, x, v[bottom])
-        #     return bottom, bottom
-        # elseif lt(o, v[top], x)
-        #     return top, top
-    else
-        u = T(1)
-        lo, hi = guess, min(guess + u, top)
-        @inbounds if Base.Order.lt(o, x, v[lo])
-            while lo > bottom && Base.Order.lt(o, x, v[lo])
-                lo, hi = max(bottom, lo - u), lo
-                u += u
-            end
-        else
-            while hi < top && !Base.Order.lt(o, x, v[hi])
-                lo, hi = hi, min(top, hi + u)
-                u += u
-            end
-        end
-    end
-    return lo, hi
-end
-
-"""
-    looks_linear(v; threshold = 1e-2)
-
-Determine if the abscissae `v` are regularly distributed, taking the standard deviation of
-the difference between the array of abscissae with respect to the straight line linking
-its first and last elements, normalized by the range of `v`. If this standard deviation is
-below the given `threshold`, the vector looks linear (return true). Internal function -
-interface may change.
-"""
-function looks_linear(v; threshold = 1.0e-2)
-    length(v) <= 2 && return true
-    x_0, x_f = first(v), last(v)
-    N = length(v)
-    x_span = x_f - x_0
-    mean_x_dist = x_span / (N - 1)
-    norm_var = sum((x_i - x_0 - (i - 1) * mean_x_dist)^2 for (i, x_i) in enumerate(v)) /
-        (N * x_span^2)
-    return norm_var < threshold^2
-end
-
-"""
-    Guesser(v::AbstractVector; looks_linear_threshold = 1e-2)
-
-Wrapper of the searched vector `v` which makes an informed guess
-for `searchsorted*correlated` by either
-
-  - Exploiting that `v` is sufficiently evenly spaced
-  - Using the previous outcome of `searchsorted*correlated`
-"""
-struct Guesser{T <: AbstractVector}
-    v::T
-    idx_prev::Base.RefValue{Int}
-    linear_lookup::Bool
-end
-
-function Guesser(v::AbstractVector; looks_linear_threshold = 1.0e-2)
-    return Guesser(v, Ref(1), looks_linear(v; threshold = looks_linear_threshold))
-end
-
-function (g::Guesser)(x)
-    (; v, idx_prev, linear_lookup) = g
-    return if linear_lookup
-        δx = x - first(v)
-        iszero(δx) && return firstindex(v)
-        f = δx / (last(v) - first(v))
-        if isinf(f)
-            f > 0 ? lastindex(v) : firstindex(v)
-        else
-            i_0, i_f = firstindex(v), lastindex(v)
-            i_approx = f * (i_f - i_0) + i_0
-            target_type = typeof(firstindex(v))
-            if i_approx >= typemax(target_type)
-                lastindex(v) + 1
-            elseif i_approx <= typemin(target_type)
-                firstindex(v) - 1
-            else
-                round(target_type, i_approx)
-            end
-        end
-    else
-        idx_prev[]
-    end
-end
-
-"""
-    searchsortedfirstcorrelated(v::AbstractVector, x, guess; order=Base.Order.Forward)
-
-An accelerated `findfirst` on sorted vectors using a bracketed search. Requires a `guess::Union{<:Integer, Guesser}`
-to start the search from.
-
-The `order` keyword argument specifies the ordering of the vector `v`, defaulting to `Base.Order.Forward`.
-"""
-function searchsortedfirstcorrelated(
-        v::AbstractVector,
-        x,
-        guess::T;
-        order::Base.Order.Ordering = Base.Order.Forward
-    ) where {T <: Integer}
-    lo, hi = bracketstrictlymontonic(v, x, guess, order)
-    return searchsortedfirst(v, x, lo, hi, order)
-end
-
-"""
-    searchsortedlastcorrelated(v::AbstractVector{T}, x, guess; order=Base.Order.Forward)
-
-An accelerated `findlast` on sorted vectors using a bracketed search. Requires a `guess::Union{<:Integer, Guesser}`
-to start the search from.
-
-The `order` keyword argument specifies the ordering of the vector `v`, defaulting to `Base.Order.Forward`.
-"""
-function searchsortedlastcorrelated(
-        v::AbstractVector,
-        x,
-        guess::T;
-        order::Base.Order.Ordering = Base.Order.Forward
-    ) where {T <: Integer}
-    lo, hi = bracketstrictlymontonic(v, x, guess, order)
-    return searchsortedlast(v, x, lo, hi, order)
-end
-
-searchsortedfirstcorrelated(r::AbstractRange, x, ::Integer) = searchsortedfirst(r, x)
-searchsortedlastcorrelated(r::AbstractRange, x, ::Integer) = searchsortedlast(r, x)
-
-function searchsortedfirstcorrelated(
-        v::AbstractVector,
-        x,
-        guess::Guesser{T};
-        order::Base.Order.Ordering = Base.Order.Forward
-    ) where {T <: AbstractVector}
-    @assert v === guess.v
-    out = searchsortedfirstcorrelated(v, x, guess(x); order = order)
-    guess.idx_prev[] = out
-    return out
-end
-
-function searchsortedlastcorrelated(
-        v::T,
-        x,
-        guess::Guesser{T};
-        order::Base.Order.Ordering = Base.Order.Forward
-    ) where {T <: AbstractVector}
-    @assert v === guess.v
-    out = searchsortedlastcorrelated(v, x, guess(x); order = order)
-    guess.idx_prev[] = out
-    return out
-end
-
-"""
-    searchsortedfirstexp(v, x, lo=firstindex(v), hi=lastindex(v))
-
-Find the first index `i` in sorted vector `v` such that `v[i] >= x`, starting from `lo`.
-This uses an exponential search followed by binary search, which is efficient when
-the target is expected to be near `lo` (e.g., for correlated sequential lookups).
-
-Inspired by Interpolations.jl's `searchsortedfirst_exp_left`.
-"""
-Base.@propagate_inbounds function searchsortedfirstexp(
-        v::AbstractVector,
-        x,
-        lo::Integer = firstindex(v),
-        hi::Integer = lastindex(v)
-    )
-    # Linear search for first few elements
-    for i in 0:4
-        ind = lo + i
-        ind > hi && return ind
-        x <= v[ind] && return ind
-    end
-    # Exponential search with doubling steps
-    n = 3
-    tn2 = 2^n
-    tn2m1 = 2^(n - 1)
-    ind = lo + tn2
-    while ind <= hi
-        x <= v[ind] &&
-            return searchsortedfirst(v, x, lo + tn2 - tn2m1, ind, Base.Order.Forward)
-        tn2 *= 2
-        tn2m1 *= 2
-        ind = lo + tn2
-    end
-    return searchsortedfirst(v, x, lo + tn2 - tn2m1, hi, Base.Order.Forward)
-end
-
-"""
-    searchsortedlastvec(v::AbstractVector, x::AbstractVector)
-
-Find the indices for multiple sorted values `x` in sorted vector `v` efficiently.
-If `x` is sorted, this leverages monotonicity to avoid redundant searching.
-Returns indices such that `v[out[i]] <= x[i]` (like `searchsortedlast`).
-
-If `x` is not sorted, falls back to element-wise `searchsortedlast`.
-
-Inspired by Interpolations.jl's `searchsortedfirst_vec`.
-"""
-function searchsortedlastvec(v::AbstractVector, x::AbstractVector)
-    issorted(x) || return searchsortedlast.(Ref(v), x)
-    out = Vector{Int}(undef, length(x))
-    lo = firstindex(v)
-    hi = lastindex(v)
-    @inbounds for i in eachindex(x)
-        xx = x[i]
-        y = searchsortedfirstexp(v, xx, lo, hi)
-        # searchsortedfirstexp returns index of first element >= x
-        # searchsortedlast returns index of last element <= x
-        # If y > hi, x is larger than all elements, return hi
-        # If v[y] == x, return y (first >= is also <=)
-        # If v[y] > x, return y - 1
-        if y > hi
-            out[i] = hi
-        elseif v[y] == xx
-            out[i] = y
-        else
-            out[i] = y - 1
-        end
-        lo = max(y, lo)
-    end
-    return out
-end
-
-"""
-    searchsortedfirstvec(v::AbstractVector, x::AbstractVector)
-
-Find the indices for multiple sorted values `x` in sorted vector `v` efficiently.
-If `x` is sorted, this leverages monotonicity to avoid redundant searching.
-Returns indices such that `v[out[i]] >= x[i]` (like `searchsortedfirst`).
-
-If `x` is not sorted, falls back to element-wise `searchsortedfirst`.
-
-Inspired by Interpolations.jl's `searchsortedfirst_vec`.
-"""
-function searchsortedfirstvec(v::AbstractVector, x::AbstractVector)
-    issorted(x) || return searchsortedfirst.(Ref(v), x)
-    out = Vector{Int}(undef, length(x))
-    lo = firstindex(v)
-    hi = lastindex(v)
-    @inbounds for i in eachindex(x)
-        xx = x[i]
-        y = searchsortedfirstexp(v, xx, lo, hi)
-        out[i] = y
-        lo = min(y, hi)
-    end
-    return out
-end
-
-using PrecompileTools: @compile_workload, @setup_workload
-
-@setup_workload begin
-    # Minimal setup for precompilation workload
-    vec_int64 = Int64[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
-    linear_vec = collect(1.0:0.5:10.0)
-
-    @compile_workload begin
-        # Precompile the most commonly used functions with typical types
-
-        # findfirstequal: fast SIMD-based search in Int64 vectors
-        findfirstequal(Int64(5), vec_int64)
-        findfirstequal(Int64(100), vec_int64)  # not found case
-
-        # findfirstsortedequal: binary search in sorted Int64 vectors
-        findfirstsortedequal(Int64(8), vec_int64)
-        findfirstsortedequal(Int64(100), vec_int64)  # not found case
-
-        # bracketstrictlymontonic: bracketing for sorted vectors
-        bracketstrictlymontonic(vec_int64, Int64(8), Int64(1), Base.Order.Forward)
-
-        # looks_linear: check if vector is evenly spaced
-        looks_linear(linear_vec)
-
-        # Guesser: wrapper for efficient repeated searches
-        guesser = Guesser(linear_vec)
-        guesser(5.0)
-
-        # searchsortedfirstcorrelated and searchsortedlastcorrelated
-        searchsortedfirstcorrelated(vec_int64, Int64(8), Int64(1))
-        searchsortedlastcorrelated(vec_int64, Int64(8), Int64(1))
-
-        # Also precompile with Guesser
-        guesser_int = Guesser(vec_int64)
-        searchsortedfirstcorrelated(vec_int64, Int64(8), guesser_int)
-        searchsortedlastcorrelated(vec_int64, Int64(8), guesser_int)
-    end
-end
+# Source layout. Include order matters — each file may depend on names
+# defined in earlier files. See the comment block at the top of each file
+# for what lives where.
+include("simd_ir.jl")             # IR template + per-eltype IR constants + SIMD primitives
+include("equality.jl")            # findfirstequal + findfirstsortedequal
+include("strategies.jl")          # SearchStrategy + concrete strategy types + SearchProperties + Auto
+include("search_properties.jl")   # Linearity / NaN probes + populated SearchProperties constructor
+include("dispatch.jl")            # Per-strategy searchsortedfirst/last methods + their internal helpers
+include("auto.jl")                # Auto crossover constants + per-query Auto + Auto's batched helpers
+include("batched.jl")             # Batched API + searchsortedrange + _batched! (incl Auto specialization)
+include("guesser.jl")             # looks_linear + Guesser + GuesserHint dispatch
+include("findequal.jl")           # findequal + BisectThenSIMD shortcut
+include("precompile.jl")          # PrecompileTools workload
 
 end # module FindFirstFunctions
diff --git a/src/auto.jl b/src/auto.jl
new file mode 100644
index 0000000..aebb1c5
--- /dev/null
+++ b/src/auto.jl
@@ -0,0 +1,168 @@
+# Auto strategy — heuristic dispatch tree + crossover constants + the
+# helpers (`_estimate_avg_gap`, `_auto_simd_*`, `_auto_interp_eligible`) it
+# consults. Per-query Auto dispatch lives here; batched Auto dispatch lives
+# in `batched.jl` (next to the generic `_batched!` it specializes).
+
+# Per-query Auto threshold: under this length, the bracket-search bookkeeping
+# costs more than a worst-case linear walk.
+const _AUTO_LINEAR_THRESHOLD = 16
+
+# Batched-Auto crossover: at gap ≤ 4 LinearScan beats ExpFromLeft (its 5
+# initial linear probes are wasted when the gap is already 0 or 1). Above 4,
+# the SIMD path picks up (where eligible) up to gap = `_auto_simd_gap_max`.
+const _AUTO_BATCH_LINEAR_GAP = 4
+
+# For sparse queries (gap large) on long vectors, InterpolationSearch can
+# beat ExpFromLeft by ~2× on uniformly-spaced data. The sampled-linearity
+# check is O(1) — 9 probes — so it's cheap enough to run inside Auto when
+# there's a real chance of unlocking InterpolationSearch.
+const _AUTO_INTERP_MIN_GAP = 8
+const _AUTO_INTERP_MIN_N = 1024
+const _AUTO_INTERP_MIN_M = 2
+
+# Very-sparse override: when the gap is large enough that ExpFromLeft's
+# log₂(gap) doubling levels approach InterpolationSearch's log₂(n) worst-case
+# binary refinement, InterpolationSearch's better cache behaviour (one
+# extrapolation jump + local refine vs. many doubling probes across the
+# array) wins even on non-strictly-linear data.
+const _AUTO_INTERP_LOOSE_GAP = 256
+const _AUTO_LINEAR_LOOSE_TOLERANCE = 5.0e-2
+
+# SIMDLinearScan eligibility window. The threshold is eltype-parameterized
+# via the `_auto_simd_gap_max` function below.
+@inline _auto_simd_gap_max(::DenseVector{Int64}) = 64
+@inline _auto_simd_gap_max(::DenseVector{Float64}) = 64
+@inline _auto_simd_gap_max(::AbstractVector) = 0   # not SIMD-eligible
+
+# When InterpolationSearch isn't eligible and the gap is large, BracketGallop
+# beats ExpFromLeft because the 5 linear probes ExpFromLeft does upfront are
+# wasted (no chance the answer is within 5 of `hint = prev_result` when the
+# gap is hundreds or thousands). BracketGallop just starts doubling
+# immediately. The bench sweep shows this crossover at gap ≈ 16.
+const _AUTO_GALLOP_GAP_MIN = 16
+
+# Per-query Auto: pick based on hint validity and length(v).
+@inline function _auto_pick(v::AbstractVector, hint::Integer)
+    return if hint < firstindex(v) || hint > lastindex(v)
+        BinaryBracket()
+    elseif length(v) <= _AUTO_LINEAR_THRESHOLD
+        LinearScan()
+    else
+        BracketGallop()
+    end
+end
+
+# Returns `(gap, skewed)`: the estimated average step in `v`'s index space
+# between consecutive query results, plus a flag that's true when the
+# queries' distribution is non-uniform within their span.
+#
+#   - `gap` is the per-query cost driver. We always use the span-based
+#     estimate `n * span(queries) / span(v) / m` so that tightly-clustered
+#     queries (span_q ≈ 0) report gap ≈ 0 regardless of `n/m`. The earlier
+#     `n / m` fallback for skewed queries caused `SIMDLinearScan` to be
+#     picked for clustered queries where LinearScan's tiny scalar walk is
+#     5× faster.
+#   - `skewed` is an InterpolationSearch-suitability flag. When the median
+#     query sits well off the midpoint of `queries[1]..queries[end]`, the
+#     queries are clustered within their span and the per-call linear
+#     extrapolation guess is worse than the previous-result hint that
+#     `ExpFromLeft` would use.
+@inline function _estimate_avg_gap(
+        v::AbstractVector{<:Number}, queries::AbstractVector{<:Number}, m::Integer,
+    )
+    n = length(v)
+    n <= 1 && return (0, false)
+    @inbounds span_v = v[end] - v[1]
+    if iszero(span_v) || !isfinite(span_v)
+        return (n ÷ max(1, m), false)
+    end
+    @inbounds span_q = queries[end] - queries[1]
+    # Skew detection on small `m` is too noisy — for `m ≈ 4` random uniform
+    # samples, the median routinely sits 30%+ off the linear midpoint by
+    # chance. Gate on `m ≥ 10` where the statistical variance is well below
+    # the 20% threshold.
+    skewed = false
+    if m >= 10
+        @inbounds mid_q = queries[firstindex(queries) + m ÷ 2]
+        @inbounds expected_mid = (
+            queries[firstindex(queries)] +
+                queries[lastindex(queries)]
+        ) / 2
+        if !iszero(span_q) &&
+                abs(mid_q - expected_mid) > 0.2 * abs(span_q)
+            skewed = true
+        end
+    end
+    ratio = span_q / span_v
+    # Clamp ratio: queries may extend outside v's range (extrapolation).
+    ratio = clamp(ratio, zero(ratio), one(ratio))
+    return (floor(Int, n * ratio / max(1, m)), skewed)
+end
+
+# Non-numeric eltypes: no span subtraction possible, fall back to length
+# ratio and assume queries are roughly uniform (no skew detection possible).
+@inline _estimate_avg_gap(
+    v::AbstractVector, ::AbstractVector, m::Integer,
+) = (length(v) ÷ max(1, m), false)
+
+# SIMD eligibility check used by Auto's batched dispatch. The static type
+# test on `v` discriminates the `DenseVector{Int64}` / `DenseVector{Float64}`
+# cases that SIMDLinearScan supports. For Float64, NaN presence is taken from
+# cached `SearchProperties.has_nan` when available; otherwise we assume no
+# NaN — Base's positional search doesn't check sortedness either, and the
+# burden of supplying populated props is on the caller for pathological
+# inputs.
+@inline _auto_simd_eligible(v::DenseVector{Int64}, ::SearchProperties) = true
+@inline function _auto_simd_eligible(v::DenseVector{Float64}, p::SearchProperties)
+    return p.has_props ? !p.has_nan : true
+end
+@inline _auto_simd_eligible(::AbstractVector, ::SearchProperties) = false
+
+# InterpolationSearch eligibility: two-tier linearity check. For
+# `_AUTO_INTERP_MIN_GAP ≤ gap < _AUTO_INTERP_LOOSE_GAP` we require strict
+# linearity (`_AUTO_LINEAR_REL_TOLERANCE`, default 0.1%) — InterpolationSearch
+# is only worth the per-call cost on truly uniform data when ExpFromLeft is
+# also competitive. For `gap ≥ _AUTO_INTERP_LOOSE_GAP` we accept a looser
+# tolerance (`_AUTO_LINEAR_LOOSE_TOLERANCE`, default 5%) because the cache
+# benefit of one extrapolation jump + local refine compensates for a worse
+# guess, but we still reject genuinely nonlinear data (log-spaced,
+# two-scale) where InterpolationSearch loses 2–3× to ExpFromLeft.
+@inline function _auto_interp_eligible(v, props::SearchProperties, gap::Integer)
+    if gap >= _AUTO_INTERP_LOOSE_GAP
+        # Loose probe — even on cached props, the strict `is_linear` bit may
+        # already reflect a tighter threshold than we need here, so run the
+        # sampled probe at the loose tolerance regardless of cache state.
+        return _sampled_looks_linear(v, _AUTO_LINEAR_LOOSE_TOLERANCE)
+    end
+    return props.has_props ? props.is_linear : _sampled_looks_linear(v)
+end
+
+# Per-query Auto dispatch.
+function Base.searchsortedlast(
+        ::Auto, v::AbstractVector, x, hint::Integer;
+        order::Base.Order.Ordering = Base.Order.Forward,
+    )
+    s = _auto_pick(v, hint)
+    return s isa BinaryBracket ?
+        searchsortedlast(s, v, x; order = order) :
+        searchsortedlast(s, v, x, hint; order = order)
+end
+
+function Base.searchsortedfirst(
+        ::Auto, v::AbstractVector, x, hint::Integer;
+        order::Base.Order.Ordering = Base.Order.Forward,
+    )
+    s = _auto_pick(v, hint)
+    return s isa BinaryBracket ?
+        searchsortedfirst(s, v, x; order = order) :
+        searchsortedfirst(s, v, x, hint; order = order)
+end
+
+Base.searchsortedlast(
+    ::Auto, v::AbstractVector, x;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedlast(BinaryBracket(), v, x; order = order)
+Base.searchsortedfirst(
+    ::Auto, v::AbstractVector, x;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedfirst(BinaryBracket(), v, x; order = order)
diff --git a/src/batched.jl b/src/batched.jl
new file mode 100644
index 0000000..9dbc82f
--- /dev/null
+++ b/src/batched.jl
@@ -0,0 +1,321 @@
+# In-place batched sorted-search API: `searchsortedfirst!` / `searchsortedlast!`
+# and `searchsortedrange`, plus the internal `_batched!` dispatchers and the
+# Auto-specific specialization that turns Auto's decision tree into a single
+# branchless dispatch on a concrete strategy.
+
+"""
+    searchsortedlast!(idx_out, v, queries; strategy = Auto(), order = Base.Order.Forward,
+                       queries_sorted = nothing)
+
+In-place batched [`searchsortedlast`](@ref Base.searchsortedlast). Writes one
+index per element of `queries` into `idx_out` (which must be the same length).
+
+If `queries` is sorted under `order`, the previous result is used as a hint for
+the next query, so the total cost is O(length(v) + length(queries)) under
+`strategy = BracketGallop()` (the default `Auto` choice for non-tiny `v`).
+
+If `queries` is not sorted, falls back to per-element `searchsortedlast` with
+no hint regardless of `strategy`.
+
+The `queries_sorted` kwarg controls the runtime `issorted(queries)` check:
+
+  - `nothing` (default): run `issorted(queries; order = order)` on every call.
+    O(m) bookkeeping, roughly 1 ns/q on long batches.
+  - `true`: trust the caller — skip the check and take the sorted-loop path
+    unconditionally. Use this when you already know your queries are sorted
+    (you computed them as a range, sorted them yourself, etc.). Wrong-answer
+    risk: a non-sorted `queries` passed with `queries_sorted = true` will
+    produce incorrect results, since the inner loop uses the previous result
+    as a hint and that hint becomes invalid when queries jump backward.
+  - `false`: skip the check and take the unsorted-loop path unconditionally
+    (per-query unhinted `Base.searchsortedlast`). Use when you know queries
+    are not sorted and want to avoid the O(m) probe.
+
+Returns `idx_out`.
+"""
+function searchsortedlast!(
+        idx_out::AbstractVector{<:Integer},
+        v::AbstractVector,
+        queries::AbstractVector;
+        strategy::SearchStrategy = Auto(),
+        order::Base.Order.Ordering = Base.Order.Forward,
+        queries_sorted::Union{Nothing, Bool} = nothing,
+    )
+    if length(idx_out) != length(queries)
+        throw(
+            DimensionMismatch(
+                "idx_out and queries must have the same length"
+            )
+        )
+    end
+    return _searchsortedlast_batched!(
+        idx_out, v, queries, strategy, order, queries_sorted
+    )
+end
+
+"""
+    searchsortedfirst!(idx_out, v, queries; strategy = Auto(), order = Base.Order.Forward,
+                        queries_sorted = nothing)
+
+In-place batched [`searchsortedfirst`](@ref Base.searchsortedfirst). See
+[`searchsortedlast!`](@ref) for behavior and for the `queries_sorted` kwarg.
+"""
+function searchsortedfirst!(
+        idx_out::AbstractVector{<:Integer},
+        v::AbstractVector,
+        queries::AbstractVector;
+        strategy::SearchStrategy = Auto(),
+        order::Base.Order.Ordering = Base.Order.Forward,
+        queries_sorted::Union{Nothing, Bool} = nothing,
+    )
+    if length(idx_out) != length(queries)
+        throw(
+            DimensionMismatch(
+                "idx_out and queries must have the same length"
+            )
+        )
+    end
+    return _searchsortedfirst_batched!(
+        idx_out, v, queries, strategy, order, queries_sorted
+    )
+end
+
+# Sorted inner loop, parameterized on strategy. Used by both the generic and
+# Auto batched entry points so each batch performs at most one issorted check.
+function _searchsortedlast_sorted_loop!(
+        idx_out, v::AbstractVector, queries::AbstractVector,
+        strategy::SearchStrategy, order::Base.Order.Ordering,
+    )
+    hint = firstindex(v) - 1
+    @inbounds for k in eachindex(queries)
+        q = queries[k]
+        hint = if hint < firstindex(v)
+            searchsortedlast(strategy, v, q; order = order)
+        else
+            searchsortedlast(strategy, v, q, hint; order = order)
+        end
+        idx_out[k] = hint
+    end
+    return idx_out
+end
+
+function _searchsortedfirst_sorted_loop!(
+        idx_out, v::AbstractVector, queries::AbstractVector,
+        strategy::SearchStrategy, order::Base.Order.Ordering,
+    )
+    hint = firstindex(v) - 1
+    @inbounds for k in eachindex(queries)
+        q = queries[k]
+        hint = if hint < firstindex(v)
+            searchsortedfirst(strategy, v, q; order = order)
+        else
+            searchsortedfirst(strategy, v, q, hint; order = order)
+        end
+        idx_out[k] = hint
+    end
+    return idx_out
+end
+
+function _searchsortedlast_unsorted_loop!(
+        idx_out, v::AbstractVector, queries::AbstractVector,
+        order::Base.Order.Ordering,
+    )
+    @inbounds for k in eachindex(queries)
+        idx_out[k] = searchsortedlast(v, queries[k], order)
+    end
+    return idx_out
+end
+
+function _searchsortedfirst_unsorted_loop!(
+        idx_out, v::AbstractVector, queries::AbstractVector,
+        order::Base.Order.Ordering,
+    )
+    @inbounds for k in eachindex(queries)
+        idx_out[k] = searchsortedfirst(v, queries[k], order)
+    end
+    return idx_out
+end
+
+# Decide whether to take the sorted-queries fast path. `queries_sorted` is
+# the caller-supplied override: `nothing` means "check at runtime", `true`
+# means "trust the caller, skip the O(m) issorted probe", `false` means
+# "force the unsorted path".
+@inline function _take_sorted_path(
+        queries, order::Base.Order.Ordering, queries_sorted::Union{Nothing, Bool},
+    )
+    return queries_sorted === nothing ?
+        issorted(queries; order = order) : queries_sorted
+end
+
+function _searchsortedlast_batched!(
+        idx_out, v::AbstractVector, queries::AbstractVector,
+        strategy::SearchStrategy, order::Base.Order.Ordering,
+        queries_sorted::Union{Nothing, Bool},
+    )
+    return if _take_sorted_path(queries, order, queries_sorted)
+        _searchsortedlast_sorted_loop!(idx_out, v, queries, strategy, order)
+    else
+        _searchsortedlast_unsorted_loop!(idx_out, v, queries, order)
+    end
+end
+
+# Specialized batched-Auto: pick an inner strategy from the n/m ratio, then
+# call the sorted loop directly (no duplicate `issorted` check, and each
+# branch is type-stable so the loop specializes on the concrete strategy).
+function _searchsortedlast_batched!(
+        idx_out, v::AbstractVector, queries::AbstractVector,
+        s::Auto, order::Base.Order.Ordering,
+        queries_sorted::Union{Nothing, Bool},
+    )
+    m = length(queries)
+    m == 0 && return idx_out
+    # m == 1: skip the issorted + span heuristic — no batched hint is
+    # available for a single-element batch, so just dispatch straight to
+    # the unhinted backing call. Saves ~20 ns of bookkeeping per call.
+    if m == 1
+        @inbounds idx_out[firstindex(idx_out)] =
+            searchsortedlast(v, queries[firstindex(queries)], order)
+        return idx_out
+    end
+    if !_take_sorted_path(queries, order, queries_sorted)
+        return _searchsortedlast_unsorted_loop!(idx_out, v, queries, order)
+    end
+    gap, skewed = _estimate_avg_gap(v, queries, m)
+    # Manually dispatch on the picked strategy so each branch is concrete.
+    if gap <= _AUTO_BATCH_LINEAR_GAP
+        return _searchsortedlast_sorted_loop!(
+            idx_out, v, queries, LinearScan(), order
+        )
+    end
+    # Medium-gap regime: SIMDLinearScan wins on `DenseVector{Int64}` and
+    # `DenseVector{Float64}` (without NaN).
+    if gap <= _auto_simd_gap_max(v) && _auto_simd_eligible(v, s.props)
+        return _searchsortedlast_sorted_loop!(
+            idx_out, v, queries, SIMDLinearScan(), order
+        )
+    end
+    # Sparse-on-large-linear: InterpolationSearch wins ~2× over ExpFromLeft
+    # on uniformly-spaced data — but only when queries are *also* spread
+    # roughly uniformly within their span.
+    if !skewed &&
+            gap >= _AUTO_INTERP_MIN_GAP &&
+            length(v) >= _AUTO_INTERP_MIN_N &&
+            m >= _AUTO_INTERP_MIN_M &&
+            _auto_interp_eligible(v, s.props, gap)
+        return _searchsortedlast_sorted_loop!(
+            idx_out, v, queries, InterpolationSearch(), order
+        )
+    end
+    # Sparse fallback: BracketGallop beats ExpFromLeft when the gap is large
+    # enough that ExpFromLeft's initial 5 linear probes are guaranteed to
+    # miss. BracketGallop starts doubling from one position past `hint`, so
+    # it skips the wasted linear preamble.
+    if gap >= _AUTO_GALLOP_GAP_MIN
+        return _searchsortedlast_sorted_loop!(
+            idx_out, v, queries, BracketGallop(), order
+        )
+    end
+    return _searchsortedlast_sorted_loop!(
+        idx_out, v, queries, ExpFromLeft(), order
+    )
+end
+
+function _searchsortedfirst_batched!(
+        idx_out, v::AbstractVector, queries::AbstractVector,
+        strategy::SearchStrategy, order::Base.Order.Ordering,
+        queries_sorted::Union{Nothing, Bool},
+    )
+    return if _take_sorted_path(queries, order, queries_sorted)
+        _searchsortedfirst_sorted_loop!(idx_out, v, queries, strategy, order)
+    else
+        _searchsortedfirst_unsorted_loop!(idx_out, v, queries, order)
+    end
+end
+
+function _searchsortedfirst_batched!(
+        idx_out, v::AbstractVector, queries::AbstractVector,
+        s::Auto, order::Base.Order.Ordering,
+        queries_sorted::Union{Nothing, Bool},
+    )
+    m = length(queries)
+    m == 0 && return idx_out
+    if m == 1
+        @inbounds idx_out[firstindex(idx_out)] =
+            searchsortedfirst(v, queries[firstindex(queries)], order)
+        return idx_out
+    end
+    if !_take_sorted_path(queries, order, queries_sorted)
+        return _searchsortedfirst_unsorted_loop!(idx_out, v, queries, order)
+    end
+    gap, skewed = _estimate_avg_gap(v, queries, m)
+    if gap <= _AUTO_BATCH_LINEAR_GAP
+        return _searchsortedfirst_sorted_loop!(
+            idx_out, v, queries, LinearScan(), order
+        )
+    end
+    if gap <= _auto_simd_gap_max(v) && _auto_simd_eligible(v, s.props)
+        return _searchsortedfirst_sorted_loop!(
+            idx_out, v, queries, SIMDLinearScan(), order
+        )
+    end
+    if !skewed &&
+            gap >= _AUTO_INTERP_MIN_GAP &&
+            length(v) >= _AUTO_INTERP_MIN_N &&
+            m >= _AUTO_INTERP_MIN_M &&
+            _auto_interp_eligible(v, s.props, gap)
+        return _searchsortedfirst_sorted_loop!(
+            idx_out, v, queries, InterpolationSearch(), order
+        )
+    end
+    if gap >= _AUTO_GALLOP_GAP_MIN
+        return _searchsortedfirst_sorted_loop!(
+            idx_out, v, queries, BracketGallop(), order
+        )
+    end
+    return _searchsortedfirst_sorted_loop!(
+        idx_out, v, queries, ExpFromLeft(), order
+    )
+end
+
+# ---------------------------------------------------------------------------
+# Range search through the strategy dispatch
+# ---------------------------------------------------------------------------
+
+"""
+    searchsortedrange(strategy, v, lo, hi[, hint]; order = Base.Order.Forward)
+        -> UnitRange{Int}
+
+Return the index range of all entries `v[i]` satisfying `lo ≤ v[i] ≤ hi`
+under `order`. Equivalent to
+`searchsortedfirst(strategy, v, lo[, hint]; order) :
+ searchsortedlast(strategy, v, hi[, hint]; order)` but expressed as a
+single call that may share bracket-discovery work between the two
+endpoints when the underlying strategy allows it.
+
+The empty range case (no `v[i]` lies in `[lo, hi]`) returns
+`searchsortedfirst(strategy, v, lo) : (searchsortedfirst(strategy, v, lo) - 1)`,
+matching `Base.searchsorted(v, lo)` for an absent value.
+
+When a `hint` is supplied it is used for both endpoint searches. Strategies
+that ignore the hint (`BinaryBracket`, `InterpolationSearch`) treat the
+hinted form as a pass-through.
+"""
+@inline function searchsortedrange(
+        strategy::SearchStrategy, v::AbstractVector, lo, hi;
+        order::Base.Order.Ordering = Base.Order.Forward,
+    )
+    first_idx = searchsortedfirst(strategy, v, lo; order = order)
+    last_idx = searchsortedlast(strategy, v, hi; order = order)
+    return first_idx:last_idx
+end
+
+@inline function searchsortedrange(
+        strategy::SearchStrategy, v::AbstractVector, lo, hi, hint::Integer;
+        order::Base.Order.Ordering = Base.Order.Forward,
+    )
+    first_idx = searchsortedfirst(strategy, v, lo, hint; order = order)
+    last_idx = searchsortedlast(
+        strategy, v, hi, max(first_idx, hint); order = order
+    )
+    return first_idx:last_idx
+end
diff --git a/src/dispatch.jl b/src/dispatch.jl
new file mode 100644
index 0000000..f2a5575
--- /dev/null
+++ b/src/dispatch.jl
@@ -0,0 +1,583 @@
+# Per-strategy `Base.searchsortedlast` / `Base.searchsortedfirst` dispatch.
+# Each strategy gets one dispatch block plus any internal helpers it needs
+# (`bracketstrictlymonotonic*` for `BracketGallop`, `searchsortedfirstexp`
+# for `ExpFromLeft`, `_interp_guess` for `InterpolationSearch`, etc.).
+#
+# `Auto`'s per-query and batched dispatch lives in `auto.jl` / `batched.jl`;
+# `GuesserHint`'s lives in `guesser.jl`.
+
+# ===========================================================================
+# Bracket helpers — backing `BracketGallop`
+# ===========================================================================
+
+# Expanding-binary-search bracket around a guess. The `searchsortedlast`
+# polarity: when `x == v[guess]`, the answer is `>= guess` (gallop right).
+function bracketstrictlymonotonic(
+        v::AbstractVector,
+        x,
+        guess::T,
+        o::Base.Order.Ordering,
+    )::NTuple{2, keytype(v)} where {T <: Integer}
+    bottom = firstindex(v)
+    top = lastindex(v)
+    if guess < bottom || guess > top
+        return bottom, top
+    else
+        u = T(1)
+        lo, hi = guess, min(guess + u, top)
+        @inbounds if Base.Order.lt(o, x, v[lo])
+            while lo > bottom && Base.Order.lt(o, x, v[lo])
+                lo, hi = max(bottom, lo - u), lo
+                u += u
+            end
+        else
+            while hi < top && !Base.Order.lt(o, x, v[hi])
+                lo, hi = hi, min(top, hi + u)
+                u += u
+            end
+        end
+    end
+    return lo, hi
+end
+
+# Companion to `bracketstrictlymonotonic` for the `searchsortedfirst`
+# polarity. Original uses `lt(o, x, v[lo])` (i.e., `x < v[lo]`), which is
+# right for `searchsortedlast`: when `x == v[lo]`, the answer is `>= lo` so
+# we gallop right. For `searchsortedfirst`, when `x == v[lo]` the answer is
+# `<= lo` (look for earlier duplicates) — so we need the inverted polarity
+# `lt(o, v[lo], x)`. Without this, BracketGallop.searchsortedfirst returns
+# the wrong index when the hint lands on a run of duplicates.
+function bracketstrictlymonotonic_first(
+        v::AbstractVector,
+        x,
+        guess::T,
+        o::Base.Order.Ordering,
+    )::NTuple{2, keytype(v)} where {T <: Integer}
+    bottom = firstindex(v)
+    top = lastindex(v)
+    if guess < bottom || guess > top
+        return bottom, top
+    else
+        u = T(1)
+        lo, hi = guess, min(guess + u, top)
+        @inbounds if !Base.Order.lt(o, v[lo], x)
+            # v[lo] >= x → answer is <= lo, gallop left.
+            while lo > bottom && !Base.Order.lt(o, v[lo], x)
+                lo, hi = max(bottom, lo - u), lo
+                u += u
+            end
+        else
+            # v[lo] < x → answer is > lo, gallop right.
+            while hi < top && Base.Order.lt(o, v[hi], x)
+                lo, hi = hi, min(top, hi + u)
+                u += u
+            end
+        end
+    end
+    return lo, hi
+end
+
+# ===========================================================================
+# Exponential-search helper — backing `ExpFromLeft`
+# ===========================================================================
+
+# Exponential search forward from `lo`, then bounded binary search inside the
+# final bracket. Used internally by `ExpFromLeft`.
+Base.@propagate_inbounds function searchsortedfirstexp(
+        v::AbstractVector,
+        x,
+        lo::Integer = firstindex(v),
+        hi::Integer = lastindex(v),
+    )
+    # Linear search for first few elements
+    for i in 0:4
+        ind = lo + i
+        ind > hi && return ind
+        x <= v[ind] && return ind
+    end
+    # Exponential search with doubling steps
+    n = 3
+    tn2 = 2^n
+    tn2m1 = 2^(n - 1)
+    ind = lo + tn2
+    while ind <= hi
+        x <= v[ind] &&
+            return searchsortedfirst(v, x, lo + tn2 - tn2m1, ind, Base.Order.Forward)
+        tn2 *= 2
+        tn2m1 *= 2
+        ind = lo + tn2
+    end
+    return searchsortedfirst(v, x, lo + tn2 - tn2m1, hi, Base.Order.Forward)
+end
+
+# ===========================================================================
+# Strategy: BinaryBracket — ignore any hint, delegate to `Base`
+# ===========================================================================
+
+Base.searchsortedlast(
+    ::BinaryBracket, v::AbstractVector, x;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedlast(v, x, order)
+Base.searchsortedfirst(
+    ::BinaryBracket, v::AbstractVector, x;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedfirst(v, x, order)
+Base.searchsortedlast(
+    s::BinaryBracket, v::AbstractVector, x, ::Integer;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedlast(s, v, x; order = order)
+Base.searchsortedfirst(
+    s::BinaryBracket, v::AbstractVector, x, ::Integer;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedfirst(s, v, x; order = order)
+
+# ===========================================================================
+# Strategy: LinearScan — walk ±1 from the hint
+# ===========================================================================
+
+function Base.searchsortedlast(
+        ::LinearScan, v::AbstractVector, x, hint::Integer;
+        order::Base.Order.Ordering = Base.Order.Forward,
+    )
+    lo, hi = firstindex(v), lastindex(v)
+    if hi < lo
+        return lo - 1   # empty vector
+    end
+    i = clamp(hint, lo, hi)
+    @inbounds if Base.Order.lt(order, x, v[i])
+        # v[i] > x → retreat
+        while i > lo
+            i -= 1
+            !Base.Order.lt(order, x, v[i]) && return i
+        end
+        return lo - 1   # x precedes all of v
+    else
+        # v[i] ≤ x → try to advance
+        while i < hi
+            Base.Order.lt(order, x, v[i + 1]) && return i
+            i += 1
+        end
+        return hi
+    end
+end
+
+function Base.searchsortedfirst(
+        ::LinearScan, v::AbstractVector, x, hint::Integer;
+        order::Base.Order.Ordering = Base.Order.Forward,
+    )
+    lo, hi = firstindex(v), lastindex(v)
+    if hi < lo
+        return lo
+    end
+    i = clamp(hint, lo, hi)
+    @inbounds if Base.Order.lt(order, v[i], x)
+        # v[i] < x → advance
+        while i < hi
+            i += 1
+            !Base.Order.lt(order, v[i], x) && return i
+        end
+        return hi + 1   # x exceeds all of v
+    else
+        # v[i] ≥ x → try to retreat
+        while i > lo
+            !Base.Order.lt(order, v[i - 1], x) && (i -= 1; continue)
+            return i
+        end
+        return lo
+    end
+end
+
+# LinearScan without a hint falls back to BinaryBracket.
+Base.searchsortedlast(
+    s::LinearScan, v::AbstractVector, x;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedlast(BinaryBracket(), v, x; order = order)
+Base.searchsortedfirst(
+    s::LinearScan, v::AbstractVector, x;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedfirst(BinaryBracket(), v, x; order = order)
+
+# ===========================================================================
+# Strategy: SIMDLinearScan — specialized forward walk for DenseVector{Int64}
+# and DenseVector{Float64}. Backward walks reuse the scalar LinearScan path
+# (rare from a good hint, and the SIMD primitive only exists for the
+# forward-scan direction).
+# ===========================================================================
+
+@inline function _simdscan_last_specialized(
+        v::Union{DenseVector{Int64}, DenseVector{Float64}}, x, hint::Integer,
+    )
+    lo = firstindex(v)
+    hi = lastindex(v)
+    hi < lo && return lo - 1
+    i = clamp(hint, lo, hi)
+    @inbounds vi = v[i]
+    if vi > x
+        # Backward walk (scalar).
+        while i > lo
+            i -= 1
+            @inbounds v[i] <= x && return i
+        end
+        return lo - 1
+    end
+    i == hi && return hi
+    start = i + 1
+    len = hi - start + 1
+    offset = GC.@preserve v _simd_first_gt(x, pointer(v, start), Int64(len))
+    return offset < 0 ? hi : (start + offset) - 1
+end
+
+@inline function _simdscan_first_specialized(
+        v::Union{DenseVector{Int64}, DenseVector{Float64}}, x, hint::Integer,
+    )
+    lo = firstindex(v)
+    hi = lastindex(v)
+    hi < lo && return lo
+    i = clamp(hint, lo, hi)
+    @inbounds vi = v[i]
+    if vi < x
+        i == hi && return hi + 1
+        start = i + 1
+        len = hi - start + 1
+        offset = GC.@preserve v _simd_first_ge(x, pointer(v, start), Int64(len))
+        return offset < 0 ? hi + 1 : start + offset
+    end
+    while i > lo
+        @inbounds v[i - 1] >= x && (i -= 1; continue)
+        return i
+    end
+    return lo
+end
+
+function Base.searchsortedlast(
+        ::SIMDLinearScan, v::DenseVector{Int64}, x::Int64, hint::Integer;
+        order::Base.Order.Ordering = Base.Order.Forward,
+    )
+    order === Base.Order.Forward ||
+        return searchsortedlast(LinearScan(), v, x, hint; order = order)
+    return _simdscan_last_specialized(v, x, hint)
+end
+function Base.searchsortedlast(
+        ::SIMDLinearScan, v::DenseVector{Float64}, x::Float64, hint::Integer;
+        order::Base.Order.Ordering = Base.Order.Forward,
+    )
+    order === Base.Order.Forward ||
+        return searchsortedlast(LinearScan(), v, x, hint; order = order)
+    return _simdscan_last_specialized(v, x, hint)
+end
+function Base.searchsortedfirst(
+        ::SIMDLinearScan, v::DenseVector{Int64}, x::Int64, hint::Integer;
+        order::Base.Order.Ordering = Base.Order.Forward,
+    )
+    order === Base.Order.Forward ||
+        return searchsortedfirst(LinearScan(), v, x, hint; order = order)
+    return _simdscan_first_specialized(v, x, hint)
+end
+function Base.searchsortedfirst(
+        ::SIMDLinearScan, v::DenseVector{Float64}, x::Float64, hint::Integer;
+        order::Base.Order.Ordering = Base.Order.Forward,
+    )
+    order === Base.Order.Forward ||
+        return searchsortedfirst(LinearScan(), v, x, hint; order = order)
+    return _simdscan_first_specialized(v, x, hint)
+end
+
+# Other eltypes fall back to the scalar LinearScan walk.
+Base.searchsortedlast(
+    ::SIMDLinearScan, v::AbstractVector, x, hint::Integer;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedlast(LinearScan(), v, x, hint; order = order)
+Base.searchsortedfirst(
+    ::SIMDLinearScan, v::AbstractVector, x, hint::Integer;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedfirst(LinearScan(), v, x, hint; order = order)
+
+# No hint → BinaryBracket.
+Base.searchsortedlast(
+    ::SIMDLinearScan, v::AbstractVector, x;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedlast(BinaryBracket(), v, x; order = order)
+Base.searchsortedfirst(
+    ::SIMDLinearScan, v::AbstractVector, x;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedfirst(BinaryBracket(), v, x; order = order)
+
+# ===========================================================================
+# Strategy: BracketGallop — bracketstrictlymonotonic + bounded binary search
+# ===========================================================================
+
+function Base.searchsortedlast(
+        ::BracketGallop, v::AbstractVector, x, hint::Integer;
+        order::Base.Order.Ordering = Base.Order.Forward,
+    )
+    lo, hi = bracketstrictlymonotonic(v, x, hint, order)
+    return searchsortedlast(v, x, lo, hi, order)
+end
+
+function Base.searchsortedfirst(
+        ::BracketGallop, v::AbstractVector, x, hint::Integer;
+        order::Base.Order.Ordering = Base.Order.Forward,
+    )
+    lo, hi = bracketstrictlymonotonic_first(v, x, hint, order)
+    return searchsortedfirst(v, x, lo, hi, order)
+end
+
+# BracketGallop without a hint falls back to BinaryBracket.
+Base.searchsortedlast(
+    ::BracketGallop, v::AbstractVector, x;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedlast(BinaryBracket(), v, x; order = order)
+Base.searchsortedfirst(
+    ::BracketGallop, v::AbstractVector, x;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedfirst(BinaryBracket(), v, x; order = order)
+
+# ===========================================================================
+# Strategy: ExpFromLeft — galloping forward from a left-bound hint.
+#
+# Contract: callers pass `hint` such that the answer is ≥ `hint`. When that
+# isn't true (hint is past the answer), we fall back to a full
+# `searchsortedlast`/`searchsortedfirst` — the batched-sorted loop sets
+# `hint = prev_result`, which always satisfies this for sorted queries, so
+# the fallback is only exercised by arbitrary single-query callers.
+# ===========================================================================
+
+function Base.searchsortedfirst(
+        ::ExpFromLeft, v::AbstractVector, x, hint::Integer;
+        order::Base.Order.Ordering = Base.Order.Forward,
+    )
+    lo = firstindex(v)
+    hi = lastindex(v)
+    if isempty(v)
+        return lo
+    end
+    h = clamp(hint, lo, hi)
+    # `searchsortedfirst` semantics: smallest i with v[i] >= x. We can only
+    # gallop forward from `h` when v[h] < x (then the answer is strictly
+    # > h). When v[h] >= x, the first occurrence of `x` may be at index
+    # ≤ h (duplicates to the left), so fall back to a full search rather
+    # than risk skipping past earlier duplicates.
+    @inbounds if !Base.Order.lt(order, v[h], x)
+        return searchsortedfirst(v, x, order)
+    end
+    return order === Base.Order.Forward ?
+        searchsortedfirstexp(v, x, h, hi) :
+        searchsortedfirst(v, x, h, hi, order)
+end
+
+function Base.searchsortedlast(
+        ::ExpFromLeft, v::AbstractVector, x, hint::Integer;
+        order::Base.Order.Ordering = Base.Order.Forward,
+    )
+    lo = firstindex(v)
+    hi = lastindex(v)
+    if isempty(v)
+        return lo - 1
+    end
+    h = clamp(hint, lo, hi)
+    @inbounds if Base.Order.lt(order, x, v[h])
+        return searchsortedlast(v, x, order)
+    end
+    if order === Base.Order.Forward
+        y = searchsortedfirstexp(v, x, h, hi)
+        return if y > hi
+            hi
+        else
+            @inbounds v[y] == x ? y : y - 1
+        end
+    else
+        return searchsortedlast(v, x, h, hi, order)
+    end
+end
+
+# ExpFromLeft without a hint falls back to BinaryBracket.
+Base.searchsortedlast(
+    ::ExpFromLeft, v::AbstractVector, x;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedlast(BinaryBracket(), v, x; order = order)
+Base.searchsortedfirst(
+    ::ExpFromLeft, v::AbstractVector, x;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedfirst(BinaryBracket(), v, x; order = order)
+
+# ===========================================================================
+# Strategy: InterpolationSearch — extrapolate a guess, then bounded binary
+# search around it.
+# ===========================================================================
+
+@inline function _interp_guess(v::AbstractVector, x, lo::Integer, hi::Integer)
+    @inbounds vlo = v[lo]
+    @inbounds vhi = v[hi]
+    span = vhi - vlo
+    iszero(span) && return lo
+    # Linear extrapolation: how far is x along [vlo, vhi]?
+    f = (x - vlo) / span
+    if !isfinite(f)
+        return f > 0 ? hi : lo
+    end
+    g = lo + round(Int, f * (hi - lo))
+    return clamp(g, lo, hi)
+end
+
+function Base.searchsortedlast(
+        ::InterpolationSearch, v::AbstractVector{<:Number}, x::Number;
+        order::Base.Order.Ordering = Base.Order.Forward,
+    )
+    if order !== Base.Order.Forward
+        # Linear interpolation doesn't carry over to reverse order; fall back
+        return searchsortedlast(v, x, order)
+    end
+    lo, hi = firstindex(v), lastindex(v)
+    hi < lo && return lo - 1
+    g = _interp_guess(v, x, lo, hi)
+    return searchsortedlast(BracketGallop(), v, x, g; order = order)
+end
+
+function Base.searchsortedfirst(
+        ::InterpolationSearch, v::AbstractVector{<:Number}, x::Number;
+        order::Base.Order.Ordering = Base.Order.Forward,
+    )
+    if order !== Base.Order.Forward
+        return searchsortedfirst(v, x, order)
+    end
+    lo, hi = firstindex(v), lastindex(v)
+    hi < lo && return lo
+    g = _interp_guess(v, x, lo, hi)
+    return searchsortedfirst(BracketGallop(), v, x, g; order = order)
+end
+
+# InterpolationSearch ignores any hint; pass-through.
+Base.searchsortedlast(
+    s::InterpolationSearch, v::AbstractVector{<:Number}, x::Number, ::Integer;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedlast(s, v, x; order = order)
+Base.searchsortedfirst(
+    s::InterpolationSearch, v::AbstractVector{<:Number}, x::Number, ::Integer;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedfirst(s, v, x; order = order)
+
+# InterpolationSearch on non-numeric data falls back to BinaryBracket.
+Base.searchsortedlast(
+    ::InterpolationSearch, v::AbstractVector, x;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedlast(BinaryBracket(), v, x; order = order)
+Base.searchsortedfirst(
+    ::InterpolationSearch, v::AbstractVector, x;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedfirst(BinaryBracket(), v, x; order = order)
+Base.searchsortedlast(
+    s::InterpolationSearch, v::AbstractVector, x, ::Integer;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedlast(BinaryBracket(), v, x; order = order)
+Base.searchsortedfirst(
+    s::InterpolationSearch, v::AbstractVector, x, ::Integer;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedfirst(BinaryBracket(), v, x; order = order)
+
+# ===========================================================================
+# Strategy: BitInterpolationSearch — InterpolationSearch on the IEEE bit
+# pattern of positive Float64. Cheaper than reinterpret-as-array because we
+# only need two endpoint reads and one query bitcast per call.
+# ===========================================================================
+
+@inline function _bit_interp_guess_f64(
+        v::DenseVector{Float64}, x::Float64, lo::Integer, hi::Integer,
+    )
+    @inbounds vlo_bits = reinterpret(UInt64, v[lo])
+    @inbounds vhi_bits = reinterpret(UInt64, v[hi])
+    xu = reinterpret(UInt64, x)
+    span = vhi_bits - vlo_bits
+    iszero(span) && return lo
+    if xu <= vlo_bits
+        return lo
+    elseif xu >= vhi_bits
+        return hi
+    end
+    num = xu - vlo_bits
+    f = Float64(num) / Float64(span)
+    g = lo + round(Int, f * (hi - lo))
+    return clamp(g, lo, hi)
+end
+
+function Base.searchsortedlast(
+        ::BitInterpolationSearch, v::DenseVector{Float64}, x::Float64;
+        order::Base.Order.Ordering = Base.Order.Forward,
+    )
+    if order !== Base.Order.Forward
+        return searchsortedlast(v, x, order)
+    end
+    lo, hi = firstindex(v), lastindex(v)
+    hi < lo && return lo - 1
+    @inbounds if v[lo] <= 0.0 || !isfinite(v[lo]) || x <= 0.0 || !isfinite(x)
+        return searchsortedlast(BinaryBracket(), v, x; order = order)
+    end
+    g = _bit_interp_guess_f64(v, x, lo, hi)
+    return searchsortedlast(BracketGallop(), v, x, g; order = order)
+end
+
+function Base.searchsortedfirst(
+        ::BitInterpolationSearch, v::DenseVector{Float64}, x::Float64;
+        order::Base.Order.Ordering = Base.Order.Forward,
+    )
+    if order !== Base.Order.Forward
+        return searchsortedfirst(v, x, order)
+    end
+    lo, hi = firstindex(v), lastindex(v)
+    hi < lo && return lo
+    @inbounds if v[lo] <= 0.0 || !isfinite(v[lo]) || x <= 0.0 || !isfinite(x)
+        return searchsortedfirst(BinaryBracket(), v, x; order = order)
+    end
+    g = _bit_interp_guess_f64(v, x, lo, hi)
+    return searchsortedfirst(BracketGallop(), v, x, g; order = order)
+end
+
+# Hint pass-through (bit-interp ignores externally-supplied hints).
+Base.searchsortedlast(
+    s::BitInterpolationSearch, v::DenseVector{Float64}, x::Float64, ::Integer;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedlast(s, v, x; order = order)
+Base.searchsortedfirst(
+    s::BitInterpolationSearch, v::DenseVector{Float64}, x::Float64, ::Integer;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedfirst(s, v, x; order = order)
+
+# Non-Float64 / non-dense eltypes: fall back to plain InterpolationSearch.
+Base.searchsortedlast(
+    ::BitInterpolationSearch, v::AbstractVector, x;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedlast(InterpolationSearch(), v, x; order = order)
+Base.searchsortedfirst(
+    ::BitInterpolationSearch, v::AbstractVector, x;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedfirst(InterpolationSearch(), v, x; order = order)
+Base.searchsortedlast(
+    ::BitInterpolationSearch, v::AbstractVector, x, ::Integer;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedlast(InterpolationSearch(), v, x; order = order)
+Base.searchsortedfirst(
+    ::BitInterpolationSearch, v::AbstractVector, x, ::Integer;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedfirst(InterpolationSearch(), v, x; order = order)
+
+# ===========================================================================
+# Strategy: BisectThenSIMD — equality-search; positional dispatch falls back
+# to BinaryBracket. (The `findequal(BisectThenSIMD(), v, x)` shortcut lives
+# in `findequal.jl`.)
+# ===========================================================================
+
+Base.searchsortedlast(
+    ::BisectThenSIMD, v::AbstractVector, x;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedlast(BinaryBracket(), v, x; order = order)
+Base.searchsortedfirst(
+    ::BisectThenSIMD, v::AbstractVector, x;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedfirst(BinaryBracket(), v, x; order = order)
+Base.searchsortedlast(
+    s::BisectThenSIMD, v::AbstractVector, x, ::Integer;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedlast(BinaryBracket(), v, x; order = order)
+Base.searchsortedfirst(
+    s::BisectThenSIMD, v::AbstractVector, x, ::Integer;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedfirst(BinaryBracket(), v, x; order = order)
diff --git a/src/equality.jl b/src/equality.jl
new file mode 100644
index 0000000..14feb03
--- /dev/null
+++ b/src/equality.jl
@@ -0,0 +1,73 @@
+# Equality-search public surface. `findfirstequal` handles unsorted vectors
+# (with a SIMD specialization for `DenseVector{Int64}`); `findfirstsortedequal`
+# handles sorted vectors via bisect-then-SIMD. The strategy-framework
+# wrapper `findequal(strategy, v, x)` lives in `findequal.jl`.
+
+"""
+    findfirstequal(x, A) -> Union{Int, Nothing}
+
+Find the first index in `A` where the value equals `x`. Returns `nothing`
+if `x` does not occur in `A`.
+
+This function does **not** assume `A` is sorted. For sorted vectors, see
+[`findfirstsortedequal`](@ref) (a bisect-then-SIMD specialization on
+`DenseVector{Int64}`) or [`findequal`](@ref) (the strategy-framework
+equality wrapper that returns an `Int` with a sentinel).
+
+The `(x::Int64, A::DenseVector{Int64})` method uses a custom LLVM IR SIMD
+scan (load 8 lanes, `icmp eq`, `cttz` on the mask) — about 8× faster than
+the scalar `findfirst(==(x), v)` on modern x86-64. Every other element-type
+and array-storage combination falls back to `findfirst(isequal(x), A)`.
+"""
+findfirstequal(vpivot, ivars) = findfirst(isequal(vpivot), ivars)
+function findfirstequal(vpivot::Int64, ivars::DenseVector{Int64})
+    GC.@preserve ivars begin
+        ret = _findfirstequal(vpivot, pointer(ivars), length(ivars))
+    end
+    return ret < 0 ? nothing : ret + 1
+end
+
+"""
+    findfirstsortedequal(var::Int64, vars::DenseVector{Int64}) -> Union{Int64, Nothing}
+
+Find the index of the first occurrence of `var` in the sorted vector
+`vars`. Returns `nothing` if `var` does not occur. Specialized for
+`DenseVector{Int64}` via a branchless binary bisection down to a small
+basecase, followed by the same SIMD equality scan that backs
+[`findfirstequal`](@ref) — faster than plain `findfirst(==(var), vars)`
+or `searchsortedfirst` + post-check for typical Int64 vectors.
+
+The strategy-framework equivalent is
+[`findequal(BisectThenSIMD(), vars, var)`](@ref findequal); that wrapper
+returns an `Int` with a sentinel (`firstindex(v) - 1`) for "not found",
+which is type-stable and composes with the rest of the strategy
+dispatch. Prefer `findequal` for new code; `findfirstsortedequal` remains
+as the dedicated `Union{Int64, Nothing}`-returning name.
+"""
+function findfirstsortedequal(
+        var::Int64,
+        vars::DenseVector{Int64},
+        ::Val{basecase} = Base.libllvm_version >= v"17" ? Val(8) : Val(128),
+    ) where {basecase}
+    len = length(vars)
+    offset = 0
+    @inbounds while len > basecase
+        # Bisect with the predicate `vars[mid] < var` (strict). When true,
+        # `var` is past the midpoint — drop the left half *and* the
+        # midpoint itself. When false (`vars[mid] >= var`), `var` may be
+        # at the midpoint, so keep `offset` and shrink the window to
+        # `vars[offset+1 .. offset+half+1]` (inclusive of the midpoint).
+        # The earlier `<=` predicate would have advanced past matching
+        # midpoints, masking earlier duplicates of `var`.
+        half = len >>> 1
+        mid = offset + half + 1
+        is_left_strictly_less = vars[mid] < var
+        offset = ifelse(is_left_strictly_less, offset + half + 1, offset)
+        len = ifelse(is_left_strictly_less, len - half - 1, half + 1)
+    end
+    # maybe occurs in vars[offset+1:offset+len]
+    GC.@preserve vars begin
+        ret = _findfirstequal(var, pointer(vars) + 8offset, len)
+    end
+    return ret < 0 ? nothing : ret + offset + 1
+end
diff --git a/src/findequal.jl b/src/findequal.jl
new file mode 100644
index 0000000..c7a9807
--- /dev/null
+++ b/src/findequal.jl
@@ -0,0 +1,91 @@
+# Strategy-framework equality search: `findequal(strategy, v, x[, hint])`.
+# Returns an `Int` with the sentinel `firstindex(v) - 1` for "not found"
+# (matching `Base.searchsortedlast`'s convention for "x precedes all of v").
+# The `BisectThenSIMD` shortcut for `DenseVector{Int64}` dispatches into
+# `findfirstsortedequal` directly; every other strategy composes via
+# `searchsortedfirst` + a post-check.
+
+"""
+    findequal(strategy, v, x[, hint]; order = Base.Order.Forward) -> Int
+
+Return the index of `x` in sorted `v` if present, or the sentinel
+`firstindex(v) - 1` if `x` is absent. Type-stable `Int` return — the
+sentinel matches the convention `Base.searchsortedlast` already uses for
+"x precedes all of v", so callers can test for "not found" with
+`i < firstindex(v)`.
+
+For vectors with 1-based indexing (the Julia default), the sentinel is
+exactly `0`, which is also `searchsortedlast`'s "x precedes all" return.
+For [OffsetArrays](https://github.com/JuliaArrays/OffsetArrays.jl) and any
+other vector whose `firstindex` is not `1`, the sentinel adjusts
+accordingly — e.g. for a vector with `firstindex == -3`, the sentinel is
+`-4`. Always test against `firstindex(v) - 1` (or equivalently
+`i < firstindex(v)`), not against the literal `0`.
+
+Most strategies are handled generically: run
+`searchsortedfirst(strategy, v, x[, hint])` to find the candidate insertion
+point, then check whether `v[i]` actually equals `x`. The shortcut method
+on [`BisectThenSIMD`](@ref) for `DenseVector{Int64}` skips the
+`searchsortedfirst` path entirely and uses the dedicated bisect-then-SIMD
+equality scan that backs [`findfirstsortedequal`](@ref).
+
+For unsorted vectors, use [`findfirstequal`](@ref) — it does not require
+a sorted input and falls outside the strategy framework.
+"""
+@inline function findequal(
+        strategy::SearchStrategy, v::AbstractVector, x;
+        order::Base.Order.Ordering = Base.Order.Forward,
+    )
+    return _findequal_generic(strategy, v, x, order)
+end
+
+@inline function findequal(
+        strategy::SearchStrategy, v::AbstractVector, x, hint::Integer;
+        order::Base.Order.Ordering = Base.Order.Forward,
+    )
+    i = searchsortedfirst(strategy, v, x, hint; order = order)
+    return _findequal_postcheck(v, x, i)
+end
+
+@inline function _findequal_generic(strategy, v, x, order)
+    i = searchsortedfirst(strategy, v, x; order = order)
+    return _findequal_postcheck(v, x, i)
+end
+
+@inline function _findequal_postcheck(v::AbstractVector, x, i::Integer)
+    if i > lastindex(v)
+        return firstindex(v) - 1
+    end
+    @inbounds return isequal(v[i], x) ? Int(i) : (firstindex(v) - 1)
+end
+
+# Shortcut: BisectThenSIMD on DenseVector{Int64} uses the dedicated bisect-
+# then-SIMD equality scan (same algorithm as `findfirstsortedequal`).
+function findequal(
+        ::BisectThenSIMD, v::DenseVector{Int64}, x::Int64;
+        order::Base.Order.Ordering = Base.Order.Forward,
+    )
+    if order !== Base.Order.Forward
+        return _findequal_generic(BinaryBracket(), v, x, order)
+    end
+    r = findfirstsortedequal(x, v)
+    return r === nothing ? (firstindex(v) - 1) : r
+end
+# Hinted form ignores the hint — the bisect-then-SIMD algorithm does not
+# benefit from a hint, and probing it would only waste cycles.
+findequal(
+    s::BisectThenSIMD, v::DenseVector{Int64}, x::Int64, ::Integer;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = findequal(s, v, x; order = order)
+
+# Non-Int64 fallback for BisectThenSIMD: use BinaryBracket + post-check.
+function findequal(
+        ::BisectThenSIMD, v::AbstractVector, x;
+        order::Base.Order.Ordering = Base.Order.Forward,
+    )
+    return _findequal_generic(BinaryBracket(), v, x, order)
+end
+findequal(
+    s::BisectThenSIMD, v::AbstractVector, x, ::Integer;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = findequal(s, v, x; order = order)
diff --git a/src/guesser.jl b/src/guesser.jl
new file mode 100644
index 0000000..e3e4b61
--- /dev/null
+++ b/src/guesser.jl
@@ -0,0 +1,109 @@
+# `Guesser` correlated-lookup helper + the public `looks_linear` probe it
+# uses + the `GuesserHint` strategy dispatch that plugs a `Guesser` into the
+# `searchsortedfirst`/`searchsortedlast` API.
+
+"""
+    looks_linear(v; threshold = 1e-2)
+
+Determine if the abscissae `v` are regularly distributed, taking the standard deviation of
+the difference between the array of abscissae with respect to the straight line linking
+its first and last elements, normalized by the range of `v`. If this standard deviation is
+below the given `threshold`, the vector looks linear (return true). Internal function -
+interface may change.
+"""
+function looks_linear(v; threshold = 1.0e-2)
+    length(v) <= 2 && return true
+    x_0, x_f = first(v), last(v)
+    N = length(v)
+    x_span = x_f - x_0
+    mean_x_dist = x_span / (N - 1)
+    norm_var = sum((x_i - x_0 - (i - 1) * mean_x_dist)^2 for (i, x_i) in enumerate(v)) /
+        (N * x_span^2)
+    return norm_var < threshold^2
+end
+
+"""
+    Guesser(v::AbstractVector; looks_linear_threshold = 1e-2)
+
+Wrapper of the searched vector `v` which makes an informed guess for the next
+correlated lookup by either
+
+  - exploiting that `v` is sufficiently evenly spaced (linear-extrapolation guess), or
+  - using the previous outcome (the cached `idx_prev`).
+
+Pass a `Guesser` to [`GuesserHint`](@ref) to use it as a search strategy with
+the dispatched [`searchsortedlast`](@ref Base.searchsortedlast) /
+[`searchsortedfirst`](@ref Base.searchsortedfirst) API.
+"""
+struct Guesser{T <: AbstractVector}
+    v::T
+    idx_prev::Base.RefValue{Int}
+    linear_lookup::Bool
+end
+
+function Guesser(v::AbstractVector; looks_linear_threshold = 1.0e-2)
+    return Guesser(v, Ref(1), looks_linear(v; threshold = looks_linear_threshold))
+end
+
+function (g::Guesser)(x)
+    (; v, idx_prev, linear_lookup) = g
+    return if linear_lookup
+        δx = x - first(v)
+        iszero(δx) && return firstindex(v)
+        f = δx / (last(v) - first(v))
+        if isinf(f)
+            f > 0 ? lastindex(v) : firstindex(v)
+        else
+            i_0, i_f = firstindex(v), lastindex(v)
+            i_approx = f * (i_f - i_0) + i_0
+            target_type = typeof(firstindex(v))
+            if i_approx >= typemax(target_type)
+                lastindex(v) + 1
+            elseif i_approx <= typemin(target_type)
+                firstindex(v) - 1
+            else
+                round(target_type, i_approx)
+            end
+        end
+    else
+        idx_prev[]
+    end
+end
+
+# Note on ranges: `Base.searchsortedlast(r::AbstractRange, x, order)` is
+# already O(1) (closed-form), so the strategies' fallback path through
+# `BinaryBracket` (which delegates to that Base method) is already optimal
+# for ranges. No special-case overlays needed.
+
+# GuesserHint methods — strategy dispatch wrapper for the `Guesser`-based
+# correlated search. Per-call cost: one `guesser(x)` evaluation + one
+# `BracketGallop` call + one `idx_prev[]` write.
+function Base.searchsortedlast(
+        s::GuesserHint, v::AbstractVector, x;
+        order::Base.Order.Ordering = Base.Order.Forward,
+    )
+    @assert v === s.guesser.v
+    out = searchsortedlast(BracketGallop(), v, x, s.guesser(x); order = order)
+    s.guesser.idx_prev[] = out
+    return out
+end
+
+function Base.searchsortedfirst(
+        s::GuesserHint, v::AbstractVector, x;
+        order::Base.Order.Ordering = Base.Order.Forward,
+    )
+    @assert v === s.guesser.v
+    out = searchsortedfirst(BracketGallop(), v, x, s.guesser(x); order = order)
+    s.guesser.idx_prev[] = out
+    return out
+end
+
+# GuesserHint ignores any externally-supplied hint (the Guesser carries its own).
+Base.searchsortedlast(
+    s::GuesserHint, v::AbstractVector, x, ::Integer;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedlast(s, v, x; order = order)
+Base.searchsortedfirst(
+    s::GuesserHint, v::AbstractVector, x, ::Integer;
+    order::Base.Order.Ordering = Base.Order.Forward,
+) = searchsortedfirst(s, v, x; order = order)
diff --git a/src/precompile.jl b/src/precompile.jl
new file mode 100644
index 0000000..93c5ec1
--- /dev/null
+++ b/src/precompile.jl
@@ -0,0 +1,58 @@
+# PrecompileTools workload — exercises the most commonly used public
+# entry points across typical eltypes so `using FindFirstFunctions` is hot
+# without a startup compile spike on the first call.
+
+using PrecompileTools: @compile_workload, @setup_workload
+
+@setup_workload begin
+    vec_int64 = Int64[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+    linear_vec = collect(1.0:0.5:10.0)
+
+    @compile_workload begin
+        # findfirstequal: fast SIMD-based search in Int64 vectors.
+        findfirstequal(Int64(5), vec_int64)
+        findfirstequal(Int64(100), vec_int64)
+
+        # findfirstsortedequal: binary search in sorted Int64 vectors.
+        findfirstsortedequal(Int64(8), vec_int64)
+        findfirstsortedequal(Int64(100), vec_int64)
+
+        # looks_linear: check if vector is evenly spaced.
+        looks_linear(linear_vec)
+
+        # Guesser: hint provider for correlated repeated searches.
+        guesser = Guesser(linear_vec)
+        guesser(5.0)
+
+        # Strategy dispatch — single-query forms across the standard strategies.
+        for strategy in (
+                LinearScan(), SIMDLinearScan(), BracketGallop(), ExpFromLeft(),
+                InterpolationSearch(), BinaryBracket(), Auto(),
+                Auto(SearchProperties(linear_vec)),
+            )
+            searchsortedfirst(strategy, vec_int64, Int64(8), Int64(1))
+            searchsortedlast(strategy, vec_int64, Int64(8), Int64(1))
+        end
+        # findequal: generic + BisectThenSIMD shortcut for Int64 dense vectors.
+        for strategy in (
+                BinaryBracket(), BracketGallop(), SIMDLinearScan(),
+                BisectThenSIMD(), Auto(),
+            )
+            findequal(strategy, vec_int64, Int64(8))
+            findequal(strategy, vec_int64, Int64(8), Int64(1))
+        end
+        # SIMDLinearScan's Float64 specialization.
+        let vec_f64 = collect(1.0:1.0:16.0)
+            searchsortedfirst(SIMDLinearScan(), vec_f64, 8.0, 1)
+            searchsortedlast(SIMDLinearScan(), vec_f64, 8.0, 1)
+        end
+        searchsortedfirst(GuesserHint(Guesser(vec_int64)), vec_int64, Int64(8))
+        searchsortedlast(GuesserHint(Guesser(vec_int64)), vec_int64, Int64(8))
+
+        # Strategy dispatch — batched in-place forms.
+        idx_out = Vector{Int}(undef, 4)
+        queries = Int64[2, 5, 8, 12]
+        searchsortedfirst!(idx_out, vec_int64, queries)
+        searchsortedlast!(idx_out, vec_int64, queries)
+    end
+end
diff --git a/src/search_properties.jl b/src/search_properties.jl
new file mode 100644
index 0000000..f0507bf
--- /dev/null
+++ b/src/search_properties.jl
@@ -0,0 +1,120 @@
+# `SearchProperties` populated constructor + the runtime probes it runs at
+# construction time. The `SearchProperties` struct itself, the default
+# constructor (`SearchProperties()` sentinel), and the `Auto` type that
+# carries one as a field all live in `strategies.jl`.
+
+# `has_nan` is only meaningful for floating-point eltypes; for other numeric
+# types and non-numeric eltypes there is no NaN concept.
+@inline _has_nan(v::AbstractVector{<:AbstractFloat}) = any(isnan, v)
+@inline _has_nan(::AbstractVector) = false
+
+# Auto's strict linearity tolerance. `SearchProperties(v)` uses the same
+# default. Defined here rather than in `auto.jl` because `_sampled_looks_*`
+# need it as a default arg; `auto.jl` re-uses the same constant.
+const _AUTO_LINEAR_REL_TOLERANCE = 1.0e-3
+
+# Sampled linearity check: probes v[1], v[k*n/10] for k = 1..9, v[n] and
+# tests whether all interior points sit within `_AUTO_LINEAR_REL_TOLERANCE`
+# (default 0.1%) of the straight line between v[1] and v[n]. The tight
+# tolerance reliably distinguishes truly-uniform data from random/sorted
+# data even at large n (where the order-statistic variance ≈ 1/sqrt(n)
+# would fool a looser check).
+#
+# Cost is ~25 ns regardless of n — 11 reads + 9 comparisons. Used by Auto
+# to decide whether to gamble on InterpolationSearch.
+#
+# InterpolationSearch's downside on non-linear data is large (4-14× slower
+# than ExpFromLeft on log/plateau/two-scale spacings), so we err on the
+# side of rejecting borderline cases. Truly uniform data — exact ranges,
+# evenly-spaced grids, and small-amplitude jittered data — passes; sorted
+# random data is rejected at all `n` tested up to ~10⁶.
+@inline function _sampled_looks_linear(
+        v::AbstractVector{<:Number},
+        tol::Float64 = _AUTO_LINEAR_REL_TOLERANCE,
+    )
+    n = length(v)
+    n < 11 && return false
+    @inbounds begin
+        v1, vn = v[1], v[n]
+        span = vn - v1
+        (iszero(span) || !isfinite(span)) && return false
+        abs_span = abs(span)
+        nm1 = n - 1
+        for k in 1:9
+            kk = 1 + (k * nm1) ÷ 10
+            expected = v1 + (kk - 1) / nm1 * span
+            rel_err = abs(v[kk] - expected) / abs_span
+            rel_err > tol && return false
+        end
+    end
+    return true
+end
+
+# Non-numeric eltype: can't sample, never picks InterpolationSearch.
+@inline _sampled_looks_linear(::AbstractVector, ::Float64 = _AUTO_LINEAR_REL_TOLERANCE) = false
+
+# AbstractRange is definitionally uniform — accept without sampling.
+@inline _sampled_looks_linear(::AbstractRange, ::Float64 = _AUTO_LINEAR_REL_TOLERANCE) = true
+
+# Sampled "log-linear" probe: same 9-point probe as `_sampled_looks_linear`
+# but tests whether `log(v)` is linear in array index. Used to detect
+# geometric / log-spaced data where `BitInterpolationSearch` is a win.
+# Requires all sampled points to be strictly positive and finite; otherwise
+# returns false. The probe runs in ~30 ns (the `log` calls are not cheap
+# but only 11 of them, all at fixed index positions).
+@inline function _sampled_looks_log_linear(
+        v::AbstractVector{<:Real},
+        tol::Float64 = _AUTO_LINEAR_REL_TOLERANCE,
+    )
+    n = length(v)
+    n < 11 && return false
+    @inbounds begin
+        v1, vn = v[1], v[n]
+        (v1 <= 0 || vn <= 0 || !isfinite(v1) || !isfinite(vn)) && return false
+        log_v1 = log(Float64(v1))
+        log_vn = log(Float64(vn))
+        span = log_vn - log_v1
+        (iszero(span) || !isfinite(span)) && return false
+        abs_span = abs(span)
+        nm1 = n - 1
+        for k in 1:9
+            kk = 1 + (k * nm1) ÷ 10
+            vk = v[kk]
+            (vk <= 0 || !isfinite(vk)) && return false
+            expected = log_v1 + (kk - 1) / nm1 * span
+            rel_err = abs(log(Float64(vk)) - expected) / abs_span
+            rel_err > tol && return false
+        end
+    end
+    return true
+end
+
+@inline _sampled_looks_log_linear(::AbstractVector, ::Float64 = _AUTO_LINEAR_REL_TOLERANCE) = false
+
+"""
+    SearchProperties(v::AbstractVector; linear_tolerance = 1.0e-3)
+
+Run the linearity probe and (for floating-point eltypes) the NaN scan on `v`,
+returning the populated [`SearchProperties`](@ref). Cost is O(n) on
+floating-point vectors because of the NaN scan; for integer and non-numeric
+eltypes the cost is O(1) — only the sampled-linearity probe runs.
+
+`linear_tolerance` controls the maximum relative deviation accepted by the
+sampled-linearity probe. The default `1e-3` (0.1%) matches `Auto`'s
+un-cached probe behaviour. Loosen it (e.g. to `1e-2`) to accept noisier
+"approximately linear" data — this widens the regime where `Auto` will pick
+`InterpolationSearch` over `ExpFromLeft`. Tighten it (e.g. to `1e-4`) to be
+more conservative.
+"""
+function SearchProperties(
+        v::AbstractVector;
+        linear_tolerance::Real = 1.0e-3,
+    )
+    tol = Float64(linear_tolerance)
+    return SearchProperties(
+        true,
+        _sampled_looks_linear(v, tol),
+        _has_nan(v),
+        _sampled_looks_log_linear(v, tol),
+    )
+end
diff --git a/src/simd_ir.jl b/src/simd_ir.jl
new file mode 100644
index 0000000..1c543da
--- /dev/null
+++ b/src/simd_ir.jl
@@ -0,0 +1,135 @@
+# SIMD LLVM IR scaffolding shared by every "find first lane matching
+# predicate" routine in the package:
+#
+#   - `FFE_IR` / `_findfirstequal`     — exact equality scan, Int64
+#                                         (predicate `icmp eq`)
+#   - `_SIMD_GT_I64_IR` / `_simd_first_gt(::Int64, …)`     — `icmp sgt`
+#   - `_SIMD_GE_I64_IR` / `_simd_first_ge(::Int64, …)`     — `icmp sge`
+#   - `_SIMD_GT_F64_IR` / `_simd_first_gt(::Float64, …)`   — `fcmp ogt`
+#   - `_SIMD_GE_F64_IR` / `_simd_first_ge(::Float64, …)`   — `fcmp oge`
+#
+# All five IR strings are generated from the same template, keyed on scalar
+# LLVM type (`i64`, `double`) and compare predicate. Adding a new predicate
+# is a single `_simd_scan_ir(t, pred)` call plus a corresponding
+# `Base.llvmcall` wrapper.
+
+# Generate the SIMD "find first lane matching predicate" IR for an arbitrary
+# scalar type and LLVM compare predicate. Load 8 lanes at a time, compare
+# against a broadcast of the search value, bitcast the i1×8 mask to i8,
+# `cttz` to find the first set bit. The tail past the last full chunk is
+# handled scalar-wise.
+function _simd_scan_ir(t, pred)
+    cmp = pred[1] in ('o', 'u') ? "fcmp" : "icmp"
+    return """
+    declare i8 @llvm.cttz.i8(i8, i1);
+    define i64 @entry($t %0, $(USE_PTR ? "ptr" : "i64") %1, i64 %2) #0 {
+    top:
+      $(USE_PTR ? "" : "%ivars = inttoptr i64 %1 to $t*")
+      %btmp = insertelement <8 x $t> undef, $t %0, i64 0
+      %var = shufflevector <8 x $t> %btmp, <8 x $t> undef, <8 x i32> zeroinitializer
+      %lenm7 = add nsw i64 %2, -7
+      %dosimditer = icmp ugt i64 %2, 7
+      br i1 %dosimditer, label %L9.lr.ph, label %L32
+
+    L9.lr.ph:
+      %len8 = and i64 %2, 9223372036854775800
+      br label %L9
+
+    L9:
+      %i = phi i64 [ 0, %L9.lr.ph ], [ %vinc, %L30 ]
+      %ivarsi = getelementptr inbounds $t, $(USE_PTR ? "ptr %1" : "$t* %ivars"), i64 %i
+      $(USE_PTR ? "" : "%vpvi = bitcast $t* %ivarsi to <8 x $t>*")
+      %v = load <8 x $t>, $(USE_PTR ? "ptr %ivarsi" : "<8 x $t> * %vpvi"), align 8
+      %m = $cmp $pred <8 x $t> %v, %var
+      %mu = bitcast <8 x i1> %m to i8
+      %matchnotfound = icmp eq i8 %mu, 0
+      br i1 %matchnotfound, label %L30, label %L17
+
+    L17:
+      %tz8 = call i8 @llvm.cttz.i8(i8 %mu, i1 true)
+      %tz64 = zext i8 %tz8 to i64
+      %vis = add nuw i64 %i, %tz64
+      br label %common.ret
+
+    common.ret:
+      %retval = phi i64 [ %vis, %L17 ], [ -1, %L32 ], [ %si, %L51 ], [ -1, %L67 ]
+      ret i64 %retval
+
+    L30:
+      %vinc = add nuw nsw i64 %i, 8
+      %continue = icmp slt i64 %vinc, %lenm7
+      br i1 %continue, label %L9, label %L32
+
+    L32:
+      %cumi = phi i64 [ 0, %top ], [ %len8, %L30 ]
+      %done = icmp eq i64 %cumi, %2
+      br i1 %done, label %common.ret, label %L51
+
+    L51:
+      %si = phi i64 [ %inc, %L67 ], [ %cumi, %L32 ]
+      %spi = getelementptr inbounds $t, $(USE_PTR ? "ptr %1" : "$t* %ivars"), i64 %si
+      %svi = load $t, $(USE_PTR ? "ptr" : "$t*") %spi, align 8
+      %match = $cmp $pred $t %svi, %0
+      br i1 %match, label %common.ret, label %L67
+
+    L67:
+      %inc = add i64 %si, 1
+      %dobreak = icmp eq i64 %inc, %2
+      br i1 %dobreak, label %common.ret, label %L51
+
+    }
+    attributes #0 = { alwaysinline }
+    """
+end
+
+const FFE_IR = _simd_scan_ir("i64", "eq")
+
+function _findfirstequal(vpivot::Int64, ptr::Ptr{Int64}, len::Int64)
+    return Base.llvmcall(
+        (FFE_IR, "entry"),
+        Int64,
+        Tuple{Int64, Ptr{Int64}, Int64},
+        vpivot,
+        ptr,
+        len
+    )
+end
+
+const _SIMD_GT_I64_IR = _simd_scan_ir("i64", "sgt")
+const _SIMD_GE_I64_IR = _simd_scan_ir("i64", "sge")
+const _SIMD_GT_F64_IR = _simd_scan_ir("double", "ogt")
+const _SIMD_GE_F64_IR = _simd_scan_ir("double", "oge")
+
+# Backing primitives for SIMDLinearScan. Each returns the 0-based offset of
+# the first lane satisfying the predicate, or -1 if none. Caveat: NaN inputs
+# always compare false under the ordered `o*` float predicates, so NaN in `v`
+# or `x` produces "no match" rather than an exception — consistent with the
+# undefined-input contract for sorted Float64 vectors containing NaN.
+function _simd_first_gt(x::Int64, ptr::Ptr{Int64}, len::Int64)
+    return Base.llvmcall(
+        (_SIMD_GT_I64_IR, "entry"),
+        Int64, Tuple{Int64, Ptr{Int64}, Int64},
+        x, ptr, len
+    )
+end
+function _simd_first_ge(x::Int64, ptr::Ptr{Int64}, len::Int64)
+    return Base.llvmcall(
+        (_SIMD_GE_I64_IR, "entry"),
+        Int64, Tuple{Int64, Ptr{Int64}, Int64},
+        x, ptr, len
+    )
+end
+function _simd_first_gt(x::Float64, ptr::Ptr{Float64}, len::Int64)
+    return Base.llvmcall(
+        (_SIMD_GT_F64_IR, "entry"),
+        Int64, Tuple{Float64, Ptr{Float64}, Int64},
+        x, ptr, len
+    )
+end
+function _simd_first_ge(x::Float64, ptr::Ptr{Float64}, len::Int64)
+    return Base.llvmcall(
+        (_SIMD_GE_F64_IR, "entry"),
+        Int64, Tuple{Float64, Ptr{Float64}, Int64},
+        x, ptr, len
+    )
+end
diff --git a/src/strategies.jl b/src/strategies.jl
new file mode 100644
index 0000000..e70263c
--- /dev/null
+++ b/src/strategies.jl
@@ -0,0 +1,268 @@
+# Sorted-search strategy type hierarchy. All concrete strategies live here;
+# their `Base.searchsortedfirst` / `Base.searchsortedlast` method definitions
+# live in `dispatch.jl` and `auto.jl`. The `SearchProperties` cache type
+# is defined here too because `Auto` carries one as a field; its populated
+# constructor lives in `search_properties.jl`.
+
+"""
+    SearchStrategy
+
+Abstract supertype for sorted-search strategies. Concrete subtypes select how
+[`searchsortedlast`](@ref Base.searchsortedlast) and
+[`searchsortedfirst`](@ref Base.searchsortedfirst) should be performed when
+called with a strategy as the first positional argument:
+
+  - [`LinearScan`](@ref) walks ±1 from the hint. Cheapest when the target is
+    within a few positions of the hint; degrades linearly otherwise.
+  - [`SIMDLinearScan`](@ref) is `LinearScan` with the forward walk lowered to
+    8-wide SIMD chunks for `DenseVector{Int64}` and `DenseVector{Float64}`.
+    Falls back to plain [`LinearScan`](@ref) for any other element type.
+  - [`BracketGallop`](@ref) expands an exponential bracket bidirectionally
+    from the hint, then binary-searches inside it. Effectively O(1) when the
+    target is near the hint; never worse than ~2 log₂ n comparisons.
+  - [`ExpFromLeft`](@ref) expands rightward from a left-bound hint by
+    doubling, then binary-searches inside the final bracket. Best for batched
+    sorted queries where each next query's hint is the previous result.
+  - [`InterpolationSearch`](@ref) guesses the answer by linearly extrapolating
+    between `v[lo]` and `v[hi]`, then refines with a bounded binary search.
+    O(1) per query on uniformly-spaced data; falls back to O(log n) on
+    irregular data.
+  - [`BinaryBracket`](@ref) is the standard `Base.searchsortedlast` /
+    `Base.searchsortedfirst` with no hint. Use it when no useful hint exists.
+  - [`BisectThenSIMD`](@ref) is an equality-search strategy: binary-bisects
+    `v` to a small basecase, then SIMD-scans for exact equality. Specialised
+    for `DenseVector{Int64}`; only meaningful when used with
+    [`findequal`](@ref).
+  - [`Auto`](@ref) heuristically picks one of the above based on the size of
+    `v`, the spacing of `v`, and whether a hint was supplied. Accepts an
+    optional [`SearchProperties`](@ref) cache to skip per-call probes.
+
+Strategies can also be passed to the batched
+[`searchsortedlast!`](@ref) / [`searchsortedfirst!`](@ref) APIs.
+"""
+abstract type SearchStrategy end
+
+"""
+    LinearScan <: SearchStrategy
+
+Walk ±1 from the hint. Best when the target is within a few positions of the
+hint. Falls back to [`BinaryBracket`](@ref) when no hint is supplied.
+"""
+struct LinearScan <: SearchStrategy end
+
+"""
+    SIMDLinearScan <: SearchStrategy
+
+Variant of [`LinearScan`](@ref) whose forward walk is lowered to 8-wide
+SIMD chunks via custom LLVM IR. Specialized for `DenseVector{Int64}` and
+`DenseVector{Float64}`; for any other element type, falls back to plain
+[`LinearScan`](@ref). The backward walk (when the hint is past the
+answer) uses the scalar `LinearScan` path regardless of element type.
+
+Wins on long forward walks (≥ 8 elements past the hint). For walks of
+1–3 elements `LinearScan` is comparable — the SIMD chunk has constant
+setup overhead. Worst case is O(n / 8) which is still linear, so
+`SIMDLinearScan` is only `Auto`-relevant for small `n` or small-gap
+batches where plain `LinearScan` would have been picked anyway.
+
+Caveats:
+  - Element type must be exactly `Int64` or `Float64`. `Int32`,
+    `UInt64`, `Float32`, and user-defined numeric types all fall back to
+    scalar.
+  - Sorted-Float64 vectors containing `NaN` produce undefined results,
+    same as for any positional search on a vector that isn't totally
+    ordered.
+  - Falls back to [`BinaryBracket`](@ref) when no hint is supplied.
+  - Falls back to [`LinearScan`](@ref) for non-`Forward` orderings.
+"""
+struct SIMDLinearScan <: SearchStrategy end
+
+"""
+    BracketGallop <: SearchStrategy
+
+Expand an exponential bracket bidirectionally from the hint, then
+binary-search inside the bracket. Effectively O(1) when the target is near
+the hint; never worse than ~2 log₂ n comparisons.
+
+Falls back to [`BinaryBracket`](@ref) when no hint is supplied.
+"""
+struct BracketGallop <: SearchStrategy end
+
+"""
+    ExpFromLeft <: SearchStrategy
+
+Exponential search forward from the hint (interpreted as a left bound), then
+binary search in the final bracket. The hint is a *lower* bound rather than a
+center guess, which is what batched sorted-search loops typically want:
+`hint = previous_result`.
+
+Specifically: starting at `lo = hint`, check `v[lo], v[lo+1], ..., v[lo+4]`
+linearly, then `v[lo+8], v[lo+16], …` exponentially, until `x` is bracketed,
+then binary-search inside the bracket.
+
+Falls back to [`BinaryBracket`](@ref) when no hint is supplied.
+"""
+struct ExpFromLeft <: SearchStrategy end
+
+"""
+    InterpolationSearch <: SearchStrategy
+
+Guesses an index by linearly extrapolating `x` between `v[lo]` and `v[hi]`,
+then refines with a bounded binary search. O(1) per query on uniformly-spaced
+data (e.g. `collect(0:0.1:10)`); falls back to O(log n) otherwise. Requires
+`x` to be subtractable with elements of `v` (i.e., a numeric ordering).
+
+Ignores any hint that is supplied — the guess is computed fresh from the
+endpoints. Falls back to [`BinaryBracket`](@ref) for non-numeric element
+types where subtraction isn't defined.
+"""
+struct InterpolationSearch <: SearchStrategy end
+
+"""
+    BitInterpolationSearch <: SearchStrategy
+
+Variant of [`InterpolationSearch`](@ref) that reinterprets `DenseVector{Float64}`
+as `DenseVector{UInt64}` before computing the extrapolation guess. For
+positive IEEE Float64 values, the bit pattern is monotonically increasing
+with the float value and is approximately linear in array index when the
+underlying data is *log-spaced* (geometric). On such data the bit-domain
+guess is far better than the float-domain guess that `InterpolationSearch`
+would compute — sometimes O(1) versus O(log n) refinement.
+
+After computing the bit-domain guess, the bracket and binary refine step
+uses the original float values for comparison, so the answer is identical
+to `Base.searchsortedlast` / `Base.searchsortedfirst`.
+
+Constraints:
+  - `DenseVector{Float64}` only (the IEEE bit-pattern trick is Float64-specific).
+  - Requires `v[1] > 0` and the query `x > 0` (negative, zero, subnormal,
+    and non-finite Float64 bit patterns are not monotonic with float value
+    under raw reinterpret).
+  - Forward ordering only.
+
+**This strategy is opt-in only** — `Auto` does not pick it. The bench sweep
+shows the per-query division and UInt64↔Float64 conversion overhead
+(~60–90 ns/q) costs more than the bracket refinement that the guess
+saves at every gap tested; pinning `BitInterpolationSearch` is slower than
+letting `Auto` pick `SIMDLinearScan` / `BracketGallop` / `ExpFromLeft`.
+The strategy is retained for users with workloads not covered by the
+sweep — for instance, very-large `n` (≥ 2²⁰), pathologically log-spaced
+data, or hardware where Float64 division is unusually cheap relative to
+the scalar walk.
+
+Falls back to [`InterpolationSearch`](@ref) for non-Float64 dense eltypes
+(where the bit pattern equals the value and the two strategies are
+equivalent), and to [`BinaryBracket`](@ref) for non-positive or
+non-finite Float64 data.
+"""
+struct BitInterpolationSearch <: SearchStrategy end
+
+"""
+    BinaryBracket <: SearchStrategy
+
+Plain `Base.searchsortedlast` / `Base.searchsortedfirst`. Ignores any hint
+that is supplied.
+"""
+struct BinaryBracket <: SearchStrategy end
+
+"""
+    BisectThenSIMD <: SearchStrategy
+
+Equality-search strategy. Binary-bisects `v` down to a small basecase, then
+SIMD-scans the basecase for exact equality with `x`. Specialised for
+`DenseVector{Int64}` + `Int64` queries via the same custom LLVM IR that
+backs [`findfirstsortedequal`](@ref FindFirstFunctions.findfirstsortedequal);
+for other element types, falls back to [`BinaryBracket`](@ref) plus an
+equality check.
+
+This strategy is meant for use with [`findequal`](@ref FindFirstFunctions.findequal),
+not with `searchsortedfirst` / `searchsortedlast` — its purpose is to answer
+"is `x` present at exactly which index, or not at all?", which is a
+different question from positional search. In the
+`searchsortedfirst`/`searchsortedlast` dispatch it falls back to
+[`BinaryBracket`](@ref).
+
+Ignores any hint that is supplied. Falls back to [`BinaryBracket`](@ref) for
+non-`Forward` orderings.
+"""
+struct BisectThenSIMD <: SearchStrategy end
+
+"""
+    GuesserHint(guesser::Guesser) <: SearchStrategy
+
+Uses a [`Guesser`](@ref) to produce an integer guess for `x`, then dispatches
+to [`BracketGallop`](@ref) from that guess. The `Guesser` already decides
+between linear-extrapolation lookup (when `v` looks linear) and using the
+previous result as a guess; this strategy plugs that logic into the strategy
+dispatch hierarchy, and updates `guesser.idx_prev` on each call.
+
+Use this strategy with the per-query and batched APIs whenever you have a
+`Guesser` attached to a vector. The cost is one `guesser(x)` evaluation
+plus one `BracketGallop` call plus one `idx_prev[]` write per call.
+"""
+struct GuesserHint{G} <: SearchStrategy
+    guesser::G
+end
+
+"""
+    SearchProperties
+
+Cached, non-allocating facts about a sorted vector. Pass to [`Auto`](@ref)
+via `Auto(props)` to skip the per-call probes that the default `Auto()` runs
+on every batched call. Stored fields are kept to plain `Bool`s so the struct
+stays `isbits` and travels in registers.
+
+Default-constructed (`SearchProperties()`) is the "no information" sentinel:
+`has_props` is `false`, the other fields are unspecified and ignored by
+`Auto`. Construct via `SearchProperties(v::AbstractVector)` to populate the
+fields by running the probes once.
+
+Currently consumed: `is_linear` and `has_nan` (the latter only on Float64,
+to gate `SIMDLinearScan` eligibility in `Auto`). `is_log_linear` is
+populated for callers that want to manually pin
+[`BitInterpolationSearch`](@ref) based on data shape; `Auto` does not
+consume it. The fields are otherwise populated for forward compatibility.
+"""
+struct SearchProperties
+    has_props::Bool
+    is_linear::Bool
+    has_nan::Bool
+    is_log_linear::Bool
+end
+
+SearchProperties() = SearchProperties(false, false, false, false)
+
+"""
+    Auto <: SearchStrategy
+    Auto()
+    Auto(props::SearchProperties)
+
+Heuristically picks among [`LinearScan`](@ref), [`SIMDLinearScan`](@ref),
+[`ExpFromLeft`](@ref), [`InterpolationSearch`](@ref),
+[`BracketGallop`](@ref), and [`BinaryBracket`](@ref). The choice depends on
+the calling context:
+
+**Per-query** (`searchsortedlast(Auto(), v, x[, hint])`):
+  - No hint, or hint outside `axes(v)` → [`BinaryBracket`](@ref).
+  - Hint in range, `length(v) ≤ 16` → [`LinearScan`](@ref).
+  - Hint in range, `length(v) > 16` → [`BracketGallop`](@ref).
+
+**Batched sorted** (`searchsortedlast!(out, v, queries; strategy = Auto())`)
+chooses by the expected average gap in `v`'s index space between
+consecutive query results. See the package's `auto.md` documentation for
+the full decision tree and the crossover constants the bench sweep
+determined.
+
+**Batched unsorted**: falls back to per-element `Base.searchsortedlast` /
+`Base.searchsortedfirst` with no hint regardless of strategy.
+
+**Cached properties.** Passing a populated [`SearchProperties`](@ref) via
+`Auto(props)` short-circuits the per-call probes. The cached path is
+behaviour-equivalent to `Auto()` when `props` is up to date for `v`; the
+caller is responsible for re-computing `props` if `v` mutates.
+"""
+struct Auto <: SearchStrategy
+    props::SearchProperties
+end
+
+Auto() = Auto(SearchProperties())
diff --git a/test/runtests.jl b/test/runtests.jl
index d89d198..45e04a7 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -37,166 +37,713 @@ end
         end
 
         @safetestset "Guesser" begin
-            using FindFirstFunctions:
-                Guesser, searchsortedfirstcorrelated,
-                searchsortedlastcorrelated
+            using FindFirstFunctions: Guesser, GuesserHint
             v = collect(LinRange(0, 10, 4))
             guesser_linear = Guesser(v)
             guesser_prev = Guesser(v, Ref(1), false)
             @test guesser_linear.linear_lookup
-            @test searchsortedfirstcorrelated(v, 4.0, guesser_linear) == 3
-            @test searchsortedfirstcorrelated(v, 1.4234326478e24, guesser_linear) == 5
-            @test searchsortedlastcorrelated(v, 4.0, guesser_prev) == 2
+
+            # Guesser feeds the dispatched API via GuesserHint.
+            @test searchsortedfirst(GuesserHint(guesser_linear), v, 4.0) == 3
+            @test searchsortedfirst(GuesserHint(guesser_linear), v, 1.4234326478e24) == 5
+            @test searchsortedlast(GuesserHint(guesser_prev), v, 4.0) == 2
             @test guesser_prev.idx_prev[] == 2
 
-            # Edge case
+            # Edge case: single-element v.
             v1 = [42.0]
             guesser = Guesser(v1)
             @test guesser_linear.linear_lookup
             @test guesser(100) == 1
             @test guesser(42.0) == 1
             @test guesser(0) == 1
-            @test searchsortedfirstcorrelated(v1, 0, guesser) == 1
-            @test searchsortedfirstcorrelated(v1, 100, guesser) == 1 + 1  # see searchsortedfirst
-            @test searchsortedfirstcorrelated(v1, 42.0, guesser) == 1
-            @test searchsortedlastcorrelated(v1, 0, guesser) == 1 - 1 # see searchsortedlast
-            @test searchsortedlastcorrelated(v1, 100, guesser) == 1
-            @test searchsortedlastcorrelated(v1, 42.0, guesser) == 1
+            @test searchsortedfirst(GuesserHint(guesser), v1, 0) == 1
+            @test searchsortedfirst(GuesserHint(guesser), v1, 100) == 2  # see Base.searchsortedfirst
+            @test searchsortedfirst(GuesserHint(guesser), v1, 42.0) == 1
+            @test searchsortedlast(GuesserHint(guesser), v1, 0) == 0  # see Base.searchsortedlast
+            @test searchsortedlast(GuesserHint(guesser), v1, 100) == 1
+            @test searchsortedlast(GuesserHint(guesser), v1, 42.0) == 1
         end
 
-        @safetestset "Custom ordering in searchsorted*correlated" begin
+        @safetestset "Custom ordering for strategy dispatch" begin
             using FindFirstFunctions:
-                Guesser, searchsortedfirstcorrelated,
-                searchsortedlastcorrelated
+                Guesser, GuesserHint, BracketGallop, LinearScan,
+                ExpFromLeft, BinaryBracket, Auto
 
-            # Test with reverse-sorted vector
-            v_rev = collect(10.0:-1.0:1.0)  # [10.0, 9.0, ..., 1.0]
+            v_rev = collect(10.0:-1.0:1.0)
+            for x in (5.0, 10.0, 1.0, 0.0, 11.0),
+                    hint in (1, 5, 10),
+                    strategy in (
+                        BracketGallop(), LinearScan(), ExpFromLeft(), Auto(),
+                    )
 
-            # Test searchsortedfirstcorrelated with Reverse order
-            @test searchsortedfirstcorrelated(v_rev, 5.0, 1; order = Base.Order.Reverse) ==
-                searchsortedfirst(v_rev, 5.0, Base.Order.Reverse)
-            @test searchsortedfirstcorrelated(v_rev, 10.0, 1; order = Base.Order.Reverse) ==
-                searchsortedfirst(v_rev, 10.0, Base.Order.Reverse)
-            @test searchsortedfirstcorrelated(v_rev, 1.0, 1; order = Base.Order.Reverse) ==
-                searchsortedfirst(v_rev, 1.0, Base.Order.Reverse)
-            @test searchsortedfirstcorrelated(v_rev, 0.0, 1; order = Base.Order.Reverse) ==
-                searchsortedfirst(v_rev, 0.0, Base.Order.Reverse)
-            @test searchsortedfirstcorrelated(v_rev, 11.0, 1; order = Base.Order.Reverse) ==
-                searchsortedfirst(v_rev, 11.0, Base.Order.Reverse)
-
-            # Test searchsortedlastcorrelated with Reverse order
-            @test searchsortedlastcorrelated(v_rev, 5.0, 1; order = Base.Order.Reverse) ==
-                searchsortedlast(v_rev, 5.0, Base.Order.Reverse)
-            @test searchsortedlastcorrelated(v_rev, 10.0, 1; order = Base.Order.Reverse) ==
-                searchsortedlast(v_rev, 10.0, Base.Order.Reverse)
-            @test searchsortedlastcorrelated(v_rev, 1.0, 1; order = Base.Order.Reverse) ==
-                searchsortedlast(v_rev, 1.0, Base.Order.Reverse)
-            @test searchsortedlastcorrelated(v_rev, 0.0, 1; order = Base.Order.Reverse) ==
-                searchsortedlast(v_rev, 0.0, Base.Order.Reverse)
-            @test searchsortedlastcorrelated(v_rev, 11.0, 1; order = Base.Order.Reverse) ==
-                searchsortedlast(v_rev, 11.0, Base.Order.Reverse)
-
-            # Test with Guesser and reverse order
+                @test searchsortedfirst(strategy, v_rev, x, hint; order = Base.Order.Reverse) ==
+                    searchsortedfirst(v_rev, x, Base.Order.Reverse)
+                @test searchsortedlast(strategy, v_rev, x, hint; order = Base.Order.Reverse) ==
+                    searchsortedlast(v_rev, x, Base.Order.Reverse)
+            end
+            # BinaryBracket ignores any hint.
+            for x in (5.0, 10.0, 1.0, 0.0, 11.0)
+                @test searchsortedfirst(BinaryBracket(), v_rev, x; order = Base.Order.Reverse) ==
+                    searchsortedfirst(v_rev, x, Base.Order.Reverse)
+                @test searchsortedlast(BinaryBracket(), v_rev, x; order = Base.Order.Reverse) ==
+                    searchsortedlast(v_rev, x, Base.Order.Reverse)
+            end
+
+            # GuesserHint with reverse order.
             guesser_rev = Guesser(v_rev)
-            @test searchsortedfirstcorrelated(v_rev, 5.0, guesser_rev; order = Base.Order.Reverse) ==
+            @test searchsortedfirst(GuesserHint(guesser_rev), v_rev, 5.0; order = Base.Order.Reverse) ==
                 searchsortedfirst(v_rev, 5.0, Base.Order.Reverse)
-            @test searchsortedlastcorrelated(v_rev, 5.0, guesser_rev; order = Base.Order.Reverse) ==
+            @test searchsortedlast(GuesserHint(guesser_rev), v_rev, 5.0; order = Base.Order.Reverse) ==
                 searchsortedlast(v_rev, 5.0, Base.Order.Reverse)
 
-            # Test that default order (Forward) still works correctly
-            v_fwd = collect(1.0:1.0:10.0)  # [1.0, 2.0, ..., 10.0]
-            @test searchsortedfirstcorrelated(v_fwd, 5.0, 1) ==
-                searchsortedfirst(v_fwd, 5.0)
-            @test searchsortedlastcorrelated(v_fwd, 5.0, 1) ==
-                searchsortedlast(v_fwd, 5.0)
+            # Default (Forward) order still resolves correctly.
+            v_fwd = collect(1.0:1.0:10.0)
+            for strategy in (BracketGallop(), LinearScan(), ExpFromLeft(), Auto())
+                @test searchsortedfirst(strategy, v_fwd, 5.0, 1) == searchsortedfirst(v_fwd, 5.0)
+                @test searchsortedlast(strategy, v_fwd, 5.0, 1) == searchsortedlast(v_fwd, 5.0)
+            end
         end
 
-        @safetestset "Exponential Search (searchsortedfirstexp)" begin
-            using FindFirstFunctions: searchsortedfirstexp
+        @safetestset "SearchStrategy dispatch (single query)" begin
+            using FindFirstFunctions:
+                SearchStrategy, LinearScan, BracketGallop, BinaryBracket, Auto
+
+            for n in (0, 1, 2, 8, 33, 257)
+                v = collect(1:n)
+                isempty(v) && continue
 
-            # Basic functionality - should match searchsortedfirst
-            v = collect(1:100)
-            for x in [1, 5, 10, 50, 99, 100]
-                @test searchsortedfirstexp(v, x) == searchsortedfirst(v, x)
+                # Probe targets that include hits, misses, boundaries
+                xs = unique!(sort!([0, 1, n ÷ 2, n, n + 1, -3, 2 * n + 1]))
+                for x in xs
+                    expected_last = searchsortedlast(v, x)
+                    expected_first = searchsortedfirst(v, x)
+
+                    # BinaryBracket — ignores any hint
+                    @test searchsortedlast(BinaryBracket(), v, x) == expected_last
+                    @test searchsortedfirst(BinaryBracket(), v, x) == expected_first
+                    @test searchsortedlast(BinaryBracket(), v, x, 1) == expected_last
+                    @test searchsortedfirst(BinaryBracket(), v, x, 1) == expected_first
+
+                    # Strategy with hint anywhere in 1..n agrees with Base
+                    for h in unique!([1, max(1, n ÷ 4), n ÷ 2, max(1, 3n ÷ 4), n])
+                        @test searchsortedlast(LinearScan(), v, x, h) == expected_last
+                        @test searchsortedfirst(LinearScan(), v, x, h) == expected_first
+                        @test searchsortedlast(BracketGallop(), v, x, h) == expected_last
+                        @test searchsortedfirst(BracketGallop(), v, x, h) == expected_first
+                        @test searchsortedlast(Auto(), v, x, h) == expected_last
+                        @test searchsortedfirst(Auto(), v, x, h) == expected_first
+                    end
+
+                    # No-hint forms fall back to BinaryBracket
+                    @test searchsortedlast(LinearScan(), v, x) == expected_last
+                    @test searchsortedfirst(LinearScan(), v, x) == expected_first
+                    @test searchsortedlast(BracketGallop(), v, x) == expected_last
+                    @test searchsortedfirst(BracketGallop(), v, x) == expected_first
+                    @test searchsortedlast(Auto(), v, x) == expected_last
+                    @test searchsortedfirst(Auto(), v, x) == expected_first
+
+                    # Out-of-range hint → Auto falls back to BinaryBracket
+                    @test searchsortedlast(Auto(), v, x, 0) == expected_last
+                    @test searchsortedfirst(Auto(), v, x, 0) == expected_first
+                    @test searchsortedlast(Auto(), v, x, n + 1) == expected_last
+                    @test searchsortedfirst(Auto(), v, x, n + 1) == expected_first
+                end
             end
 
-            # Edge cases - value not in array
-            @test searchsortedfirstexp(v, 0) == searchsortedfirst(v, 0)
-            @test searchsortedfirstexp(v, 101) == searchsortedfirst(v, 101)
-            @test searchsortedfirstexp(v, 50.5) == searchsortedfirst(v, 50.5)
+            # Reverse order
+            v_rev = collect(10.0:-1.0:1.0)
+            for x in (0.5, 1.0, 5.0, 10.0, 11.0), h in (1, 5, 10)
+                @test searchsortedlast(BracketGallop(), v_rev, x, h; order = Base.Order.Reverse) ==
+                    searchsortedlast(v_rev, x, Base.Order.Reverse)
+                @test searchsortedfirst(BracketGallop(), v_rev, x, h; order = Base.Order.Reverse) ==
+                    searchsortedfirst(v_rev, x, Base.Order.Reverse)
+                @test searchsortedlast(LinearScan(), v_rev, x, h; order = Base.Order.Reverse) ==
+                    searchsortedlast(v_rev, x, Base.Order.Reverse)
+                @test searchsortedfirst(LinearScan(), v_rev, x, h; order = Base.Order.Reverse) ==
+                    searchsortedfirst(v_rev, x, Base.Order.Reverse)
+                @test searchsortedlast(Auto(), v_rev, x, h; order = Base.Order.Reverse) ==
+                    searchsortedlast(v_rev, x, Base.Order.Reverse)
+                @test searchsortedfirst(Auto(), v_rev, x, h; order = Base.Order.Reverse) ==
+                    searchsortedfirst(v_rev, x, Base.Order.Reverse)
+            end
 
-            # With custom bounds
-            @test searchsortedfirstexp(v, 50, 40, 60) == searchsortedfirst(v, 50, 40, 60, Base.Order.Forward)
-            @test searchsortedfirstexp(v, 45, 40, 60) == searchsortedfirst(v, 45, 40, 60, Base.Order.Forward)
+            # Strategy abstract type hierarchy
+            @test LinearScan <: SearchStrategy
+            @test BracketGallop <: SearchStrategy
+            @test BinaryBracket <: SearchStrategy
+            @test Auto <: SearchStrategy
+        end
 
-            # Float vectors
-            vf = collect(0.0:0.1:10.0)
-            for x in [0.0, 0.5, 1.0, 5.0, 9.9, 10.0]
-                @test searchsortedfirstexp(vf, x) == searchsortedfirst(vf, x)
+        @safetestset "ExpFromLeft and InterpolationSearch" begin
+            using FindFirstFunctions:
+                ExpFromLeft, InterpolationSearch, BinaryBracket
+
+            # ExpFromLeft on uniform Int range
+            v = collect(1:1000)
+            for x in (0, 1, 50, 250, 500, 999, 1000, 1001), h in (1, 50, 500, 1000)
+                @test searchsortedlast(ExpFromLeft(), v, x, h) ==
+                    searchsortedlast(v, x)
+                @test searchsortedfirst(ExpFromLeft(), v, x, h) ==
+                    searchsortedfirst(v, x)
             end
+            # ExpFromLeft without hint falls back to BinaryBracket
+            @test searchsortedlast(ExpFromLeft(), v, 500) == searchsortedlast(v, 500)
+            @test searchsortedfirst(ExpFromLeft(), v, 500) == searchsortedfirst(v, 500)
 
-            # Empty and small vectors
-            @test searchsortedfirstexp(Int[], 5) == 1
-            @test searchsortedfirstexp([1], 0) == 1
-            @test searchsortedfirstexp([1], 1) == 1
-            @test searchsortedfirstexp([1], 2) == 2
+            # InterpolationSearch on uniform Float64 range
+            vf = collect(0.0:0.1:10.0)
+            for x in (-1.0, 0.0, 0.05, 1.0, 5.5, 9.95, 10.0, 11.0)
+                @test searchsortedlast(InterpolationSearch(), vf, x) ==
+                    searchsortedlast(vf, x)
+                @test searchsortedfirst(InterpolationSearch(), vf, x) ==
+                    searchsortedfirst(vf, x)
+            end
 
-            # Vector with repeated elements
-            vr = [1, 2, 2, 2, 3, 4, 5]
-            @test searchsortedfirstexp(vr, 2) == searchsortedfirst(vr, 2)
+            # InterpolationSearch on log-spaced (non-uniform) — must still be correct
+            vlog = exp.(range(log(0.1), log(100.0); length = 256))
+            for x in (0.05, 0.1, 1.0, 50.0, 100.0, 150.0)
+                @test searchsortedlast(InterpolationSearch(), vlog, x) ==
+                    searchsortedlast(vlog, x)
+                @test searchsortedfirst(InterpolationSearch(), vlog, x) ==
+                    searchsortedfirst(vlog, x)
+            end
 
-            # Large vector
-            big_v = collect(1:10000)
-            for x in [1, 100, 1000, 5000, 9999, 10000]
-                @test searchsortedfirstexp(big_v, x) == searchsortedfirst(big_v, x)
+            # InterpolationSearch ignores hint (computes its own guess)
+            for h in (1, 100, 256)
+                @test searchsortedlast(InterpolationSearch(), vlog, 50.0, h) ==
+                    searchsortedlast(vlog, 50.0)
             end
+
+            # InterpolationSearch falls back to BinaryBracket on non-Number eltypes
+            vs = ["a", "b", "c", "d"]
+            @test searchsortedlast(InterpolationSearch(), vs, "c") ==
+                searchsortedlast(vs, "c")
+            @test searchsortedfirst(InterpolationSearch(), vs, "c", 2) ==
+                searchsortedfirst(vs, "c")
+
+            # InterpolationSearch on a constant vector (span=0) shouldn't divide
+            # by zero; should fall through to a bounded search and return a
+            # correct result.
+            vc = fill(3.0, 16)
+            @test searchsortedlast(InterpolationSearch(), vc, 3.0) ==
+                searchsortedlast(vc, 3.0)
+            @test searchsortedlast(InterpolationSearch(), vc, 2.0) ==
+                searchsortedlast(vc, 2.0)
+            @test searchsortedlast(InterpolationSearch(), vc, 4.0) ==
+                searchsortedlast(vc, 4.0)
+
+            # Edge: 1-element and 2-element vectors
+            @test searchsortedlast(ExpFromLeft(), [5], 4, 1) == 0
+            @test searchsortedlast(ExpFromLeft(), [5], 5, 1) == 1
+            @test searchsortedlast(ExpFromLeft(), [5], 6, 1) == 1
+            @test searchsortedfirst(ExpFromLeft(), [5, 10], 7, 1) == 2
+            @test searchsortedlast(InterpolationSearch(), [5], 4) == 0
+            @test searchsortedlast(InterpolationSearch(), [5, 10], 7) == 1
         end
 
-        @safetestset "Vectorized Search (searchsortedfirstvec and searchsortedlastvec)" begin
-            using FindFirstFunctions: searchsortedfirstvec, searchsortedlastvec
+        @safetestset "Batched Auto heuristic" begin
+            using FindFirstFunctions:
+                Auto, LinearScan, ExpFromLeft, BracketGallop, BinaryBracket,
+                searchsortedlast!
+            using StableRNGs
 
-            # Basic functionality
+            # Dense queries: Auto's avg-gap heuristic should land on LinearScan
+            # (verified by output correctness, not by introspecting the picked
+            # strategy — that's an implementation detail).
+            v = collect(0.0:0.01:10.0)   # n=1001
+            tt_dense = sort!(rand(StableRNG(1), 4096) .* 10.0)
+            out_auto = Vector{Int}(undef, length(tt_dense))
+            out_linear = Vector{Int}(undef, length(tt_dense))
+            searchsortedlast!(out_auto, v, tt_dense; strategy = Auto())
+            searchsortedlast!(out_linear, v, tt_dense; strategy = LinearScan())
+            @test out_auto == out_linear
+
+            # Sparse queries on a long vector: same correctness check.
+            v_long = collect(0.0:0.001:100.0)  # n=100001
+            tt_sparse = sort!(rand(StableRNG(2), 10) .* 100.0)
+            out_a = Vector{Int}(undef, length(tt_sparse))
+            out_e = Vector{Int}(undef, length(tt_sparse))
+            searchsortedlast!(out_a, v_long, tt_sparse; strategy = Auto())
+            searchsortedlast!(out_e, v_long, tt_sparse; strategy = ExpFromLeft())
+            @test out_a == out_e
+
+            # Dense burst — queries clustered inside one segment of v.
+            # The span-based gap detection should send Auto to LinearScan.
+            seg_lo = v[length(v) ÷ 2]
+            seg_hi = v[length(v) ÷ 2 + 1]
+            tt_burst = sort!(seg_lo .+ (seg_hi - seg_lo) .* rand(StableRNG(3), 2048))
+            out_b = Vector{Int}(undef, length(tt_burst))
+            out_l = Vector{Int}(undef, length(tt_burst))
+            searchsortedlast!(out_b, v, tt_burst; strategy = Auto())
+            searchsortedlast!(out_l, v, tt_burst; strategy = LinearScan())
+            @test out_b == out_l
+
+            # m=1 fast path — bypass span heuristic
+            out1 = Vector{Int}(undef, 1)
+            searchsortedlast!(out1, v, [5.123]; strategy = Auto())
+            @test out1[1] == searchsortedlast(v, 5.123)
+
+            # m=0 returns the output untouched (empty vector)
+            empty_out = Int[]
+            @test searchsortedlast!(empty_out, v, Float64[]; strategy = Auto()) === empty_out
+            @test isempty(empty_out)
+
+            # Non-numeric eltype: span heuristic falls back to length-ratio.
+            vs = ["a", "b", "c", "d", "e", "f", "g", "h"]
+            qs = ["b", "d", "f"]
+            outs = Vector{Int}(undef, length(qs))
+            searchsortedlast!(outs, vs, qs; strategy = Auto())
+            @test outs == searchsortedlast.(Ref(vs), qs)
+        end
+
+        @safetestset "SearchProperties cache" begin
+            using FindFirstFunctions:
+                Auto, SearchProperties, searchsortedlast!, searchsortedfirst!
+            using StableRNGs
+
+            # The sentinel struct.
+            @test !SearchProperties().has_props
+
+            # A populated SearchProperties from a linear, NaN-free vector.
+            v = collect(0.0:0.001:100.0)
+            props = SearchProperties(v)
+            @test props.has_props
+            @test props.is_linear
+            @test !props.has_nan
+
+            # Output equivalence: Auto(props) returns the same answers as Auto()
+            # on a sparse-on-long-linear regime (where InterpolationSearch is
+            # the picked strategy via the cached `is_linear`).
+            tt = sort!(rand(StableRNG(10), 16) .* 100.0)
+            out_cached = Vector{Int}(undef, length(tt))
+            out_baseline = Vector{Int}(undef, length(tt))
+            out_truth = searchsortedlast.(Ref(v), tt)
+            searchsortedlast!(out_cached, v, tt; strategy = Auto(props))
+            searchsortedlast!(out_baseline, v, tt; strategy = Auto())
+            @test out_cached == out_truth
+            @test out_baseline == out_truth
+
+            # searchsortedfirst path takes the same branch.
+            searchsortedfirst!(out_cached, v, tt; strategy = Auto(props))
+            searchsortedfirst!(out_baseline, v, tt; strategy = Auto())
+            @test out_cached == searchsortedfirst.(Ref(v), tt)
+            @test out_baseline == searchsortedfirst.(Ref(v), tt)
+
+            # Float vector with a NaN: props.has_nan is true. The cache
+            # currently isn't consumed for has_nan in Auto's decision tree,
+            # but the field is populated correctly.
+            vnan = [1.0, 2.0, NaN, 4.0, 5.0]
+            propsnan = SearchProperties(vnan)
+            @test propsnan.has_nan
+
+            # Non-float eltype: has_nan is always false.
+            vi = collect(Int64, 1:100)
+            @test !SearchProperties(vi).has_nan
+
+            # Lying SearchProperties (claims is_linear=true on non-linear data)
+            # is still correctness-preserving — Auto's "InterpolationSearch on
+            # linear data" branch handles the false positive gracefully
+            # because InterpolationSearch's bad guess just makes BracketGallop
+            # wider, never incorrect.
+            v_log = exp.(range(0.0, 10.0; length = 4096))
+            lying = SearchProperties(true, true, false, false)
+            tt_log = sort!(rand(StableRNG(11), 8) .* (v_log[end] - v_log[1]) .+ v_log[1])
+            out_lying = Vector{Int}(undef, length(tt_log))
+            searchsortedlast!(out_lying, v_log, tt_log; strategy = Auto(lying))
+            @test out_lying == searchsortedlast.(Ref(v_log), tt_log)
+
+            # Bits-ness: SearchProperties must be isbits so it doesn't allocate.
+            @test isbitstype(SearchProperties)
+
+            # is_log_linear field: populated by SearchProperties(v) on
+            # geometric data, rejected on linear / two-scale data.
+            v_log = collect(exp.(range(0.0, log(1.0e6); length = 65536)))
+            p_log = SearchProperties(v_log)
+            @test p_log.is_log_linear
+
+            v_lin = collect(0.0:0.001:65.0)
+            p_lin = SearchProperties(v_lin)
+            @test !p_lin.is_log_linear
+
+            # Two-scale data should fail both linearity probes.
+            v_2s = sort!(vcat(range(0.0, 1.0; length = 32768), range(1.0, 100.0; length = 32768)))
+            p_2s = SearchProperties(v_2s)
+            @test !p_2s.is_log_linear
+
+            # is_log_linear requires strictly positive v; mixed-sign rejects.
+            v_signed = collect(-100.0:0.001:65.0)
+            p_signed = SearchProperties(v_signed)
+            @test !p_signed.is_log_linear
+        end
+
+        @safetestset "Batched in-place searchsorted!" begin
+            using FindFirstFunctions:
+                LinearScan, BracketGallop, BinaryBracket, Auto,
+                searchsortedlast!, searchsortedfirst!
+
+            # Sorted queries
             v = collect(1:100)
             x_sorted = [5, 10, 20, 50, 90]
+            out_last = Vector{Int}(undef, length(x_sorted))
+            out_first = Vector{Int}(undef, length(x_sorted))
 
-            # searchsortedfirstvec should match element-wise searchsortedfirst
-            result_first = searchsortedfirstvec(v, x_sorted)
-            expected_first = searchsortedfirst.(Ref(v), x_sorted)
-            @test result_first == expected_first
+            @test searchsortedlast!(out_last, v, x_sorted) ==
+                searchsortedlast.(Ref(v), x_sorted)
+            @test searchsortedfirst!(out_first, v, x_sorted) ==
+                searchsortedfirst.(Ref(v), x_sorted)
+            @test out_last === searchsortedlast!(out_last, v, x_sorted)  # in-place
 
-            # searchsortedlastvec should match element-wise searchsortedlast
-            result_last = searchsortedlastvec(v, x_sorted)
-            expected_last = searchsortedlast.(Ref(v), x_sorted)
-            @test result_last == expected_last
+            # Each strategy gives the same result on sorted input
+            for strategy in
+                (LinearScan(), BracketGallop(), BinaryBracket(), Auto())
+                fill!(out_last, 0)
+                fill!(out_first, 0)
+                searchsortedlast!(out_last, v, x_sorted; strategy = strategy)
+                searchsortedfirst!(out_first, v, x_sorted; strategy = strategy)
+                @test out_last == searchsortedlast.(Ref(v), x_sorted)
+                @test out_first == searchsortedfirst.(Ref(v), x_sorted)
+            end
 
-            # Unsorted input falls back to element-wise
+            # Unsorted falls back to per-element regardless of strategy
             x_unsorted = [50, 10, 90, 5, 20]
-            @test searchsortedfirstvec(v, x_unsorted) == searchsortedfirst.(Ref(v), x_unsorted)
-            @test searchsortedlastvec(v, x_unsorted) == searchsortedlast.(Ref(v), x_unsorted)
+            out = Vector{Int}(undef, length(x_unsorted))
+            for strategy in (LinearScan(), BracketGallop(), Auto())
+                searchsortedlast!(out, v, x_unsorted; strategy = strategy)
+                @test out == searchsortedlast.(Ref(v), x_unsorted)
+                searchsortedfirst!(out, v, x_unsorted; strategy = strategy)
+                @test out == searchsortedfirst.(Ref(v), x_unsorted)
+            end
+
+            # Reverse-order vector + reverse-sorted queries
+            v_rev = collect(10.0:-1.0:1.0)
+            x_rev_sorted = [9.5, 7.5, 5.0, 2.5, 0.5]   # sorted descending
+            out_r = Vector{Int}(undef, length(x_rev_sorted))
+            searchsortedlast!(
+                out_r, v_rev, x_rev_sorted; order = Base.Order.Reverse
+            )
+            @test out_r ==
+                [searchsortedlast(v_rev, x, Base.Order.Reverse) for x in x_rev_sorted]
 
-            # Float vectors
+            # Floats + values between grid points
             vf = collect(0.0:0.1:10.0)
-            xf_sorted = [0.5, 1.0, 2.5, 5.0, 9.5]
-            @test searchsortedfirstvec(vf, xf_sorted) == searchsortedfirst.(Ref(vf), xf_sorted)
-            @test searchsortedlastvec(vf, xf_sorted) == searchsortedlast.(Ref(vf), xf_sorted)
-
-            # Edge cases - values outside range
-            x_edges = [-5, 0, 1, 100, 150]
-            @test searchsortedfirstvec(v, x_edges) == searchsortedfirst.(Ref(v), x_edges)
-            @test searchsortedlastvec(v, x_edges) == searchsortedlast.(Ref(v), x_edges)
-
-            # Empty input vector
-            @test searchsortedfirstvec(v, Int[]) == Int[]
-            @test searchsortedlastvec(v, Int[]) == Int[]
-
-            # Single element search
-            @test searchsortedfirstvec(v, [50]) == [50]
-            @test searchsortedlastvec(v, [50]) == [50]
-
-            # Values between grid points
-            x_between = [1.5, 10.5, 50.5, 99.5]
-            @test searchsortedfirstvec(v, x_between) == searchsortedfirst.(Ref(v), x_between)
-            @test searchsortedlastvec(v, x_between) == searchsortedlast.(Ref(v), x_between)
+            xf = [0.5, 1.0, 2.5, 5.0, 9.5]
+            outf = Vector{Int}(undef, length(xf))
+            searchsortedlast!(outf, vf, xf)
+            @test outf == searchsortedlast.(Ref(vf), xf)
+
+            # Edge cases: out-of-range queries on each side
+            x_edges = [-5.0, 0.0, 5.0, 10.0, 15.0]
+            sort!(x_edges)
+            oute = Vector{Int}(undef, length(x_edges))
+            searchsortedlast!(oute, vf, x_edges)
+            @test oute == searchsortedlast.(Ref(vf), x_edges)
+            searchsortedfirst!(oute, vf, x_edges)
+            @test oute == searchsortedfirst.(Ref(vf), x_edges)
+
+            # Empty queries
+            @test searchsortedlast!(Int[], v, Int[]) == Int[]
+            @test searchsortedfirst!(Int[], v, Int[]) == Int[]
+
+            # DimensionMismatch
+            @test_throws DimensionMismatch searchsortedlast!(zeros(Int, 2), v, [1, 2, 3])
+            @test_throws DimensionMismatch searchsortedfirst!(zeros(Int, 2), v, [1, 2, 3])
+
+            # Sparse queries on a long vector — exercises the BracketGallop hint path
+            v_big = collect(1:100_000)
+            x_sparse = [100, 50_000, 99_900]
+            outb = Vector{Int}(undef, length(x_sparse))
+            searchsortedlast!(outb, v_big, x_sparse)
+            @test outb == searchsortedlast.(Ref(v_big), x_sparse)
+
+            # Range fast paths route through the strategy dispatch.
+            r = 1:100
+            outr = Vector{Int}(undef, length(x_sorted))
+            for strategy in (LinearScan(), BracketGallop(), Auto())
+                searchsortedlast!(outr, r, x_sorted; strategy = strategy)
+                @test outr == searchsortedlast.(Ref(r), x_sorted)
+            end
+        end
+
+        @safetestset "searchsortedrange" begin
+            using FindFirstFunctions, StableRNGs
+            v = collect(0.0:0.5:50.0)
+            # Compare against Base composition for several (lo, hi) pairs.
+            for (lo, hi) in [
+                    (5.0, 7.0), (0.0, 100.0), (-1.0, 5.5), (10.0, 10.0),
+                    (45.0, 60.0), (51.0, 100.0),
+                ]
+                expected = searchsortedfirst(v, lo):searchsortedlast(v, hi)
+                for strategy in (
+                        Auto(), BinaryBracket(), BracketGallop(), LinearScan(),
+                        InterpolationSearch(), ExpFromLeft(), SIMDLinearScan(),
+                    )
+                    @test searchsortedrange(strategy, v, lo, hi) == expected
+                    # Hinted form.
+                    h = clamp(searchsortedfirst(v, lo), 1, length(v))
+                    @test searchsortedrange(strategy, v, lo, hi, h) == expected
+                end
+            end
+            # Random fuzz on Int64.
+            rng = StableRNG(2026)
+            vi = sort!(rand(rng, Int64(-100):Int64(100), 200))
+            for _ in 1:200
+                lo, hi = sort([rand(rng, Int64(-110):Int64(110)) for _ in 1:2])
+                want = searchsortedfirst(vi, lo):searchsortedlast(vi, hi)
+                @test searchsortedrange(Auto(), vi, lo, hi) == want
+                @test searchsortedrange(BracketGallop(), vi, lo, hi, 100) == want
+            end
+            # Type stability: result is UnitRange{Int}.
+            @test typeof(searchsortedrange(Auto(), v, 5.0, 7.0)) === UnitRange{Int}
+        end
+
+        @safetestset "queries_sorted kwarg" begin
+            using FindFirstFunctions, StableRNGs
+            v = collect(0.0:0.5:100.0)
+            rng = StableRNG(2026)
+            sorted_q = sort!(rand(rng, 64) .* 100.0)
+            unsorted_q = rand(rng, 64) .* 100.0
+            out = Vector{Int}(undef, 64)
+            expected_sorted = searchsortedlast.(Ref(v), sorted_q)
+            expected_unsorted = searchsortedlast.(Ref(v), unsorted_q)
+
+            # Default behaviour (nothing) — runtime issorted check.
+            searchsortedlast!(out, v, sorted_q; strategy = Auto())
+            @test out == expected_sorted
+            searchsortedlast!(out, v, unsorted_q; strategy = Auto())
+            @test out == expected_unsorted
+
+            # Explicit queries_sorted = true: trust the caller's sortedness.
+            searchsortedlast!(out, v, sorted_q; strategy = Auto(), queries_sorted = true)
+            @test out == expected_sorted
+            # Across every shipped strategy.
+            for strategy in (
+                    LinearScan(), SIMDLinearScan(), BracketGallop(),
+                    ExpFromLeft(), InterpolationSearch(), BinaryBracket(),
+                )
+                searchsortedlast!(
+                    out, v, sorted_q;
+                    strategy = strategy, queries_sorted = true
+                )
+                @test out == expected_sorted
+                searchsortedfirst!(
+                    out, v, sorted_q;
+                    strategy = strategy, queries_sorted = true
+                )
+                @test out == searchsortedfirst.(Ref(v), sorted_q)
+            end
+
+            # Explicit queries_sorted = false: take the unsorted-loop path
+            # unconditionally, even on sorted input — answers must still be
+            # correct (the unsorted loop is per-query unhinted Base call).
+            searchsortedlast!(out, v, sorted_q; strategy = Auto(), queries_sorted = false)
+            @test out == expected_sorted
+            searchsortedlast!(out, v, unsorted_q; strategy = Auto(), queries_sorted = false)
+            @test out == expected_unsorted
+        end
+
+        @safetestset "SIMDLinearScan correctness" begin
+            using FindFirstFunctions, StableRNGs
+            F = FindFirstFunctions
+
+            @testset "Int64 fuzz vs Base" begin
+                rng = StableRNG(2026)
+                for _ in 1:5_000
+                    n = rand(rng, 1:512)
+                    v = sort!(rand(rng, Int64(-1000):Int64(1000), n))
+                    x = rand(rng, Int64(-1100):Int64(1100))
+                    hint = rand(rng, 1:n)
+                    @test searchsortedlast(F.SIMDLinearScan(), v, x, hint) ==
+                        searchsortedlast(v, x)
+                    @test searchsortedfirst(F.SIMDLinearScan(), v, x, hint) ==
+                        searchsortedfirst(v, x)
+                end
+            end
+
+            @testset "Float64 fuzz vs Base" begin
+                rng = StableRNG(2027)
+                for _ in 1:5_000
+                    n = rand(rng, 1:512)
+                    v = sort!(randn(rng, n))
+                    x = (rand(rng) - 0.5) * 6
+                    hint = rand(rng, 1:n)
+                    @test searchsortedlast(F.SIMDLinearScan(), v, x, hint) ==
+                        searchsortedlast(v, x)
+                    @test searchsortedfirst(F.SIMDLinearScan(), v, x, hint) ==
+                        searchsortedfirst(v, x)
+                end
+            end
+
+            @testset "Edge cases (Int64)" begin
+                v = collect(Int64, 1:100)
+                # Out-of-range hint is clamped.
+                @test searchsortedlast(F.SIMDLinearScan(), v, Int64(50), -5) == 50
+                @test searchsortedlast(F.SIMDLinearScan(), v, Int64(50), 1_000) == 50
+                # x below/above the range.
+                @test searchsortedlast(F.SIMDLinearScan(), v, Int64(-10), 50) == 0
+                @test searchsortedlast(F.SIMDLinearScan(), v, Int64(1_000), 50) == 100
+                @test searchsortedfirst(F.SIMDLinearScan(), v, Int64(-10), 50) == 1
+                @test searchsortedfirst(F.SIMDLinearScan(), v, Int64(1_000), 50) == 101
+                # Empty and single-element vectors.
+                vempty = Int64[]
+                @test searchsortedlast(F.SIMDLinearScan(), vempty, Int64(5), 1) == 0
+                @test searchsortedfirst(F.SIMDLinearScan(), vempty, Int64(5), 1) == 1
+                v1 = Int64[42]
+                @test searchsortedlast(F.SIMDLinearScan(), v1, Int64(42), 1) == 1
+                @test searchsortedfirst(F.SIMDLinearScan(), v1, Int64(42), 1) == 1
+                # Duplicates.
+                vd = Int64[1, 2, 2, 2, 5]
+                @test searchsortedlast(F.SIMDLinearScan(), vd, Int64(2), 1) == 4
+                @test searchsortedfirst(F.SIMDLinearScan(), vd, Int64(2), 5) == 2
+            end
+
+            @testset "Fallback: non-Int64/Float64 eltypes" begin
+                # Int32 vectors must hit the generic LinearScan fallback,
+                # not the Int64 SIMD primitive.
+                v32 = Int32[1, 5, 10, 20, 50, 100, 200]
+                for x in (Int32(0), Int32(7), Int32(20), Int32(300))
+                    for hint in 1:length(v32)
+                        @test searchsortedlast(F.SIMDLinearScan(), v32, x, hint) ==
+                            searchsortedlast(v32, x)
+                        @test searchsortedfirst(F.SIMDLinearScan(), v32, x, hint) ==
+                            searchsortedfirst(v32, x)
+                    end
+                end
+                # Float32 same.
+                v32f = Float32[1.0, 5.0, 10.0, 20.0, 50.0]
+                for x in (Float32(0.0), Float32(7.0), Float32(20.0), Float32(100.0))
+                    @test searchsortedlast(F.SIMDLinearScan(), v32f, x, 2) ==
+                        searchsortedlast(v32f, x)
+                end
+                # Non-numeric.
+                vs = sort!(["alpha", "beta", "gamma", "delta", "epsilon"])
+                @test searchsortedlast(F.SIMDLinearScan(), vs, "gamma", 2) ==
+                    searchsortedlast(vs, "gamma")
+            end
+
+            @testset "Fallback: no hint, reverse order" begin
+                v = collect(Int64, 1:100)
+                # No hint → BinaryBracket.
+                @test searchsortedlast(F.SIMDLinearScan(), v, Int64(50)) ==
+                    searchsortedlast(v, Int64(50))
+                # Reverse order → scalar LinearScan.
+                v_rev = collect(Int64, 100:-1:1)
+                @test searchsortedlast(
+                    F.SIMDLinearScan(), v_rev, Int64(50), 1; order = Base.Order.Reverse
+                ) == searchsortedlast(v_rev, Int64(50), Base.Order.Reverse)
+            end
+        end
+
+        @safetestset "findequal + BisectThenSIMD" begin
+            using FindFirstFunctions, StableRNGs
+            F = FindFirstFunctions
+
+            # Reference: an Int sentinel-returning equality search built from
+            # Base.searchsortedfirst.
+            function ref_findequal(v, x)
+                i = searchsortedfirst(v, x)
+                return (i > lastindex(v) || !isequal(v[i], x)) ?
+                    (firstindex(v) - 1) : i
+            end
+
+            @testset "Strategy parity on Int64 (1-based)" begin
+                rng = StableRNG(3001)
+                for _ in 1:2_000
+                    n = rand(rng, 1:256)
+                    v = sort!(rand(rng, Int64(-50):Int64(50), n))
+                    x = rand(rng, Int64(-60):Int64(60))
+                    hint = rand(rng, 1:n)
+                    want = ref_findequal(v, x)
+                    for strategy in (
+                            F.BinaryBracket(), F.BracketGallop(),
+                            F.SIMDLinearScan(), F.LinearScan(),
+                            F.ExpFromLeft(), F.InterpolationSearch(),
+                            F.Auto(), F.BisectThenSIMD(),
+                        )
+                        @test F.findequal(strategy, v, x) == want
+                        @test F.findequal(strategy, v, x, hint) == want
+                    end
+                end
+            end
+
+            @testset "Strategy parity on Float64" begin
+                rng = StableRNG(3002)
+                for _ in 1:500
+                    n = rand(rng, 1:256)
+                    v = sort!(randn(rng, n))
+                    # Mix queries that hit elements with ones that don't.
+                    x = rand(rng) < 0.4 ? v[rand(rng, 1:n)] :
+                        (rand(rng) - 0.5) * 6
+                    hint = rand(rng, 1:n)
+                    want = ref_findequal(v, x)
+                    for strategy in (
+                            F.BinaryBracket(), F.BracketGallop(),
+                            F.SIMDLinearScan(), F.Auto(), F.BisectThenSIMD(),
+                        )
+                        @test F.findequal(strategy, v, x) == want
+                        @test F.findequal(strategy, v, x, hint) == want
+                    end
+                end
+            end
+
+            @testset "BisectThenSIMD shortcut uses SIMD on DenseVector{Int64}" begin
+                # Compare against findfirstsortedequal directly.
+                v = collect(Int64, 1:10_000)
+                for x in (
+                        Int64(1), Int64(5_000), Int64(10_000),
+                        Int64(0), Int64(10_001), Int64(-100), Int64(20_000),
+                    )
+                    a = F.findequal(F.BisectThenSIMD(), v, x)
+                    b = F.findfirstsortedequal(x, v)
+                    @test (a == 0 ? nothing : a) == b
+                end
+            end
+
+            @testset "Sentinel for OffsetArray-style indexing" begin
+                # Manually shift the index base by using a UnitRange directly.
+                v = collect(Int64, 10:20)
+                @test F.findequal(F.Auto(), v, Int64(15)) == 6
+                @test F.findequal(F.Auto(), v, Int64(100)) == 0
+                @test F.findequal(F.Auto(), v, Int64(-5)) == 0
+            end
+
+            @testset "Reverse ordering" begin
+                v_rev = collect(Int64, 10:-1:1)
+                # Forward findequal on a reverse-sorted vector with the
+                # Reverse ordering should still find the element if present.
+                @test F.findequal(
+                    F.BinaryBracket(), v_rev, Int64(5);
+                    order = Base.Order.Reverse,
+                ) == 6
+                @test F.findequal(
+                    F.BinaryBracket(), v_rev, Int64(99);
+                    order = Base.Order.Reverse,
+                ) == 0
+                # BisectThenSIMD on reverse order falls back to generic path.
+                @test F.findequal(
+                    F.BisectThenSIMD(), v_rev, Int64(5);
+                    order = Base.Order.Reverse,
+                ) == 6
+            end
+
+            @testset "Empty and single-element" begin
+                vempty = Int64[]
+                @test F.findequal(F.Auto(), vempty, Int64(0)) == 0
+                @test F.findequal(F.BisectThenSIMD(), vempty, Int64(0)) == 0
+                v1 = Int64[42]
+                @test F.findequal(F.Auto(), v1, Int64(42)) == 1
+                @test F.findequal(F.Auto(), v1, Int64(7)) == 0
+                @test F.findequal(F.BisectThenSIMD(), v1, Int64(42)) == 1
+            end
+
+            @testset "BisectThenSIMD in positional dispatch falls back" begin
+                # When used with searchsortedfirst/last, BisectThenSIMD just
+                # delegates to BinaryBracket — its purpose is findequal.
+                v = collect(Int64, 1:100)
+                @test searchsortedfirst(F.BisectThenSIMD(), v, Int64(50)) ==
+                    searchsortedfirst(v, Int64(50))
+                @test searchsortedlast(F.BisectThenSIMD(), v, Int64(50)) ==
+                    searchsortedlast(v, Int64(50))
+            end
         end
     end