Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
028cfce
Add SearchStrategy dispatch types and batched in-place searchsorted!
ChrisRackauckas May 16, 2026
163465e
Add ExpFromLeft + InterpolationSearch strategies, refine Auto via ben…
ChrisRackauckas May 17, 2026
e26ac82
Add GuesserHint + sampled-linearity check in Auto, broaden bench
ChrisRackauckas May 17, 2026
65ea6c0
Auto: skew detection + tightened linearity probe, more bench coverage
ChrisRackauckas May 17, 2026
e6afad5
Fix CI: rename strat -> strategy, add StableRNGs compat, docs entries
ChrisRackauckas May 17, 2026
cebc780
FFF 2.0: drop legacy single-purpose dispatches, keep only strategy API
ChrisRackauckas May 20, 2026
48c71cc
Add SIMDLinearScan strategy and restructure docs into topical pages
ChrisRackauckas May 20, 2026
dff0888
Add SearchProperties cache for Auto
ChrisRackauckas May 20, 2026
ed39347
Add NEWS.md documenting the 2.0 release
ChrisRackauckas May 20, 2026
9db2fe6
Add findequal + BisectThenSIMD; fix duplicates bug in 4 search paths
ChrisRackauckas May 20, 2026
c1c7520
Cleanup: typo, FFE_IR unification, docstring refresh, exports, equali…
ChrisRackauckas May 20, 2026
9c067bd
Retune Auto with SIMDLinearScan integration; 1080-cell bench sweep
ChrisRackauckas May 20, 2026
9be8387
Document BitInterpolationSearch exploration as negative result
ChrisRackauckas May 20, 2026
23fa058
Restore BitInterpolationSearch as opt-in strategy
ChrisRackauckas May 20, 2026
aaaf5a5
Add queries_sorted kwarg, searchsortedrange, is_log_linear, AirspeedV…
ChrisRackauckas May 20, 2026
d5b976e
Split src/FindFirstFunctions.jl into a 10-file hierarchy
ChrisRackauckas May 20, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 69 additions & 0 deletions .github/workflows/Benchmark.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
name: Benchmarks

on:
pull_request:
paths:
- "src/**"
- "bench/**"
- "Project.toml"
- ".github/workflows/Benchmark.yml"
workflow_dispatch:

# Only one benchmark run per PR at a time; cancel stale ones.
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

permissions:
contents: read
pull-requests: write

jobs:
benchmark:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0 # we need both PR head and base branch
- uses: julia-actions/setup-julia@v3
with:
version: "1.11"
- uses: julia-actions/cache@v2
- name: Install AirspeedVelocity
run: |
julia -e 'using Pkg; Pkg.add(PackageSpec(name="AirspeedVelocity", version="0.6"))'
# Resolve commit shas explicitly so AirspeedVelocity sees stable refs.
- name: Resolve refs
id: resolve
run: |
PR_SHA=$(git rev-parse HEAD)
BASE_SHA=$(git rev-parse origin/${{ github.base_ref }})
echo "pr_sha=$PR_SHA" >> "$GITHUB_OUTPUT"
echo "base_sha=$BASE_SHA" >> "$GITHUB_OUTPUT"
- name: Run benchmark comparison
run: |
export JULIA_NUM_THREADS=1
~/.julia/bin/benchpkg FindFirstFunctions \
--rev=${{ steps.resolve.outputs.base_sha }},${{ steps.resolve.outputs.pr_sha }} \
--bench-on=${{ steps.resolve.outputs.pr_sha }} \
--output-dir=results/ \
--tune
- name: Render comparison table
run: |
~/.julia/bin/benchpkgtable FindFirstFunctions \
--rev=${{ steps.resolve.outputs.base_sha }},${{ steps.resolve.outputs.pr_sha }} \
--input-dir=results/ \
> benchmark_table.md
cat benchmark_table.md
- name: Post benchmark comment
if: github.event_name == 'pull_request'
uses: peter-evans/create-or-update-comment@v4
with:
issue-number: ${{ github.event.pull_request.number }}
body-path: benchmark_table.md
edit-mode: replace
- name: Upload raw results
uses: actions/upload-artifact@v4
with:
name: benchmark-results
path: results/
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
/Manifest.toml
bench/Manifest.toml
docs/build
docs/Manifest.toml
docs/src/assets/Manifest.toml
Expand Down
464 changes: 464 additions & 0 deletions NEWS.md

Large diffs are not rendered by default.

6 changes: 4 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "FindFirstFunctions"
uuid = "64ca27bc-2ba2-4a57-88aa-44e436879224"
version = "1.8.0"
version = "2.0.0"
authors = ["Chris Elrod <elrodc@gmail.com> and contributors"]

[deps]
Expand All @@ -10,13 +10,15 @@ PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
Pkg = "1.10"
PrecompileTools = "1"
SafeTestsets = "0.1"
StableRNGs = "1"
Test = "1.10"
julia = "1.10"

[extras]
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Pkg", "SafeTestsets", "Test"]
test = ["Pkg", "SafeTestsets", "StableRNGs", "Test"]
2 changes: 2 additions & 0 deletions bench/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
results.csv
Manifest.toml
6 changes: 6 additions & 0 deletions bench/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[deps]
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
FindFirstFunctions = "64ca27bc-2ba2-4a57-88aa-44e436879224"
PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
146 changes: 146 additions & 0 deletions bench/analyze.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
using DelimitedFiles, Statistics, Printf

const CSV = joinpath(@__DIR__, "results.csv")

# Read raw data
raw, header = readdlm(CSV, ','; header = true)
header = vec(string.(header))
function col(name)
j = findfirst(==(name), header)
j === nothing && error("no column $name")
return j
end

const STRATS = ["LinearScan", "SIMDLinearScan", "BracketGallop", "ExpFromLeft", "InterpolationSearch"]

function row_winner(row)
times = Float64[parse(Float64, string(row[col(s)])) for s in STRATS]
j = argmin(times)
return STRATS[j], times[j]
end

function ratio_to(row, strat_name)
times = Float64[parse(Float64, string(row[col(s)])) for s in STRATS]
best = minimum(times)
return parse(Float64, string(row[col(strat_name)])) / best
end

println("==> Where does SIMDLinearScan win?")
simd_wins = []
for i in axes(raw, 1)
local row = raw[i, :]
local winner, _ = row_winner(row)
if winner == "SIMDLinearScan"
push!(
simd_wins, (
eltype = string(row[col("eltype")]),
v = string(row[col("v_kind")]),
q = string(row[col("q_kind")]),
n = parse(Int, string(row[col("n")])),
m = parse(Int, string(row[col("m")])),
)
)
end
end
n_simd_wins = length(simd_wins)
println(" SIMDLinearScan wins in $n_simd_wins cells")
println()

# Tabulate where SIMD wins by m (gap proxy)
println("==> SIMDLinearScan wins by m (proxy for batched-gap regime):")
by_m = Dict{Int, Int}()
for c in simd_wins
by_m[c.m] = get(by_m, c.m, 0) + 1
end
for m in sort(collect(keys(by_m)))
@printf(" m=%5d: %d cells\n", m, by_m[m])
end
println()

# By eltype
println("==> SIMDLinearScan wins by eltype:")
by_eltype = Dict{String, Int}()
for c in simd_wins
by_eltype[c.eltype] = get(by_eltype, c.eltype, 0) + 1
end
for k in sort(collect(keys(by_eltype)))
println(" $k: $(by_eltype[k]) cells")
end
println()

# Now show: for sorted-batched cells where SIMDLinearScan wins, what's the
# ratio of LinearScan/ExpFromLeft to SIMDLinearScan? This tells us the
# magnitude of the improvement.
println("==> SIMDLinearScan win margin over LinearScan and ExpFromLeft:")
println(" (Higher = bigger SIMD speedup over the second-best Auto candidate)")
margins = []
for i in axes(raw, 1)
row = raw[i, :]
winner, best = row_winner(row)
if winner == "SIMDLinearScan"
t_simd = best
t_lin = parse(Float64, string(row[col("LinearScan")]))
t_exp = parse(Float64, string(row[col("ExpFromLeft")]))
push!(
margins, (
ratio_lin = t_lin / t_simd,
ratio_exp = t_exp / t_simd,
n = parse(Int, string(row[col("n")])),
m = parse(Int, string(row[col("m")])),
)
)
end
end
println(" SIMD vs LinearScan: median $(median(m.ratio_lin for m in margins))x, max $(maximum(m.ratio_lin for m in margins))x")
println(" SIMD vs ExpFromLeft: median $(median(m.ratio_exp for m in margins))x, max $(maximum(m.ratio_exp for m in margins))x")
println()

# What does Auto pick in the cells where SIMDLinearScan would win?
# Find the m, n, gap regime where SIMDLinearScan beats LinearScan AND ExpFromLeft.
println("==> Best regime for SIMDLinearScan (cells where SIMD wins by >20%):")
significant_simd_wins = filter(c -> c.ratio_lin > 1.2 && c.ratio_exp > 1.2, margins)
println(" $(length(significant_simd_wins)) cells where SIMD beats both LinearScan and ExpFromLeft by >20%")
n_by_m_n = Dict{Tuple{Int, Int}, Int}()
for c in significant_simd_wins
n_by_m_n[(c.n, c.m)] = get(n_by_m_n, (c.n, c.m), 0) + 1
end
for (n, m) in sort(collect(keys(n_by_m_n)))
@printf(" n=%5d m=%5d: %d cells\n", n, m, n_by_m_n[(n, m)])
end
println()

# Compute: in each cell, the GAP. gap = n * span(queries)/span(v) / m
# (the same heuristic Auto uses). Use n/m as a rough proxy since exact gap
# depends on the query distribution which isn't recoverable from CSV alone.
println("==> n/m ratio for SIMD-winning cells (rough gap proxy):")
nm_buckets = Dict{Int, Int}()
for c in simd_wins
bucket = c.n ÷ c.m
# Round to a power-of-2 bucket
log_bucket = bucket == 0 ? 0 : floor(Int, log2(bucket))
nm_buckets[log_bucket] = get(nm_buckets, log_bucket, 0) + 1
end
for b in sort(collect(keys(nm_buckets)))
@printf(" n/m in [2^%d, 2^%d): %d cells\n", b, b + 1, nm_buckets[b])
end
println()

println("==> What strategy does Auto pick in cells where SIMD would win?")
auto_picks_when_simd = Dict{String, Int}()
for i in axes(raw, 1)
row = raw[i, :]
winner, _ = row_winner(row)
if winner == "SIMDLinearScan"
# We don't know Auto's pick from the CSV, but we know Auto's time.
# The closest strategy time to Auto's time tells us the pick.
t_auto = parse(Float64, string(row[col("Auto")]))
candidates = [(s, parse(Float64, string(row[col(s)]))) for s in STRATS]
# Pick the strategy whose time is closest to Auto's
# (within 20% — heuristic).
closest = argmin(c -> abs(c[2] - t_auto), candidates)
auto_picks_when_simd[closest[1]] = get(auto_picks_when_simd, closest[1], 0) + 1
end
end
for (s, n) in sort(collect(pairs(auto_picks_when_simd)); by = x -> -x[2])
println(" Auto -> $s: $n cells (out of $(length(simd_wins)))")
end
Loading
Loading