From 0abdab9c202ae1ca0c54b5dcc96306ea28bd1ad5 Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Thu, 25 Jan 2024 15:39:52 -0500 Subject: [PATCH] Add improved searchsortedfirstcorrelated and bracketstrictlymontonic For benchmarks see: * https://github.com/SciML/DataInterpolations.jl/pull/198 * https://github.com/SciML/DataInterpolations.jl/pull/147 > While the cost of constructing the interpolator does not change, tracking the last index results in a best-case speedup of ~2.7x for CubicSpline, when successive values are close together (and a little higher for simpler interpolators). In the worst case (where successive values are always on opposite ends of the vectors), it can result in a ~15% slowdown due to the unhelpful expanding binary search at the beginning. However, the original approach of not tracking the index at all is also still available; it now involves essentially one extra if statement, which seems to be lost in the timing noise. --- .github/workflows/Downstream.yml | 54 ++++++++++++++++++++++++++ README.md | 40 +++++++++++++++++++- src/FindFirstFunctions.jl | 65 ++++++++++++++++++++++++++++++++ 3 files changed, 158 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/Downstream.yml diff --git a/.github/workflows/Downstream.yml b/.github/workflows/Downstream.yml new file mode 100644 index 0000000..c168fba --- /dev/null +++ b/.github/workflows/Downstream.yml @@ -0,0 +1,54 @@ +name: IntegrationTest +on: + push: + branches: [master] + tags: [v*] + pull_request: + +jobs: + test: + name: ${{ matrix.package.repo }}/${{ matrix.package.group }}/${{ matrix.julia-version }} + runs-on: ${{ matrix.os }} + env: + GROUP: ${{ matrix.package.group }} + strategy: + fail-fast: false + matrix: + julia-version: [1] + os: [ubuntu-latest] + package: + - {user: SciML, repo: DataInterpolations.jl, group: Core} + - {user: SciML, repo: ModelingToolkit.jl, group: InterfaceI} + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v1 + with: + version: ${{ matrix.julia-version }} + arch: x64 + - uses: julia-actions/julia-buildpkg@latest + - name: Clone Downstream + uses: actions/checkout@v4 + with: + repository: ${{ matrix.package.user }}/${{ matrix.package.repo }} + path: downstream + - name: Load this and run the downstream tests + shell: julia --color=yes --project=downstream {0} + run: | + using Pkg + try + # force it to use this PR's version of the package + Pkg.develop(PackageSpec(path=".")) # resolver may fail with main deps + Pkg.update() + Pkg.test(coverage=true) # resolver may fail with test time deps + catch err + err isa Pkg.Resolve.ResolverError || rethrow() + # If we can't resolve that means this is incompatible by SemVer and this is fine + # It means we marked this as a breaking change, so we don't need to worry about + # Mistakenly introducing a breaking change, as we have intentionally made one + @info "Not compatible with this release. No problem." exception=err + exit(0) # Exit immediately, as a success + end + - uses: julia-actions/julia-processcoverage@v1 + - uses: codecov/codecov-action@v3 + with: + file: lcov.info diff --git a/README.md b/README.md index eec6dab..fb5f89b 100644 --- a/README.md +++ b/README.md @@ -7,4 +7,42 @@ over more generic implementations. ## Functions -* `findfirstequal(x::Int64,A::DenseVector{Int64})`: finds the first value in `A` equal to `x` +### `findfirstequal` + +```julia +findfirstequal(x::Int64,A::DenseVector{Int64}) +``` + +Finds the first value in `A` equal to `x` + +### `bracketstrictlymontonic` + +```julia +bracketstrictlymontonic(v, x, guess; lt=, by=, rev=false) +``` + +Starting from an initial `guess` index, find indices `(lo, hi)` such that `v[lo] ≤ x ≤ +v[hi]` according to the specified order, assuming that `x` is actually within the range of +values found in `v`. If `x` is outside that range, either `lo` will be `firstindex(v)` or +`hi` will be `lastindex(v)`. + +Note that the results will not typically satisfy `lo ≤ guess ≤ hi`. If `x` is precisely +equal to a value that is not unique in the input `v`, there is no guarantee that `(lo, hi)` +will encompass *all* indices corresponding to that value. + +This algorithm is essentially an expanding binary search, which can be used as a precursor +to `searchsorted` and related functions, which can take `lo` and `hi` as arguments. The +purpose of using this function first would be to accelerate convergence in those functions +by using correlated `guess`es for repeated calls. The best `guess` for the next call of +this function would be the index returned by the previous call to `searchsorted`. + +See `sort!` for an explanation of the keyword arguments `by`, `lt` and `rev`. + +### `searchsortedfirstcorrelated(v::AbstractVector, x, guess)` + +```julia +searchsortedfirstcorrelated(v::AbstractVector, x, guess) +``` + +An accelerated `findfirst` on sorted vectors using a bracketed search. Requires a `guess` +to start the search from. diff --git a/src/FindFirstFunctions.jl b/src/FindFirstFunctions.jl index 16bec72..b020f7f 100644 --- a/src/FindFirstFunctions.jl +++ b/src/FindFirstFunctions.jl @@ -68,5 +68,70 @@ function findfirstequal(vpivot::Int64, ivars::DenseVector{Int64}) ret < 0 ? nothing : ret + 1 end +""" + bracketstrictlymontonic(v, x, guess; lt=, by=, rev=false) + +Starting from an initial `guess` index, find indices `(lo, hi)` such that `v[lo] ≤ x ≤ +v[hi]` according to the specified order, assuming that `x` is actually within the range of +values found in `v`. If `x` is outside that range, either `lo` will be `firstindex(v)` or +`hi` will be `lastindex(v)`. + +Note that the results will not typically satisfy `lo ≤ guess ≤ hi`. If `x` is precisely +equal to a value that is not unique in the input `v`, there is no guarantee that `(lo, hi)` +will encompass *all* indices corresponding to that value. + +This algorithm is essentially an expanding binary search, which can be used as a precursor +to `searchsorted` and related functions, which can take `lo` and `hi` as arguments. The +purpose of using this function first would be to accelerate convergence in those functions +by using correlated `guess`es for repeated calls. The best `guess` for the next call of +this function would be the index returned by the previous call to `searchsorted`. + +See [`sort!`](@ref) for an explanation of the keyword arguments `by`, `lt` and `rev`. +""" +function bracketstrictlymontonic(v::AbstractVector, + x, + guess::T, + o::Base.Order.Ordering)::NTuple{2, keytype(v)} where {T <: Integer} + bottom = firstindex(v) + top = lastindex(v) + if guess < bottom || guess > top + return bottom, top + # # NOTE: for cache efficiency in repeated calls, we avoid accessing the first and last elements of `v` + # # on each call to this function. This should only result in significant slow downs for calls with + # # out-of-bounds values of `x` *and* bad `guess`es. + # elseif lt(o, x, v[bottom]) + # return bottom, bottom + # elseif lt(o, v[top], x) + # return top, top + else + u = T(1) + lo, hi = guess, min(guess + u, top) + @inbounds if Base.Order.lt(o, x, v[lo]) + while lo > bottom && Base.Order.lt(o, x, v[lo]) + lo, hi = max(bottom, lo - u), lo + u += u + end + else + while hi < top && !Base.Order.lt(o, x, v[hi]) + lo, hi = hi, min(top, hi + u) + u += u + end + end + end + return lo, hi +end + +function searchsortedfirstcorrelated(v::AbstractVector, x, guess) + lo, hi = bracketstrictlymontonic(v, x, guess, Base.Order.Forward) + searchsortedfirst(v, x, lo, hi, Base.Order.Forward) +end + +function searchsortedlastcorrelated(v::AbstractVector, x, guess) + lo, hi = bracketstrictlymontonic(v, x, guess, Base.Order.Forward) + searchsortedlast(v, x, lo, hi, Base.Order.Forward) +end + +searchsortedfirstcorrelated(r::AbstractRange, x, _) = searchsortedfirst(r, x) +searchsortedlastcorrelated(r::AbstractRange, x, _) = searchsortedlast(r, x) end