From 67d379a500e1f7319fe7026ff9b129cda515edcd Mon Sep 17 00:00:00 2001 From: Stephen Vavasis Date: Thu, 20 Jan 2022 16:27:19 -0500 Subject: [PATCH 1/8] Major update to sorted containers. Address issue 239: Dict constructors on typed data but without explicit type declarations should no longer return {Any,Any} items because collect is now used when explicit types are omitted Address issue 479: Deprecate insert!, pop!(s), introduce popfirst!, poplast!, ss_push!, sd_push!, smd_push! Address issue 528: push! documentation improved Address issue 667: Docs extensively cleaned up and much greater use of documenter.jl Address issue 669: Expanded use of tokens including a:b notation for tokens Address issue 671: Add reverse iteration Address issue 742: New union, merge, symdiff, setdiff, intersect algorithms for sorted containers to improve performance Address issue 726: Clarify docs on insert! (now deprecated) Address issue 767: Implement firstindex, lastindex, reverse for sorted container iteration Address issue 781: Use collect on an intermediate step to infer types in untyped construction of sorted containers Also, this commit cleans up the constructors and adds two new iteration-helper functions inclusive_key and exclusive_key that were useful in my own work. --- docs/Project.toml | 5 +- docs/src/sorted_containers.md | 669 +++++------------ src/DataStructures.jl | 19 +- src/balanced_tree.jl | 119 ++- src/container_loops.jl | 323 -------- src/sorted_container_iteration.jl | 1167 +++++++++++++++++++++++++++++ src/sorted_dict.jl | 672 +++++++---------- src/sorted_multi_dict.jl | 552 ++++++-------- src/sorted_set.jl | 848 ++++++++++++--------- src/tokens2.jl | 181 ----- test/test_deprecations.jl | 11 + test/test_sorted_containers.jl | 445 +++++++++-- 12 files changed, 2929 insertions(+), 2082 deletions(-) delete mode 100644 src/container_loops.jl create mode 100644 src/sorted_container_iteration.jl delete mode 100644 src/tokens2.jl diff --git a/docs/Project.toml b/docs/Project.toml index d7906fcfe..dc779ba28 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,5 +1,4 @@ [deps] +Coverage = "a2441757-f6aa-5fb2-8edb-039e3f45d037" +DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" - -[compat] -Documenter = "0.23" diff --git a/docs/src/sorted_containers.md b/docs/src/sorted_containers.md index 5fda05cb1..fdca9fb2f 100644 --- a/docs/src/sorted_containers.md +++ b/docs/src/sorted_containers.md @@ -10,18 +10,19 @@ with the additional feature that the keys are stored in sorted order and can be efficiently iterated in this order. SortedDict is a subtype of AbstractDict. It is generally slower than `Dict` because looking up a key requires an O(log _n_) tree search rather than an expected O(1) -hash-table lookup time as with Dict. SortedDict is a parametrized type +hash-table lookup time of `Dict`. SortedDict is a parameterized type with three parameters, the key type `K`, the value type `V`, and the ordering type `O`. SortedSet has only keys; it is an alternative to the -built-in `Set` container. Internally, SortedSet is implemented as a -SortedDict in which the value type is `Void`. Finally, SortedMultiDict +built-in `Set` container and is a subtype of AbstractSet. +Internally, SortedSet is implemented as a +SortedDict in which the value type is `Nothing`. Finally, SortedMultiDict is similar to SortedDict except that each key can be associated with multiple values. The key=>value pairs in a SortedMultiDict are stored according to the sorted order for keys, and key=>value pairs with the same key are stored in order of insertion. The containers internally use a 2-3 tree, which is a kind of balanced -tree and is described in many elementary data structure textbooks. +tree and is described in data structure textbooks. The containers require two functions to compare keys: a _less-than_ and _equals_ function. With the default ordering argument, the comparison @@ -31,52 +32,27 @@ are keys. More details are provided below. ## Tokens for Sorted Containers -The sorted container objects use a special type for indexing called a -_token_ defined as a two-entry tuple and aliased as `SDToken`, -`SMDToken`, and `SetToken` for SortedDict, SortedMultiDict and SortedSet -respectively. A token is the address of a single data item in the +The sorted containers support an object for indexing called a +_token_ defined as a two-entry tuple and aliased as `SortedDictToken`, +`SortedMultiDictToken`, or `SortedSetToken`. +A token is the address of a single data item in the container and can be dereferenced in time O(1). -The first entry of a Token tuple is the container as a whole, and the +The first entry of a token tuple is the container as a whole, and the second refers to the particular item. The second part is called a -_semitoken_. The types for a semitoken are `SDSemiToken`, -`SMDSemiToken`, and `SetSemiToken` for the three types of containers -SortedDict, SortedMultiDict and SortedSet. These types are all aliases -of `IntSemiToken`. +_semitoken_. The type of the semitoken is `IntSemiToken`. -A restriction for the sorted containers is that `IntSemiToken` or its -aliases cannot used as the key-type. This is because ambiguity would +A restriction for the sorted containers is that `IntSemiToken` +cannot used as the key-type. This is because ambiguity would result between the two subscripting calls `sc[k]` and `sc[st]` described below. In the rare scenario that a sorted container whose key-type is `IntSemiToken` is required, a workaround is to wrap the key inside another immutable structure. -In the current version of Julia, it is costly to operate on tuples whose -entries are not bits-types because such tuples are allocated on the -heap. For example, the first entry of a token is a pointer to a -container (a non-bits type), so a new token is allocated on the heap -rather than the stack. In order to avoid performance loss, the package -uses tokens less frequently than semitokens. For a function taking a -token as an argument like `deref` described below, if it is invoked by -explicitly naming the token like this: - -```julia -tok = (sc,st) # sc is a sorted container, st is a semitoken -k,v = deref(tok) -``` - -then there may be a loss of performance compared to: - -```julia -k,v = deref((sc,st)) -``` - -because the former may need an extra heap allocation step for `tok`. - The notion of token is similar to the concept of iterators used by C++ standard containers. Tokens can be explicitly advanced or regressed through the data in the sorted order; they are implicitly advanced or -regressed via iteration loops defined below. +regressed via iteration defined below. A token may take two special values: the _before-start_ value and the _past-end_ value. These values act as lower and upper bounds on the @@ -85,25 +61,40 @@ token can be regressed. A dereferencing operation on either leads to an error. In the current implementation, semitokens are internally stored as -integers. However, for the purpose of future compatibility, the user -should not extract this internal representation; these integers do not -have a documented interpretation in terms of the container. +integers. Users should regard these integers as opaque +since future versions of the package may change the internal indexing +scheme. +In certain situations it may be more costly to operate on tokens than +semitokens because the first entry of a token (i.e., the container) +is not a bits-type. +If code profiling indicates that statements using tokens are allocating memory, +then it may be advisable to rewrite the application code using semitokens +more than tokens. + +## Complexity of Sorted Containers + +In the list of functions below, the running time of the various +operations is provided. In these running times, _n_ denotes the +size (number of items) in the container, +and _c_ denotes the time needed to compare two keys. + + ## Constructors for Sorted Containers ### `SortedDict` constructors ```@docs -SortedDict(o::Ord) where {Ord <: Ordering} +SortedDict{K,V,Ord}(o::Ord=Forward) where {K, V, Ord <: Ordering} ``` - SortedDict{K,V}(o=Forward) -Construct an empty `SortedDict` with key type `K` and value type -`V` with `o` ordering (default to forward ordering). +```@docs +SortedDict(o::Ord=Forward) where {Ord <: Ordering} +``` ```@docs -SortedDict{K,D,Ord}(o::Ord) where {K, D, Ord <: Ordering} +SortedDict(iter, o::Ord=Forward) where {Ord <: Ordering} ``` ```@docs @@ -111,29 +102,19 @@ SortedDict(ps::Pair...) ``` ```@docs -SortedDict{K,D}(o::Ord, ps::Pair...) where {K,D,Ord<:Ordering} +SortedDict{K,V}(::Val{true}, iterable) where {K,V} ``` -### `SortedMultiDict` constructors - - SortedMultiDict(ks, vs, o) -Construct a SortedMultiDict using keys given by `ks`, values given -by `vs` and ordering object `o`. The ordering object defaults to -`Forward` if not specified. The two arguments `ks` and `vs` are -1-dimensional arrays of the same length in which `ks` holds keys and -`vs` holds the corresponding values. +### `SortedMultiDict` constructors ```@docs -SortedMultiDict{K,D,Ord}(o::Ord) where {K,D,Ord} +SortedMultiDict{K,V,Ord}(o::Ord=Forward) where {K, V, Ord <: Ordering} ``` ```@docs -SortedMultiDict() -``` +SortedMultiDict(o::Ord=Forward) where {Ord <: Ordering} -```@docs -SortedMultiDict(o::O) where {O<:Ordering} ``` ```@docs @@ -141,560 +122,317 @@ SortedMultiDict(ps::Pair...) ``` ```@docs -SortedMultiDict(o::Ordering, ps::Pair...) +SortedMultiDict(iter, o::Ord=Forward) where {Ord <: Ordering} ``` ```@docs -SortedMultiDict{K,D}(kv) where {K,D} +SortedMultiDict{K,V}(::Val{true}, iterable) where {K,V} ``` +### `SortedSets` constructors ```@docs -SortedMultiDict{K,D}(o::Ord, kv) where {K,D,Ord<:Ordering} +SortedSet{K,Ord}(o::Ord=Forward) where {K, Ord<:Ordering} ``` -### `SortedSets` constructors - ```@docs -SortedSet{K, Ord <: Ordering} +SortedSet(o::Ord=Forward) where {Ord <: Ordering} ``` ```@docs -SortedSet() +SortedSet(o::Ordering, iter) ``` - ```@docs -SortedSet(o::O) where {O<:Ordering} +SortedSet{K}(::Val{true}, iterable) where {K} ``` + +## Navigating the Containers + ```@docs -SortedSet{K}() where {K} +Base.getindex(sd::SortedDict, k) ``` ```@docs -SortedSet{K}(o::O) where {K,O<:Ordering} +Base.getindex(m::SortedDict, st::IntSemiToken) ``` -## Complexity of Sorted Containers - -In the list of functions below, the running time of the various -operations is provided. In these running times, _n_ denotes the current -size (number of items) in the container at the time of the function -call, and _c_ denotes the time needed to compare two keys. - -### Navigating the Containers - ```@docs -getindex(m::SortedDict, k_) +Base.setindex!(m::SortedDict, newvalue, st::IntSemiToken) ``` - deref((sc, st)) - -Argument `(sc,st)` is a token (i.e., `sc` is a container and `st` is -a semitoken). Note the double-parentheses in the calling syntax: the -argument of `deref` is a token, which is defined to be a 2-tuple. -This returns a key=>value pair. pointed to by the token for -SortedDict and SortedMultiDict. Note that the syntax -`k,v=deref((sc,st))` is valid because Julia automatically iterates -over the two entries of the Pair in order to assign `k` and `v`. For -SortedSet this returns a key. Time: O(1) - - deref_key((sc, st)) - -Argument `(sc,st)` is a token for SortedMultiDict or SortedDict. -This returns the key (i.e., the first half of a key=>value pair) -pointed to by the token. This functionality is available as plain -`deref` for SortedSet. Time: O(1) - - deref_value((sc, st)) - -Argument `(sc,st)` is a token for SortedMultiDict or SortedDict. -This returns the value (i.e., the second half of a key=>value -pair) pointed to by the token. Time: O(1) - - startof(sc) +```@docs +Base.setindex!(sd::SortedDict, newvalue, k) -Argument `sc` is SortedDict, SortedMultiDict or SortedSet. This -function returns the semitoken of the first item according to the -sorted order in the container. If the container is empty, it returns -the past-end semitoken. Time: O(log _n_) +``` - endof(sc) +```@docs +deref(token::Token) +``` -Argument `sc` is a SortedDict, SortedMultiDict or SortedSet. This -function returns the semitoken of the last item according to the -sorted order in the container. If the container is empty, it returns -the before-start semitoken. Time: O(log _n_) ```@docs -first(sc::SortedDict) +deref_key(token::Token) ``` - ```@docs -first(sc::SortedMultiDict) +deref_value(token::Token) ``` ```@docs -first(sc::SortedSet) +Base.firstindex(m::SortedContainer) ``` ```@docs -last(sc::SortedDict) +Base.lastindex(m::SortedContainer) ``` ```@docs -last(sc::SortedMultiDict) +token_firstindex(m::SortedContainer) ``` ```@docs -last(sc::SortedSet) +token_lastindex(m::SortedContainer) ``` - pastendsemitoken(sc) - -Argument `sc` is a SortedDict, SortedMultiDict or SortedSet. This -function returns the past-end semitoken. Time: O(1) - - beforestartsemitoken(sc) - -Argument `sc` is a SortedDict, SortedMultiDict or SortedSet. This -function returns the before-start semitoken. Time: O(1) - - advance((sc,st)) - -Argument `(sc,st)` is a token. This function returns the semitoken -of the next entry in the container according to the sort order of -the keys. After the last item, this routine returns the past-end -semitoken. It is an error to invoke this function if `(sc,st)` is -the past-end token. If `(sc,st)` is the before-start token, then -this routine returns the semitoken of the first item in the sort -order (i.e., the same semitoken returned by the `startof` function). -Time: O(log _n_) - - regress((sc,st)) - -Argument `(sc,st)` is a token. This function returns the semitoken -of the previous entry in the container according to the sort order -of the keys. If `(sc,st)` indexes the first item, this routine -returns the before-start semitoken. It is an error to invoke this -function if `(sc,st)` is the before-start token. If `(sc,st)` is the -past-end token, then this routine returns the smitoken of the last -item in the sort order (i.e., the same semitoken returned by the -`endof` function). Time: O(log _n_) - - searchsortedfirst(sc,k) +```@docs +Base.first(sc::SortedContainer) +``` -Argument `sc` is a SortedDict, SortedMultiDict or SortedSet and `k` -is a key. This routine returns the semitoken of the first item in -the container whose key is greater than or equal to `k`. If there is -no such key, then the past-end semitoken is returned. Time: O(_c_ -log _n_) +```@docs +Base.last(sc::SortedContainer) +``` - searchsortedlast(sc,k) +```@docs +pastendsemitoken(sc::SortedContainer) +``` +```@docs +beforestartsemitoken(sc::SortedContainer) +``` -Argument `sc` is a SortedDict, SortedMultiDict or SortedSet and `k` -is a key. This routine returns the semitoken of the last item in the -container whose key is less than or equal to `k`. If there is no -such key, then the before-start semitoken is returned. Time: O(_c_ -log _n_) +```@docs +pastendtoken(sc::SortedContainer) +``` - searchsortedafter(sc,k) -Argument `sc` is a SortedDict, SortedMultiDict or SortedSet and `k` -is an element of the key type. This routine returns the semitoken of -the first item in the container whose key is greater than `k`. If -there is no such key, then the past-end semitoken is returned. Time: -O(_c_ log _n_) +```@docs +beforestarttoken(sc::SortedContainer) +``` - searchequalrange(sc,k) +```@docs +advance(token::Token) +``` -Argument `sc` is a SortedMultiDict and `k` is an element of the key -type. This routine returns a pair of semitokens; the first of the -pair is the semitoken addressing the first item in the container -with key `k` and the second is the semitoken addressing the last -item in the container with key `k`. If no item matches the given -key, then the pair (past-end-semitoken, before-start-semitoken) is -returned. Time: O(_c_ log _n_) +```@docs +regress(token::Token) -## Inserting & Deleting in Sorted Containers +``` - empty!(sc) +```@docs ++(t::Token, k::Integer) -Argument `sc` is a SortedDict, SortedMultiDict or SortedSet. This -empties the container. Time: O(1). +``` ```@docs -insert!(sc::SortedDict, k, v) +Base.searchsortedfirst(m::SortedContainer, k) ``` ```@docs -insert!(sc::SortedMultiDict, k, v) +Base.searchsortedlast(m::SortedContainer, k) ``` ```@docs -insert!(sc::SortedSet, k) +searchsortedafter(m::SortedContainer, k) ``` ```@docs -push!(sc::SortedSet, k) +searchequalrange(smd::SortedMultiDict, k) ``` ```@docs -push!(sc::SortedDict, pr::Pair) +findkey(m::SortedSet, k) ``` ```@docs -push!(sc::SortedMultiDict, pr::Pair) +findkey(sd::SortedDict, k) ``` - delete!((sc, st)) +## Inserting & Deleting in Sorted Containers -Argument `(sc,st)` is a token for a SortedDict, SortedMultiDict or -SortedSet. This operation deletes the item addressed by `(sc,st)`. -It is an error to call this on an entry that has already been -deleted or on the before-start or past-end tokens. After this -operation is complete, `(sc,st)` is an invalid token and cannot be -used in any further operations. Time: O(log _n_) ```@docs -pop!(sc::SortedDict, k) +ss_push!(ss::SortedSet, k) ``` ```@docs -pop!(ss::SortedSet, k) +sd_push!(sd::SortedDict, p::Pair) ``` ```@docs -pop!(ss::SortedSet) +smd_push!(smd::SortedMultiDict, p::Pair) ``` ```@docs -setindex!(m::SortedDict, d_, k_) +Base.push!(ss::SortedSet, k) ``` -### Token Manipulation - - compare(sc, st1, st2) - -Here, `st1` and `st2` are semitokens for the same container `sc`; -this function determines the relative positions of the data items -indexed by `(sc,st1)` and `(sc,st2)` in the sorted order. The return -value is -1 if `(sc,st1)` precedes `(sc,st2)`, 0 if they are equal, -and 1 if `(sc,st1)` succeeds `(sc,st2)`. This function compares the -tokens by determining their relative position within the tree -without dereferencing them. For SortedDict it is mostly equivalent -to comparing `deref_key((sc,st1))` to `deref_key((sc,st2))` using -the ordering of the SortedDict except in the case that either -`(sc,st1)` or `(sc,st2)` is the before-start or past-end token, in -which case the `deref` operation will fail. Which one is more -efficient depends on the time-complexity of comparing two keys. -Similarly, for SortedSet it is mostly equivalent to comparing -`deref((sc,st1))` to `deref((sc,st2))`. For SortedMultiDict, this -function is not equivalent to a key comparison since two items in a -SortedMultiDict with the same key are not necessarily the same item. -Time: O(log _n_) - - status((sc, st)) - -This function returns 0 if the token `(sc,st)` is invalid (e.g., -refers to a deleted item), 1 if the token is valid and points to -data, 2 if the token is the before-start token and 3 if it is the -past-end token. Time: O(1) - -## Iteration Over Sorted Containers - -As is standard in Julia, iteration over the containers is implemented -via calls to the `iterate` function. It is usual -practice, however, to call this function implicitly with a for-loop -rather than explicitly, so they are presented here in for-loop notation. -Internally, all of these iterations are implemented with semitokens that -are advanced via the `advance` operation. Each iteration of these loops -requires O(log _n_) operations to advance the semitoken. If one loops -over an entire container, then the amortized cost of advancing the -semitoken drops to O(1). - -The following snippet loops over the entire container `sc`, where `sc` -is a SortedDict or SortedMultiDict: - -```julia -for (k,v) in sc - < body > -end +```@docs +Base.push!(sd::SortedDict, p::Pair) ``` -In this loop, `(k,v)` takes on successive (key,value) pairs according to -the sort order of the key. If one uses: - -```julia -for p in sc - < body > -end +```@docs +Base.push!(smd::SortedMultiDict, p::Pair) ``` -where `sc` is a SortedDict or SortedMultiDict, then `p` is a `k=>v` -pair. - -For SortedSet one uses: - -```julia -for k in ss - < body > -end +```@docs +Base.delete!(token::Token) ``` -There are two ways to iterate over a subrange of a container. The first -is the inclusive iteration for SortedDict and SortedMultiDict: - -```julia -for (k,v) in inclusive(sc,st1,st2) - < body > -end +```@docs +Base.delete!(ss::SortedSet, k) ``` -Here, `st1` and `st2` are semitokens that refer to the container `sc`. -Token `(sc,st1)` may not be the before-start token and -token `(sc,st2)` may not be the past-end token. -It is acceptable for `(sc,st1)` to be the past-end token or `(sc,st2)` -to be the before-start token or both (in these cases, the body is not executed). -If `compare(sc,st1,st2)==1` then the body is not executed. A second -calling format for `inclusive` is `inclusive(sc,(st1,st2))`. With the -second format, the return value of `searchequalrange` may -be used directly as the second argument to `inclusive`. - -One can also define a loop that excludes the final item: - -```julia -for (k,v) in exclusive(sc,st1,st2) - < body > -end +```@docs +Base.delete!(sc::SortedDict, k) ``` - -In this case, all the data addressed by tokens from `(sc,st1)` up to but -excluding `(sc,st2)` are executed. The body is not executed at all if -`compare(sc,st1,st2)>=0`. In this setting, either or both can be the -past-end token, and `(sc,st2)` can be the before-start token. For the -sake of consistency, `exclusive` also supports the calling format -`exclusive(sc,(st1,st2))`. In the previous few snippets, if the loop -object is `p` instead of `(k,v)`, then `p` is a `k=>v` pair. - -Both the `inclusive` and `exclusive` functions return objects that can -be saved and used later for iteration. The validity of the tokens is not -checked until the loop initiates. - -For SortedSet the usage is: - -```julia -for k in inclusive(ss,st1,st2) - < body > -end - -for k in exclusive(ss,st1,st2) - < body > -end +```@docs +Base.popfirst!(ss::SortedSet) ``` - -If `sc` is a SortedDict or SortedMultiDict, one can iterate over just -keys or just values: - -```julia -for k in keys(sc) - < body > -end - -for v in values(sc) - < body > -end +```@docs +poplast!(ss::SortedSet) ``` -Finally, one can retrieve semitokens during any of these iterations. In -the case of SortedDict and SortedMultiDict, one uses: - -```julia -for (st,k,v) in semitokens(sc) - < body > -end - -for (st,k) in semitokens(keys(sc)) - < body > -end - -for (st,v) in semitokens(values(sc)) - < body > -end +```@docs +Base.pop!(ss::SortedSet, k) ``` -In each of the above three iterations, `st` is a semitoken referring to -the current `(k,v)` pair. In the case of SortedSet, the following -iteration may be used: - -```julia -for (st,k) in semitokens(ss) - < body > -end +```@docs +Base.pop!(sd::SortedDict, k) ``` -If one wishes to retrieve only semitokens, the following may be used: +## Token Manipulation -```julia -for st in onlysemitokens(sc) - < body > -end +```@docs +compare(m::SortedContainer, s::IntSemiToken, t::IntSemiToken) ``` -In this case, `sc` is a SortedDict, SortedMultiDict, or SortedSet. To be -compatible with standard containers, the package also offers `eachindex` -iteration: - -```julia -for ind in eachindex(sc) - < body > -end +```@docs +status(token::Token) ``` -This iteration function `eachindex` is equivalent to `keys` in the case -of SortedDict. It is equivalent to `onlysemitokens` in the case of -SortedMultiDict and SortedSet. - -In place of `sc` in the above `keys`, `values` and `semitokens`, -snippets, one could also use `inclusive(sc,st1,st2)` or -`exclusive(sc,st1,st2)`. Similarly, for SortedSet, one can iterate over -`semitokens(inclusive(ss,st1,st2))` or -`semitokens(exclusive(ss,st1,st2))` -Note that it is acceptable for the loop body in the above `semitokens` -code snippets to invoke `delete!((sc,st))` or `delete!((ss,st))`. This -is because the for-loop internal state variable is already advanced to -the next token at the beginning of the body, so `st` is not necessarily -referred to in the loop body (unless the user refers to it). - -### Other Functions +## Iteration Over Sorted Containers - isempty(sc) +```@docs + Base.iterate(sci::SortedContainerIterable) +``` -Returns `true` if the container is empty (no items). Time: O(1) +## `in` function - length(sc) +```@docs +Base.in(k,m::SortedSet) +``` -Returns the length, i.e., number of items, in the container. Time: -O(1) +```@docs +Base.in(p::Pair, sd::SortedDict) +``` -```docs -in(pr::Pair, m::SortedDict{K,D,Ord}) where {K,D,Ord <: Ordering} +```@docs +Base.in(p::Pair, smd::SortedMultiDict) ``` - in(x, iter) -Returns true if `x` is in `iter`, where `iter` refers to any of the -iterable objects described above in the discussion of container -loops and `x` is of the appropriate type. For all of the iterables -except the five listed below, the algorithm used is a linear-time -search. For example, the call: +```@docs +Base.in(x, iter::SortedContainerIterable) +``` - (k=>v) in exclusive(sd, st1, st2) +## Misc. Functions -where `sd` is a SortedDict, `st1` and `st2` are semitokens, `k` is a -key, and `v` is a value, will loop over all entries in the -dictionary between the two tokens and a compare for equality using -`isequal` between the indexed item and `k=>v`. -The five exceptions are: + Base.isempty(m::SortedContainer) + Base.empty!(m::SortedContainer) + Base.empty(m::SortedContainer) + Base.length(m::SortedContainer) + Base.eltype(m::SortedContainer) + Base.keytype(m::SortedContainer) + Base.valtype(m::SortedContainer) + Base.eltype(m::SortedContainerIteration) + Base.keytype(m::SortedContainerIteration) + Base.valtype(m::SortedContainerIteration) + + +These functions from `Base` are all applicable to sorted containers +with the obvious meaning. The `eltype`, `keytype`, and `valtype` functions +may be applied either to the object `m` or its type. +Note that `keytype` and `valtype` are +applicable only to SortedDict and SortedMultiDict, or to +pairs iterations over SortedDict or SortedMultiDict. +Time: O(1) -```julia -(k=>v) in sd -(k=>v) in smd -k in ss -k in keys(sd) -k in keys(smd) -``` - -Here, `sd` is a SortedDict, `smd` is a SortedMultiDict, and `ss` is -a SortedSet. - -These five invocations of `in` use the index structure of the sorted -container and test equality based on the order object of the keys -rather than `isequal`. Therefore, these five are all faster than -linear-time looping. The first three were already discussed in the -previous entry. The last two are equivalent to `haskey(sd,k)` and -`haskey(smd,k)` respectively. To force the use of `isequal` test on -the keys rather than the order object (thus slowing the execution -from logarithmic to linear time), replace the above five constructs -with these: -```julia -(k=>v) in collect(sd) -(k=>v) in collect(smd) -k in collect(ss) -k in collect(keys(sd)) -k in collect(keys(smd)) +```@docs +ordtype(sc::SortedContainer) ``` ```@docs -eltype(sc::SortedDict) +orderobject(sc::SortedContainer) ``` ```@docs -keytype(sc::SortedDict) +Base.haskey(sc::SortedContainer, k) ``` ```@docs -valtype(sc::SortedDict) +Base.get(sd::SortedDict,k,v) ``` ```@docs -ordtype(sc::SortedDict) +Base.get!(sd::SortedDict,k,v) ``` ```@docs -orderobject(sc::SortedDict) +Base.getkey(sd::SortedDict,k,defaultk) ``` ```@docs -haskey(sc::SortedDict,k) +Base.isequal(ss1::SortedSet{K,Ord}, ss2::SortedSet{K,Ord}) where {K, Ord <: Ordering} ``` ```@docs -get(sd::SortedDict,k,v) +Base.isequal(sc1::SortedDict{K,V,Ord}, sc2::SortedDict{K,V,Ord}) where {K, V, Ord <: Ordering} ``` ```@docs -get!(sd::SortedDict,k,v) +Base.isequal(smd1::SortedMultiDict{K,V,Ord}, smd2::SortedMultiDict{K,V,Ord}) where {K, V, Ord <: Ordering} + ``` ```@docs -getkey(sd::SortedDict,k,defaultk) +packcopy(sc::SortedSet) ``` ```@docs -isequal(sc1::SortedDict,sc2::SortedDict) +packdeepcopy(sc::SortedSet) ``` ```@docs -packcopy(sc::SortedDict) +Base.merge(sd::SortedDict{K,V,Ord}, d1::AbstractDict{K,V}...) where {K,V,Ord <: Ordering} ``` - deepcopy(sc) - -This returns a copy of `sc` in which the data is deep-copied, i.e., -the keys and values are replicated if they are mutable types. A -semitoken for the original `sc` is a valid semitoken for the copy -because this operation preserves the relative positions of the data -in memory. Time O(_maxn_), where _maxn_ denotes the maximum size -that `sc` has attained in the past. - ```@docs -packdeepcopy(sc) +Base.merge!(sd::SortedDict{K,V,Ord}, d1::AbstractDict{K,V}...) where {K,V,Ord <: Ordering} ``` ```@docs -merge(m::SortedDict{K,D,Ord}, - others::AbstractDict{K,D}...) where {K,D,Ord <: Ordering} +Base.merge(smd::SortedMultiDict, iter...) ``` ```@docs -merge!(m::SortedDict{K,D,Ord}, - others::AbstractDict{K,D}...) where {K,D,Ord <: Ordering} +Base.merge!(smd::SortedMultiDict, iter...) ``` -### Set operations + + + +## Set operations The SortedSet container supports the following set operations. Note that in the case of intersect, symdiff and setdiff, the two SortedSets should @@ -704,34 +442,35 @@ default versions of these functions (that can be applied to `Any` iterables and that return arrays) are invoked. ```@docs -union!(m1::SortedSet, iterable_item) +Base.union!(ss::SortedSet, iterable...) ``` ```@docs -union(m1::SortedSet, others...) +Base.union(ss::SortedSet, iterable...) ``` ```@docs -intersect(m1::SortedSet{K,Ord}, others::SortedSet{K,Ord}...) where {K, Ord <: Ordering} +Base.intersect(ss::SortedSet, others...) ``` ```@docs -symdiff(m1::SortedSet{K,Ord}, m2::SortedSet{K,Ord}) where {K, Ord <: Ordering} +Base.symdiff(ss1::SortedSet, iterable) + ``` ```@docs -setdiff(m1::SortedSet{K,Ord}, m2::SortedSet{K,Ord}) where {K, Ord <: Ordering} +Base.setdiff(ss1::SortedSet{K,Ord}, ss2::SortedSet{K,Ord}) where {K, Ord<:Ordering} ``` ```@docs -setdiff!(m1::SortedSet, iterable) +Base.setdiff!(m1::SortedSet, iterable) ``` ```@docs -issubset(iterable, m2::SortedSet) +Base.issubset(iterable, ss::SortedSet) ``` -### Ordering of keys +## Ordering of keys As mentioned earlier, the default ordering of keys uses `isless` and `isequal` functions. If the default ordering is used, it is a @@ -792,6 +531,7 @@ container also needs an equal-to function; the default is: eq(o::Ordering, a, b) = !lt(o, a, b) && !lt(o, b, a) ``` + The user can also customize this function with a more efficient implementation. In the above example, an appropriate customization would be: @@ -800,6 +540,11 @@ appropriate customization would be: eq(::CaseInsensitive, a, b) = isequal(lowercase(a), lowercase(b)) ``` +Note: the user-defined `eq` and `lt` functions must be compatible in the sense +that `!lt(o, a, b) && !lt(o, b, a)` if and only if +`eq(o, a, b)`. + + Finally, the user specifies the unique element of `CaseInsensitive`, namely the object `CaseInsensitive()`, as the ordering object to the `SortedDict`, `SortedMultiDict` or `SortedSet` constructor. @@ -820,10 +565,9 @@ Dicts, keys for the sorted containers can be either mutable or immutable. In the case of mutable keys, it is important that the keys not be mutated once they are in the container else the indexing structure will be corrupted. (The same restriction applies to Dict.) For -example, suppose a SortedDict `sd` is defined in which the keys are of -type `Array{Int,1}.` (For this to be possible, the user must provide an -`isless` function or order object for `Array{Int,1}` since none is built -into Julia.) Suppose the values of `sd` are of type `Int`. Then the +example, suppose `sd` has type `SortedDict{Vector{Int},Int,ForwardOrdering}`, in +other words, keys of type `Vector{Int}`, values of type `Int`, and +lexicographic ordering (default ordering of vectors). Then the following sequence of statements leaves `sd` in a corrupted state: ```julia @@ -835,11 +579,4 @@ k[1] = 7 ## Performance of Sorted Containers The sorted containers are currently not optimized for cache performance. -This will be addressed in the future. - -There is a minor performance issue as follows: the container may hold -onto a small number of keys and values even after the data records -containing those keys and values have been deleted. This may cause a -memory drain in the case of large keys and values. It may also lead to a -delay in the invocation of finalizers. All keys and values are released -completely by the `empty!` function. + diff --git a/src/DataStructures.jl b/src/DataStructures.jl index 11c0804ad..0c0594544 100644 --- a/src/DataStructures.jl +++ b/src/DataStructures.jl @@ -5,6 +5,9 @@ module DataStructures isbitsunion, isiterable, dict_with_eltype, KeySet, Callable, _tablesz, findnextnot, unsafe_getindex, unsafe_setindex!, peek + + + import Base.insert! # Exports for old version of julia where Base doesn't export this export peek export popat! @@ -41,10 +44,12 @@ module DataStructures export SetToken, SetSemiToken export startof export pastendsemitoken, beforestartsemitoken + export pastendtoken, beforestarttoken export searchsortedafter, searchequalrange export packcopy, packdeepcopy - export exclusive, inclusive, semitokens + export exclusive, inclusive, semitokens, inclusive_key, exclusive_key export orderobject, ordtype, Lt, compare, onlysemitokens + export tokens, onlytokens export MultiDict, enumerateall export RobinDict @@ -60,6 +65,14 @@ module DataStructures export findkey + import Base.== + import Base.(:) + import Base.+ + import Base.- + import Base.isequal + export sd_push!, ss_push!, smd_push!, poplast! + export token_firstindex, token_lastindex + include("delegate.jl") include("deque.jl") @@ -89,8 +102,7 @@ module DataStructures include("sorted_dict.jl") include("sorted_multi_dict.jl") include("sorted_set.jl") - include("tokens2.jl") - include("container_loops.jl") + include("sorted_container_iteration.jl") include("robin_dict.jl") include("ordered_robin_dict.jl") include("swiss_dict.jl") @@ -115,4 +127,5 @@ module DataStructures include("splay_tree.jl") include("deprecations.jl") + end diff --git a/src/balanced_tree.jl b/src/balanced_tree.jl index 81cf7c83d..62ef254c1 100644 --- a/src/balanced_tree.jl +++ b/src/balanced_tree.jl @@ -72,13 +72,13 @@ end ## marker whose index is 1 and the after-end marker whose index is 2. ## These two markers live in dummy data nodes. -function initializeTree!(tree::Array{TreeNode{K},1}) where K +function initializeTree!(tree::Vector{TreeNode{K}}) where K resize!(tree,1) tree[1] = TreeNode{K}(K, 1, 2, 0, 0) return nothing end -function initializeData!(data::Array{KDRec{K,D},1}) where {K,D} +function initializeData!(data::Vector{KDRec{K,D}}) where {K,D} resize!(data, 2) data[1] = KDRec{K,D}(1) data[2] = KDRec{K,D}(1) @@ -120,17 +120,17 @@ end mutable struct BalancedTree23{K, D, Ord <: Ordering} ord::Ord - data::Array{KDRec{K,D}, 1} - tree::Array{TreeNode{K}, 1} + data::Vector{KDRec{K,D}} + tree::Vector{TreeNode{K}} rootloc::Int depth::Int - freetreeinds::Array{Int,1} - freedatainds::Array{Int,1} + freetreeinds::Vector{Int} + freedatainds::Vector{Int} useddatacells::BitSet # The next two arrays are used as a workspace by the delete! # function. - deletionchild::Array{Int,1} - deletionleftkey::Array{K,1} + deletionchild::Vector{Int} + deletionleftkey::Vector{K} function BalancedTree23{K,D,Ord}(ord1::Ord) where {K,D,Ord<:Ordering} tree1 = Vector{TreeNode{K}}(undef, 1) initializeTree!(tree1) @@ -313,12 +313,12 @@ end ## They replace the 'parent' field of either an internal tree node or ## a data node at the bottom tree level. -function replaceparent!(data::Array{KDRec{K,D},1}, whichind::Int, newparent::Int) where {K,D} +function replaceparent!(data::Vector{KDRec{K,D}}, whichind::Int, newparent::Int) where {K,D} data[whichind] = KDRec{K,D}(newparent, data[whichind].k, data[whichind].d) return nothing end -function replaceparent!(tree::Array{TreeNode{K},1}, whichind::Int, newparent::Int) where K +function replaceparent!(tree::Vector{TreeNode{K}}, whichind::Int, newparent::Int) where K tree[whichind] = TreeNode{K}(tree[whichind].child1, tree[whichind].child2, tree[whichind].child3, newparent, tree[whichind].splitkey1, @@ -332,7 +332,7 @@ end ## location and marking it as used. The return value is the ## index of the data just inserted into the vector. -function push_or_reuse!(a::Vector, freelocs::Array{Int,1}, item) +function push_or_reuse!(a::Vector, freelocs::Vector{Int}, item) if isempty(freelocs) push!(a, item) return length(a) @@ -982,3 +982,100 @@ function Base.delete!(t::BalancedTree23{K,D,Ord}, it::Int) where {K,D,Ord<:Order end return nothing end + +# Build a balanced tree from an iterable in which the data is already +# sorted + +function BalancedTree23{K,D,Ord}(::Val{true}, + iterable, + ord::Ord, + allowdups::Bool) where {K, D, Ord <: Ordering} + m = BalancedTree23{K,D,Ord}(ord) + lengthdata = length(m.data) + @assert lengthdata == 2 + firsttrip = true + for (k,d) in iterable + # Must initialize the before-start and past-end markers + # with live data to prevent references to undefined fields + # later + if firsttrip + m.data[1] = KDRec{K,D}(m.data[1].parent, k, d) + m.data[2] = KDRec{K,D}(m.data[2].parent, k, d) + end + + + if !firsttrip + lt(ord, k, m.data[lengthdata].k) && throw(ArgumentError("Keys out of order")) + if !allowdups + !lt(ord, m.data[lengthdata].k, k) && throw(ArgumentError("Repeated key")) + end + end + push!(m.data, KDRec{K,D}(0, convert(K,k), convert(D,d))) + lengthdata += 1 + push!(m.useddatacells, lengthdata) + firsttrip = false + end + resize!(m.tree, 0) + height = 0 + belowlevlength = lengthdata + levbelowbaseind = 0 + child_belowaddress = Vector{Int}(undef, 3) + child_keyaddress = Vector{Int}(undef, 3) + keysbelow = Int[] + newkeysbelow = Int[] + + while true # loop on tree levels + newlevbelowbaseind = length(m.tree) + resize!(newkeysbelow, 0) + # Loop over the nodes of the level below, stepping by 2's + # to form the tree nodes on the new level. One tree node (the + # last one) may need to have three children if the + # number of nodes on the level below is odd. + for i = 1 : div(belowlevlength, 2) + cbase = i * 2 - 2 + numchildren = (cbase == belowlevlength - 3) ? 3 : 2 + for whichc = 1 : numchildren + i1 = cbase + whichc + if height == 0 + child_belowaddress[whichc] = (i1 == 1) ? 1 : + ((i1 == length(m.data)) ? 2 : i1 + 1) + child_keyaddress[whichc] = child_belowaddress[whichc] + else + child_belowaddress[whichc] = levbelowbaseind + i1 + child_keyaddress[whichc] = keysbelow[i1] + end + end + if numchildren == 2 + child_belowaddress[3] = 0 + child_keyaddress[3] = child_keyaddress[2] + end + push!(newkeysbelow, child_keyaddress[1]) + push!(m.tree, + TreeNode{K}(child_belowaddress[1], child_belowaddress[2], + child_belowaddress[3], 0, + m.data[child_keyaddress[2]].k, + m.data[child_keyaddress[3]].k)) + myaddress = length(m.tree) + if height == 0 + replaceparent!(m.data, child_belowaddress[1], myaddress) + replaceparent!(m.data, child_belowaddress[2], myaddress) + child_belowaddress[3] > 0 && + replaceparent!(m.data, child_belowaddress[3], myaddress) + else + replaceparent!(m.tree, child_belowaddress[1], myaddress) + replaceparent!(m.tree, child_belowaddress[2], myaddress) + child_belowaddress[3] > 0 && + replaceparent!(m.tree, child_belowaddress[3], myaddress) + end + end + #update for the next level + belowlevlength = length(m.tree) - newlevbelowbaseind + keysbelow, newkeysbelow = newkeysbelow, keysbelow + height += 1 + levbelowbaseind = newlevbelowbaseind + belowlevlength == 1 && break #root has been reached + end + m.rootloc = levbelowbaseind + 1 + m.depth = height + m +end diff --git a/src/container_loops.jl b/src/container_loops.jl deleted file mode 100644 index 7177862fa..000000000 --- a/src/container_loops.jl +++ /dev/null @@ -1,323 +0,0 @@ -## These functions define the possible iterations for the -## sorted containers. -## The prefix SDM is for SortedDict and SortedMultiDict -## The prefix SS is for SortedSet. The prefix SA -## is for all sorted containers. -## The following two definitions now appear in tokens2.jl - -# const SDMContainer = Union{SortedDict, SortedMultiDict} -# const SAContainer = Union{SDMContainer, SortedSet} - -extractcontainer(s::SAContainer) = s -getrangeobj(s::SAContainer) = s - - -## This holds an object describing an exclude-last -## iteration. - - -abstract type AbstractExcludeLast{ContainerType <: SAContainer} end - -struct SDMExcludeLast{ContainerType <: SDMContainer} <: - AbstractExcludeLast{ContainerType} - m::ContainerType - first::Int - pastlast::Int -end - -Base.keytype(::SDMExcludeLast{T}) where {T <: SAContainer} = keytype(T) -Base.keytype(::Type{SDMExcludeLast{T}}) where {T <: SAContainer} = keytype(T) -Base.valtype(::SDMExcludeLast{T}) where {T <: SAContainer} = valtype(T) -Base.valtype(::Type{SDMExcludeLast{T}}) where {T <: SAContainer} = valtype(T) -Base.eltype(::SDMExcludeLast{T}) where {T <: SAContainer} = eltype(T) -Base.eltype(::Type{SDMExcludeLast{T}}) where {T <: SAContainer} = eltype(T) - - -struct SSExcludeLast{ContainerType <: SortedSet} <: - AbstractExcludeLast{ContainerType} - m::ContainerType - first::Int - pastlast::Int -end - -Base.eltype(::SSExcludeLast{T}) where {T <: SortedSet} = eltype(T) -Base.eltype(::Type{SSExcludeLast{T}}) where {T <: SortedSet} = eltype(T) - - -extractcontainer(s::AbstractExcludeLast) = s.m -getrangeobj(s::AbstractExcludeLast) = s - -## This holds an object describing an include-last -## iteration. - -abstract type AbstractIncludeLast{ContainerType <: SAContainer} end - - -struct SDMIncludeLast{ContainerType <: SDMContainer} <: - AbstractIncludeLast{ContainerType} - m::ContainerType - first::Int - last::Int -end - -Base.keytype(::SDMIncludeLast{T}) where {T <: SAContainer} = keytype(T) -Base.keytype(::Type{SDMIncludeLast{T}}) where {T <: SAContainer} = keytype(T) -Base.valtype(::SDMIncludeLast{T}) where {T <: SAContainer} = valtype(T) -Base.valtype(::Type{SDMIncludeLast{T}}) where {T <: SAContainer} = valtype(T) -Base.eltype(::SDMIncludeLast{T}) where {T <: SAContainer} = eltype(T) -Base.eltype(::Type{SDMIncludeLast{T}}) where {T <: SAContainer} = eltype(T) - - -struct SSIncludeLast{ContainerType <: SortedSet} <: - AbstractIncludeLast{ContainerType} - m::ContainerType - first::Int - last::Int -end - -Base.eltype(::SSIncludeLast{T}) where {T <: SortedSet} = eltype(T) -Base.eltype(::Type{SSIncludeLast{T}}) where {T <: SortedSet} = eltype(T) - - -extractcontainer(s::AbstractIncludeLast) = s.m -getrangeobj(s::AbstractIncludeLast) = s - - -Base.IteratorSize(::Type{T} where {T <: SAContainer}) = HasLength() -Base.IteratorSize(::Type{T} where {T <: AbstractExcludeLast}) = SizeUnknown() -Base.IteratorSize(::Type{T} where {T <: AbstractIncludeLast}) = SizeUnknown() - - -## The basic iterations are either over the whole sorted container, an -## exclude-last object or include-last object. - -const SDMIterableTypesBase = Union{SDMContainer, - SDMExcludeLast, - SDMIncludeLast} - -const SSIterableTypesBase = Union{SortedSet, - SSExcludeLast, - SSIncludeLast} - - -const SAIterableTypesBase = Union{SAContainer, - AbstractExcludeLast, - AbstractIncludeLast} - - -## The compound iterations are obtained by applying keys(..) or values(..) -## to the basic iterations of the SDM.. type. -## Furthermore, semitokens(..) can be applied -## to either a basic iteration or a keys/values iteration. - -struct SDMKeyIteration{T <: SDMIterableTypesBase} - base::T -end - -Base.eltype(::Type{SDMKeyIteration{T}}) where {T} = keytype(T) -Base.eltype(s::SDMKeyIteration) = keytype(extractcontainer(s.base)) -Base.length(s::SDMKeyIteration{T} where T <: SDMContainer) = length(extractcontainer(s.base)) - - -struct SDMValIteration{T <: SDMIterableTypesBase} - base::T -end - -Base.eltype(::Type{SDMValIteration{T}}) where {T} = valtype(T) -Base.eltype(s::SDMValIteration) = valtype(extractcontainer(s.base)) -Base.length(s::SDMValIteration{T} where T <: SDMContainer) = length(extractcontainer(s.base)) - - -struct SDMSemiTokenIteration{T <: SDMIterableTypesBase} - base::T -end - -Base.eltype(::Type{SDMSemiTokenIteration{T}}) where {T} = - Tuple{IntSemiToken, keytype(T), valtype(T)} -Base.eltype(s::SDMSemiTokenIteration) = Tuple{IntSemiToken, - keytype(extractcontainer(s.base)), - valtype(extractcontainer(s.base))} -Base.length(s::SDMSemiTokenIteration{T} where T <: SDMContainer) = length(s.base) - - -struct SSSemiTokenIteration{T <: SSIterableTypesBase} - base::T -end - -Base.eltype(::Type{SSSemiTokenIteration{T}}) where {T} = - Tuple{IntSemiToken, eltype(T)} -Base.eltype(s::SSSemiTokenIteration) = Tuple{IntSemiToken, - eltype(extractcontainer(s.base))} -Base.length(s::SSSemiTokenIteration{T} where T <: SortedSet) = length(s.base) - - -struct SDMSemiTokenKeyIteration{T <: SDMIterableTypesBase} - base::T -end - -Base.eltype(::Type{SDMSemiTokenKeyIteration{T}}) where {T} = - Tuple{IntSemiToken, - keytype(T)} -Base.eltype(s::SDMSemiTokenKeyIteration) = Tuple{IntSemiToken, - keytype(extractcontainer(s.base))} -Base.length(s::SDMSemiTokenKeyIteration{T} where T <: SDMContainer) = length(s.base) - -struct SAOnlySemiTokensIteration{T <: SAIterableTypesBase} - base::T -end - -Base.eltype(::Type{SAOnlySemiTokensIteration{T}} where {T}) = IntSemiToken -Base.eltype(::SAOnlySemiTokensIteration) = IntSemiToken -Base.length(s::SAOnlySemiTokensIteration{T} where T <: SAContainer) = length(s.base) - -struct SDMSemiTokenValIteration{T <: SDMIterableTypesBase} - base::T -end - -Base.eltype(::Type{SDMSemiTokenValIteration{T}}) where {T} = - Tuple{IntSemiToken, valtype(T)} -Base.eltype(s::SDMSemiTokenValIteration) = Tuple{IntSemiToken, - valtype(extractcontainer(s.base))} -Base.length(s::SDMSemiTokenValIteration{T} where T <: SDMContainer) = length(s.base) - -const SACompoundIterable = Union{SDMKeyIteration, - SDMValIteration, - SDMSemiTokenIteration, - SSSemiTokenIteration, - SDMSemiTokenKeyIteration, - SDMSemiTokenValIteration, - SAOnlySemiTokensIteration} - -extractcontainer(s::SACompoundIterable) = extractcontainer(s.base) -getrangeobj(s::SACompoundIterable) = getrangeobj(s.base) - -const SAIterable = Union{SAIterableTypesBase, SACompoundIterable} - - -Base.IteratorEltype(::Type{T} where {T <: SAIterable}) = HasEltype() -Base.IteratorSize(::Type{SDMKeyIteration{T}}) where {T} = IteratorSize(T) -Base.IteratorSize(::Type{SDMValIteration{T}}) where {T} = IteratorSize(T) -Base.IteratorSize(::Type{SDMSemiTokenIteration{T}}) where {T} = IteratorSize(T) -Base.IteratorSize(::Type{SSSemiTokenIteration{T}}) where {T} = IteratorSize(T) -Base.IteratorSize(::Type{SDMSemiTokenKeyIteration{T}}) where {T} = IteratorSize(T) -Base.IteratorSize(::Type{SDMSemiTokenValIteration{T}}) where {T} = IteratorSize(T) -Base.IteratorSize(::Type{SAOnlySemiTokensIteration{T}}) where {T} = IteratorSize(T) - - -## All the loops maintain a state which is an object of the -## following type. - -struct SAIterationState - next::Int - final::Int -end - - -exclusive(m::SDMContainer, ii::Tuple{IntSemiToken,IntSemiToken}) = - SDMExcludeLast(m, ii[1].address, ii[2].address) -exclusive(m::SortedSet, ii::Tuple{IntSemiToken,IntSemiToken}) = - SSExcludeLast(m, ii[1].address, ii[2].address) -exclusive(m::SAContainer, i1::IntSemiToken, i2::IntSemiToken) = - exclusive(m, (i1, i2)) - -inclusive(m::SDMContainer, ii::Tuple{IntSemiToken,IntSemiToken}) = - SDMIncludeLast(m, ii[1].address, ii[2].address) -inclusive(m::SortedSet, ii::Tuple{IntSemiToken,IntSemiToken}) = - SSIncludeLast(m, ii[1].address, ii[2].address) -inclusive(m::SAContainer, i1::IntSemiToken, i2::IntSemiToken) = - inclusive(m, (i1, i2)) - - -# Next definition needed to break ambiguity with keys(AbstractDict) from Dict.jl - -Base.keys(ba::SortedDict) = SDMKeyIteration(ba) -Base.keys(ba::SDMIterableTypesBase) = SDMKeyIteration(ba) - - -Base.in(k, keyit::SDMKeyIteration{SortedDict{K,D,Ord}} where {K,D,Ord}) = - haskey(extractcontainer(keyit.base), k) - -Base.in(k, keyit::SDMKeyIteration{SortedMultiDict{K,D,Ord}} where {K,D,Ord}) = - haskey(extractcontainer(keyit.base), k) - - -# Next definition needed to break ambiguity with values(AbstractDict) from Dict.jl -Base.values(ba::SortedDict) = SDMValIteration(ba) -Base.values(ba::SDMIterableTypesBase) = SDMValIteration(ba) -semitokens(ba::SDMIterableTypesBase) = SDMSemiTokenIteration(ba) -semitokens(ba::SSIterableTypesBase) = SSSemiTokenIteration(ba) -semitokens(ki::SDMKeyIteration) = SDMSemiTokenKeyIteration(ki.base) -semitokens(vi::SDMValIteration) = SDMSemiTokenValIteration(vi.base) -onlysemitokens(ba::SAIterableTypesBase) = SAOnlySemiTokensIteration(ba) - - -function nexthelper(c::SAContainer, state::SAIterationState) - sn = state.next - (sn < 3 || !(sn in c.bt.useddatacells)) && throw(BoundsError()) - SAIterationState(nextloc0(c.bt, sn), state.final) -end - - -getitem(::SDMIterableTypesBase, dt, sn) = dt.k => dt.d -getitem(::SSIterableTypesBase, dt, sn) = dt.k -getitem(::SDMKeyIteration, dt, sn) = dt.k -getitem(::SDMValIteration, dt, sn) = dt.d -getitem(::SDMSemiTokenIteration, dt, sn) = (IntSemiToken(sn), dt.k, dt.d) -getitem(::SSSemiTokenIteration, dt, sn) = (IntSemiToken(sn), dt.k) -getitem(::SDMSemiTokenKeyIteration, dt, sn) = (IntSemiToken(sn), dt.k) -getitem(::SDMSemiTokenValIteration, dt, sn) = (IntSemiToken(sn), dt.d) -getitem(::SAOnlySemiTokensIteration, dt, sn) = IntSemiToken(sn) - - -function get_init_state(e::AbstractExcludeLast) - (!(e.first in e.m.bt.useddatacells) || e.first == 1 || - !(e.pastlast in e.m.bt.useddatacells)) && - throw(BoundsError()) - if compareInd(e.m.bt, e.first, e.pastlast) < 0 - return SAIterationState(e.first, e.pastlast) - else - return SAIterationState(2, 2) - end -end - -function get_init_state(e::AbstractIncludeLast) - (!(e.first in e.m.bt.useddatacells) || e.first == 1 || - !(e.last in e.m.bt.useddatacells) || e.last == 2) && - throw(BoundsError()) - if compareInd(e.m.bt, e.first, e.last) <= 0 - return SAIterationState(e.first, nextloc0(e.m.bt, e.last)) - else - return SAIterationState(2, 2) - end -end - -get_init_state(m::SAContainer) = SAIterationState(beginloc(m.bt), 2) - -function Base.iterate(s::SAIterable, state = get_init_state(getrangeobj(s))) - if state.next == state.final - return nothing - else - c = extractcontainer(s) - dt = isa(s, SAOnlySemiTokensIteration) ? nothing : c.bt.data[state.next] - return (getitem(s, dt, state.next), - nexthelper(c, state)) - end -end - - -Base.eachindex(sd::SortedDict) = keys(sd) -Base.eachindex(sdm::SortedMultiDict) = onlysemitokens(sdm) -Base.eachindex(ss::SortedSet) = onlysemitokens(ss) -Base.eachindex(sd::SDMExcludeLast{SortedDict{K,D,Ord}} where {K,D,Ord <: Ordering}) = keys(sd) -Base.eachindex(smd::SDMExcludeLast{SortedMultiDict{K,D,Ord}} where {K,D,Ord <: Ordering}) = - onlysemitokens(smd) -Base.eachindex(ss::SSExcludeLast) = onlysemitokens(ss) -Base.eachindex(sd::SDMIncludeLast{SortedDict{K,D,Ord}} where {K,D,Ord <: Ordering}) = keys(sd) -Base.eachindex(smd::SDMIncludeLast{SortedMultiDict{K,D,Ord}} where {K,D,Ord <: Ordering}) = - onlysemitokens(smd) -Base.eachindex(ss::SSIncludeLast) = onlysemitokens(ss) - - -Base.empty!(m::SAContainer) = (empty!(m.bt); m) -Base.length(m::SAContainer) = length(m.bt.data) - length(m.bt.freedatainds) - 2 -Base.isempty(m::SAContainer) = length(m) == 0 diff --git a/src/sorted_container_iteration.jl b/src/sorted_container_iteration.jl new file mode 100644 index 000000000..90a617755 --- /dev/null +++ b/src/sorted_container_iteration.jl @@ -0,0 +1,1167 @@ +## Functions in this file implement many kinds of iterations over +## SortedDict, SortedMultiDict, and SortedSet. +## The iteration state is a "semitoken". +## A "token" is a (container,index_into_container) 2-tuple, +## while semitokens are the second part, index_into_container. + +# From tokens.jl: +# abstract type AbstractSemiToken end + +# struct IntSemiToken <: AbstractSemiToken +# address::Int +# end + +const SDMContainer = Union{SortedDict, SortedMultiDict} +const SortedContainer = Union{SDMContainer, SortedSet} +const Token = Tuple{SortedContainer, IntSemiToken} +const SortedDictToken = Tuple{SortedDict, IntSemiToken} +const SortedMultiDictToken = Tuple{SortedMultiDict, IntSemiToken} +const SDMToken = Tuple{SDMContainer, IntSemiToken} +const SortedSetToken = Tuple{SortedSet, IntSemiToken} + +(==)(t1::Token, t2::Token) = (t1[1] === t2[1] && t1[2] == t2[2]) + + +""" + Base.firstindex(m::SortedContainer) + startof(m::SortedContainer) + +Return the semitoken of +the first entry of the container `m`, or the past-end semitoken +if the container is empty. Time: O(log *n*) +""" +Base.firstindex(m::SortedContainer) = startof(m) +startof(m::SortedContainer) = IntSemiToken(beginloc(m.bt)) + + +""" + token_firstindex(m::SortedContainer) + + +Return the token of +the first entry of the sorted container `m`, or the past-end token +if the container is empty. Time: O(log *n*) +""" +token_firstindex(m::SortedContainer) = (m, firstindex(m)) + + + +""" + Base.lastindex(m::SortedContainer) + endof(m::SortedContainer) + +Return the semitoken of the +last entry of the sorted container `m`, or the before-start semitoken +if the container is empty. Time: O(log *n*) +""" +Base.lastindex(m::SortedContainer) = endof(m) +endof(m::SortedContainer) = IntSemiToken(endloc(m.bt)) + + +""" + token_lastindex(m::SortedContainer) + +Return the token of the +last entry of the sorted container `m`, or the before-start semitoken +if the container is empty. Time: O(log *n*) +""" +token_lastindex(m::SortedContainer) = (m, lastindex(m)) + + +""" + pastendsemitoken(m::SortedContainer) + +Return the semitoken +of the entry that is one past the end of the sorted container `m`. +Time: O(1) +""" +pastendsemitoken(::SortedContainer) = IntSemiToken(2) + + +""" + pastendtoken(m::SortedContainer) + +Return the token +of the entry that is one past the end of the sorted container `m`. +Time: O(1) +""" +pastendtoken(m::SortedContainer) = (m, pastendsemitoken(m)) + + + +""" + beforestartsemitoken(m::SortedContainer) + +Return the semitoken +of the entry that is one before the beginning of the +sorted container `m`. Time: O(1) +""" +beforestartsemitoken(::SortedContainer) = IntSemiToken(1) + + +""" + beforestarttoken(m::SortedContainer) + +Return the token +of the entry that is one before the beginning of the +sorted container `m`. Time: O(1) +""" +beforestarttoken(m::SortedContainer) = (m, beforestartsemitoken(m)) + + +delete_nocheck!(ii::Token) = delete!(ii[1].bt, ii[2].address) + + +""" + Base.delete!(token::Token) + Base.delete!((m,st)) + +Delete the item indexed by the token from a sorted container. The +token must point to live data. The second form creates the token +in-place as a tuple of a container `m` and a semitoken `st`. +Time: O(log *n*). +""" +function Base.delete!(ii::Token) + has_data(ii) + delete_nocheck!(ii) +end + + +advance_nocheck(ii::Token) = IntSemiToken(nextloc0(ii[1].bt, ii[2].address)) + +""" + advance(token::Token) + advance((m,st)) + +Return the semitoken of the item in a sorted container +one after the given token. A `BoundsError` is thrown if the token is +the past-end token. The second form creates the token +in-place as a tuple of a container `m` and a semitoken `st`. +Time: O(log *n*) +""" +function advance(ii::Token) + not_pastend(ii) + advance_nocheck(ii) +end + +regress_nocheck(ii::Token) = IntSemiToken(prevloc0(ii[1].bt, ii[2].address)) + +""" + regress(token::Token) + regress((m,st)) + +Return the semitoken of the item in a sorted container +one before the given token. A `BoundsError` is thrown if the token is +the before-start token. The second form creates the token +in-place as a tuple of a container `m` and a semitoken `st`. +Time: O(log *n*) +""" +function regress(ii::Token) + not_beforestart(ii) + regress_nocheck(ii) +end + + +""" + status(token::Token) + status((m, st)) + +Determine the status of a token. Return values are: +- 0 = invalid token +- 1 = valid and points to live data +- 2 = before-start token +- 3 = past-end token +The second form creates the token +in-place as a tuple of a sorted container `m` and a semitoken `st`. +Time: O(1) +""" +status(ii::Token) = + !(ii[2].address in ii[1].bt.useddatacells) ? 0 : + ii[2].address == 1 ? 2 : + ii[2].address == 2 ? 3 : 1 + +""" + compare(m::SortedContainer, s::IntSemiToken, t::IntSemiToken) + +Determine the relative position according to the +sort order of the data items indexed +by tokens `(m,s)` and `(m,t)`. Return: +- `-1`if`(m,s)` precedes `(m,t)`, +- `0` if `s == t` +- `1` if `(m,s)`succeeds `(m,t)`. +The relative positions are determined +from the tree topology without any key +comparisons. Time: O(log *n*) +""" +compare(m::SortedContainer, s::IntSemiToken, t::IntSemiToken) = + compareInd(m.bt, s.address, t.address) + + +""" + ordtype(sc::SortedSet) + ordtype(sc::SortedDict) + ordtype(sc::SortedMultiDict) + +Return the order type for a sorted container. +This function may also be applied to the type itself. +Time: O(1) +""" +@inline ordtype(::SortedSet{K,Ord}) where {K, Ord} = Ord +@inline ordtype(::Type{SortedSet{K,Ord}}) where {K, Ord} = Ord +@inline ordtype(::SortedDict{K,D,Ord}) where {K, D, Ord} = Ord +@inline ordtype(::Type{SortedDict{K,D,Ord}}) where {K, D, Ord} = Ord +@inline ordtype(::SortedMultiDict{K,D, Ord}) where {K, D, Ord} = Ord +@inline ordtype(::Type{SortedMultiDict{K,D, Ord}}) where {K, D, Ord} = Ord + +""" + orderobject(sc::SortedContainer) + +Return the order object used to construct the container. Time: O(1) +""" +@inline orderobject(m::SortedContainer) = m.bt.ord + + +""" + deref(token::Token) + deref((m,st)) + +Return the data item indexed by the token. If +the container is a `SortedSet`, then this is a key in the set. +If the container is a `SortedDict` or `SortedMultiDict`, then +this is a key=>value pair. It is a BoundsError() if the token +is invalid or is the before-start or past-end token. The +second form creates the token in-place as a tuple of a +sorted container `m` +and a semitoken `st`. Time: O(1) +""" +function deref(ii::Token) + error("This is not reachable because the specialized methods below will always be selected but is here to make the doc work") +end + +@inline function deref_nocheck(ii::SortedDictToken) + @inbounds kdrec = ii[1].bt.data[ii[2].address] + return Pair(kdrec.k, kdrec.d) +end + +function deref(ii::SortedDictToken) + has_data(ii) + deref_nocheck(ii) +end + +@inline function deref_nocheck(ii::SortedMultiDictToken) + @inbounds kdrec = ii[1].bt.data[ii[2].address] + return kdrec.k => kdrec.d +end + +function deref(ii::SortedMultiDictToken) + has_data(ii) + deref_nocheck(ii) +end + +@inline function deref_nocheck(ii::SortedSetToken) + @inbounds k = ii[1].bt.data[ii[2].address].k + return k +end + +function deref(ii::SortedSetToken) + has_data(ii) + deref_nocheck(ii) +end + + +""" + deref_key(token::Token) + deref_key((m,st)) + +Return the key portion of a data item (a key=>value pair) in a +`SortedDict` or `SortedMultiDict` indexed by the token. +It is a BoundsError() if the token +is invalid or is the before-start or past-end token. The +second form creates the token in-place as a tuple of a container `m` +and a semitoken `st`. Time: O(1) +""" +function deref_key(ii::Token) + error("Cannot invoke deref_key on a SortedSet") +end + + +@inline function deref_key_nocheck(ii::SDMToken) + @inbounds k = ii[1].bt.data[ii[2].address].k + return k +end + +function deref_key(ii::SDMToken) + has_data(ii) + deref_key_nocheck(ii) +end + + + +""" + deref_value(token::Token) + deref_value((m,st)) + +Returns the value portion of a data item (a key=>value pair) +in a `SortedDict` or `SortedMultiDict` +indexed by the token. +It is a BoundsError() if the token +is invalid or is the before-start or past-end token. The +second form creates the token in-place as a tuple of a container `m` +and a semitoken `st`. Time: O(1) +""" +function deref_value(ii::Token) + error("Cannot invoke deref_key on a SortedSet") +end + +@inline function deref_value_nocheck(ii::SDMToken) + @inbounds d = ii[1].bt.data[ii[2].address].d + return d +end + +function deref_value(ii::SDMToken) + has_data(ii) + deref_value_nocheck(ii) +end + +""" + Base.first(sc::SortedContainer) + +Return the +first item (a `k=>v` pair for SortedDict and +SortedMultiDict or an element for SortedSet) in `sc` according to the sorted +order in the container. It is a `BoundsError` to call this function on +an empty container. Equivalent to `deref(token_startindex(sc))`. Time: O(log *n*) +""" +Base.first(m::SortedContainer) = deref(token_firstindex(m)) + + +""" + Base.last(sc::SortedContainer) + +Return the last item (a `k=>v` pair for SortedDict and +SortedMultiDict or a key for SortedSet) in `sc` according to the sorted +order in the container. It is a `BoundsError` to call this function on an +empty container. Equivalent to `deref(token_lastindex(sc))`. Time: O(log *n*) +""" +Base.last(m::SortedContainer) = deref(token_lastindex(m)) + + + +""" + Base.getindex(m::SortedDict, st::IntSemiToken) + Base.getindex(m::SortedMultiDict, st::IntSemiToken) + +Retrieve value portion of item from SortedDict or SortedMultiDict +`m` indexed by `st`, a semitoken. Notation `m[st]` appearing in +an expression +is equivalent to [`deref_value(token::Token)`](@ref) where `token=(m,st)`. Time: O(1) +""" +function Base.getindex(m::SortedDict, + i::IntSemiToken) + @boundscheck has_data((m,i)) + @inbounds d = m.bt.data[i.address].d + return d +end +# Must repeat this to break ambiguity; cannot use SDMContainer. +function Base.getindex(m::SortedMultiDict, + i::IntSemiToken) + @boundscheck has_data((m,i)) + @inbounds d = m.bt.data[i.address].d + return d +end + + + +""" + Base.setindex!(m::SortedDict, newvalue, st::IntSemiToken) + Base.setindex!(m::SortedMultiDict, newvalue, st::IntSemiToken) + +Set the value portion of item from SortedDict or SortedMultiDict +`m` indexed by `st`, a semitoken to `newvalue`. Time: O(1) +""" +function Base.setindex!(m::SortedDict, + d_, + i::IntSemiToken) + @boundscheck has_data((m,i)) + @inbounds m.bt.data[i.address] = + KDRec{keytype(m),valtype(m)}(m.bt.data[i.address].parent, + m.bt.data[i.address].k, + convert(valtype(m),d_)) + return m +end +## Must repeat this to break ambiguity; cannot use SDMContainer +function Base.setindex!(m::SortedMultiDict, + d_, + i::IntSemiToken) + @boundscheck has_data((m,i)) + @inbounds m.bt.data[i.address] = + KDRec{keytype(m),valtype(m)}(m.bt.data[i.address].parent, + m.bt.data[i.address].k, + convert(valtype(m),d_)) + return m +end + + +""" + Base.searchsortedfirst(m::SortedContainer, k) + +Return the semitoken of the first item in the +sorted container `m` that is greater than or equal to +`k` in the sort order. +If there is no +such item, then the past-end semitoken is returned. Time: O(*c* log *n*) + +""" +function Base.searchsortedfirst(m::SortedContainer, k_) + i = findkeyless(m.bt, convert(keytype(m), k_)) + IntSemiToken(nextloc0(m.bt, i)) +end + + +""" + searchsortedafter(m::SortedContainer, k) + +Return the semitoken of the first item in the container that is greater than +`k` in the sort order. If there is no +such item, then the past-end semitoken is returned. Time: O(*c* log *n*) +""" +function searchsortedafter(m::SortedContainer, k_) + i, exactfound = findkey(m.bt, convert(keytype(m), k_)) + IntSemiToken(nextloc0(m.bt, i)) +end + + +""" + Base.searchsortedlast(m::SortedContainer, k) + +Return the semitoken of the last item in the container that is less than or equal +to `k` in sort order. If there is no +such item, then the before-start semitoken is returned. Time: O(*c* log *n*) +""" +function Base.searchsortedlast(m::SortedContainer, k_) + i, exactfound = findkey(m.bt, convert(keytype(m),k_)) + IntSemiToken(i) +end + + +## The next four are correctness-checking routines. They are +## not exported. + + +not_beforestart(i::Token) = + (!(i[2].address in i[1].bt.useddatacells) || + i[2].address == 1) && throw(BoundsError()) + +not_pastend(i::Token) = + (!(i[2].address in i[1].bt.useddatacells) || + i[2].address == 2) && throw(BoundsError()) + + +has_data(i::Token) = + (!(i[2].address in i[1].bt.useddatacells) || + i[2].address < 3) && throw(BoundsError()) + + + + +# Container iterables IterableObject{C, R, KV, T, D} +# have five independent parameters as follows +# C is the type of container, i.e., SortedSet{K,Ord}, SortedDict{K,D,Ord} +# or SortedMultiDict{K,D,Ord} +# R indicates whether the iteration is over the entire container, +# an exclusive range (i.e., a:b where b is omitted) or +# over an inclusive range (i.e., a:b where b is included) +# KV indicates whether the iteration is supposed to return keys, values, or both +# If T is set to onlysemitokens or onlytokens, then this parameter +# has no effect +# T indicates whether to return tokens or semitokens in addition to +# keys and values +# D indicates whether the iteration is forward or reverse +# + + + +# This struct indicates iteration over the entire container is specified + +abstract type RangeTypes end + +struct EntireContainer <: RangeTypes +end + +# This struct stores an exclusive range +struct ExclusiveRange <: RangeTypes + first::Int + pastlast::Int +end + +# This struct stores an inclusive range +struct InclusiveRange <: RangeTypes + first::Int + last::Int +end + + +abstract type KVIterTypes end + + +# This struct indicates 'keys' iteration +struct KeysIter <: KVIterTypes +end + +# This struct indicates 'vals' iteration +struct ValsIter <: KVIterTypes +end + +# This struct indicates keys+vals iteration +struct KeysValsIter <: KVIterTypes +end + +default_KVIterType(::SDMContainer) = KeysValsIter +default_KVIterType(::SortedSet) = KeysIter + +abstract type TokenIterTypes end + +# This struct indicates 'semitokens' iteration +struct SemiTokenIter <: TokenIterTypes +end + +# This struct indicates 'tokens' iteration +struct TokenIter <: TokenIterTypes +end + +# This struct indicates 'onlysemitokens' iteration +struct OnlySemiTokenIter <: TokenIterTypes +end + +# This struct indicates 'onlytokens' iteration +struct OnlyTokenIter <: TokenIterTypes +end + +# This struct indicates neither tokens nor semitokens iteration +struct NoTokens <: TokenIterTypes +end + + +# This struct indicates forward iteration + +abstract type IterDirection end + +struct ForwardIter <: IterDirection +end + +# This struct indicates reverse iteration +struct ReverseIter <: IterDirection +end + + +struct IterableObject{C <: SortedContainer, + R <: RangeTypes, + Kv <: KVIterTypes, + T <: TokenIterTypes, + D <: IterDirection} + m::C + r::R +end + +const SortedContainerIterable = Union{IterableObject, SortedContainer} + + +base_iterable_object(m::SortedContainer) = + IterableObject{typeof(m), + EntireContainer, + default_KVIterType(m), + NoTokens, + ForwardIter}(m, EntireContainer()) + +exclusive(m::SortedContainer, (ii1, ii2)::Tuple{IntSemiToken, IntSemiToken}) = + IterableObject{typeof(m), + ExclusiveRange, + default_KVIterType(m), + NoTokens, + ForwardIter}(m, ExclusiveRange(ii1.address, ii2.address)) + + +exclusive(m::SortedContainer, ii1::IntSemiToken, ii2::IntSemiToken) = + exclusive(m, (ii1, ii2)) + +exclusive_key(m::SortedContainer, key1, key2) = + exclusive(m, (searchsortedfirst(m, key1), searchsortedfirst(m, key2))) + + +inclusive(m::SortedContainer, (ii1, ii2)::Tuple{IntSemiToken, IntSemiToken}) = + IterableObject{typeof(m), + InclusiveRange, + default_KVIterType(m), + NoTokens, + ForwardIter}(m, InclusiveRange(ii1.address, ii2.address)) + +inclusive(m::SortedContainer,ii1::IntSemiToken, ii2::IntSemiToken) = + inclusive(m, (ii1, ii2)) + +inclusive_key(m::SortedContainer, key1, key2) = + inclusive(m, (searchsortedfirst(m, key1), searchsortedlast(m, key2))) + + +Base.keys(ito::IterableObject{C, R, KeysValsIter, T, D}) where +{C <: SDMContainer, R, T, D} = + IterableObject{C, R, KeysIter, T, D}(ito.m, ito.r) + +Base.keys(m::SDMContainer) = keys(base_iterable_object(m)) + +Base.pairs(ito::IterableObject{<: SDMContainer, R, KV, T, D}) where {R, KV, T, D} = ito +Base.pairs(m::SDMContainer) = base_iterable_object(m) + + +Base.values(ito::IterableObject{C, R, KeysValsIter, T, D}) where +{C <: SDMContainer, R, T, D} = + IterableObject{C, R, ValsIter, T, D}(ito.m, ito.r) + +Base.values(m::SDMContainer) = values(base_iterable_object(m)) + +semitokens(ito::IterableObject{C, R, KV, NoTokens, D}) where {C, R, KV, D} = + IterableObject{C, R, KV, SemiTokenIter, D}(ito.m, ito.r) + +semitokens(m::SortedContainer) = semitokens(base_iterable_object(m)) + +tokens(ito::IterableObject{C, R, KV, NoTokens, D}) where {C, R, KV, D} = + IterableObject{C, R, KV, TokenIter, D}(ito.m, ito.r) + +tokens(m::SortedContainer) = tokens(base_iterable_object(m)) + +onlysemitokens(ito::IterableObject{C, R, KV, NoTokens, D}) where {C, R, KV, D} = + IterableObject{C, R, KV, OnlySemiTokenIter, D}(ito.m, ito.r) + +onlysemitokens(m::SortedContainer) = onlysemitokens(base_iterable_object(m)) + +onlytokens(ito::IterableObject{C, R, KV, NoTokens, D}) where {C, R, KV, D} = + IterableObject{C, R, KV, OnlyTokenIter, D}(ito.m, ito.r) + +onlytokens(m::SortedContainer) = onlytokens(base_iterable_object(m)) + + +Base.Iterators.reverse(ito::IterableObject{C, R, KV, T, ForwardIter}) where {C, R, KV, T} = + IterableObject{C, R, KV, T, ReverseIter}(ito.m, ito.r) + +Base.Iterators.reverse(ito::IterableObject{C, R, KV, T, ReverseIter}) where {C, R, KV, T} = + IterableObject{C, R, KV, T, ForwardIter}(ito.m, ito.r) + +Base.Iterators.reverse(m::SortedContainer) = Iterators.reverse(base_iterable_object(m)) + +struct SAIterationState + next::Int + final::Int +end + + +# The iterate function is decomposed into three pieces: +# The iteration_init function initializes the iteration state and +# also stores the final state. It +# does different things depending on the parameter R (range) and D (direction). +# The get_item function retrieves the requested data from the +# the container and depends on the KV and D parameters. +# The next function updates the iteration state to the next item +# and depends on the D (direction) parameter. + + +iteration_init(ito::IterableObject{C, EntireContainer, KV, T, ForwardIter}) where +{C, KV, T} = SAIterationState(beginloc(ito.m.bt), 2) + +iteration_init(ito::IterableObject{C, EntireContainer, KV, T, ReverseIter}) where +{C, KV, T} = SAIterationState(endloc(ito.m.bt), 1) + +function iteration_init(ito::IterableObject{C, ExclusiveRange, KV, T, ForwardIter}) where +{C, KV, T} + (!(ito.r.first in ito.m.bt.useddatacells) || ito.r.first == 1 || + !(ito.r.pastlast in ito.m.bt.useddatacells)) && + throw(BoundsError()) + if compareInd(ito.m.bt, ito.r.first, ito.r.pastlast) < 0 + return SAIterationState(ito.r.first, ito.r.pastlast) + else + return SAIterationState(2, 2) + end +end + +function iteration_init(ito::IterableObject{C, ExclusiveRange, KV, T, ReverseIter}) where +{C, KV, T} + (!(ito.r.first in ito.m.bt.useddatacells) || ito.r.first == 2 || + !(ito.r.pastlast in ito.m.bt.useddatacells)) && + throw(BoundsError()) + if compareInd(ito.m.bt, ito.r.first, ito.r.pastlast) < 0 + return SAIterationState(prevloc0(ito.m.bt, ito.r.pastlast), + prevloc0(ito.m.bt, ito.r.first)) + else + return SAIterationState(2, 2) + end +end + +function iteration_init(ito::IterableObject{C, InclusiveRange, KV, T, ForwardIter}) where +{C, KV, T} + (!(ito.r.first in ito.m.bt.useddatacells) || ito.r.first == 1 || + !(ito.r.last in ito.m.bt.useddatacells) || ito.r.last == 2) && + throw(BoundsError()) + if compareInd(ito.m.bt, ito.r.first, ito.r.last) <= 0 + return SAIterationState(ito.r.first, nextloc0(ito.m.bt, ito.r.last)) + else + return SAIterationState(2, 2) + end +end + + +function iteration_init(ito::IterableObject{C, InclusiveRange, KV, T, ReverseIter}) where +{C, KV, T} + (!(ito.r.first in ito.m.bt.useddatacells) || ito.r.first == 2 || + !(ito.r.last in ito.m.bt.useddatacells) || ito.r.last == 1) && + throw(BoundsError()) + if compareInd(ito.m.bt, ito.r.first, ito.r.last) <= 0 + return SAIterationState(ito.r.last, prevloc0(ito.m.bt, ito.r.first)) + else + return SAIterationState(2, 2) + end +end + +iteration_init(m::SortedContainer) = iteration_init(base_iterable_object(m)) + +@inline function get_item0(ito::IterableObject{C, R, KeysIter, T, D}, + state::SAIterationState) where {C, R, T, D} + @inbounds k = ito.m.bt.data[state.next].k + return k +end + + +@inline function get_item0(ito::IterableObject{C, R, ValsIter, T, D}, + state::SAIterationState) where {C, R, T, D} + @inbounds v = ito.m.bt.data[state.next].d + return v +end + + +@inline function get_item0(ito::IterableObject{C, R, KeysValsIter, T, D}, + state::SAIterationState) where {C, R, T, D} + @inbounds dt = ito.m.bt.data[state.next] + return (dt.k => dt.d) +end + +get_item(ito::IterableObject{C, R, KeysIter, TokenIter, D}, + state::SAIterationState) where {C, R, KV, D} = + ((ito.m, IntSemiToken(state.next)), get_item0(ito, state)) + +Base.eltype(::Type{IterableObject{C, R, KeysIter, TokenIter, D}}) where {C, R, D} = + Tuple{Tuple{C,IntSemiToken}, keytype(C)} + + +get_item(ito::IterableObject{C, R, ValsIter, TokenIter, D}, + state::SAIterationState) where {C, R, KV, D} = + ((ito.m, IntSemiToken(state.next)), get_item0(ito, state)) + +Base.eltype(::Type{IterableObject{C, R, ValsIter, TokenIter, D}}) where {C, R, D} = + Tuple{Tuple{C,IntSemiToken}, valtype(C)} + + +function get_item(ito::IterableObject{C, R, KeysValsIter, TokenIter, D}, + state::SAIterationState) where {C, R, KV, D} + i = get_item0(ito, state) + ((ito.m, IntSemiToken(state.next)), i.first, i.second) +end + +Base.eltype(::Type{IterableObject{C, R, KeysValsIter, TokenIter, D}}) where {C, R, D} = + Tuple{Tuple{C,IntSemiToken}, keytype(C), valtype(C)} + +get_item(ito::IterableObject{C, R, KeysIter, SemiTokenIter, D}, + state::SAIterationState) where {C, R, KV, D} = + (IntSemiToken(state.next), get_item0(ito, state)) + +Base.eltype(::Type{IterableObject{C, R, KeysIter, SemiTokenIter, D}}) where {C, R, D} = + Tuple{IntSemiToken, keytype(C)} + +get_item(ito::IterableObject{C, R, ValsIter, SemiTokenIter, D}, + state::SAIterationState) where {C, R, KV, D} = + (IntSemiToken(state.next), get_item0(ito, state)) + +Base.eltype(::Type{IterableObject{C, R, ValsIter, SemiTokenIter, D}}) where {C, R, D} = + Tuple{IntSemiToken, valtype(C)} + +function get_item(ito::IterableObject{C, R, KeysValsIter, SemiTokenIter, D}, + state::SAIterationState) where {C, R, KV, D} + i = get_item0(ito, state) + (IntSemiToken(state.next), i.first, i.second) +end + +Base.eltype(::Type{IterableObject{C, R, KeysValsIter, SemiTokenIter, D}}) where {C, R, D} = + Tuple{IntSemiToken, keytype(C), valtype(C)} + +get_item(ito::IterableObject{C, R, KV, OnlyTokenIter, D}, + state::SAIterationState) where {C, R, KV, D} = + (ito.m, IntSemiToken(state.next)) + +Base.eltype(::Type{IterableObject{C, R, KV, OnlyTokenIter, D}}) where {C, KV, R, D} = + Tuple{C, IntSemiToken} + + +get_item(ito::IterableObject{C, R, KV, OnlySemiTokenIter, D}, + state::SAIterationState) where {C, R, KV, D} = + IntSemiToken(state.next) + + +Base.eltype(::Type{IterableObject{C, R, KV, OnlySemiTokenIter, D}}) where {C, R, KV, D} = IntSemiToken + +get_item(ito::IterableObject{C, R, KV, NoTokens, D}, + state::SAIterationState) where {C, R, KV, D} = + get_item0(ito, state) + +Base.eltype(::Type{IterableObject{C, R, KeysIter, NoTokens, D}}) where {C, R, D} = + keytype(C) +Base.eltype(::Type{IterableObject{C, R, ValsIter, NoTokens, D}}) where {C, R, D} = + valtype(C) +Base.eltype(::Type{IterableObject{C, R, KeysValsIter, NoTokens, D}}) where {C, R, D} = + eltype(C) + +Base.eltype(::ItObj) where {ItObj <: IterableObject} = eltype(ItObj) + +get_item(m::SortedContainer, state::SAIterationState) = + get_item(base_iterable_object(m), state) + + +function next(ito::IterableObject{C, R, KV, T, ForwardIter}, + state::SAIterationState) where {C, R, KV, T} + sn = state.next + (sn < 3 || !(sn in ito.m.bt.useddatacells)) && throw(BoundsError()) + SAIterationState(nextloc0(ito.m.bt, sn), state.final) +end + +function next(ito::IterableObject{C, R, KV, T, ReverseIter}, + state::SAIterationState) where {C, R, KV, T} + sn = state.next + (sn < 3 || !(sn in ito.m.bt.useddatacells)) && throw(BoundsError()) + SAIterationState(prevloc0(ito.m.bt, sn), state.final) +end + +next(m::SortedContainer, state::SAIterationState) = + next(base_iterable_object(m), state) + +""" + Base.iterate(iter::SortedContainerIterable) + +with the following helper functions to construct a `SortedContainerIterable`: + + inclusive(m::SortedContainer, st1, st2) + inclusive(m::SortedContainer, (st1, st2)) + inclusive_key(m::SortedContainer, key1, key2) + inclusive_key(m::SortedContainer, (key1, key2)) + exclusive(m::SortedContainer, st1, st2) + exclusive(m::SortedContainer, (st1, st2)) + exclusive_key(m::SortedContainer, key1, key2) + exclusive_key(m::SortedContainer, (key1, key2)) + Base.keys(b) + Base.values(b) + Base.pairs(b) + Base.eachindex(b) + tokens(kv) + semitokens(kv) + onlytokens(kv) + onlysemitokens(kv) + Base.Iterators.reverse(m) + (:)(a,b) + + + +Iterate over a sorted container, typically +within a for-loop, comprehension, or generator. +Here, `iter` is an iterable object constructed from a sorted +container. The possible iterable objects are constructed from +the helper functions as follows: + +A *basic* iterable object is either +- an entire sorted container `m`, +- `inclusive(m, (st1, st2))` or equivalently `inclusive(m, st1, st2)`, +- `inclusive_key(m, (k1, k2))` or equivalently `inclusive_key(m, k1, k2)` +- `a:b`, where `a` and `b` are tokens addressing the same container +- `exclusive(m, (st1, st2))` or equivalently `exclusive(m, st1, st2)` +- `exclusive_key(m, (k1, k2))` or equivalently `exclusive_key(m, k1, k2)` + +These extract ranges of consecutive items in the containers. In the +`inclusive` and `exclusive` constructions, +constructions, `m` is a container, `st1` and `st2` are semitokens. The +`inclusive` range includes both endpoints `st1` and `st2`. +The inclusive +iteration is empty if `compare(m,st1,st2)<0`. The `exclusive` range includes +endpoint `st1` but not `st2`. The exclusive iteration is empty if +`compare(m,st1,st2)<=0`. In the exclusive iteration, it is acceptable +if `st2` is the past-end semitoken. + + +The range `exclusive_key` means all data items with keys between `k1` up to but +excluding items with key `k2`. For this range to be nonempty, +`k1value pairs + (not applicable to SortedSet). + This is the same as just specifying `b` and is provided only for compatibility + with `Base.pairs`. + +A *tkv* object has the form +- `kv`, a kv iterable object +- `tokens(kv)` where `kv` is a kv iterable object. + Return 2-tuples of the form `(t,w)`, where `t` is the + token of the item and `w` is a key or value if `kv` is a keys or values + iteration, or `(t,k,v)` if `kv` is a pairs iteration. +- `semitokens(kv)` where `kv` is a kv iterable object. + Return pairs of the form `(st,w)`, where `st` is the + token of the item and `w` is a key or value if `kv` is a keys or values + iteration, or `(st,k,v)` if `kv` is a pairs iteration. +- `onlytokens(kv)` where `kv` is a kv iterable object. Return only tokens + of the data items but not the items themselves. + The `keys`, `values`, or `pairs` modifiers described above + are ignored. +- `onlysemitokens(kv)` where `kv` is a kv iterable object. Return only semitokens + of the data items but not the items themselves. + The `keys`, `values`, or `pairs` modifiers described above + are ignored. + +Finally, a tkv iteration can be reversed by the `Iterators.reverse` function. The +`Iterators.reverse` function +may be nested in an arbitrary position with respect to the other operations described +above. Two reverse operations cancel each other out. For example, +`Iterators.reverse(keys(Iterators.reverse(m)))` is the same iteration as `keys(m)`. + +For compatibility with `Base`, there is also an `eachindex` function: +`eachindex(b)` where the base object `b` a SortedDict is +the same as `keys(b)` (to be compatible with Dict). +On the other hand, `eachindex(b)` where the +base object `b` is a SortedSet or SortedMultiDict is the +same as `onlysemitokens(b)`. + +Colon notation `a:b` is equivalent +to `onlytokens(inclusive(a[1], a[2], b[2]))`, in other words, it yields +an iteration that provides all the tokens of items in the sort order ranging +from token `a` up to token `b`. It is required that `a[1]===b[1]` (i.e., +`a` and `b` are tokens for the same container). Exclusive iteration using +colon notation is obtained via `a : b-1`. + +# Examples: + +```julia + for (k,v) in sd + + end +``` +Here, `sd` is a `SortedDict` or `SortedMultiDict`. The variables `(k,v)` +are set to consecutive key-value pairs. All items in the container are +produced in order. + + +```julia + for k in inclusive(ss, st1, st2) + + end +``` +Here, `ss` is a `SortedSet`, and `st1`, and `st2` are semitokens indexing `ss`. +The elements of the set between `st1` and `st2` inclusive are returned. + + +```julia + for (t,k) in tokens(keys(exclusive_key(sd, key1, key2))) + + end +``` +Here, `sd` is a `SortedDict` or `SortedMultiDict`, `key1` and `key2` are keys +indexing `sd`. In this case, `t` will be tokens of consecutive items, +while `k` will be the corresponding keys. The returned keys lie between `key1` and +`key2` excluding `key2`. + +```julia + for (t,k) in Iterators.reverse(tokens(keys(exclusive_key(sd, key1, key2)))) + + end +``` +Same as above, except the iteration is in the reverse order. + +Running time for all iterations: O(*c*(*s* + log *n*)), where +*s* is the number of steps from start to end of the iteration. +""" +function Base.iterate(s::SortedContainerIterable, + state = iteration_init(s)) + if state.next == state.final + return nothing + else + return (get_item(s, state), next(s, state)) + end +end + +Base.keytype(::IterableObject{C, R, KeysValsIter, NoTokens, D}) where +{C <: SDMContainer, R, D} = keytype(C) + +Base.keytype(::Type{IterableObject{C, R, KeysValsIter, NoTokens, D}}) where +{C <: SDMContainer, R, D} = keytype(C) + +Base.valtype(::IterableObject{C, R, KeysValsIter, NoTokens, D}) where +{C <: SDMContainer, R, D} = valtype(C) + +Base.valtype(::Type{IterableObject{C, R, KeysValsIter, NoTokens, D}}) where +{C <: SDMContainer, R, D} = valtype(C) + +Base.IteratorSize(::Type{T} where {T <: SortedContainer}) = HasLength() +Base.IteratorSize(::Type{IterableObject{C, EntireContainer, KV, T, D}}) where +{C, KV, T, D} = HasLength() + +Base.IteratorSize(::Type{IterableObject{C, ExclusiveRange, KV, T, D}}) where +{C, KV, T, D} = SizeUnknown() + +Base.IteratorSize(::Type{IterableObject{C, InclusiveRange, KV, T, D}}) where +{C, KV, T, D} = SizeUnknown() + +Base.length(ito::IterableObject{C, EntireContainer, KV, T, D}) where +{C, KV, T, D} = length(ito.m) + + +""" + Base.in(x, iter::SortedContainerIterable) + +Returns true if `x` is in `iter`, where `iter` refers to any of the +iterable objects described under [`Base.iterate(iter::SortedContainerIterable)`](@ref), +and `x` is of the appropriate type. For all of the iterables +except the five listed below, the algorithm used is a linear-time +search. For example, the call: + + (k=>v) in exclusive(sd, st1, st2) + +where `sd` is a SortedDict, `st1` and `st2` are semitokens, `k` is a +key, and `v` is a value, will loop over all entries in the +dictionary between the two tokens and a compare for equality using +`isequal` between the indexed item and `k=>v`. + +The five exceptions are: + +```julia +(k=>v) in sd +(k=>v) in smd +k in ss +k in keys(sd) +k in keys(smd) +``` + +Here, `sd` is a SortedDict, `smd` is a SortedMultiDict, and `ss` is +a SortedSet. + +These five invocations of `in` use the index structure of the sorted +container and test equality based on the order object of the keys +rather than `isequal`. Therefore, these five are all faster than +linear-time looping. To force the use of `isequal` test on +the keys rather than the order object (thus slowing the execution +from logarithmic to linear time), replace the above five constructs +with these: + +```julia +(k=>v) in collect(sd) +(k=>v) in collect(smd) +k in collect(ss) +k in collect(keys(sd)) +k in collect(keys(smd)) +``` +""" +Base.in(x, m::SortedContainerIterable) = + invoke(in, Tuple{Any,Any}, x, m) + +Base.in(k, ito::IterableObject{C, EntireContainer, KeysIter, NoTokens, D}) where +{C, D} = haskey(ito.m, k) + + +""" + haskey(sc::SortedContainer, k) + +Return `true` iff key `k` is present in `sc`. Equivalent +to +`in(k,sc)` for a SortedSet, or to `in(k,keys(sc))` for +a SortedDict or SortedMultiDict. Time: O(*c* log *n*) +""" +@inline function Base.haskey(m::SortedContainer, k_) + i, exactfound = findkey(m.bt, convert(keytype(m),k_)) + return exactfound +end + + +Base.eachindex(sd::SortedDict) = keys(sd) +Base.eachindex(sdm::SortedMultiDict) = onlysemitokens(sdm) +Base.eachindex(ss::SortedSet) = onlysemitokens(ss) + + +Base.eachindex(ito::IterableObject{C, R, KeysValsIter, NoTokens, D}) where +{C <: SortedDict, R, KV, D} = keys(ito) + +Base.eachindex(ito::IterableObject{C, R, KV, NoTokens, D}) where +{C <: SortedMultiDict, R, KV, T, D} = onlysemitokens(ito) + +Base.eachindex(ito::IterableObject{C, R, KV, NoTokens, D}) where +{C <: SortedSet, R, KV, T, D} = onlysemitokens(ito) + + +""" + empty!(m) + +Empty a sorted container +""" + +Base.empty!(m::SortedContainer) = (empty!(m.bt); m) +Base.length(m::SortedContainer) = length(m.bt.data) - length(m.bt.freedatainds) - 2 +Base.isempty(m::SortedContainer) = length(m) == 0 + + +(:)(t1::Token, t2::Token) = _colon(t1,t2) + +function _colon(t1::Token, t2::Token) + t1[1] !== t2[1] && + throw(ArgumentError("First and second arguments of colon operator on sorted container tokens must refer to the same container")) + IterableObject{typeof(t1[1]), InclusiveRange, default_KVIterType(t1[1]), OnlyTokenIter, + ForwardIter}(t1[1], InclusiveRange(t1[2].address, t2[2].address)) +end + + +""" + +(t::Token, j::Integer) + -(t::Token, j::Integer) +Return the token that is `j` positions ahead (if `+`) or behind (if `-`) of `t`. +Here, `t` is a token for a sorted container and `j` is an integer. +If `j` is negative, then `+` regresses while `-` advances. +If the operation `t+j` or `t-j` reaches the before-start +or past-end positions in the container, +then the before-start/past-end tokens are returned (and there is no error). +Time: O(*j*+log *n*), so this function is not optimized for long jumps. +""" ++(t1::Token, numstep::Integer) = + numstep >= 0 ? stepforward(t1, numstep) : stepback(t1, -numstep) + +-(t1::Token, numstep::Integer) = + numstep >= 0 ? stepback(t1, numstep) : stepforward(t1, -numstep) + + +function stepforward(t1::Token, numstep::Integer) + m = t1[1] + j = t1[2].address + !(j in m.bt.useddatacells) && throw(BoundsError()) + for i = 1 : numstep + j == 2 && break + j = nextloc0(m.bt, j) + end + (m, IntSemiToken(j)) +end + +function stepback(t1::Token, numstep::Integer) + m = t1[1] + j = t1[2].address + !(j in m.bt.useddatacells) && throw(BoundsError()) + for i = 1 : numstep + j == 1 && break + j = prevloc0(m.bt, j) + end + (m, IntSemiToken(j)) +end + diff --git a/src/sorted_dict.jl b/src/sorted_dict.jl index db2afe34a..6b6fd2105 100644 --- a/src/sorted_dict.jl +++ b/src/sorted_dict.jl @@ -3,411 +3,294 @@ mutable struct SortedDict{K, D, Ord <: Ordering} <: AbstractDict{K,D} bt::BalancedTree23{K,D,Ord} +end - ## Base constructors - """ - SortedDict{K,V}(o=Forward) - Construct an empty `SortedDict` with key type `K` and value type - `V` with `o` ordering (default to forward ordering). - """ - SortedDict{K,D,Ord}(o::Ord) where {K, D, Ord <: Ordering} = - new{K,D,Ord}(BalancedTree23{K,D,Ord}(o)) +""" + SortedDict{K,V,Ord}(o::Ord=Forward) where {K, V, Ord <: Ordering} + SortedDict{K,V,Ord}(o::Ord, kv) where {K, V, Ord <: Ordering} - function SortedDict{K,D,Ord}(o::Ord, kv) where {K, D, Ord <: Ordering} - s = new{K,D,Ord}(BalancedTree23{K,D,Ord}(o)) +Construct a `SortedDict` with key type `K` and value type +`V` with `o` ordering from an iterable `kv`. The iterable should +generate either `Pair{K,V}` or `Tuple{K,V}`. If omitted, then +the SortedDict is initially empty. Time: O(*cn* log *n*) where +*n* is the length of the iterable. +""" +SortedDict{K,D,Ord}(o::Ord=Forward) where {K, D, Ord <: Ordering} = + SortedDict{K,D,Ord}(BalancedTree23{K,D,Ord}(o)) - if eltype(kv) <: Pair - # It's (possibly?) more efficient to access the first and second - # elements of Pairs directly, rather than destructure - for p in kv - s[p.first] = p.second - end - else - for (k, v) in kv - s[k] = v - end - end - return s +function SortedDict{K,D,Ord}(o::Ord, kv) where {K, D, Ord <: Ordering} + s = SortedDict{K,D,Ord}(BalancedTree23{K,D,Ord}(o)) + for (k, v) in kv + s[k] = v end - + return s end -# Any-Any constructors -""" - SortedDict() -Construct an empty `SortedDict` with key type `Any` and value type -`Any`. Ordering defaults to `Forward` ordering. - -**Note that a key type of `Any` or any other abstract type will lead -to slow performance, as the values are stored boxed (i.e., as -pointers), and insertion will require a run-time lookup of the -appropriate comparison function. It is recommended to always specify -a concrete key type, or to use one of the constructors below in -which the key type is inferred.** -""" -SortedDict() = SortedDict{Any,Any,ForwardOrdering}(Forward) """ - SortedDict(o=Forward) + SortedDict(o::Ord=Forward) where {Ord <: Ordering} + SortedDict{K,V}(o::Ord=Forward) where {K,V,Ord<:Ordering} Construct an empty `SortedDict` with key type `K` and value type -`V`. If `K` and `V` are not specified, the dictionary defaults to a -`SortedDict{Any,Any}`. Keys and values are converted to the given -type upon insertion. Ordering `o` defaults to `Forward` ordering. +`V` with `o` ordering (default to forward ordering). If +`K` and `V` are not specified as in the +first form, then they are assumed to both be `Any`. +Time: O(1) **Note that a key type of `Any` or any other abstract type will lead to slow performance, as the values are stored boxed (i.e., as pointers), and insertion will require a run-time lookup of the appropriate comparison function. It is recommended to always specify -a concrete key type, or to use one of the constructors below in +a concrete key type, or to use one of the constructors in which the key type is inferred.** """ -SortedDict(o::Ord) where {Ord <: Ordering} = SortedDict{Any,Any,Ord}(o) - -# Construction from Pairs -# TODO: fix SortedDict(1=>1, 2=>2.0) -""" - SortedDict(k1=>v1, k2=>v2, ...) -and `SortedDict{K,V}(k1=>v1, k2=>v2, ...)` +SortedDict(o::Ord=Forward) where {Ord <: Ordering} = SortedDict{Any,Any,Ord}(o) +SortedDict{K,D}(o::Ord=Forward) where {K,D,Ord<:Ordering} = + SortedDict{K,D,Ord}(o) -Construct a `SortedDict` from the given key-value pairs. If `K` and -`V` are not specified, key type and value type are inferred from the -given key-value pairs, and ordering is assumed to be `Forward` -ordering. -""" -SortedDict(ps::Pair...) = SortedDict(Forward, ps) -SortedDict{K,D}(ps::Pair...) where {K,D} = SortedDict{K,D,ForwardOrdering}(Forward, ps) -""" - SortedDict(o, k1=>v1, k2=>v2, ...) -Construct a `SortedDict` from the given pairs with the specified -ordering `o`. The key type and value type are inferred from the -given pairs. """ -SortedDict(o::Ordering, ps::Pair...) = SortedDict(o, ps) + SortedDict(iter, o::Ord=Forward) where {Ord <: Ordering} + SortedDict(o::Ordering, iter) + SortedDict{K,V}(iter, o::Ordering=Forward) where {K,V} + SortedDict{K,V}(o::Ordering, iter) where {K,V} -""" - SortedDict{K,V}(o, k1=>v1, k2=>v2, ...) +Construct a `SortedDict` from an arbitrary iterable object of +`key=>value` pairs or `(key,value)` tuples with order object `o`. The key type +and value type are inferred from the given iterable in the +first two forms. The first two forms copy the +data three times, so +it is more efficient to explicitly specify `K` and `V` as in the +second two forms. Time: O(*cn* log *n*) -Construct a `SortedDict` from the given pairs with the specified -ordering `o`. If `K` and `V` are not specified, the key type and -value type are inferred from the given pairs. See below for more -information about ordering. """ -SortedDict{K,D}(o::Ord, ps::Pair...) where {K,D,Ord<:Ordering} = SortedDict{K,D,Ord}(o, ps) - - -# Construction from AbstractDicts -SortedDict(o::Ord, d::AbstractDict{K,D}) where {K,D,Ord<:Ordering} = SortedDict{K,D,Ord}(o, d) +SortedDict(iter, o::Ord=Forward) where {Ord <: Ordering} = + SortedDict(o, iter) -## Construction from iteratables of Pairs/Tuples +# TODO: figure out how to infer type without three copies -# Construction specifying Key/Value types -# e.g., SortedDict{Int,Float64}([1=>1, 2=>2.0]) -""" - SortedDict(iter, o=Forward) -and `SortedDict{K,V}(iter, o=Forward)` - -Construct a `SortedDict` from an arbitrary iterable object of -`key=>value` pairs. If `K` and `V` are not specified, the key type -and value type are inferred from the given iterable. The ordering -object `o` defaults to `Forward`. -""" -SortedDict{K,D}(kv) where {K,D} = SortedDict{K,D}(Forward, kv) -function SortedDict{K,D}(o::Ord, kv) where {K,D,Ord<:Ordering} - try - SortedDict{K,D,Ord}(o, kv) - catch e - if not_iterator_of_pairs(kv) - throw(ArgumentError("SortedDict(kv): kv needs to be an iterator of tuples or pairs")) - else - rethrow(e) - end +function SortedDict(o::Ordering, kv) + c = collect(kv) + if eltype(c) <: Pair + c2 = collect((t.first, t.second) for t in c) + elseif eltype(c) <: Tuple + c2 = collect((t[1], t[2]) for t in c) + else + throw(ArgumentError("In SortedDict(o,kv), kv should contain either pairs or 2-tuples")) end + SortedDict{eltype(c2).parameters[1], eltype(c2).parameters[2], typeof(o)}(o, c2) end +SortedDict{K,D}(iter, o::Ordering=Forward) where {K, D} = + SortedDict{K,D,typeof(o)}(o, iter) +SortedDict{K,D}(o::Ordering, iter) where {K, D} = + SortedDict{K,D,typeof(o)}(o, iter) -# Construction inferring Key/Value types from input -# e.g. SortedDict{} - -SortedDict(o1::Ordering, o2::Ordering) = throw(ArgumentError("SortedDict with two parameters must be called with an Ordering and an interable of pairs")) +""" + SortedDict(ps::Pair...) + SortedDict(o::Ordering, ps::Pair...) + SortedDict{K,V}(ps::Pair...) + SortedDict{K,V}(o::Ordering, ps::Pair...) where {K,V} +Construct a `SortedDict` from the given key-value pairs. +The key type and value type are inferred from the +given key-value pairs in the first two forms. +The ordering is assumed to be `Forward` +ordering in the first and third form. +The first two forms (where `K` and `V` are not specified +but inferred) involves copying the data three times +and so is less efficient than the second two forms. +Time: O(*cn* log *n*) """ - SortedDict(d, o=Forward) -and `SortedDict{K,V}(d, o=Forward)` +SortedDict(ps::Pair...) = SortedDict(Forward, ps) +SortedDict{K,D}(ps::Pair...) where {K,D} = SortedDict{K,D,ForwardOrdering}(Forward, ps) +SortedDict(o::Ordering, ps::Pair...) = SortedDict(o, ps) +SortedDict{K,D}(o::Ord, ps::Pair...) where {K,D,Ord<:Ordering} = + SortedDict{K,D,Ord}(o, ps) -Construct a `SortedDict` from an ordinary Julia dict `d` (or any -associative type), e.g.: -```julia -d = Dict("New York" => 1788, "Illinois" => 1818) -c = SortedDict(d) -``` -In this example the key-type is deduced to be `String`, while the -value-type is `Int`. +""" + SortedDict{K,V}(::Val{true}, iterable) where {K, V} + SortedDict{K,V}(::Val{true}, iterable, ord::Ordering) where {K,V} -If `K` and `V` are not specified, the key type and value type are -inferred from the given dictionary. The ordering object `o` defaults -to `Forward`. +Construct a `SortedDict` from an iterable whose eltype +is Tuple{K,V} or Pair{K,V} and that is already in sorted ordered. +The first form assumes Forward ordering. No duplicate +keys allowed. Time: O(*cn*). """ -SortedDict(kv, o::Ordering=Forward) = SortedDict(o, kv) -function SortedDict(o::Ordering, kv) - try - _sorted_dict_with_eltype(o, kv, eltype(kv)) - catch e - if not_iterator_of_pairs(kv) - throw(ArgumentError("SortedDict(kv): kv needs to be an iterator of tuples or pairs")) - else - rethrow(e) - end - end +SortedDict{K,D}(::Val{true}, iterable) where {K,D} = + SortedDict{K,D}(Val(true), iterable, Forward) + +function SortedDict{K,D}(::Val{true}, + iterable, + ord::Ord) where {K,D,Ord <: Ordering} + SortedDict{K, D, Ord}(BalancedTree23{K,D,Ord}(Val(true), iterable, ord, false)) end -_sorted_dict_with_eltype(o::Ord, ps, ::Type{Pair{K,D}}) where {K,D,Ord} = SortedDict{ K, D,Ord}(o, ps) -_sorted_dict_with_eltype(o::Ord, kv, ::Type{Tuple{K,D}}) where {K,D,Ord} = SortedDict{ K, D,Ord}(o, kv) -_sorted_dict_with_eltype(o::Ord, ps, ::Type{Pair{K}} ) where {K, Ord} = SortedDict{ K,Any,Ord}(o, ps) -_sorted_dict_with_eltype(o::Ord, kv, ::Type ) where { Ord} = SortedDict{Any,Any,Ord}(o, kv) -## TODO: It seems impossible (or at least very challenging) to create the eltype below. -## If deemed possible, please create a test and uncomment this definition. -# _sorted_dict_with_eltype{ D,Ord}(o::Ord, ps, ::Type{Pair{K,D} where K}) = SortedDict{Any, D,Ord}(o, ps) +## The following is needed to resolve ambiguities -const SDSemiToken = IntSemiToken +SortedDict(::Ordering, ::Ordering) = + throw(ArgumentError("Not a valid SortedDict constructor")) +SortedDict{K,D}(::Ordering, ::Ordering) where {K,D} = + throw(ArgumentError("Not a valid SortedDict constructor")) +SortedDict(::Val{true}, ::Ordering) = throw(ArgumentError("Not a valid SortedDict constructor")) +SortedDict{K,D}(::Val{true}, ::Ordering) where {K,D} = + throw(ArgumentError("Not a valid SortedDict constructor")) -const SDToken = Tuple{SortedDict,IntSemiToken} -## This function implements m[k]; it returns the -## data item associated with key k. """ - v = sd[k] + Base.getindex(sd::SortedDict, k) -Argument `sd` is a SortedDict and `k` is a key. In an expression, -this retrieves the value (`v`) associated with the key (or `KeyError` if -none). On the left-hand side of an assignment, this assigns or -reassigns the value associated with the key. (For assigning and -reassigning, see also `insert!` below.) Time: O(*c* log *n*) +Retrieve the value associated with key `k` in SortedDict `sc`. +Yields a `KeyError` if `k` is not found. The following +functions do not throw an error if the key is not found: +[`Base.get(sd::SortedDict,k,v)`](@ref) and +[`findkey(sd::SortedDict, k)`](@ref). +Time: O(*c* log *n*) """ @inline function Base.getindex(m::SortedDict, k_) i, exactfound = findkey(m.bt, convert(keytype(m),k_)) !exactfound && throw(KeyError(k_)) - return m.bt.data[i].d + @inbounds return m.bt.data[i].d end -## This function implements m[k]=d; it sets the -## data item associated with key k equal to d. """ - sc[st] = v + Base.setindex!(sd::SortedDict, newvalue, k) -If `st` is a semitoken and `sc` is a SortedDict or SortedMultiDict, -then `sc[st]` refers to the value field of the (key,value) pair that -the full token `(sc,st)` refers to. This expression may occur on -either side of an assignment statement. Time: O(1) +Assign or +reassign the value associated with the key `k` to `newvalue`. Note +that the key is also overwritten; this is not necessarily a no-op +since the equivalence in the sort-order does not imply equality. +See also [`sd_push!(sd::SortedDict, p::Pair)`](@ref). +Time: O(*c* log *n*) """ -@inline function Base.setindex!(m::SortedDict{K,D,Ord}, d_, k_) where {K, D, Ord <: Ordering} - insert!(m.bt, convert(K,k_), convert(D,d_), false) +@inline function Base.setindex!(m::SortedDict, d_, k_) + insert!(m.bt, convert(keytype(m),k_), convert(valtype(m),d_), false) return m end -## push! is an alternative to insert!; it returns the container. + """ - push!(sc, k=>v) + Base.push!(sd::SortedDict, p::Pair) -Argument `sc` is a SortedDict or SortedMultiDict and `k=>v` is a -key-value pair. This inserts the key-value pair into the container. -If the key is already present, this overwrites the old value. The -return value is `sc`. Time: O(*c* log *n*) +Insert key-vaue pair `p`, i.e., a `k=>v` pair, into `sd`. +If the key `k` is already present, this overwrites the old value. +The key is also overwritten (not necessarily a no-op, since +sort-order equivalence may differ from equality). +The return value is `sd`. See also [`sd_push!(sd::SortedDict, p::Pair)`]@ref. +Time: O(*c* log *n*) """ @inline function Base.push!(m::SortedDict{K,D}, pr::Pair) where {K,D} insert!(m.bt, convert(K, pr[1]), convert(D, pr[2]), false) return m end -## This function looks up a key in the tree; -## if not found, then it returns a marker for the -## end of the tree. """ - findkey(sd, k) + findkey(sd::SortedDict, k) -Argument `sd` is a SortedDict and argument `k` is a key. This -function returns the semitoken that refers to the item whose key is -`k`, or past-end semitoken if `k` is absent. Time: O(*c* log *n*) +Return the semitoken that +points to the item whose key is +`k`, or past-end semitoken if `k` is absent. +See also [`Base.getindex(sd::SortedDict, k)`](@ref) +Time: O(*c* log *n*) """ @inline function findkey(m::SortedDict, k_) ll, exactfound = findkey(m.bt, convert(keytype(m),k_)) IntSemiToken(exactfound ? ll : 2) end -## This function inserts an item into the tree. -## Unlike m[k]=d, it also returns a bool and a token. -## The bool is true if the inserted item is new. -## It is false if there was already an item -## with that key. -## The token points to the newly inserted item. """ - insert!(sc, k) + sd_push!(sd::SortedDict, p::Pair) -Argument `sc` is a SortedDict or SortedMultiDict, `k` is a key and -`v` is the corresponding value. This inserts the `(k,v)` pair into -the container. If the key is already present in a SortedDict, this -overwrites the old value. In the case of SortedMultiDict, no -overwriting takes place (since SortedMultiDict allows the same key -to associate with multiple values). In the case of SortedDict, the -return value is a pair whose first entry is boolean and indicates +Insert pair `p` of the form `k=>v` into `sd`. +If the key is already present in `sd`, this +overwrites the old value. Note that the key is also overwritten, +which is not necessarily a no-op because equivalence in the sort +order does not necessarily imply equality. Unlike `push!`, +the +return value is a 2-tuple whose first entry is boolean and indicates whether the insertion was new (i.e., the key was not previously -present) and the second entry is the semitoken of the new entry. In -the case of SortedMultiDict, a semitoken is returned (but no -boolean). Time: O(*c* log *n*) +present) and whose second entry is the semitoken of the new entry. +This function replaces the deprecated `insert!(sd,k,v)`. + Time: O(*c* log *n*) """ -@inline function Base.insert!(m::SortedDict{K,D,Ord}, k_, d_) where {K,D, Ord <: Ordering} - b, i = insert!(m.bt, convert(K,k_), convert(D,d_), false) +@inline function sd_push!(m::SortedDict{K,D,Ord}, pr::Pair) where {K,D, Ord <: Ordering} + b, i = insert!(m.bt, convert(K,pr.first), convert(D,pr.second), false) b, IntSemiToken(i) end -""" - eltype(sc) - -Returns the (key,value) type (a 2-entry pair, i.e., `Pair{K,V}`) for -SortedDict and SortedMultiDict. Returns the key type for SortedSet. -This function may also be applied to the type itself. Time: O(1) -""" -@inline Base.eltype(m::SortedDict{K,D,Ord}) where {K,D,Ord <: Ordering} = Pair{K,D} -@inline Base.eltype(::Type{SortedDict{K,D,Ord}}) where {K,D,Ord <: Ordering} = Pair{K,D} - -""" - in(p, sc) +@deprecate insert!(m::SortedDict, k, d) sd_push!(m::SortedDict, k=>d) -Returns true if `p` is in `sc`. In the case that `sc` is a -SortedDict or SortedMultiDict, `p` is a key=>value pair. In the -case that `sc` is a SortedSet, `p` should be a key. Time: O(*c* log -*n* + *d*) for SortedDict and SortedSet, where *d* stands for the -time to compare two values. In the case of SortedMultiDict, the time -is O(*c* log *n* + *dl*), and *l* stands for the number of entries -that have the key of the given pair. (So therefore this call is -inefficient if the same key addresses a large number of values, and -an alternative should be considered.) -""" -@inline function Base.in(pr::Pair, m::SortedDict{K,D,Ord}) where {K,D,Ord <: Ordering} - i, exactfound = findkey(m.bt,convert(K,pr[1])) - return exactfound && isequal(m.bt.data[i].d,convert(D,pr[2])) -end - -@inline Base.in(::Tuple{Any,Any}, ::SortedDict) = - throw(ArgumentError("'(k,v) in sorteddict' not supported in Julia 0.4 or 0.5. See documentation")) -""" - keytype(sc) -Returns the key type for SortedDict, SortedMultiDict and SortedSet. -This function may also be applied to the type itself. Time: O(1) -""" +@inline Base.eltype(m::SortedDict{K,D,Ord}) where {K,D,Ord <: Ordering} = Pair{K,D} +@inline Base.eltype(::Type{SortedDict{K,D,Ord}}) where {K,D,Ord <: Ordering} = Pair{K,D} @inline Base.keytype(m::SortedDict{K,D,Ord}) where {K,D,Ord <: Ordering} = K @inline Base.keytype(::Type{SortedDict{K,D,Ord}}) where {K,D,Ord <: Ordering} = K - -""" - valtype(sc) - -Returns the value type for SortedDict and SortedMultiDict. This -function may also be applied to the type itself. Time: O(1) -""" @inline Base.valtype(m::SortedDict{K,D,Ord}) where {K,D,Ord <: Ordering} = D @inline Base.valtype(::Type{SortedDict{K,D,Ord}}) where {K,D,Ord <: Ordering} = D """ - ordtype(sc) - -Returns the order type for SortedDict, SortedMultiDict and -SortedSet. This function may also be applied to the type itself. -Time: O(1) -""" -@inline ordtype(m::SortedDict{K,D,Ord}) where {K,D,Ord <: Ordering} = Ord -@inline ordtype(::Type{SortedDict{K,D,Ord}}) where {K,D,Ord <: Ordering} = Ord - - -""" - orderobject(sc) - -Returns the order object used to construct the container. Time: O(1) -""" -@inline orderobject(m::SortedDict) = m.bt.ord + Base.in(p::Pair, sd::SortedDict) - -## First and last return the first and last (key,data) pairs -## in the SortedDict. It is an error to invoke them on an -## empty SortedDict. - -""" - first(sc) - -Argument `sc` is a SortedDict, SortedMultiDict or SortedSet. This -function returns the first item (a `k=>v` pair for SortedDict and -SortedMultiDict or a key for SortedSet) according to the sorted -order in the container. Thus, `first(sc)` is equivalent to -`deref((sc,startof(sc)))`. It is an error to call this function on -an empty container. Time: O(log *n*) +Return true if `p` is in `sd`. Here, `p` is a key=>value pair. +Time: O(*c* log *n* + *d*) where *d* stands for the +time to compare two values. """ -@inline function Base.first(m::SortedDict) - i = beginloc(m.bt) - i == 2 && throw(BoundsError()) - return Pair(m.bt.data[i].k, m.bt.data[i].d) +@inline function Base.in(pr::Pair, m::SortedDict) + i, exactfound = findkey(m.bt,convert(keytype(m),pr[1])) + return exactfound && isequal(m.bt.data[i].d, convert(valtype(m),pr[2])) end -""" - last(sc) - -Argument `sc` is a SortedDict, SortedMultiDict or SortedSet. This -function returns the last item (a `k=>v` pair for SortedDict and -SortedMultiDict or a key for SortedSet) according to the sorted -order in the container. Thus, `last(sc)` is equivalent to -`deref((sc,lastindex(sc)))`. It is an error to call this function on an -empty container. Time: O(log *n*) -""" -@inline function Base.last(m::SortedDict) - i = endloc(m.bt) - i == 1 && throw(BoundsError()) - return Pair(m.bt.data[i].k, m.bt.data[i].d) -end """ - haskey(sc,k) + Base.get(sd::SortedDict,k,default) + Base.get(default_f::Union{Function,Type}, sd::SortedDict, k) -Returns true if key `k` is present for SortedDict, SortedMultiDict -or SortedSet `sc`. For SortedSet, `haskey(sc,k)` is a synonym for -`in(k,sc)`. For SortedDict and SortedMultiDict, `haskey(sc,k)` is -equivalent to `in(k,keys(sc))`. Time: O(*c* log *n*) +Return the value associated with key `k` where `sd` is a +SortedDict, or else returns `default` if `k` is not in `sd`. +The second form obtains `default` as the return argument of +the function/type-constructor `default_f` (with no arguments) +when the key is not present. +Time: O(*c* log *n*) """ -@inline function Base.haskey(m::SortedDict, k_) - i, exactfound = findkey(m.bt, convert(keytype(m), k_)) - exactfound +@inline function Base.get(m::SortedDict, k_, default_) + get(()->default_, m, k_) end -""" - get(sd,k,v) -Returns the value associated with key `k` where `sd` is a -SortedDict, or else returns `v` if `k` is not in `sd`. Time: O(*c* -log *n*) -""" function Base.get(default_::Union{Function,Type}, m::SortedDict{K,D}, k_) where {K,D} i, exactfound = findkey(m.bt, convert(K, k_)) return exactfound ? m.bt.data[i].d : default_() end -Base.get(m::SortedDict, k_, default_) = get(()->default_, m, k_) +Base.get(m::SortedDict, n::SortedDict, ::Any) = + throw_error("Ambiguous invocation of 'get'; please select the correct version using Base.invoke") + """ - get!(sd,k,v) + Base.get!(sd::SortedDict,k,default) + Base.get!(default_f::Union{Function,Type}, sd::SortedDict, k) -Returns the value associated with key `k` where `sd` is a -SortedDict, or else returns `v` if `k` is not in `sd`, and in the -latter case, inserts `(k,v)` into `sd`. Time: O(*c* log *n*) +Return the value associated with key `k` where `sd` is a +SortedDict, or else return `default` if `k` is not in `sd`, and in the +latter case, inserts `(k,default)` into `sd`. +The +second form computes a default value by calling +the function `default_f` (with no arguments) or the constructor of +type `default_f` when the key is not present. +Time: O(*c* log *n*) """ +Base.get!(m::SortedDict, k_, default_) = get!(()->default_, m, k_) + function Base.get!(default_::Union{Function,Type}, m::SortedDict{K,D}, k_) where {K,D} k = convert(K,k_) i, exactfound = findkey(m.bt, k) @@ -420,12 +303,12 @@ function Base.get!(default_::Union{Function,Type}, m::SortedDict{K,D}, k_) where end end -Base.get!(m::SortedDict, k_, default_) = get!(()->default_, m, k_) + """ - getkey(sd,k,defaultk) + Base.getkey(sd::SortedDict,k,defaultk) -Returns key `k` where `sd` is a SortedDict, if `k` is in `sd` else +Return the key `k` where `sd` is a SortedDict, if `k` is in `sd` else it returns `defaultk`. If the container uses in its ordering an `eq` method different from isequal (e.g., case-insensitive ASCII strings illustrated below), then the return value is the actual key stored @@ -444,13 +327,13 @@ end ## Function delete! deletes an item at a given ## key """ - delete!(sc, k) + Base.delete!(sd::SortedDict, k) -Argument `sc` is a SortedDict or SortedSet and `k` is a key. This -operation deletes the item whose key is `k`. It is a `KeyError` if -`k` is not a key of an item in the container. After this operation +Delete the item whose key is `k` in `sd`. +After this operation is complete, any token addressing the deleted item is invalid. -Returns `sc`. Time: O(*c* log *n*) +Returns `sc`. This is a no-op if `k` is not present in `sd`. + Time: O(*c* log *n*) """ @inline function Base.delete!(m::SortedDict, k_) i, exactfound = findkey(m.bt, convert(keytype(m), k_)) @@ -461,18 +344,19 @@ Returns `sc`. Time: O(*c* log *n*) end """ - pop!(sc, k[, default]) + Base.pop!(sd::SortedDict, k) + Base.pop!(sd::SortedDict, k, default) -Deletes the item with key `k` in SortedDict or SortedSet `sc` and -returns the value that was associated with `k` in the case of -SortedDict or `k` itself in the case of SortedSet. If `k` is not in `sc` +Delete the item with key `k` in `sd` and +return the value that was associated with `k`. +If `k` is not in `sd` return `default`, or throw a `KeyError` if `default` is not specified. Time: O(*c* log *n*) """ @inline function Base.pop!(m::SortedDict, k_) i, exactfound = findkey(m.bt, convert(keytype(m), k_)) !exactfound && throw(KeyError(k_)) - d = m.bt.data[i].d + @inbounds d = m.bt.data[i].d delete!(m.bt, i) return d end @@ -480,144 +364,164 @@ end @inline function Base.pop!(m::SortedDict, k_, default) i, exactfound = findkey(m.bt, convert(keytype(m), k_)) !exactfound && return default - d = m.bt.data[i].d + @inbounds d = m.bt.data[i].d delete!(m.bt, i) return d end -## Check if two SortedDicts are equal in the sense of containing -## the same (K,D) pairs. This sense of equality does not mean -## that semitokens valid for one are also valid for the other. """ - isequal(sc1,sc2) + Base.isequal(sd1::SortedDict{K,V,Ord}, sd2::SortedDict{K,V,Ord}) where {K, V, Ord <: Ordering} -Checks if two containers are equal in the sense that they contain +Check if two SortedDicts are equal in the sense that they contain the same items; the keys are compared using the `eq` method, while -the values are compared with the `isequal` function. In the case of -SortedMultiDict, equality requires that the values associated with a -particular key have same order (that is, the same insertion order). -Note that `isequal` in this sense does not imply any correspondence -between semitokens for items in `sc1` with those for `sc2`. If the -equality-testing method associated with the keys and values implies -hash-equivalence in the case of SortedDict, then `isequal` of the -entire containers implies hash-equivalence of the containers. Time: -O(*cn* + *n* log *n*) -""" -function Base.isequal(m1::SortedDict, m2::SortedDict) +the values are compared with the `isequal` function. +Note that `isequal` in this sense does not imply correspondence +between semitokens for items in `sd1` with those for `sd2`. +Time: O(*cn*). Note +that if `K`, `V`, `Ord`, or the +order objects of sd1 and sd2 are different, then a fallback routine +`Base.isequal(::AbstractDict, ::AbstractDict)` is invoked. +Time: O(*cn*) +""" +function Base.isequal(m1::SortedDict{K, D, Ord}, m2::SortedDict{K, D, Ord}) where +{K, D, Ord <: Ordering} + ord = orderobject(m1) - if !isequal(ord, orderobject(m2)) || !isequal(eltype(m1), eltype(m2)) - throw(ArgumentError("Cannot use isequal for two SortedDicts unless their element types and ordering objects are equal")) + if ord != orderobject(m2) + return invoke((==), Tuple{AbstractDict, AbstractDict}, m1, m2) end p1 = startof(m1) p2 = startof(m2) while true - if p1 == pastendsemitoken(m1) - return p2 == pastendsemitoken(m2) - end - if p2 == pastendsemitoken(m2) - return false - end - k1,d1 = deref((m1,p1)) - k2,d2 = deref((m2,p2)) - if !eq(ord,k1,k2) || !isequal(d1,d2) - return false - end - p1 = advance((m1,p1)) - p2 = advance((m2,p2)) + p1 == pastendsemitoken(m1) && return p2 == pastendsemitoken(m2) + p2 == pastendsemitoken(m2) && return false + k1,d1 = deref_nocheck((m1,p1)) + k2,d2 = deref_nocheck((m2,p2)) + (!eq(ord,k1,k2) || !isequal(d1,d2)) && return false + p1 = advance_nocheck((m1,p1)) + p2 = advance_nocheck((m2,p2)) end end function mergetwo!(m::SortedDict{K,D,Ord}, - m2::AbstractDict{K,D}) where {K,D,Ord <: Ordering} + m2) where {K,D,Ord <: Ordering} for (k,v) in m2 m[convert(K,k)] = convert(D,v) end end -# Standard copy functions use packcopy - that is, they retain elements but not -# the identical structure. Base.copymutable(m::SortedDict) = packcopy(m) Base.copy(m::SortedDict) = packcopy(m) -""" - packcopy(sc) +# See sorted_set for the docstrings for packcopy and packdeepcopy -This returns a copy of `sc` in which the data is packed. When -deletions take place, the previously allocated memory is not -returned. This function can be used to reclaim memory after many -deletions. Time: O(*cn* log *n*) -""" function packcopy(m::SortedDict{K,D,Ord}) where {K,D,Ord <: Ordering} - w = SortedDict(Dict{K,D}(), orderobject(m)) - mergetwo!(w,m) - return w + SortedDict{K,D}(Val(true), m, orderobject(m)) +end +function packdeepcopy(m::SortedDict{K,D,Ord}) where {K,D,Ord <: Ordering} + m2 = deepcopy(m) + SortedDict{K,D}(Val(true), m2, orderobject(m)) end -""" - packdeepcopy(sc) -This returns a packed copy of `sc` in which the keys and values are -deep-copied. This function can be used to reclaim memory after many -deletions. Time: O(*cn* log *n*) -""" -function packdeepcopy(m::SortedDict{K,D,Ord}) where {K,D,Ord <: Ordering} - w = SortedDict(Dict{K,D}(),orderobject(m)) - for (k,v) in m - newk = deepcopy(k) - newv = deepcopy(v) - w[newk] = newv +struct MergeManySortedDicts{K, D, Ord <: Ordering} + vec::Vector{SortedDict{K,D,Ord}} +end + +function Base.iterate(sds::MergeManySortedDicts{K, D, Ord}, + state = [startof(sds.vec[i]) for i=1:length(sds.vec)]) where +{K, D, Ord <: Ordering} + ord = orderobject(sds.vec[1]) + firsti = 0 + N = length(sds.vec) + for i = 1 : N + if state[i] != pastendsemitoken(sds.vec[i]) + firsti = i + break + end + end + firsti == 0 && return nothing + foundi = firsti + firstk = deref_key_nocheck((sds.vec[firsti], state[firsti])) + for i = firsti + 1 : N + if state[i] != pastendsemitoken(sds.vec[i]) + k2 = deref_key_nocheck((sds.vec[i], state[i])) + if !lt(ord, firstk, k2) + foundi = i + firstk = k2 + end + end + end + foundsemitoken = state[foundi] + for i = firsti : N + if state[i] != pastendsemitoken(sds.vec[i]) && + eq(ord, deref_key_nocheck((sds.vec[i], state[i])), firstk) + state[i] = advance_nocheck((sds.vec[i], state[i])) + end end - return w + (deref_nocheck((sds.vec[foundi], foundsemitoken)), state) end """ - merge!(sc, sc1...) + Base.merge!(sd::SortedDict{K,V,Ord}, d1::AbstractDict{K,V}...) where {K,V,Ord<:Ordering} -This updates `sc` by merging SortedDicts or SortedMultiDicts `sc1`, -etc. into `sc`. These must all must have the same key-value types. +Merge one or more dicts `d1`, etc. into `sd`. +These must all must have the same key-value types. In the case of keys duplicated among the arguments, the rightmost -argument that owns the key gets its value stored for SortedDict. In -the case of SortedMultiDict all the key-value pairs are stored, and -for overlapping keys the ordering is left-to-right. This function is -not available for SortedSet, but the `union!` function (see below) -provides equivalent functionality. Time: O(*cN* log *N*), where *N* +argument that owns the key gets its value stored. +Time: O(*cN* log *N*), where *N* is the total size of all the arguments. """ function Base.merge!(m::SortedDict{K,D,Ord}, - others::AbstractDict{K,D}...) where {K,D,Ord <: Ordering} + others::AbstractDict{K,D}...) where {K,D,Ord <: Ordering} for o in others mergetwo!(m,o) end end + """ - merge(sc1, sc2...) + Base.merge(sd::SortedDict{K,V,Ord}, d1::AbstractDict{K,V}...) where {K,V,Ord <: Ordering} -This returns a SortedDict or SortedMultiDict that results from -merging SortedDicts or SortedMultiDicts `sc1`, `sc2`, etc., which -all must have the same key-value-ordering types. In the case of keys +Merge one or more dicts into a single SortedDict +and return the new SortedDict. Arguments `d1` etc. +must have the same key-value type as `sd`. +In the case of keys duplicated among the arguments, the rightmost argument that owns the -key gets its value stored for SortedDict. In the case of -SortedMultiDict all the key-value pairs are stored, and for keys -shared between `sc1` and `sc2` the ordering is left-to-right. This -function is not available for SortedSet, but the `union` function -(see below) provides equivalent functionality. Time: O(*cN* log -*N*), where *N* is the total size of all the arguments. +key gets its value stored. Time: O(*cN* log +*N*), where *N* is the total size of all the arguments. If all +the arguments are SortedDicts with the same +key, value, and order object, then the time is O(*cN*). """ function Base.merge(m::SortedDict{K,D,Ord}, - others::AbstractDict{K,D}...) where {K,D,Ord <: Ordering} + others::AbstractDict{K,D}...) where {K,D,Ord <: Ordering} result = packcopy(m) merge!(result, others...) return result end +function Base.merge(m::SortedDict{K,D,Ord}, + others::SortedDict{K,D,Ord}...) where {K, D, Ord <: Ordering} + sds = MergeManySortedDicts{K, D, Ord}(SortedDict{K,D,Ord}[m]) + for sd in others + if orderobject(sd) != orderobject(m) + return invoke(merge, + Tuple{SortedDict{K,D,Ord}, Vararg{AbstractDict{K,D}}}, + m, others...) + end + push!(sds.vec, sd) + end + SortedDict{K,D}(Val(true), sds, orderobject(m)) +end + + """ - empty(sc) + Base.empty(sc) -Returns a new `SortedDict`, `SortedMultiDict`, or `SortedSet` of the same +Return a new `SortedDict`, `SortedMultiDict`, or `SortedSet` of the same type and with the same ordering as `sc` but with no entries (i.e., empty). Time: O(1) """ diff --git a/src/sorted_multi_dict.jl b/src/sorted_multi_dict.jl index 96a280bd9..87b4a5908 100644 --- a/src/sorted_multi_dict.jl +++ b/src/sorted_multi_dict.jl @@ -4,179 +4,180 @@ mutable struct SortedMultiDict{K, D, Ord <: Ordering} bt::BalancedTree23{K,D,Ord} - - ## Base constructors - - """ - SortedMultiDict{K,V,Ord}(o) - - Construct an empty sorted multidict in which type parameters are - explicitly listed; ordering object is explicitly specified. (See - below for discussion of ordering.) An empty SortedMultiDict may also - be constructed via `SortedMultiDict(K[], V[], o)` where the `o` - argument is optional. - """ - SortedMultiDict{K,D,Ord}(o::Ord) where {K,D,Ord} = new{K,D,Ord}(BalancedTree23{K,D,Ord}(o)) - function SortedMultiDict{K,D,Ord}(o::Ord, kv) where {K,D,Ord} - smd = new{K,D,Ord}(BalancedTree23{K,D,Ord}(o)) - - if eltype(kv) <: Pair - # It's (possibly?) more efficient to access the first and second - # elements of Pairs directly, rather than destructure - for p in kv - insert!(smd, p.first, p.second) - end - else - for (k,v) in kv - insert!(smd, k, v) - end - end - return smd - - end end """ - SortedMultiDict() - -Construct an empty `SortedMultiDict` with key type `Any` and value type -`Any`. Ordering defaults to `Forward` ordering. + SortedMultiDict{K,V,Ord}(o::Ord=Forward) where {K, V, Ord <: Ordering} + SortedMultiDict{K,V,Ord}(o::Ord, iterable) where {K, V, Ord <: Ordering} -**Note that a key type of `Any` or any other abstract type will lead -to slow performance.** +Construct a sorted multidict in which type parameters are +explicitly listed; ordering object is explicitly specified. +Time: O(*cn* log *n*) """ -SortedMultiDict() = SortedMultiDict{Any,Any,ForwardOrdering}(Forward) +SortedMultiDict{K,D,Ord}(o::Ord=Forward) where {K,D,Ord<:Ordering} = + SortedMultiDict{K,D,Ord}(BalancedTree23{K,D,Ord}(o)) + +function SortedMultiDict{K,D,Ord}(o::Ord, kv) where {K,D,Ord<:Ordering} + smd = SortedMultiDict{K,D,Ord}(BalancedTree23{K,D,Ord}(o)) + for (k,v) in kv + smd_push!(smd, k=>v) + end + return smd + +end """ - SortedMultiDict(o) + SortedMultiDict(o::Ord=Forward) where {Ord <: Ordering} + SortedMultiDict{K,V}(o::Ordering=Forward) where {K,V} -Construct an empty `SortedMultiDict` with key type `Any` and value type -`Any`, ordered using `o`. +Construct an empty `SortedMultiDict` with key type `K` and value type +`V` with `o` ordering (default to `Forward` ordering). If +`K` and `V` are not specified as in the +first form, then they are assumed to both be `Any`. +Time: O(1). **Note that a key type of `Any` or any other abstract type will lead -to slow performance.** +to slow performance, as the values are stored boxed (i.e., as +pointers), and insertion will require a run-time lookup of the +appropriate comparison function. It is recommended to always specify +a concrete key type, or to use one of the constructors in +which the key type is inferred.** """ -SortedMultiDict(o::O) where {O<:Ordering} = SortedMultiDict{Any,Any,O}(o) +SortedMultiDict(o::Ord=Forward) where {Ord <: Ordering} = + SortedMultiDict{Any,Any,typeof(o)}(o) +SortedMultiDict{K,D}(o::Ordering=Forward) where {K, D} = + SortedMultiDict{K,D,typeof(o)}(o) # Construction from Pairs """ - SortedMultiDict(k1=>v1, k2=>v2, ...) + SortedMultiDict(ps::Pair...) + SortedMultiDict(o, ps::Pair...) + SortedMultiDict{K,V}(ps::Pair...) + SortedMultiDict{K,V}(o, ps::Pair...) -Arguments are key-value pairs for insertion into the multidict. The -keys must be of the same type as one another; the values must also -be of one type. +Construct a `SortedMultiDict` from the given key-value pairs. +The key type and value type are inferred from the +given key-value pairs in the first two form. +The ordering is assumed to be `Forward` +ordering in the first and third forms. +The first two forms involve copying the data three times to +infer the types and so are less efficient than the third and fourth +form where `{K,V}` are specified explicitly. Time: O(*cn* log *n*) """ SortedMultiDict(ps::Pair...) = SortedMultiDict(Forward, ps) - -""" - SortedMultiDict(o, k1=>v1, k2=>v2, ...) - -The first argument `o` is an ordering object. The remaining -arguments are key-value pairs for insertion into the multidict. The -keys must be of the same type as one another; the values must also -be of one type. -""" -SortedMultiDict(o::Ordering, ps::Pair...) = SortedMultiDict(o, ps) SortedMultiDict{K,D}(ps::Pair...) where {K,D} = SortedMultiDict{K,D,ForwardOrdering}(Forward, ps) +SortedMultiDict(o::Ordering, ps::Pair...) = SortedMultiDict(o, ps) SortedMultiDict{K,D}(o::Ord, ps::Pair...) where {K,D,Ord<:Ordering} = SortedMultiDict{K,D,Ord}(o, ps) -# Construction from AbstractDicts -SortedMultiDict(o::Ord, d::AbstractDict{K,D}) where {K,D,Ord<:Ordering} = SortedMultiDict{K,D,Ord}(o, d) - -## Construction from iteratables of Pairs/Tuples -# Construction specifying Key/Value types -# e.g., SortedMultiDict{Int,Float64}([1=>1, 2=>2.0]) """ - SortedMultiDict{K,D}(iter) + SortedMultiDict(iter, o::Ord=Forward) where {Ord <: Ordering} + SortedMultiDict(o::Ordering, iter) + SortedMultiDict{K,V}(iter, o::Ordering=Forward) where {K, V} + SortedMultiDict{K,V}(o::Ordering, iter) where {K, V} -Takes an arbitrary iterable object of key=>value pairs with -key type `K` and value type `D`. The default Forward ordering is used. -""" -SortedMultiDict{K,D}(kv) where {K,D} = SortedMultiDict{K,D}(Forward, kv) +Construct a `SortedMultiDict` from an arbitrary iterable object of +`key=>value` pairs or (key,value) tuples with order object `o`. The key type +and value type are inferred from the given iterable in the +first two forms. The first two forms copy the +data three times, so +it is more efficient to explicitly specify `K` and `V` as in the +second two forms. Time: O(*cn* log *n*) """ - SortedMultiDict{K,D}(o, iter) +SortedMultiDict(kv, o::Ord=Forward) where {Ord <: Ordering} = + SortedMultiDict(o, kv) +SortedMultiDict{K,D}(kv, o::Ordering=Forward) where {K,D} = + SortedMultiDict{K,D, typeof(o)}(o, kv) +SortedMultiDict{K,D}(o::Ordering, kv) where {K,D} = + SortedMultiDict{K,D, typeof(o)}(o, kv) -Takes an arbitrary iterable object of key=>value pairs with -key type `K` and value type `D`. The ordering object `o` is explicitly given. -""" -function SortedMultiDict{K,D}(o::Ord, kv) where {K,D,Ord<:Ordering} - try - SortedMultiDict{K,D,Ord}(o, kv) - catch e - if not_iterator_of_pairs(kv) - throw(ArgumentError("SortedMultiDict(kv): kv needs to be an iterator of tuples or pairs")) - else - rethrow(e) - end +# TODO: figure out how to infer type without three copies +function SortedMultiDict(o::Ordering, kv) + c = collect(kv) + if eltype(c) <: Pair + c2 = collect((t.first, t.second) for t in c) + elseif eltype(c) <: Tuple + c2 = collect((t[1], t[2]) for t in c) + else + throw(ArgumentError("In SortedMultiDict(o,kv), kv should contain either pairs or 2-tuples")) end + SortedMultiDict{eltype(c2).parameters[1], eltype(c2).parameters[2], typeof(o)}(o, c2) end -# Construction inferring Key/Value types from input -# e.g. SortedMultiDict{} -SortedMultiDict(o1::Ordering, o2::Ordering) = throw(ArgumentError("SortedMultiDict with two parameters must be called with an Ordering and an interable of pairs")) -SortedMultiDict(kv, o::Ordering=Forward) = SortedMultiDict(o, kv) -function SortedMultiDict(o::Ordering, kv) - try - _sorted_multidict_with_eltype(o, kv, eltype(kv)) - catch e - if not_iterator_of_pairs(kv) - throw(ArgumentError("SortedMultiDict(kv): kv needs to be an iterator of tuples or pairs")) - else - rethrow(e) - end - end +""" + SortedMultiDict{K,V}(::Val{true}, iterable) where {K,V} + SortedMultiDict{K,V}(::Val{true}, iterable, ord::Ord) where {K,V,Ord<:Ordering} + +Construct a `SortedMultiDict` from an iterable whose eltype is +Tuple{K,V} or Pair{K,V} and that is already in sorted ordered. +The first form assumes Forward ordering. +Duplicate keys +allowed. Time: O(*cn*). +""" +SortedMultiDict{K,D}(::Val{true},iterable) where {K, D} = + SortedMultiDict{K,D}(Val(true), iterable, Forward) + +function SortedMultiDict{K,D}(::Val{true}, + iterable, + ord::Ord) where {K, D, Ord<:Ordering} + SortedMultiDict{K, D, Ord}(BalancedTree23{K, D, Ord}(Val(true), + iterable, + ord, + true)) end -_sorted_multidict_with_eltype(o::Ord, ps, ::Type{Pair{K,D}}) where {K,D,Ord} = SortedMultiDict{ K, D,Ord}(o, ps) -_sorted_multidict_with_eltype(o::Ord, kv, ::Type{Tuple{K,D}}) where {K,D,Ord} = SortedMultiDict{ K, D,Ord}(o, kv) -_sorted_multidict_with_eltype(o::Ord, ps, ::Type{Pair{K}} ) where {K, Ord} = SortedMultiDict{ K,Any,Ord}(o, ps) -_sorted_multidict_with_eltype(o::Ord, kv, ::Type ) where { Ord} = SortedMultiDict{Any,Any,Ord}(o, kv) -## TODO: It seems impossible (or at least very challenging) to create the eltype below. -## If deemed possible, please create a test and uncomment this definition. -# _sorted_multi_dict_with_eltype{ D,Ord}(o::Ord, ps, ::Type{Pair{K,D} where K}) = SortedMultiDict{Any, D,Ord}(o, ps) +## The following is needed to resolve ambiguities + +SortedMultiDict(::Ordering, ::Ordering) = + throw(ArgumentError("Not a valid SortedMultiDict constructor")) +SortedMultiDict{K,D}(::Ordering, ::Ordering) where {K,D} = + throw(ArgumentError("Not a valid SortedMultiDict constructor")) +SortedMultiDict(::Val{true}, ::Ordering) = + throw(ArgumentError("Not a valid SortedMultiDict constructor")) +SortedMultiDict{K,D}(::Val{true}, ::Ordering) where {K,D}= +throw(ArgumentError("Not a valid SortedMultiDict constructor")) + + + const SMDSemiToken = IntSemiToken const SMDToken = Tuple{SortedMultiDict, IntSemiToken} -## This function inserts an item into the tree. -## It returns a token that -## points to the newly inserted item. - """ - insert!(sc, k) - -Argument `sc` is a SortedDict or SortedMultiDict, `k` is a key and -`v` is the corresponding value. This inserts the `(k,v)` pair into -the container. If the key is already present in a SortedDict, this -overwrites the old value. In the case of SortedMultiDict, no -overwriting takes place (since SortedMultiDict allows the same key -to associate with multiple values). In the case of SortedDict, the -return value is a pair whose first entry is boolean and indicates -whether the insertion was new (i.e., the key was not previously -present) and the second entry is the semitoken of the new entry. In -the case of SortedMultiDict, a semitoken is returned (but no -boolean). Time: O(*c* log *n*) + smd_push!(smd::SortedMultiDict, pr::Pair) + +Insert the key-value pair `pr`, i.e., `k=>v`, into `smd`. +If `k` already appears as a key +in `smd`, then `k=>v` is inserted in the rightmost position after existing +items with key `k`. Unlike `push!`, `smd_push!` returns +a semitoken referring to the new item. This function replaces +the deprecated `insert!`. +Time: O(*c* log *n*) """ -@inline function Base.insert!(m::SortedMultiDict{K,D,Ord}, k_, d_) where {K, D, Ord <: Ordering} - b, i = insert!(m.bt, convert(K,k_), convert(D,d_), true) +@inline function smd_push!(m::SortedMultiDict{K,D,Ord}, pr::Pair) where {K, D, Ord <: Ordering} + b, i = insert!(m.bt, convert(K,pr.first), convert(D,pr.second), true) IntSemiToken(i) end -## push! is an alternative to insert!; it returns the container. + +@deprecate insert!(m::SortedMultiDict, k, d) smd_push!(m::SortedMultiDict, k=>d) + + """ - push!(sc, k=>v) + Base.push!(smd::SortedMultiDict, p::Pair) -Argument `sc` is a SortedDict or SortedMultiDict and `k=>v` is a -key-value pair. This inserts the key-value pair into the container. -If the key is already present, this overwrites the old value. The -return value is `sc`. Time: O(*c* log *n*) +Insert the pair `p`, i.e., a `k=>v` into `smd`. +If `k` already appears as a key +in `smd`, then `k=>v` is inserted in the rightmost position after existing +items with key `k`. Returns the container. +See also [`smd_push(smd::SortedMultiDict, p::Pair`]@ref. +Time: O(*c* log *n*) """ @inline function Base.push!(m::SortedMultiDict{K,D}, pr::Pair) where {K,D} insert!(m.bt, convert(K,pr[1]), convert(D,pr[2]), true) @@ -184,43 +185,15 @@ return value is `sc`. Time: O(*c* log *n*) end -## First and last return the first and last (key,data) pairs -## in the SortedMultiDict. It is an error to invoke them on an -## empty SortedMultiDict. - """ - first(sc) - -Argument `sc` is a SortedDict, SortedMultiDict or SortedSet. This -function returns the first item (a `k=>v` pair for SortedDict and -SortedMultiDict or a key for SortedSet) according to the sorted -order in the container. Thus, `first(sc)` is equivalent to -`deref((sc,startof(sc)))`. It is an error to call this function on -an empty container. Time: O(log *n*) -""" -@inline function Base.first(m::SortedMultiDict) - i = beginloc(m.bt) - i == 2 && throw(BoundsError()) - return Pair(m.bt.data[i].k, m.bt.data[i].d) -end + searchequalrange(smd::SortedMultiDict, k) +Return two semitokens that correspond to the first and last +items in the SortedMultiDict that have key exactly equal +to `k`. If `k` is not found, then it returns +(pastendsemitoken(smd), beforestartsemitoken(smd)). +Time: O(*c* log *n*) """ - last(sc) - -Argument `sc` is a SortedDict, SortedMultiDict or SortedSet. This -function returns the last item (a `k=>v` pair for SortedDict and -SortedMultiDict or a key for SortedSet) according to the sorted -order in the container. Thus, `last(sc)` is equivalent to -`deref((sc,lastindex(sc)))`. It is an error to call this function on an -empty container. Time: O(log *n*) -""" -@inline function Base.last(m::SortedMultiDict) - i = endloc(m.bt) - i == 1 && throw(BoundsError()) - return Pair(m.bt.data[i].k, m.bt.data[i].d) -end - - function searchequalrange(m::SortedMultiDict, k_) k = convert(keytype(m),k_) i1 = findkeyless(m.bt, k) @@ -234,9 +207,15 @@ function searchequalrange(m::SortedMultiDict, k_) end -## '(k,d) in m' checks whether a key-data pair is in -## a sorted multidict. This requires a loop over -## all data items whose key is equal to k. +""" + Base.eltype(sc) + +Returns the (key,value) type (a 2-entry pair, i.e., `Pair{K,V}`) for +SortedDict and SortedMultiDict. Returns the key type for SortedSet. +This function may also be applied to the type itself. Time: O(1) +""" +@inline Base.eltype(m::SortedMultiDict{K,D,Ord}) where {K,D,Ord <: Ordering} = Pair{K,D} +@inline Base.eltype(::Type{SortedMultiDict{K,D,Ord}}) where {K,D,Ord <: Ordering} = Pair{K,D} function in_(k_, d_, m::SortedMultiDict) @@ -248,111 +227,51 @@ function in_(k_, d_, m::SortedMultiDict) ord = m.bt.ord while true i1 = nextloc0(m.bt, i1) - @assert(eq(ord, m.bt.data[i1].k, k)) + @invariant eq(ord, m.bt.data[i1].k, k) m.bt.data[i1].d == d && return true i1 == i2 && return false end end -""" - eltype(sc) -Returns the (key,value) type (a 2-entry pair, i.e., `Pair{K,V}`) for -SortedDict and SortedMultiDict. Returns the key type for SortedSet. -This function may also be applied to the type itself. Time: O(1) """ -@inline Base.eltype(m::SortedMultiDict{K,D,Ord}) where {K,D,Ord <: Ordering} = Pair{K,D} -@inline Base.eltype(::Type{SortedMultiDict{K,D,Ord}}) where {K,D,Ord <: Ordering} = Pair{K,D} + Base.in(p::Pair, smd::SortedMultiDict) -""" - in(p, sc) - -Returns true if `p` is in `sc`. In the case that `sc` is a -SortedDict or SortedMultiDict, `p` is a key=>value pair. In the -case that `sc` is a SortedSet, `p` should be a key. Time: O(*c* log -*n* + *d*) for SortedDict and SortedSet, where *d* stands for the -time to compare two values. In the case of SortedMultiDict, the time -is O(*c* log *n* + *dl*), and *l* stands for the number of entries +Return true if `p` is in `smd`. Here, `p` is a key=>value pair. In the +The time is +is O(*c* log *n* + *dl*) where *d* is the time +to compare two values and *l* stands for the number of entries that have the key of the given pair. (So therefore this call is inefficient if the same key addresses a large number of values, and an alternative should be considered.) """ @inline Base.in(pr::Pair, m::SortedMultiDict) = in_(pr[1], pr[2], m) -@inline Base.in(::Tuple{Any,Any}, ::SortedMultiDict) = - throw(ArgumentError("'(k,v) in sortedmultidict' not supported in Julia 0.4 or 0.5. See documentation")) - -""" - keytype(sc) -Returns the key type for SortedDict, SortedMultiDict and SortedSet. -This function may also be applied to the type itself. Time: O(1) -""" @inline Base.keytype(m::SortedMultiDict{K,D,Ord}) where {K,D,Ord <: Ordering} = K @inline Base.keytype(::Type{SortedMultiDict{K,D,Ord}}) where {K,D,Ord <: Ordering} = K - -""" - valtype(sc) - -Returns the value type for SortedDict and SortedMultiDict. This -function may also be applied to the type itself. Time: O(1) -""" @inline Base.valtype(m::SortedMultiDict{K,D,Ord}) where {K,D,Ord <: Ordering} = D @inline Base.valtype(::Type{SortedMultiDict{K,D,Ord}}) where {K,D,Ord <: Ordering} = D -""" - ordtype(sc) - -Returns the order type for SortedDict, SortedMultiDict and -SortedSet. This function may also be applied to the type itself. -Time: O(1) -""" -@inline ordtype(m::SortedMultiDict{K,D,Ord}) where {K,D,Ord <: Ordering} = Ord -@inline ordtype(::Type{SortedMultiDict{K,D,Ord}}) where {K,D,Ord <: Ordering} = Ord - -""" - orderobject(sc) -Returns the order object used to construct the container. Time: O(1) """ -@inline orderobject(m::SortedMultiDict) = m.bt.ord - -""" - haskey(sc,k) - -Returns true if key `k` is present for SortedDict, SortedMultiDict -or SortedSet `sc`. For SortedSet, `haskey(sc,k)` is a synonym for -`in(k,sc)`. For SortedDict and SortedMultiDict, `haskey(sc,k)` is -equivalent to `in(k,keys(sc))`. Time: O(*c* log *n*) -""" -@inline function Base.haskey(m::SortedMultiDict, k_) - i, exactfound = findkey(m.bt,convert(keytype(m),k_)) - return exactfound -end - + Base.isequal(smd1::SortedMultiDict{K,V,Ord}, smd2::SortedMultiDict{K,V,Ord}) where {K, V, Ord <: Ordering} + -## Check if two SortedMultiDicts are equal in the sense of containing -## the same (K,D) pairs in the same order. This sense of equality does not mean -## that semitokens valid for one are also valid for the other. -""" - isequal(sc1,sc2) - -Checks if two containers are equal in the sense that they contain -the same items; the keys are compared using the `eq` method, while -the values are compared with the `isequal` function. In the case of -SortedMultiDict, equality requires that the values associated with a -particular key have same order (that is, the same insertion order). +Check if two SortedMultiDicts are equal in the sense that they contain +the same items in the same order (that is, the same insertion order). +They must have the same order object, else they compare unequal. +The keys are compared using the `eq` method, while +the values are compared with the `isequal` function. Note that `isequal` in this sense does not imply any correspondence -between semitokens for items in `sc1` with those for `sc2`. If the -equality-testing method associated with the keys and values implies -hash-equivalence in the case of SortedDict, then `isequal` of the -entire containers implies hash-equivalence of the containers. Time: -O(*cn* + *n* log *n*) +between semitokens for items in `smd1` with those for `smd2`. +Time: O(*cn*) """ -function Base.isequal(m1::SortedMultiDict, m2::SortedMultiDict) +function Base.isequal(m1::SortedMultiDict{K, D, Ord}, + m2::SortedMultiDict{K, D, Ord}) where {K, D, Ord <: Ordering} ord = orderobject(m1) - if !isequal(ord, orderobject(m2)) || !isequal(eltype(m1), eltype(m2)) - throw(ArgumentError("Cannot use isequal for two SortedMultiDicts unless their element types and ordering objects are equal")) + if ord != orderobject(m2) + return false end p1 = startof(m1) p2 = startof(m2) @@ -363,115 +282,130 @@ function Base.isequal(m1::SortedMultiDict, m2::SortedMultiDict) if p2 == pastendsemitoken(m2) return false end - k1,d1 = deref((m1,p1)) - k2,d2 = deref((m2,p2)) - if !eq(ord,k1,k2) || !isequal(d1,d2) - return false - end - p1 = advance((m1,p1)) - p2 = advance((m2,p2)) + k1,d1 = deref_nocheck((m1,p1)) + k2,d2 = deref_nocheck((m2,p2)) + (!eq(ord,k1,k2) || !isequal(d1,d2)) && return false + p1 = advance_nocheck((m1,p1)) + p2 = advance_nocheck((m2,p2)) end end -const SDorAbstractDict = Union{AbstractDict,SortedMultiDict} -function mergetwo!(m::SortedMultiDict{K,D,Ord}, - m2::SDorAbstractDict) where {K,D,Ord <: Ordering} - for (k,v) in m2 +function mergetwo!(m::SortedMultiDict{K,D,Ord}, iterable) where {K,D,Ord <: Ordering} + for (k,v) in iterable insert!(m.bt, convert(K,k), convert(D,v), true) end end -# Standard copy functions use packcopy - that is, they retain elements but not -# the identical structure. Base.copymutable(m::SortedMultiDict) = packcopy(m) Base.copy(m::SortedMultiDict) = packcopy(m) -""" - packcopy(sc) -This returns a copy of `sc` in which the data is packed. When -deletions take place, the previously allocated memory is not -returned. This function can be used to reclaim memory after many -deletions. Time: O(*cn* log *n*) -""" +# See sorted_set.jl for the docstrings on packcopy and packdeepcopy function packcopy(m::SortedMultiDict{K,D,Ord}) where {K,D,Ord <: Ordering} - w = SortedMultiDict{K,D}(orderobject(m)) - mergetwo!(w,m) - return w + SortedMultiDict{K, D}(Val(true), m, orderobject(m)) end -""" - packdeepcopy(sc) - -This returns a packed copy of `sc` in which the keys and values are -deep-copied. This function can be used to reclaim memory after many -deletions. Time: O(*cn* log *n*) -""" function packdeepcopy(m::SortedMultiDict{K,D,Ord}) where {K,D,Ord <: Ordering} - w = SortedMultiDict{K,D}(orderobject(m)) - for (k,v) in m - insert!(w.bt, deepcopy(k), deepcopy(v), true) + SortedMultiDict{K, D}(Val(true), deepcopy(m), orderobject(m)) +end + + +struct MergeManySortedMultiDicts{K, D, Ord <: Ordering} + vec::Vector{SortedMultiDict{K,D,Ord}} +end + +function Base.iterate(sds::MergeManySortedMultiDicts{K, D, Ord}, + state = [startof(sds.vec[k]) for k=1:length(sds.vec)]) where +{K, D, Ord <: Ordering} + ord = orderobject(sds.vec[1]) + firsti = 0 + N = length(sds.vec) + for i = 1 : N + if state[i] != pastendsemitoken(sds.vec[i]) + firsti = i + break + end end - return w + firsti == 0 && return nothing + foundi = firsti + firstk = deref_key_nocheck((sds.vec[firsti], state[firsti])) + for i = firsti + 1 : N + if state[i] != pastendsemitoken(sds.vec[i]) + k2 = deref_key_nocheck((sds.vec[i], state[i])) + if lt(ord, k2, firstk) + foundi = i + firstk = k2 + end + end + end + foundsemitoken = state[foundi] + state[foundi] = advance_nocheck((sds.vec[foundi], foundsemitoken)) + (deref_nocheck((sds.vec[foundi], foundsemitoken)), state) end """ - merge!(sc, sc1...) - -This updates `sc` by merging SortedDicts or SortedMultiDicts `sc1`, -etc. into `sc`. These must all must have the same key-value types. -In the case of keys duplicated among the arguments, the rightmost -argument that owns the key gets its value stored for SortedDict. In -the case of SortedMultiDict all the key-value pairs are stored, and -for overlapping keys the ordering is left-to-right. This function is -not available for SortedSet, but the `union!` function (see below) -provides equivalent functionality. Time: O(*cN* log *N*), where *N* + Base.merge!(smd::SortedMultiDict, iter...) + +Merge one or more iterables `iter`, etc. into `smd`. +These must all must have the same key-value types. +Items with equal keys are stored +with left-to-right ordering. Time: O(*cN* log *N*), where *N* is the total size of all the arguments. """ function Base.merge!(m::SortedMultiDict{K,D,Ord}, - others::SDorAbstractDict...) where {K,D,Ord <: Ordering} + others...) where {K,D,Ord <: Ordering} for o in others mergetwo!(m,o) end end + """ - merge(sc1, sc2...) - -This returns a SortedDict or SortedMultiDict that results from -merging SortedDicts or SortedMultiDicts `sc1`, `sc2`, etc., which -all must have the same key-value-ordering types. In the case of keys -duplicated among the arguments, the rightmost argument that owns the -key gets its value stored for SortedDict. In the case of -SortedMultiDict all the key-value pairs are stored, and for keys -shared between `sc1` and `sc2` the ordering is left-to-right. This -function is not available for SortedSet, but the `union` function -(see below) provides equivalent functionality. Time: O(*cN* log -*N*), where *N* is the total size of all the arguments. + Base.merge(smd::SortedMultiDict, iter...) + +Merge `smd` and one or more iterables and return +the resulting new SortedMultiDict. +The iterables must have +the same key-value type as `smd`. +Items with equal keys are stored +with left-to-right ordering. +Time: O(*cN* log +*N*), where *N* is the total size of all the arguments. If all +the arguments are SortedMultiDicts with the same +key, value, and order object, then the time is O(*cN*). """ function Base.merge(m::SortedMultiDict{K,D,Ord}, - others::SDorAbstractDict...) where {K,D,Ord <: Ordering} + others...) where {K,D,Ord <: Ordering} result = packcopy(m) merge!(result, others...) return result end -function Base.show(io::IO, m::SortedMultiDict{K,D,Ord}) where {K,D,Ord <: Ordering} - print(io, "SortedMultiDict(") - print(io, orderobject(m), ",") - l = length(m) - for (count,(k,v)) in enumerate(m) - print(io, k, " => ", v) - if count < l - print(io, ", ") +function Base.merge(m::SortedMultiDict{K,D,Ord}, + others::SortedMultiDict{K,D,Ord}...) where {K, D, Ord <: Ordering} + sds = MergeManySortedMultiDicts{K, D, Ord}(SortedMultiDict{K,D,Ord}[m]) + for sd in others + if orderobject(sd) != orderobject(m) + return invoke(merge, Tuple{SortedMultiDict, Vararg{Any}}, + m, others...) end + push!(sds.vec, sd) end + SortedMultiDict{K, D}(Val(true), sds, orderobject(m)) +end + +function Base.show(io::IO, m::SortedMultiDict{K,D,Ord}) where {K,D,Ord <: Ordering} + print(io, "SortedMultiDict{", K, + ",\n ", D, + ",\n ", Ord, "}(") + print(io, orderobject(m), ",\n") + print(io, collect(m)) print(io, ")") end """ - empty(sc) + Base.empty(sc) Returns a new `SortedDict`, `SortedMultiDict`, or `SortedSet` of the same type and with the same ordering as `sc` but with no entries (i.e., diff --git a/src/sorted_set.jl b/src/sorted_set.jl index e79a65456..b19ebb1bc 100644 --- a/src/sorted_set.jl +++ b/src/sorted_set.jl @@ -1,129 +1,150 @@ ## A SortedSet is a wrapper around balancedTree with ## methods similiar to those of the julia Set. +mutable struct SortedSet{K, Ord <: Ordering} <: AbstractSet{K} + bt::BalancedTree23{K,Nothing,Ord} +end -""" - SortedSet(iter, o=Forward) -and - `SortedSet{K}(iter, o=Forward)` -and - `SortedSet(o, iter)` -and - `SortedSet{K}(o, iter)` -Construct a SortedSet using keys given by iterable `iter` (e.g., an -array) and ordering object `o`. The ordering object defaults to -`Forward` if not specified. """ -mutable struct SortedSet{K, Ord <: Ordering} <: AbstractSet{K} - bt::BalancedTree23{K,Nothing,Ord} - - function SortedSet{K,Ord}(o::Ord=Forward, iter=Tuple{}()) where {K,Ord<:Ordering} - sorted_set = new{K,Ord}(BalancedTree23{K,Nothing,Ord}(o)) + SortedSet{K,Ord}(o::Ord=Forward) where {K, Ord<:Ordering} + SortedSet{K,Ord}(o::Ord, iter) where {K, Ord<:Ordering} - for item in iter - push!(sorted_set, item) - end +Construct a SortedSet of eltype `K`using from elements +produced by iterable `iter` (e.g., an +array) and ordering object `o`. Running time: O(*cn* log *n*) +where *n* is the length of iterable. +""" +SortedSet{K,Ord}(o::Ord=Forward) where{K,Ord<:Ordering} = + SortedSet{K,Ord}(BalancedTree23{K,Nothing,Ord}(o)) - return sorted_set +function SortedSet{K,Ord}(o::Ord, iter) where {K,Ord<:Ordering} + sorted_set = SortedSet{K,Ord}(BalancedTree23{K,Nothing,Ord}(o)) + for item in iter + push!(sorted_set, item) end + return sorted_set end -""" - SortedSet() - -Construct a `SortedSet{Any}` with `Forward` ordering. -**Note that a key type of `Any` or any other abstract type will lead -to slow performance.** """ -SortedSet() = SortedSet{Any,ForwardOrdering}(Forward) + SortedSet(o::Ord=Forward) where {Ord <: Ordering} + SortedSet{K}(o::Ord=Forward) where {K, Ord<:Ordering} -""" - SortedSet(o) -Construct a `SortedSet{Any}` with `o` ordering. +Construct an empty `SortedSet` with `Forward` ordering. The first form +assumes element type of `Any`. Time: O(1). -**Note that a key type of `Any` or any other abstract type will lead +**Note that an element type of `Any` or any other abstract type will lead to slow performance.** """ -SortedSet(o::O) where {O<:Ordering} = SortedSet{Any,O}(o) +SortedSet(o::Ord=Forward) where {Ord<:Ordering} = SortedSet{Any,Ord}(o) +SortedSet{K}(o::Ord=Forward) where {K,Ord <: Ordering} = + SortedSet{K,Ord}(o) -# To address ambiguity warnings on Julia v0.4 -SortedSet(o1::Ordering, o2::Ordering) = - throw(ArgumentError("SortedSet with two parameters must be called with an Ordering and an interable")) -SortedSet(o::Ordering, iter) = sortedset_with_eltype(o, iter, eltype(iter)) -SortedSet(iter, o::Ordering=Forward) = sortedset_with_eltype(o, iter, eltype(iter)) """ - SortedSet{K}() + SortedSet(o::Ordering, iter) + SortedSet(iter, o::Ordering=Forward) + SortedSet{K}(o::Ordering, iter) + SortedSet{K}(iter, o::Ordering=Forward) -Construct a `SortedSet` of keys of type `K` with `Forward` ordering. +Construct a sorted set from an iterable `iter` using order o. In +the first two forms, the element type is inferred from the +iterable, which requires copying the data twice. Therefore, +the second two forms (specifying `K` explicitly) are more efficient. +Time: O(*cn* log *n*) """ -SortedSet{K}() where {K} = SortedSet{K,ForwardOrdering}(Forward) +function SortedSet(o::Ordering, iter) + c = collect(iter) + SortedSet{eltype(c),typeof(o)}(o, c) +end +SortedSet(iter, o::Ordering=Forward) = SortedSet(o, iter) +SortedSet{K}(o::Ordering, iter) where {K} = SortedSet{K,typeof(o)}(o, iter) +SortedSet{K}(iter, o::Ordering=Forward) where {K} = SortedSet{K}(o, iter) + + """ - SortedSet{K}(o) + SortedSet{K}(::Val{true}, iterable) where {K} + SortedSet{K}(::Val{true}, iterable, ord::Ord) where {K, Ord <: Ordering} -Construct a `SortedSet` of keys of type `K` with ordering given according -`o` parameter. +Construct a `SortedSet` from an iterable whose entries +have type `K` and that is already in sorted ordered. No duplicates +allowed. The first form assumes Forward ordering. +Time: O(*cn*). """ -SortedSet{K}(o::O) where {K,O<:Ordering} = SortedSet{K,O}(o) +SortedSet{K}(::Val{true}, iterable) where {K} = + SortedSet{K}(Val(true), iterable, Forward) +function SortedSet{K}(::Val{true}, + iterable, + ord::Ord) where {K, Ord <: Ordering} + SortedSet{K, Ord}(BalancedTree23{K,Nothing,Ord}(Val(true), + ((k,nothing) for k in iterable), + ord, + false)) +end -# To address ambiguity warnings on Julia v0.4 -SortedSet{K}(o1::Ordering,o2::Ordering) where {K} = - throw(ArgumentError("SortedSet with two parameters must be called with an Ordering and an interable")) -SortedSet{K}(o::Ordering, iter) where {K} = sortedset_with_eltype(o, iter, K) -SortedSet{K}(iter, o::Ordering=Forward) where {K} = sortedset_with_eltype(o, iter, K) +# The following is needed to resolve ambiguities + +SortedSet{K}(::Val{true}, ::Ordering) where {K} = + throw(ArgumentError("Not a valid SortedSet constructor")) +SortedSet(::Ordering, ::Ordering) = + throw(ArgumentError("Not a valid SortedSet constructor")) +SortedSet{K}(::Ordering,::Ordering) where {K} = + throw(ArgumentError("Not a valid SortedSet constructor")) -sortedset_with_eltype(o::Ord, iter, ::Type{K}) where {K,Ord} = SortedSet{K,Ord}(o, iter) const SetSemiToken = IntSemiToken -# The following definition was moved to tokens2.jl -# const SetToken = Tuple{SortedSet, IntSemiToken} -## This function looks up a key in the tree; -## if not found, then it returns a marker for the -## end of the tree. +""" + findkey(m::SortedSet, k) +Return the semitoken of the element `k` in sorted set `m`. +If the element is not present in `m`, then the past-end semitoken +is returned. Time: O(*c* log *n*) +""" @inline function findkey(m::SortedSet, k_) ll, exactfound = findkey(m.bt, convert(keytype(m),k_)) IntSemiToken(exactfound ? ll : 2) end -## This function inserts an item into the tree. -## It returns a bool and a token. -## The bool is true if the inserted item is new. -## It is false if there was already an item -## with that key. -## The token points to the newly inserted item. """ - insert!(sc, k) + ss_push!(ss::SortedSet, k) -Argument `sc` is a SortedSet and `k` is a key. This inserts the key -into the container. If the key is already present, this overwrites -the old value. (This is not necessarily a no-op; see below for -remarks about the customizing the sort order.) The return value is a +Insert the element `k` +into the SortedSet `sc`. +If `k` is already present, this overwrites +the old value. (This is not necessarily a no-op; see remarks about the +customizing the sort order.) Unlike `push!`, +the return value is a pair whose first entry is boolean and indicates whether the insertion was new (i.e., the key was not previously present) and the -second entry is the semitoken of the new entry. Time: O(*c* log *n*) +second entry is the semitoken of the new entry. This function +replaces the deprecated `insert!`. +Time: O(*c* log *n*) """ -@inline function Base.insert!(m::SortedSet, k_) +@inline function ss_push!(m::SortedSet, k_) b, i = insert!(m.bt, convert(keytype(m),k_), nothing, false) return b, IntSemiToken(i) end -## push! is similar to insert but returns the set +@deprecate insert!(m::SortedSet, k) ss_push!(m::SortedSet, k) + + + """ - push!(sc, k) + Base.push!(ss::SortedSet, k) -Argument `sc` is a SortedSet and `k` is a key. This inserts the key -into the container. If the key is already present, this overwrites -the old value. (This is not necessarily a no-op; see below for -remarks about the customizing the sort order.) The return value is +Insert the element `k` into the sorted set `ss`. +If the `k` is already present, this overwrites +the old value. (This is not necessarily a no-op; see remarks about the +customizing the sort order.) +See also [`ss_push!(ss::SortedSet, k)`]@ref. +The return value is `sc`. Time: O(*c* log *n*) """ @inline function Base.push!(m::SortedSet, k_) @@ -132,100 +153,34 @@ remarks about the customizing the sort order.) The return value is end -## First and last return the first and last (key,data) pairs -## in the SortedDict. It is an error to invoke them on an -## empty SortedDict. """ - first(sc) + Base.in(k,m::SortedSet) -Argument `sc` is a SortedDict, SortedMultiDict or SortedSet. This -function returns the first item (a `k=>v` pair for SortedDict and -SortedMultiDict or a key for SortedSet) according to the sorted -order in the container. Thus, `first(sc)` is equivalent to -`deref((sc,startof(sc)))`. It is an error to call this function on -an empty container. Time: O(log *n*) +Return `true` iff +element `k` is in sorted set `m` is a sorted set. +Unlike the `in` function for +`Set`, this routine will thrown an error if `k` is not +convertible to `eltype(m)`. Time: O(*c* log *n*) """ -@inline function Base.first(m::SortedSet) - i = beginloc(m.bt) - i == 2 && throw(BoundsError()) - return m.bt.data[i].k -end - -""" - last(sc) - -Argument `sc` is a SortedDict, SortedMultiDict or SortedSet. This -function returns the last item (a `k=>v` pair for SortedDict and -SortedMultiDict or a key for SortedSet) according to the sorted -order in the container. Thus, `last(sc)` is equivalent to -`deref((sc,lastindex(sc)))`. It is an error to call this function on an -empty container. Time: O(log *n*) -""" -@inline function Base.last(m::SortedSet) - i = endloc(m.bt) - i == 1 && throw(BoundsError()) - return m.bt.data[i].k -end - - @inline function Base.in(k_, m::SortedSet) i, exactfound = findkey(m.bt, convert(keytype(m),k_)) return exactfound end -""" - eltype(sc) - -Returns the key type for SortedSet. -This function may also be applied to the type itself. Time: O(1) -""" @inline Base.eltype(::Type{SortedSet{K,Ord}}) where {K,Ord <: Ordering} = K - -""" - keytype(sc) - -Returns the key type for SortedDict, SortedMultiDict and SortedSet. -This function may also be applied to the type itself. Time: O(1) -""" @inline Base.keytype(m::SortedSet{K,Ord}) where {K,Ord <: Ordering} = K @inline Base.keytype(::Type{SortedSet{K,Ord}}) where {K,Ord <: Ordering} = K -""" - ordtype(sc) - -Returns the order type for SortedDict, SortedMultiDict and -SortedSet. This function may also be applied to the type itself. -Time: O(1) -""" -@inline ordtype(m::SortedSet{K,Ord}) where {K,Ord <: Ordering} = Ord -@inline ordtype(::Type{SortedSet{K,Ord}}) where {K,Ord <: Ordering} = Ord - -""" - orderobject(sc) - -Returns the order object used to construct the container. Time: O(1) -""" -@inline orderobject(m::SortedSet) = m.bt.ord - -""" - haskey(sc,k) -Returns true if key `k` is present for SortedDict, SortedMultiDict -or SortedSet `sc`. For SortedSet, `haskey(sc,k)` is a synonym for -`in(k,sc)`. For SortedDict and SortedMultiDict, `haskey(sc,k)` is -equivalent to `in(k,keys(sc))`. Time: O(*c* log *n*) -""" -Base.haskey(m::SortedSet, k_) = in(k_, m) """ - delete!(sc, k) + Base.delete!(ss::SortedSet, k) -Argument `sc` is a SortedDict or SortedSet and `k` is a key. This -operation deletes the item whose key is `k`. It is a `KeyError` if -`k` is not a key of an item in the container. After this operation -is complete, any token addressing the deleted item is invalid. -Returns `sc`. Time: O(*c* log *n*) +Delete element `k` from `sc`. After this operation +is complete, a token addressing the deleted item is invalid. +Returns `sc`. if `k` is not present, this operation is a no-op. +Time: O(*c* log *n*) """ @inline function Base.delete!(m::SortedSet, k_) i, exactfound = findkey(m.bt,convert(keytype(m),k_)) @@ -237,11 +192,13 @@ end """ - pop!(sc, k[, default]) + Base.pop!(ss::SortedSet, k) + Base.pop!(ss::SortedSet, k, default) -Deletes the item with key `k` in SortedDict or SortedSet `sc` and -returns the value that was associated with `k` in the case of -SortedDict or `k` itself in the case of SortedSet. If `k` is not in `sc` +Delete the item with key `k` in `ss` and +return the item that compares equal to `k` according to the +sort order (which is not necessarily `k`, since equality in the +sort-order does not necessarily imply hash-equality). If `k` is not found, return `default`, or throw a `KeyError` if `default` is not specified. Time: O(*c* log *n*) """ @@ -249,27 +206,27 @@ Time: O(*c* log *n*) k = convert(keytype(m),k_) i, exactfound = findkey(m.bt, k) !exactfound && throw(KeyError(k_)) - d = m.bt.data[i].d + k2 = m.bt.data[i].k delete!(m.bt, i) - return k + return k2 end @inline function Base.pop!(m::SortedSet, k_, default) k = convert(keytype(m),k_) i, exactfound = findkey(m.bt, k) !exactfound && return default - d = m.bt.data[i].d delete!(m.bt, i) return k end """ - pop!(ss) + Base.popfirst!(ss::SortedSet) -Deletes the item with first key in SortedSet `ss` and returns the -key. A `BoundsError` results if `ss` is empty. Time: O(*c* log *n*) +Delete the item with first key in SortedSet `ss` and returns the +key. This function was named `pop!` in a previous version of the package. + A `BoundsError` results if `ss` is empty. Time: O(log *n*) """ -@inline function Base.pop!(m::SortedSet) +@inline function Base.popfirst!(m::SortedSet) i = beginloc(m.bt) i == 2 && throw(BoundsError()) k = m.bt.data[i].k @@ -277,71 +234,132 @@ key. A `BoundsError` results if `ss` is empty. Time: O(*c* log *n*) return k end +Base.pop!(m::SortedSet) = error("pop!(::SortedSet) is disabled in this version; refer to popfirst! and `poplast! in the docs") + + + +""" + poplast!(ss::SortedSet) + +Delete the item with last key in SortedSet `ss` and returns the +key. A `BoundsError` results if `ss` is empty. This function will +be renamed `Base.pop!` in a future version of the package. +Time: O(log *n*) +""" +@inline function poplast!(m::SortedSet) + i = endloc(m.bt) + i == 2 && throw(BoundsError()) + k = m.bt.data[i].k + delete!(m.bt, i) + return k +end + + ## Check if two SortedSets are equal in the sense of containing ## the same K entries. This sense of equality does not mean ## that semitokens valid for one are also valid for the other. """ - isequal(sc1,sc2) - -Checks if two containers are equal in the sense that they contain -the same items; the keys are compared using the `eq` method, while -the values are compared with the `isequal` function. In the case of -SortedMultiDict, equality requires that the values associated with a -particular key have same order (that is, the same insertion order). -Note that `isequal` in this sense does not imply any correspondence -between semitokens for items in `sc1` with those for `sc2`. If the -equality-testing method associated with the keys and values implies -hash-equivalence in the case of SortedDict, then `isequal` of the -entire containers implies hash-equivalence of the containers. Time: -O(*cn* + *n* log *n*) -""" -function Base.isequal(m1::SortedSet, m2::SortedSet) + Base.isequal(ss1::SortedSet{K,Ord}, ss2::SortedSet{K,Ord}) where {K,Ord <: Ordering} + Base.issetequal(ss1::SortedSet{K,Ord}, ss2::SortedSet{K,Ord}) where {K,Ord <: Ordering} + +Check if two sorted sets are equal in the sense that they contain +the same items. +Note that `isequal` in this sense does not imply correspondence +between semitokens for items in `sc1` with those for `sc2`. Time: +O(*cn*) where n is the size of the smaller container. +If the two sorted sets have `K`, different `Ord`, or +different order objects, then a +fallback routine `isequal(::AbstractSet, ::AbstractSet)` is invoked. +""" +function Base.isequal(m1::SortedSet{K, Ord}, m2::SortedSet{K, Ord}) where {K, Ord <: Ordering} ord = orderobject(m1) - if !isequal(ord, orderobject(m2)) || !isequal(eltype(m1), eltype(m2)) - throw(ArgumentError("Cannot use isequal for two SortedSets unless their element types and ordering objects are equal")) + if ord != orderobject(m2) + return invoke(issetequal, Tuple{AbstractSet, AbstractSet}, m1, m2) end p1 = startof(m1) p2 = startof(m2) while true - if p1 == pastendsemitoken(m1) - return p2 == pastendsemitoken(m2) - end - if p2 == pastendsemitoken(m2) - return false - end - k1 = deref((m1,p1)) - k2 = deref((m2,p2)) - if !eq(ord,k1,k2) - return false - end - p1 = advance((m1,p1)) - p2 = advance((m2,p2)) + p1 == pastendsemitoken(m1) && return p2 == pastendsemitoken(m2) + p2 == pastendsemitoken(m2) && return false + k1 = deref_nocheck((m1,p1)) + k2 = deref_nocheck((m2,p2)) + !eq(ord,k1,k2) && return false + p1 = advance_nocheck((m1,p1)) + p2 = advance_nocheck((m2,p2)) end end + +Base.issetequal(m1::SortedSet, m2::SortedSet) = isequal(m1::SortedSet, m2::SortedSet) + + """ - union!(ss, iterable) + Base.union!(ss::SortedSet, iterable...) -This function inserts each item from the second argument (which must +Insert each item among the second and following +arguments (which must be iterable) into the SortedSet `ss`. The items must be convertible to -the key-type of `ss`. Time: O(*ci* log *n*) where *i* is the number -of items in the iterable argument. +the key-type of `ss`. Time: O(*cN* log *N*) where *N* is the total number +of items in the iterable arguments. """ -function Base.union!(m1::SortedSet{K,Ord}, iterable_item) where {K, Ord <: Ordering} - for k in iterable_item - push!(m1,convert(K,k)) +function Base.union!(m1::SortedSet, iterable...) + for iter in iterable + for k in iter + push!(m1,convert(eltype(m1),k)) + end end return m1 end +struct UnionManySortedSets{K, Ord <: Ordering} + vec::Vector{SortedSet{K, Ord}} +end + +function Base.iterate(ss::UnionManySortedSets{K, Ord}, + state = [startof(ss.vec[k]) for k=1:length(ss.vec)]) where +{K, Ord <: Ordering} + ord = orderobject(ss.vec[1]) + N = length(ss.vec) + firsti = 0 + for i = 1 : N + if state[i] != pastendsemitoken(ss.vec[i]) + firsti = i + break + end + end + firsti == 0 && return nothing + foundi = firsti + firstk = deref_nocheck((ss.vec[firsti], state[firsti])) + for i = firsti + 1 : N + if state[i] != pastendsemitoken(ss.vec[i]) + k2 = deref_nocheck((ss.vec[i], state[i])) + if !lt(ord, firstk, k2) + foundi = i + firstk = k2 + end + end + end + for i = firsti : N + if state[i] != pastendsemitoken(ss.vec[i]) && + eq(ord, deref_nocheck((ss.vec[i], state[i])), firstk) + state[i] = advance_nocheck((ss.vec[i], state[i])) + end + end + (firstk, state) +end + + + """ - union(ss, iterable...) + Base.union(ss::SortedSet, iterable...) + +Compute and return +the union of a sorted set and one or more iterables. They must have the same +keytype. If they are all sorted sets with the same order object, then the +required time is O(*cn*), where *n* is the total size. If not, +then the fallback routine requires time O(*cn* log *n*). -This function creates a new SortedSet (the return argument) and -inserts each item from `ss` and each item from each iterable -argument into the returned SortedSet. Time: O(*cn* log *n*) where -*n* is the total number of items in all the arguments. """ function Base.union(m1::SortedSet, others...) mr = packcopy(m1) @@ -351,227 +369,363 @@ function Base.union(m1::SortedSet, others...) return mr end -function intersect2(m1::SortedSet{K, Ord}, m2::SortedSet{K, Ord}) where {K, Ord <: Ordering} +function Base.union(s1::SortedSet{K,Ord}, others::SortedSet{K,Ord}...) where +{K, Ord <: Ordering} + ss = UnionManySortedSets{K, Ord}(SortedSet{K,Ord}[s1]) + for s in others + if orderobject(s) != orderobject(s1) + return invoke(union, + Tuple{SortedSet, Vararg{Any}}, + s1, others...) + end + push!(ss.vec, s) + end + SortedSet{K}(Val(true), ss, orderobject(s1)) +end + + + + + +struct IntersectTwoSortedSets{K, Ord <: Ordering} + m1::SortedSet{K,Ord} + m2::SortedSet{K,Ord} +end + +struct TwoSortedSets_State + p1::IntSemiToken + p2::IntSemiToken +end + +function Base.iterate(twoss::IntersectTwoSortedSets, + state = TwoSortedSets_State(startof(twoss.m1), + startof(twoss.m2))) + m1 = twoss.m1 + m2 = twoss.m2 ord = orderobject(m1) - mi = SortedSet(K[], ord) - p1 = startof(m1) - p2 = startof(m2) - while true - if p1 == pastendsemitoken(m1) || p2 == pastendsemitoken(m2) - return mi + p1 = state.p1 + p2 = state.p2 + while p1 != pastendsemitoken(m1) && p2 != pastendsemitoken(m2) + k1 = deref_nocheck((m1, p1)) + k2 = deref_nocheck((m2, p2)) + if lt(ord, k1, k2) + p1 = advance_nocheck((m1, p1)) + continue end - k1 = deref((m1,p1)) - k2 = deref((m2,p2)) - if lt(ord,k1,k2) - p1 = advance((m1,p1)) - elseif lt(ord,k2,k1) - p2 = advance((m2,p2)) - else - push!(mi,k1) - p1 = advance((m1,p1)) - p2 = advance((m2,p2)) + if lt(ord, k2, k1) + p2 = advance_nocheck((m2, p2)) + continue end + return (k1, TwoSortedSets_State(advance_nocheck((m1, p1)), + advance_nocheck((m2, p2)))) + end + return nothing +end + + +function intersect2(m1::SortedSet{K, Ord}, m2::SortedSet{K, Ord}) where {K, Ord <: Ordering} + if orderobject(m1) != orderobject(m2) + return invoke(intersect2, Tuple{SortedSet{K, Ord}, Any}, m1, m2) + end + SortedSet{K}(Val(true), + IntersectTwoSortedSets(m1, m2), + orderobject(m1)) +end + + +function intersect2(m1::SortedSet{K,Ord}, iterable) where {K, Ord <: Ordering} + mi = SortedSet{K, Ord}(orderobject(m1)) + for k_ in iterable + k = convert(K,k_) + k in m1 && push!(mi, k) end + mi end + + + """ - intersect(ss, others...) + Base.intersect(ss::SortedSet, others...) + +Intersect SortedSets with other SortedSets or other iterables and return +the intersection as a new SortedSet. +Time: O(*cn*), where *n* is the +total number of items in all the arguments if all the arguments are +SortedSets of the same type and same order object. Otherwise, the +time is O(*cn* log *n*) -Each argument is a SortedSet with the same key and order type. The -return variable is a new SortedSet that is the intersection of all -the sets that are input. Time: O(*cn* log *n*), where *n* is the -total number of items in all the arguments. """ -function Base.intersect(m1::SortedSet{K,Ord}, others::SortedSet{K,Ord}...) where {K, Ord <: Ordering} +function Base.intersect(m1::SortedSet{K,Ord}, others...) where {K, Ord <: Ordering} + if length(others) == 0 + return packcopy(m1) + end + mi = intersect2(m1, others[1]) + for s2 = others[2:end] + mi = intersect2(mi, s2) + end + mi +end + + +struct SymdiffTwoSortedSets{K,Ord <: Ordering} + m1::SortedSet{K,Ord} + m2::SortedSet{K,Ord} +end + +function Base.iterate(twoss::SymdiffTwoSortedSets, + state = TwoSortedSets_State(startof(twoss.m1), + startof(twoss.m2))) + m1 = twoss.m1 + m2 = twoss.m2 ord = orderobject(m1) - for s2 in others - if !isequal(ord, orderobject(s2)) - throw(ArgumentError("Cannot intersect two SortedSets unless their ordering objects are equal")) + p1 = state.p1 + p2 = state.p2 + while true + m1end = p1 == pastendsemitoken(m1) + m2end = p2 == pastendsemitoken(m2) + if m1end && m2end + return nothing end - end - if length(others) == 0 - return m1 - else - mi = intersect2(m1, others[1]) - for s2 = others[2:end] - mi = intersect2(mi, s2) + if m1end + return (deref_nocheck((m2, p2)), + TwoSortedSets_State(p1, advance_nocheck((m2,p2)))) + end + if m2end + return (deref_nocheck((m1, p1)), + TwoSortedSets_State(advance_nocheck((m1,p1)), p2)) + end + k1 = deref_nocheck((m1, p1)) + k2 = deref_nocheck((m2, p2)) + if lt(ord, k1, k2) + return (k1, TwoSortedSets_State(advance_nocheck((m1,p1)), p2)) end - return mi + if lt(ord, k2, k1) + return (k2, TwoSortedSets_State(p1, advance_nocheck((m2,p2)))) + end + p1 = advance_nocheck((m1,p1)) + p2 = advance_nocheck((m2,p2)) end end """ - symdiff(ss1, ss2) + Base.symdiff(ss1::SortedSet, iterable) -The two argument are sorted sets with the same key and order type. -This operation computes the symmetric difference, i.e., a sorted set -containing entries that are in one of `ss1`, `ss2` but not both. -Time: O(*cn* log *n*), where *n* is the total size of the two -containers. +Compute and +return the symmetric difference of `ss1` and `iterable`, i.e., a sorted set +containing entries that are in one of `ss1` or `iterable` but not both. +Time: O(*cn*), where *n* is the total size of the two +containers if both are sorted sets with the same key and order objects. +Otherwise, the time is O(*cn* log *n*) """ +function Base.symdiff(m1::SortedSet{K,Ord}, iterable) where {K, Ord <: Ordering} + ms = SortedSet{K,Ord}(orderobject(m1)) + m1seen = SortedSet{K,Ord}(orderobject(m1)) + for k_ in iterable + k = convert(K,k_) + if k in m1 + push!(m1seen, k) + else + push!(ms, k) + end + end + for k in m1 + if !(k in m1seen) + push!(ms, k) + end + end + ms +end + function Base.symdiff(m1::SortedSet{K,Ord}, m2::SortedSet{K,Ord}) where {K, Ord <: Ordering} ord = orderobject(m1) - if !isequal(ord, orderobject(m2)) - throw(ArgumentError("Cannot apply symdiff to two SortedSets unless their ordering objects are equal")) + if ord != orderobject(m2) + return invoke(symdiff, Tuple{SortedSet{K,Ord}, Any}, m1, m2) end - mi = SortedSet(K[], ord) - p1 = startof(m1) - p2 = startof(m2) + SortedSet{K}(Val(true), SymdiffTwoSortedSets(m1, m2), ord) +end + + +struct SetdiffTwoSortedSets{K, Ord <: Ordering} + m1::SortedSet{K,Ord} + m2::SortedSet{K,Ord} +end + +function Base.iterate(twoss::SetdiffTwoSortedSets, + state = TwoSortedSets_State(startof(twoss.m1), + startof(twoss.m2))) + m1 = twoss.m1 + m2 = twoss.m2 + ord = orderobject(m1) + p1 = state.p1 + p2 = state.p2 while true m1end = p1 == pastendsemitoken(m1) m2end = p2 == pastendsemitoken(m2) - if m1end && m2end - return mi - elseif m1end - push!(mi, deref((m2,p2))) - p2 = advance((m2,p2)) - elseif m2end - push!(mi, deref((m1,p1))) - p1 = advance((m1,p1)) - else - k1 = deref((m1,p1)) - k2 = deref((m2,p2)) - if lt(ord,k1,k2) - push!(mi, k1) - p1 = advance((m1,p1)) - elseif lt(ord,k2,k1) - push!(mi, k2) - p2 = advance((m2,p2)) - else - p1 = advance((m1,p1)) - p2 = advance((m2,p2)) - end + if m1end + return nothing + end + if m2end + return (deref_nocheck((m1, p1)), TwoSortedSets_State(advance_nocheck((m1,p1)), p2)) end + k1 = deref_nocheck((m1, p1)) + k2 = deref_nocheck((m2, p2)) + if lt(ord, k1, k2) + return (k1, TwoSortedSets_State(advance_nocheck((m1,p1)), p2)) + end + if !lt(ord, k2, k1) + p1 = advance_nocheck((m1,p1)) + end + p2 = advance_nocheck((m2, p2)) end end + """ - setdiff(ss1, ss2) + Base.setdiff(ss1::SortedSet{K,Ord}, ss2::SortedSet{K,Ord}) where {K, Ord<:Ordering} + Base.setdiff(ss1::SortedSet, others...) -The two arguments are sorted sets with the same key and order type. -This operation computes the difference, i.e., a sorted set -containing entries that in are in `ss1` but not `ss2`. Time: O(*cn* -log *n*), where *n* is the total size of the two containers. +Return the set difference, i.e., a sorted set containing entries in `ss1` but not +in `ss2` or successive arguments. Time for the first form: O(*cn*) +where *n* is the total size of both sets provided that they are both +sorted sets of the same type and order object. +The second form computes the set difference +between `ss1` and all the others, which are all iterables. The second +form requires O(*cn* log *n*) time. """ function Base.setdiff(m1::SortedSet{K,Ord}, m2::SortedSet{K,Ord}) where {K, Ord <: Ordering} ord = orderobject(m1) - if !isequal(ord, orderobject(m2)) - throw(ArgumentError("Cannot apply setdiff to two SortedSets unless their ordering objects are equal")) - end - mi = SortedSet(K[], ord) - p1 = startof(m1) - p2 = startof(m2) - while true - if p1 == pastendsemitoken(m1) - return mi - elseif p2 == pastendsemitoken(m2) - push!(mi, deref((m1,p1))) - p1 = advance((m1,p1)) - else - k1 = deref((m1,p1)) - k2 = deref((m2,p2)) - if lt(ord,k1,k2) - push!(mi, deref((m1,p1))) - p1 = advance((m1,p1)) - elseif lt(ord,k2,k1) - p2 = advance((m2,p2)) - else - p1 = advance((m1,p1)) - p2 = advance((m2,p2)) - end - end + if ord != orderobject(m2) + return invoke(setdiff, Tuple{SortedSet{K,Ord}, Vararg{Any}}, m1, m2) end + SortedSet{K}(Val(true), SetdiffTwoSortedSets(m1,m2), ord) +end + + +function Base.setdiff(m1::SortedSet{K,Ord}, others...) where {K, Ord <: Ordering} + ms = packcopy(m1) + setdiff!(ms, others...) + ms end + """ - setdiff!(ss, iterable) + Base.setdiff!(ss::SortedSet, iterable..) -This function deletes items in `ss` that appear in the second -argument. The second argument must be iterable and its entries must -be convertible to the key type of m1. Time: O(*cm* log *n*), where -*n* is the size of `ss` and *m* is the number of items in -`iterable`. +Delete items in `ss` that appear in any of the +iterables. The arguments after the first must be iterables each +of whose entries must convertible to the key type of m1. +Time: O(*cm* log *n*), where *n* is the size of `ss` and *m* is the +total number of items in iterable. """ -function Base.setdiff!(m1::SortedSet, iterable) - for p in iterable - i = findkey(m1, p) - if i != pastendsemitoken(m1) - delete!((m1,i)) +function Base.setdiff!(m1::SortedSet, others...) + for iterable in others + for p in iterable + i = findkey(m1, convert(eltype(m1), p)) + i != pastendsemitoken(m1) && delete!((m1,i)) end end end + +# TODO: implement a jump-forward method so that issubset runs in time +# O(*m* log (*n*/*m*)) when both sets are sorted sets. + """ - issubset(iterable, ss) + Base.issubset(iterable, ss::SortedSet) -This function checks whether each item of the first argument is an -element of the SortedSet `ss`. The entries must be convertible to -the key-type of `ss`. Time: O(*cm* log *n*), where *n* is the sizes -of `ss` and *m* is the number of items in `iterable`. +Check whether each item of the first argument is an +element of `ss`. The entries must be convertible to +the key-type of `ss`. Time: O(*cm* log *n*), where *n* is the size +of `ss` and *m* is the number of items in `iterable`. If both are +sorted sets of the same keytype and order object and if *m* > *n* / log *n*, +then an algorithm whose running time is O(*c*(*m*+*n*)) is used. """ function Base.issubset(iterable, m2::SortedSet) for k in iterable - if !in(k, m2) + if !(k in m2) return false end end return true end +function Base.issubset(m1::SortedSet{K,Ord}, m2::SortedSet{K,Ord}) where {K, Ord <: Ordering} + ord = orderobject(m1) + if ord != orderobject(m2) || + length(m1) < length(m2) / log2(length(m2) + 2) + return invoke(issubset, Tuple{Any, SortedSet}, m1, m2) + end + p1 = startof(m1) + p2 = startof(m2) + while p1 != pastendsemitoken(m1) + p2 == pastendsemitoken(m2) && return false + k1 = deref_nocheck((m1, p1)) + k2 = deref_nocheck((m2, p2)) + if eq(ord, k1, k2) + p1 = advance_nocheck((m1,p1)) + p2 = advance_nocheck((m2,p2)) + elseif lt(ord, k1,k2) + return false + else + p2 = advance_nocheck((m2,p2)) + end + end + return true +end + # Standard copy functions use packcopy - that is, they retain elements but not # the identical structure. Base.copymutable(m::SortedSet) = packcopy(m) Base.copy(m::SortedSet) = packcopy(m) """ - packcopy(sc) + copy(sc::SortedSet) + copy(sc::SortedDict) + copy(sc::SortedMultiDict) + packcopy(sc::SortedSet) + packcopy(sc::SortedDict) + packcopy(sc::SortedMultiDict) -This returns a copy of `sc` in which the data is packed. When +Return a copy of `sc`, where `sc` is a sorted +container, in which the data is packed. When deletions take place, the previously allocated memory is not returned. This function can be used to reclaim memory after many -deletions. Time: O(*cn* log *n*) +deletions. Time: O(*cn*) + +Note that the semitokens valid for the original container are no +longer valid for the copy because the indexing structure is rebuilt +by these copies. If an exact copy is needed in which semitokens +remain valid, use `Base.deepcopy`. """ function packcopy(m::SortedSet{K,Ord}) where {K,Ord <: Ordering} - w = SortedSet(K[], orderobject(m)) - for k in m - push!(w, k) - end - return w + SortedSet{K}(Val(true), m, orderobject(m)) end + """ - packdeepcopy(sc) + packdeepcopy(sc::SortedSet) + packdeepcopy(sc::SortedDict) + packdeepcopy(sc::SorteMultiDict) -This returns a packed copy of `sc` in which the keys and values are +Return a packed copy of `sc`, where `sc` is a sorted +container in which the keys and values are deep-copied. This function can be used to reclaim memory after many -deletions. Time: O(*cn* log *n*) +deletions. Time: O(*cn*) """ -function packdeepcopy(m::SortedSet{K,Ord}) where {K, Ord <: Ordering} - w = SortedSet(K[], orderobject(m)) - for k in m - newk = deepcopy(k) - push!(w, newk) - end - return w -end +packdeepcopy(m::SortedSet{K,Ord}) where {K, Ord <: Ordering} = + SortedSet{K}(Val(true), deepcopy(m), orderobject(m)) function Base.show(io::IO, m::SortedSet{K,Ord}) where {K,Ord <: Ordering} - print(io, "SortedSet(") - keys = K[] - for k in m - push!(keys, k) - end - print(io, keys) - println(io, ",") - print(io, orderobject(m)) - print(io, ")") + print(io, "SortedSet{", K, ",", Ord, "}(") + print(io, collect(m), ",", orderobject(m), ")") end """ - empty(sc) + Base.empty(sc) -Returns a new `SortedDict`, `SortedMultiDict`, or `SortedSet` of the same +Return a new `SortedDict`, `SortedMultiDict`, or `SortedSet` of the same type and with the same ordering as `sc` but with no entries (i.e., empty). Time: O(1) """ diff --git a/src/tokens2.jl b/src/tokens2.jl deleted file mode 100644 index 5e9cec5ee..000000000 --- a/src/tokens2.jl +++ /dev/null @@ -1,181 +0,0 @@ -const SDMContainer = Union{SortedDict, SortedMultiDict} -const SAContainer = Union{SDMContainer, SortedSet} - -const Token = Tuple{SAContainer, IntSemiToken} -const SDMToken = Tuple{SDMContainer, IntSemiToken} -const SetToken = Tuple{SortedSet, IntSemiToken} - - -## Function startof returns the semitoken that points -## to the first sorted order of the tree. It returns -## the past-end token if the tree is empty. - -@inline startof(m::SAContainer) = IntSemiToken(beginloc(m.bt)) - -## Function lastindex returns the semitoken that points -## to the last item in the sorted order, -## or the before-start marker if the tree is empty. - -@inline Base.lastindex(m::SAContainer) = IntSemiToken(endloc(m.bt)) - -## Function pastendsemitoken returns the token past the end of the data. - -@inline pastendsemitoken(::SAContainer) = IntSemiToken(2) - -## Function beforestarttoken returns the token before the start of the data. - -@inline beforestartsemitoken(::SAContainer) = IntSemiToken(1) - -## delete! deletes an item given a token. - -@inline function Base.delete!(ii::Token) - has_data(ii) - delete!(ii[1].bt, ii[2].address) -end - -## Function advances takes a token and returns the -## next token in the sorted order. - -@inline function advance(ii::Token) - not_pastend(ii) - IntSemiToken(nextloc0(ii[1].bt, ii[2].address)) -end - - -## Function regresss takes a token and returns the -## previous token in the sorted order. - -@inline function regress(ii::Token) - not_beforestart(ii) - IntSemiToken(prevloc0(ii[1].bt, ii[2].address)) -end - - -## status of a token is 0 if the token is invalid, 1 if it points to -## ordinary data, 2 if it points to the before-start location and 3 if -## it points to the past-end location. - - -@inline status(ii::Token) = - !(ii[2].address in ii[1].bt.useddatacells) ? 0 : - ii[2].address == 1 ? 2 : - ii[2].address == 2 ? 3 : 1 - -""" - compare(m::SAContainer, s::IntSemiToken, t::IntSemiToken) - -Determines the relative positions of the data items indexed -by `(m,s)` and `(m,t)` in the sorted order. The return value is `-1` -if `(m,s)` precedes `(m,t)`, `0` if they are equal, and `1` if `(m,s)` -succeeds `(m,t)`. `s` and `t` are semitokens for the same container `m`. -""" -@inline compare(m::SAContainer, s::IntSemiToken, t::IntSemiToken) = - compareInd(m.bt, s.address, t.address) - - -@inline function deref(ii::SDMToken) - has_data(ii) - return Pair(ii[1].bt.data[ii[2].address].k, ii[1].bt.data[ii[2].address].d) -end - -@inline function deref(ii::SetToken) - has_data(ii) - return ii[1].bt.data[ii[2].address].k -end - -@inline function deref_key(ii::SDMToken) - has_data(ii) - return ii[1].bt.data[ii[2].address].k -end - -@inline function deref_value(ii::SDMToken) - has_data(ii) - return ii[1].bt.data[ii[2].address].d -end - - -## Functions setindex! and getindex for semitokens. -## Note that we can't use SDMContainer here; we have -## to spell it out otherwise there is an ambiguity. - -@inline function Base.getindex(m::SortedDict, - i::IntSemiToken) - has_data((m,i)) - return m.bt.data[i.address].d -end - -@inline function Base.getindex(m::SortedMultiDict, - i::IntSemiToken) - has_data((m,i)) - return m.bt.data[i.address].d -end - -@inline function Base.setindex!(m::SortedDict, - d_, - i::IntSemiToken) - has_data((m,i)) - m.bt.data[i.address] = KDRec{keytype(m),valtype(m)}(m.bt.data[i.address].parent, - m.bt.data[i.address].k, - convert(valtype(m),d_)) - return m -end - -@inline function Base.setindex!(m::SortedMultiDict, - d_, - i::IntSemiToken) - has_data((m,i)) - m.bt.data[i.address] = KDRec{keytype(m),valtype(m)}(m.bt.data[i.address].parent, - m.bt.data[i.address].k, - convert(valtype(m),d_)) - return m -end - - -## This function takes a key and returns the token -## of the first item in the tree that is >= the given -## key in the sorted order. It returns the past-end marker -## if there is none. - -@inline function Base.searchsortedfirst(m::SAContainer, k_) - i = findkeyless(m.bt, convert(keytype(m), k_)) - IntSemiToken(nextloc0(m.bt, i)) -end - -## This function takes a key and returns a token -## to the first item in the tree that is > the given -## key in the sorted order. It returns the past-end marker -## if there is none. - - -@inline function searchsortedafter(m::SAContainer, k_) - i, exactfound = findkey(m.bt, convert(keytype(m), k_)) - IntSemiToken(nextloc0(m.bt, i)) -end - -## This function takes a key and returns a token -## to the last item in the tree that is <= the given -## key in the sorted order. It returns the before-start marker -## if there is none. - -@inline function Base.searchsortedlast(m::SAContainer, k_) - i, exactfound = findkey(m.bt, convert(keytype(m),k_)) - IntSemiToken(i) -end - - -## The next four are correctness-checking routines. They are -## not exported. - - -@inline not_beforestart(i::Token) = - (!(i[2].address in i[1].bt.useddatacells) || - i[2].address == 1) && throw(BoundsError()) - -@inline not_pastend(i::Token) = - (!(i[2].address in i[1].bt.useddatacells) || - i[2].address == 2) && throw(BoundsError()) - - -@inline has_data(i::Token) = - (!(i[2].address in i[1].bt.useddatacells) || - i[2].address < 3) && throw(BoundsError()) diff --git a/test/test_deprecations.jl b/test/test_deprecations.jl index 565e42883..09d9827ad 100644 --- a/test/test_deprecations.jl +++ b/test/test_deprecations.jl @@ -1,3 +1,5 @@ +using DataStructures: IntSemiToken + # These are the tests for deprecated features, they should be deleted along with them @testset "Trie: path iterator" begin @@ -116,3 +118,12 @@ end push!(pq, 1 => 1) @test peek(pq) == (1=>1) end + +@testset "insert!" begin + s = SortedDict{Int,String}(); + @test isa(insert!(s, 5, "hello"), Tuple{Bool, IntSemiToken}) + s2 = SortedMultiDict{Int,String}(); + @test isa(insert!(s2, 5, "hello"), IntSemiToken) + s3 = SortedSet{Int}() + @test isa(insert!(s3, 5), Tuple{Bool, IntSemiToken}) +end diff --git a/test/test_sorted_containers.jl b/test/test_sorted_containers.jl index 8a246bc77..7b46b6e98 100644 --- a/test/test_sorted_containers.jl +++ b/test/test_sorted_containers.jl @@ -1,11 +1,9 @@ -import Base.Ordering -import Base.Forward -import Base.Reverse -import DataStructures.eq +using Base: Ordering, Forward, Reverse, ForwardOrdering, + ReverseOrdering +using DataStructures: IntSemiToken import Base.lt -import Base.ForwardOrdering -import Base.ReverseOrdering -import DataStructures.IntSemiToken +import DataStructures.eq + struct CaseInsensitive <: Ordering end @@ -13,6 +11,14 @@ end lt(::CaseInsensitive, a, b) = isless(lowercase(a), lowercase(b)) eq(::CaseInsensitive, a, b) = isequal(lowercase(a), lowercase(b)) + +struct ForBack <: Ordering + flag::Bool +end + +lt(o::ForBack, a, b) = o.flag ? isless(a,b) : isless(b,a) + + @noinline my_assert(stmt) = stmt ? nothing : throw(AssertionError("assertion failed")) function my_primes(N) @@ -32,6 +38,17 @@ function my_primes(N) p end +function remove_spaces(s::String) + b = Vector{UInt8}() + for c in s + if !isspace(c) + push!(b,UInt8(c)) + end + end + String(b) +end + + @@ -222,6 +239,8 @@ function checkcorrectness(t::DataStructures.BalancedTree23{K,D,Ord}, end + + function testSortedDictBasic() # a few basic tests of SortedDict to start m1 = SortedDict((Dict{String,String}()), Forward) @@ -269,15 +288,15 @@ function testSortedDictMethods() m05 = SortedDict{Int,Float64}(Reverse, Pair[1=>1, 2=>2.0]) my_assert(typeof(m05) == SortedDict{Int,Float64,ReverseOrdering{ForwardOrdering}}) m06a = SortedDict(Pair[1=>2.0, 3=>'a']) - my_assert(typeof(m06a) == SortedDict{Any,Any,ForwardOrdering}) + my_assert(typeof(m06a) == SortedDict{Int,Any,ForwardOrdering}) m06b = SortedDict([(1,2.0), (3,'a')]) my_assert(typeof(m06b) == SortedDict{Int,Any,ForwardOrdering}) m07a = SortedDict(Pair[1.0=>2, 2=>3]) - my_assert(typeof(m07a) == SortedDict{Any,Any,ForwardOrdering}) + my_assert(typeof(m07a) == SortedDict{Real, Int, ForwardOrdering}) m07b = SortedDict([(1.0,2), (2,3)]) my_assert(typeof(m07b) == SortedDict{Real,Int,ForwardOrdering}) m08a = SortedDict(Pair[1.0=>2, 2=>'a']) - my_assert(typeof(m08a) == SortedDict{Any,Any,ForwardOrdering}) + my_assert(typeof(m08a) == SortedDict{Real,Any,ForwardOrdering}) m08b = SortedDict([(1.0,2), (2,'a')]) my_assert(typeof(m08b) == SortedDict{Real,Any,ForwardOrdering}) m09a = SortedDict(Pair{Int}[1=>2, 3=>'a']) @@ -294,7 +313,7 @@ function testSortedDictMethods() # Test Exceptions @test_throws ArgumentError SortedDict([1,2,3,4]) - @test_throws ArgumentError SortedDict{Int,Int}([1,2,3,4]) + @test_throws BoundsError SortedDict{Int,Int}([1,2,3,4]) expected = ([6,8,12], [18.2, 32.0, 33.1]) @@ -411,7 +430,7 @@ function testSortedDictMethods() checkcorrectness(m1.bt, false) my_assert(ii == pastendsemitoken(m1)) my_assert(status((m1,ii)) == 3) - my_assert(status((m1,SDSemiToken(-1))) == 0) + my_assert(status((m1,IntSemiToken(-1))) == 0) t = 0 u = 0.0 for pr in m1 @@ -428,7 +447,7 @@ function testSortedDictMethods() m1[6] = 49.0 my_assert(length(m1) == numprimes + 1) my_assert(m1[6] == 49.0) - b, i6 = insert!(m1, 6, 50.0) + b, i6 = sd_push!(m1, 6=>50.0) my_assert(length(m1) == numprimes + 1) my_assert(!b) p = deref((m1,i6)) @@ -437,7 +456,7 @@ function testSortedDictMethods() p = deref((m1,i6)) my_assert(p[1] == 6 && p[2] == 9.0) my_assert(m1[i6] == 9.0) - b2, i7 = insert!(m1, 8, 51.0) + b2, i7 = sd_push!(m1, 8=>51.0) my_assert(b2) my_assert(length(m1) == numprimes + 2) p = deref((m1,i7)) @@ -560,6 +579,35 @@ function testSortedDictMethods() my_assert(!haskey(dfc, 3)) my_assert([43] == get(dfc, 4, [43])) my_assert(!haskey(dfc, 4)) + @test_throws ArgumentError SortedDict(Forward, Forward) + @test_throws ArgumentError SortedDict{Int,Int}(Forward, Forward) + @test_throws ArgumentError SortedDict(Val(true), Forward) + @test_throws ArgumentError SortedDict{Int,Int}(Val(true), Forward) + my_assert(isequal(SortedDict{Int,Int}(ForBack(true), 1=>3,2=>7), + SortedDict{Int,Int}(ForBack(false), 1=>3, 2=>7))) + my_assert(!isequal(SortedDict{Int,Int}(ForBack(true), 1=>3,2=>7), + SortedDict{Int,Int}(ForBack(false), 1=>3, 2=>8))) + my_assert(isequal(SortedDict(1=>3,2=>7), + Dict(1=>3,2=>7))) + my_assert(!isequal(SortedDict(1=>3,2=>7), + Dict(1=>3, 2=>8))) + + my_assert(isequal(merge(SortedDict{Float64,Int}(ForBack(true), 3.5=>2, 4.5=>8), + SortedDict{Float64,Int}(ForBack(false), 2.3=>2, 9.9=>8)), + SortedDict{Float64,Int}(ForBack(true), 3.5 => 2, 4.5 => 8, + 2.3 => 2, 9.9 => 8))) + my_assert(!isequal(merge(SortedDict{Float64,Int}(ForBack(true), 3.5=>2, 4.5=>8), + SortedDict{Float64,Int}(ForBack(false), 2.3=>2, 9.9=>9)), + SortedDict{Float64,Int}(ForBack(true), 3.5 => 2, 4.5 => 8, + 2.3 => 2, 9.9 => 8))) + my_assert(isequal(merge(SortedDict{Float64,Int}(ForBack(true), 3.5=>2, 4.5=>8), + Dict(2.3=>2, 9.9=>8)), + SortedDict{Float64,Int}(ForBack(true), 3.5 => 2, 4.5 => 8, + 2.3 => 2, 9.9 => 8))) + my_assert(!isequal(merge(SortedDict{Float64,Int}(ForBack(true), 3.5=>2, 4.5=>8), + Dict(2.3=>2, 9.9=>9)), + SortedDict{Float64,Int}(ForBack(true), 3.5 => 2, 4.5 => 8, + 2.3 => 2, 9.9 => 8))) true end @@ -740,7 +788,7 @@ function testSortedDictLoops() sum2 = 0 for factor = 1 : N for multiple = factor : factor : N - insert!(factors, multiple, factor) + smd_push!(factors, multiple=>factor) sum1 += multiple sum2 += factor len += 1 @@ -1214,17 +1262,27 @@ function testSortedMultiDict() test_pair_array = Pair{Char}['a'=>1, 'b'=>2, 'c'=>3] factors5 = SortedMultiDict(test_pair_array) - my_assert(typeof(factors5) == SortedMultiDict{Char,Any,ForwardOrdering}) + + + my_assert(typeof(factors5) == SortedMultiDict{Char,Int,ForwardOrdering}) + # my_assert(remove_spaces(repr(MIME{Symbol("text/plain")}(), factors5)) == + # "SortedMultiDict{Char,Int64,Base.Order.ForwardOrdering}" * + # "(Base.Order.ForwardOrdering(),['a'=>1,'b'=>2,'c'=>3])") + my_assert(remove_spaces(repr(MIME{Symbol("text/plain")}(), factors5)) == + "SortedMultiDict{Char,Int64,ForwardOrdering}" * + "(ForwardOrdering(),['a'=>1,'b'=>2,'c'=>3])") #@test factors2 == factors3 # Broken! TODO: fix me... my_assert(isequal(factors2, factors3)) + my_assert(!isequal(SortedMultiDict{Int,Int,ForBack}(ForBack(true), [1=>2, 1=>3, 0=>1]), + SortedMultiDict{Int,Int,ForBack}(ForBack(false), [1=>2, 1=>3, 0=>1]))) N = 1000 checkcorrectness(factors.bt, true) len = 0 for factor = 1 : N for multiple = factor : factor : N - insert!(factors, multiple, factor) + smd_push!(factors, multiple=>factor) len += 1 end end @@ -1243,6 +1301,7 @@ function testSortedMultiDict() my_assert(ordtype(factors) == ForwardOrdering) my_assert(ordtype(typeof(factors)) == ForwardOrdering) + my_assert(2 in values(factors)) push!(factors, 70 => 3) my_assert(length(factors) == len+1) my_assert(Pair(70,3) in factors) @@ -1296,7 +1355,7 @@ function testSortedMultiDict() my_assert(compare(factors,i,i2) != 0) my_assert(compare(factors,regress((factors,i)),i2) == 0) my_assert(compare(factors,i,i1) != 0) - insert!(factors, 80, 6) + smd_push!(factors, 80=>6) my_assert(length(factors) == len + 1) checkcorrectness(factors.bt, true) expected1 = deepcopy(expected) @@ -1341,7 +1400,7 @@ function testSortedMultiDict() my_assert(isequal(m1,m2)) my_assert(!isequal(m1,m3)) my_assert(!isequal(m1, SortedMultiDict("apples"=>2.0))) - stok = insert!(m2, "cherries", 6.1) + stok = smd_push!(m2, "cherries"=>6.1) checkcorrectness(m2.bt, true) my_assert(!isequal(m1,m2)) delete!((m2,stok)) @@ -1379,10 +1438,10 @@ function testSortedMultiDict() m7 = SortedMultiDict{Int,Int}() n1 = 10000 for k = 1 : n1 - insert!(m7, k, k+1) + smd_push!(m7, k=>k+1) end for k = 1 : n1 - insert!(m7, k, k+2) + smd_push!(m7, k=>k+2) end for k = 1 : n1 i1, i2 = searchequalrange(m7, k) @@ -1401,33 +1460,57 @@ function testSortedMultiDict() end my_assert(count == 2) end + my_assert(isequal(merge(SortedMultiDict{Int,Float64}(1=>3.0, + 2=>2.2), + Dict(1=>2.0, 2=>2.3)), + SortedMultiDict{Int,Float64}(1=>3.0, 1=>2.0, 2=>2.2, 2=>2.3))) + my_assert(!isequal(merge(SortedMultiDict{Int,Float64}(1=>3.0, + 2=>2.2), + Dict(1=>2.0, 2=>2.3)), + SortedMultiDict{Int,Float64}(1=>2.0, 1=>3.0, 2=>2.2, 2=>2.3))) + my_assert(isequal(merge(SortedMultiDict{Int,Float64,ForBack}(ForBack(true), + [1=>3.0,2=>2.2]), + SortedMultiDict{Int,Float64,ForBack}(ForBack(false), + [1=>2.0,2=>2.3])), + SortedMultiDict{Int,Float64,ForBack}(ForBack(true), + [1=>3.0, 1=>2.0, + 2=>2.2, 2=>2.3]))) + my_assert(!isequal(merge(SortedMultiDict{Int,Float64,ForBack}(ForBack(true), + [1=>3.0, 2=>2.2]), + SortedMultiDict{Int,Float64,ForBack}(ForBack(false), + [1=>2.0,2=>2.3])), + SortedMultiDict{Int,Float64,ForBack}(ForBack(true), + [1=>3.0, 1=>2.0, + 2=>2.3, 2=>2.2]))) + + # issue #216 my_assert(DataStructures.isordered(SortedMultiDict{Int, String})) # issue #773 s = SortedMultiDict{Int, Int}() - insert!(s, 4, 41) - insert!(s, 3, 31) - insert!(s, 2, 21) - insert!(s, 2, 22) - insert!(s, 2, 23) - insert!(s, 2, 24) - insert!(s, 2, 25) - insert!(s, 2, 26) - insert!(s, 1, 11) - insert!(s, 1, 12) - st1 = insert!(s, 1, 13) - st2 = insert!(s, 1, 14) - st3 = insert!(s, 1, 15) - st4 = insert!(s, 1, 16) - st5 = insert!(s, 1, 17) - st6 = insert!(s, 1, 18) + smd_push!(s, 4=>41) + smd_push!(s, 3=>31) + smd_push!(s, 2=>21) + smd_push!(s, 2=>22) + smd_push!(s, 2=>23) + smd_push!(s, 2=>24) + smd_push!(s, 2=>25) + smd_push!(s, 2=>26) + smd_push!(s, 1=>11) + smd_push!(s, 1=>12) + st1 = smd_push!(s, 1=>13) + st2 = smd_push!(s, 1=>14) + st3 = smd_push!(s, 1=>15) + st4 = smd_push!(s, 1=>16) + st5 = smd_push!(s, 1=>17) + st6 = smd_push!(s, 1=>18) delete!((s, st6)) delete!((s, st5)) delete!((s, st4)) delete!((s, st3)) delete!((s, st2)) delete!((s, st1)) - insert!(s, 1, 19) + smd_push!(s, 1=>19) checkcorrectness(s.bt, true) true end @@ -1456,8 +1539,27 @@ function testSortedSet() my_assert(typeof(SortedSet([1,2,3], Reverse)) == SortedSet{Int, ReverseOrdering{ForwardOrdering}}) my_assert(typeof(SortedSet{Float32}([1,2,3], Reverse)) == SortedSet{Float32, ReverseOrdering{ForwardOrdering}}) - # @test_throws ArgumentError SortedSet(Reverse, Reverse) - # @test_throws ArgumentError SortedSet{Int}(Reverse, Reverse) + ss1 = SortedSet{String}(["berry", "cherry", "apple", "grape"]) + q = popfirst!(ss1) + my_assert(q == "apple") + my_assert(ss1 == Set(["berry", "cherry", "grape"])) + q2 = poplast!(ss1) + my_assert(q2 == "grape") + my_assert(ss1 == Set(["cherry", "berry"])) + my_assert(isequal(SortedSet{Int,ForBack}(ForBack(true), [5,6,4,8]), + SortedSet{Int,ForBack}(ForBack(false), [5,6,4,8]))) + my_assert(!isequal(SortedSet{Int,ForBack}(ForBack(true), [5,6,4,8]), + SortedSet{Int,ForBack}(ForBack(false), [5,6,6,8]))) + my_assert(isequal(SortedSet{Int,ForBack}(ForBack(true), [5,6,4,8]), + SortedSet{Int,ForBack}(ForBack(true), [5,6,4,8]))) + my_assert(!isequal(SortedSet{Int,ForBack}(ForBack(true), [5,6,4,8]), + SortedSet{Int,ForBack}(ForBack(true), [5,6,6,8]))) + + + + @test_throws ArgumentError SortedSet(Reverse, Reverse) + @test_throws ArgumentError SortedSet{Int}(Reverse, Reverse) + @test_throws ArgumentError SortedSet{Int}(Val(true), Reverse) smallest = 10.0 largest = -10.0 @@ -1469,11 +1571,11 @@ function testSortedSet() smallest = min(smallest,ui) largest = max(largest,ui) end - isnew,st = insert!(m, 72.5) + isnew,st = ss_push!(m, 72.5) my_assert(isnew) my_assert(deref((m,st)) == 72.5) delete!((m,st)) - isnew,st = insert!(m, 73.5) + isnew,st = ss_push!(m, 73.5) my_assert(isnew) my_assert(deref((m,st)) == 73.5) delete!(m, 73.5) @@ -1548,7 +1650,7 @@ function testSortedSet() pop!(m, smallest) checkcorrectness(m.bt, false) my_assert(length(m) == N - dcount - 1) - key1 = pop!(m) + key1 = popfirst!(m) my_assert(key1 == secondsmallest) my_assert(length(m) == N - dcount - 2) checkcorrectness(m.bt, false) @@ -1607,6 +1709,17 @@ function testSortedSet() m9a = intersect(m8, SortedSet(["yellow", "red", "white"]), m8) my_assert(typeof(m9a) == SortedSet{String, ForwardOrdering}) my_assert(isequal(m9a, SortedSet(["red", "yellow"]))) + my_assert(intersect(SortedSet{Int}([5,4,8,9]), + [5,6,9,2], [5,10,10,9]) == Set{Int}([5,9])) + my_assert(intersect(SortedSet{Int}([5,4,8,9]), + [5,6,9,2], [5,10,10,8]) != Set{Int}([5,9])) + my_assert(isequal(intersect(SortedSet{Int,ForBack}(ForBack(true), [5,4,8,9]), + SortedSet{Int,ForBack}(ForBack(false), [5,7,8,9])), + SortedSet{Int}([5,8,9]))) + my_assert(!isequal(intersect(SortedSet{Int,ForBack}(ForBack(true), [5,4,8,9]), + SortedSet{Int,ForBack}(ForBack(false), [5,7,8,9])), + SortedSet{Int}([5,8]))) + m10 = symdiff(m8, SortedSet(["yellow", "red", "white"])) my_assert(typeof(m10) == SortedSet{String, ForwardOrdering}) my_assert(isequal(m10, SortedSet(["white", "blue", "orange"]))) @@ -1618,16 +1731,54 @@ function testSortedSet() "zinc"]), m8) my_assert(isequal(m12, SortedSet(["zinc"]))) my_assert(typeof(m12) == SortedSet{String, ForwardOrdering}) + my_assert(isequal(symdiff(SortedSet{String}(["yellow","red", "green", "iron"]), + ["iron", "yellow", "reed"]), + SortedSet{String}(["red", "reed", "green"]))) + my_assert(isequal(symdiff(SortedSet{String,ForBack}(ForBack(true), + ["yellow","red", "green", "iron"]), + SortedSet{String,ForBack}(ForBack(false), + ["iron", "yellow", "reed"])), + SortedSet{String,ForBack}(ForBack(false), ["red", "reed", "green"]))) m13 = setdiff(m8, SortedSet(["yellow", "red", "white"])) my_assert(typeof(m13) == SortedSet{String, ForwardOrdering}) my_assert(isequal(m13, SortedSet(["blue", "orange"]))) m14 = setdiff(m8, SortedSet(["blue"])) my_assert(typeof(m14) == SortedSet{String, ForwardOrdering}) my_assert(isequal(m14, SortedSet(["orange", "yellow", "red"]))) + my_assert(isequal(setdiff(SortedSet(["red", "purple", "pearl", "black"]), + ["red", "white", "white"], + ["blue", "black", "green"]), + SortedSet(["purple", "pearl"]))) + my_assert(isequal(setdiff(SortedSet{String,ForBack}(ForBack(true), + ["red", "purple", "pearl", "black"]), + SortedSet{String,ForBack}(ForBack(false), + ["purple", "hue", "pearl"])), + SortedSet{String}(["red", "black"]))) + my_assert(issubset(["yellow", "blue"], m8)) my_assert(!issubset(["blue", "green"], m8)) + my_assert(issubset(SortedSet(["green", "white", "red"]), + SortedSet(["blue","white", "red", "green"]))) + my_assert(issubset(SortedSet(collect(1:5)), + SortedSet(collect(0:500)))) + my_assert(!issubset(SortedSet(collect(0:5)), + SortedSet(collect(1:500)))) + my_assert(issubset(SortedSet(collect(1:499)), + SortedSet(collect(0:500)))) + my_assert(!issubset(SortedSet(collect(0:499)), + SortedSet(collect(1:500)))) + my_assert(!issubset(SortedSet(collect(1:501)), + SortedSet(collect(1:500)))) + my_assert(issubset(SortedSet{Int,ForBack}(ForBack(false), [1,5]), + SortedSet{Int,ForBack}(ForBack(true), [1,5]))) + my_assert(!issubset(SortedSet{Int,ForBack}(ForBack(false), [1,5,6]), + SortedSet{Int,ForBack}(ForBack(true), [1,5]))) setdiff!(m8, SortedSet(["yellow", "red", "white"])) my_assert(isequal(m8, SortedSet(["blue", "orange"]))) + my_assert(isequal(union(SortedSet{Int,ForBack}(ForBack(true), [1,2,3]), + SortedSet{Int,ForBack}(ForBack(false), [6,4,2])), + SortedSet{Int}([1,2,3,4,6]))) + true end @@ -1688,6 +1839,185 @@ function testSortedMultiDictConstructors() true end +function testTokens() + ss1 = SortedSet(["turkey", "chicken", "duck", "goose"]) + ss2 = SortedSet(["turkey", "chicken", "duck", "goose"]) + t1 = token_firstindex(ss1) + my_assert(t1 == (ss1, firstindex(ss1))) + my_assert(t1 != (ss2, firstindex(ss1))) + my_assert(t1 != (ss1, lastindex(ss1))) + my_assert(advance(beforestarttoken(ss1)) == firstindex(ss1)) + my_assert(regress(pastendtoken(ss1)) == lastindex(ss1)) + + # test the IteratorSize functions + my_assert(length(collect(SortedSet([1,2,2,3]))) == 3) + my_assert(length(collect(keys(SortedDict([1=>2,2=>3,2=>4,3=>4])))) == 3) + ss3 = SortedSet([1,2,2,3]) + my_assert(length(collect(inclusive(ss3, firstindex(ss3), lastindex(ss3)))) == 3) + my_assert(length(collect(exclusive(ss3, firstindex(ss3), lastindex(ss3)))) == 2) + smd1 = SortedMultiDict(5=>"j", 1=>"a", 3=>"b", 1=>"c", 3=>"s") + my_assert(typeof(smd1) == SortedMultiDict{Int,String, ForwardOrdering}) + result = String[] + for (k,v) = exclusive_key(smd1, 1, 5) + push!(result,v) + end + my_assert(result == ["a", "c", "b", "s"]) + result2 = String[] + for (k,v) = inclusive_key(smd1, 1, 5) + push!(result2, v) + end + my_assert(result2 == ["a", "c", "b", "s", "j"]) + count = 0 + for (t,k,v) in tokens(exclusive_key(smd1, 1, 5)) + count += 1 + my_assert(deref(t) == (k=>v)) + my_assert(deref_value(t) == result[count]) + end + my_assert(count == 4) + my_assert(eltype(tokens(exclusive_key(smd1, 1, 5))) == + Tuple{Tuple{SortedMultiDict{Int,String,ForwardOrdering},IntSemiToken}, + Int, String}) + count = 0 + for (t,k,v) in tokens(smd1) + count += 1 + my_assert(deref(t) == (k=>v)) + my_assert(deref_value(t) == result2[count]) + end + my_assert(count == 5) + count = 0 + for (t,k) in tokens(keys(smd1)) + count += 1 + my_assert(deref_key(t) == k) + my_assert(deref_value(t) == result2[count]) + end + my_assert(count == 5) + my_assert(eltype(tokens(keys(smd1))) == + Tuple{Tuple{SortedMultiDict{Int,String,ForwardOrdering}, IntSemiToken},Int}) + count = 0 + for (t,v) in tokens(values(smd1)) + count += 1 + my_assert(deref_value(t) == v) + my_assert(deref_value(t) == result2[count]) + end + my_assert(count == 5) + my_assert(eltype(tokens(values(smd1))) == + Tuple{Tuple{SortedMultiDict{Int,String,ForwardOrdering}, IntSemiToken},String}) + count = 0 + for t in onlytokens(smd1) + count += 1 + my_assert(deref_value(t) == result2[count]) + end + my_assert(count == 5) + my_assert(eltype(onlytokens(smd1)) == Tuple{SortedMultiDict{Int,String,ForwardOrdering}, + IntSemiToken}) + + + count = 0 + for t in onlytokens(exclusive_key(smd1, 1, 5)) + count += 1 + my_assert(deref_value(t) == result[count]) + end + my_assert(count == 4) + + count = 4 + for t in onlytokens(Iterators.reverse(exclusive_key(smd1, 1, 5))) + my_assert(deref_value(t) == result[count]) + count -= 1 + end + my_assert(count == 0) + count = 5 + for t in onlytokens(Iterators.reverse(smd1)) + my_assert(deref_value(t) == result2[count]) + count -= 1 + end + my_assert(count == 0) + + count = 0 + for (k,v) in Iterators.reverse(exclusive(smd1, firstindex(smd1), firstindex(smd1))) + count +=1 + end + my_assert(count == 0) + + + count = 5 + for (k,v) in Iterators.reverse(inclusive(smd1, firstindex(smd1), lastindex(smd1))) + my_assert(v == result2[count]) + count -= 1 + end + my_assert(count == 0) + + count = 0 + for (k,v) in Iterators.reverse(inclusive(smd1, + advance((smd1, firstindex(smd1))), + firstindex(smd1))) + count += 1 + end + my_assert(count == 0) + + + count = 0 + for t in Iterators.reverse(exclusive_key(smd1, 1, 1)) + count += 1 + end + my_assert(count == 0) + + count = 5 + for t in onlytokens(Iterators.reverse(inclusive(smd1, firstindex(smd1), lastindex(smd1)))) + my_assert(deref_value(t) == result2[count]) + count -= 1 + end + my_assert(count == 0) + + count = 0 + for t in onlytokens(Iterators.reverse(inclusive(smd1, lastindex(smd1), firstindex(smd1)))) + my_assert(deref_value(t) == result2[count]) + end + my_assert(count == 0) + + + + + count = 0 + for t in Iterators.reverse(onlytokens(Iterators.reverse(exclusive_key(smd1, 1, 5)))) + count += 1 + my_assert(deref_value(t) == result[count]) + end + my_assert(count == 4) + my_assert(keytype(exclusive_key(smd1, 1, 5)) == Int) + my_assert(valtype(exclusive_key(smd1, 1, 5)) == String) + count = 0 + for (k,v) in pairs(exclusive_key(smd1, 1, 5)) + count += 1 + my_assert(v == result[count]) + end + my_assert(keytype(exclusive_key(smd1,1, 5)) == Int) + my_assert(keytype(typeof(exclusive_key(smd1,1, 5))) == Int) + my_assert(valtype(exclusive_key(smd1,1, 5)) == String) + my_assert(valtype(typeof(exclusive_key(smd1,1, 5))) == String) + my_assert(count == 4) + count = 0 + for (k,v) in pairs(smd1) + count += 1 + my_assert(v == result2[count]) + end + my_assert(count == 5) + count = 0 + for t in token_firstindex(smd1) : token_lastindex(smd1) + count += 1 + my_assert(deref_value(t) == result2[count]) + end + my_assert(count == 5) + count = 1 + for t in token_firstindex(smd1) + 1 : token_lastindex(smd1) - 1 + count += 1 + my_assert(deref_value(t) == result2[count]) + end + my_assert(count == 4) + true +end + + + @testset "SortedContainers" begin @test testSortedDictBasic() @@ -1698,6 +2028,7 @@ end @test testSortedSet() @test testSortedDictConstructors() @test testSortedMultiDictConstructors() + @test testTokens() # test all the errors of sorted containers @@ -1714,18 +2045,20 @@ end @test m === delete!(m,"a") # Okay to delete! nonexistent keys @test_throws KeyError pop!(m,"a") m3 = SortedDict((Dict{String, Int}()), Reverse) - @test_throws ArgumentError isequal(m2, m3) @test_throws BoundsError m[i1] @test_throws BoundsError regress((m,beforestartsemitoken(m))) @test_throws BoundsError advance((m,pastendsemitoken(m))) m1 = SortedMultiDict{Int,Int}() @test_throws ArgumentError SortedMultiDict([1,2,3]) @test_throws ArgumentError SortedMultiDict(Forward, [1,2,3]) - @test_throws ArgumentError SortedMultiDict{Int,Int}([1,2,3]) - @test_throws ArgumentError SortedMultiDict{Int,Int}(Forward, [1,2,3]) + @test_throws BoundsError SortedMultiDict{Int,Int}([1,2,3]) + @test_throws BoundsError SortedMultiDict{Int,Int}(Forward, [1,2,3]) @test_throws ArgumentError SortedMultiDict(Forward, Reverse) - @test_throws ArgumentError isequal(SortedMultiDict("a"=>1), SortedMultiDict("b"=>1.0)) - @test_throws ArgumentError isequal(SortedMultiDict(["a"=>1],Reverse), SortedMultiDict(["b"=>1])) + @test_throws ArgumentError SortedMultiDict{Int,Int}(Forward, Reverse) + @test_throws ArgumentError SortedMultiDict(Val(true), Forward) + @test_throws ArgumentError SortedMultiDict{Int,Int}(Val(true), Forward) + #@test_throws ArgumentError isequal(SortedMultiDict("a"=>1), SortedMultiDict("b"=>1.0)) + #@test_throws ArgumentError isequal(SortedMultiDict(["a"=>1],Reverse), SortedMultiDict(["b"=>1])) @test_throws MethodError SortedMultiDict{Char,Int}(Forward, ["aa"=>2, "bbb"=>5]) @test_throws MethodError SortedMultiDict(Forward, [("aa",2)=>2, "bbb"=>5]) @test_throws BoundsError first(m1) @@ -1734,15 +2067,15 @@ end s = SortedSet([3,5]) @test s === delete!(s,7) # Okay to delete! nonexistent keys @test_throws KeyError pop!(s, 7) - pop!(s) - pop!(s) - @test_throws BoundsError pop!(s) + popfirst!(s) + popfirst!(s) + @test_throws BoundsError popfirst!(s) @test_throws BoundsError first(s) @test_throws BoundsError last(s) - @test_throws ArgumentError isequal(SortedSet(["a"]), SortedSet([1])) - @test_throws ArgumentError isequal(SortedSet(["a"]), SortedSet(["b"],Reverse)) - @test_throws ArgumentError (("a",6) in m) - @test_throws ArgumentError ((2,5) in m1) + @test_throws MethodError isequal(SortedSet(["a"]), SortedSet([1])) + #@test_throws ArgumentError isequal(SortedSet(["a"]), SortedSet(["b"],Reverse)) + #@test_throws ErrorException (("a",6) in m) + #@test_throws ArgumentError ((2,5) in m1) s = SortedSet([10,30,50]) @test pop!(s,10) == 10 @@ -1751,6 +2084,7 @@ end @test pop!(s,50, nothing) == 50 @test pop!(s,50, nothing) == nothing @test isempty(s) + @test_throws ErrorException pop!(s) # Test AbstractSet/AbstractDict interface for m in [SortedSet([1,2]), SortedDict(1=>2, 2=>3), SortedMultiDict(1=>2, 1=>3)] @@ -1765,3 +2099,4 @@ end end end end + From 4b8c108d9943735494613cf786bf7e7aad286637 Mon Sep 17 00:00:00 2001 From: Stephen Vavasis Date: Thu, 20 Jan 2022 21:57:36 -0500 Subject: [PATCH 2/8] added some print statements to debug CI failures --- test/test_sorted_containers.jl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test/test_sorted_containers.jl b/test/test_sorted_containers.jl index 7b46b6e98..b4a520818 100644 --- a/test/test_sorted_containers.jl +++ b/test/test_sorted_containers.jl @@ -1266,10 +1266,11 @@ function testSortedMultiDict() my_assert(typeof(factors5) == SortedMultiDict{Char,Int,ForwardOrdering}) - # my_assert(remove_spaces(repr(MIME{Symbol("text/plain")}(), factors5)) == - # "SortedMultiDict{Char,Int64,Base.Order.ForwardOrdering}" * - # "(Base.Order.ForwardOrdering(),['a'=>1,'b'=>2,'c'=>3])") - my_assert(remove_spaces(repr(MIME{Symbol("text/plain")}(), factors5)) == + str1 = repr(MIME{Symbol("text/plain")}(), factors5) + str2 = remove_spaces(str1) + println("str1 = ", str1) + println("str2 = ", str2) + my_assert(str2 == "SortedMultiDict{Char,Int64,ForwardOrdering}" * "(ForwardOrdering(),['a'=>1,'b'=>2,'c'=>3])") #@test factors2 == factors3 # Broken! TODO: fix me... From 95c24789190da36378b1249cb1f8fc26aeb4b2a2 Mon Sep 17 00:00:00 2001 From: Stephen Vavasis Date: Fri, 21 Jan 2022 09:20:31 -0500 Subject: [PATCH 3/8] Changed test for exact output from `show` to more forgiving test of successful parse --- test/test_sorted_containers.jl | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/test/test_sorted_containers.jl b/test/test_sorted_containers.jl index b4a520818..ac047de76 100644 --- a/test/test_sorted_containers.jl +++ b/test/test_sorted_containers.jl @@ -1262,17 +1262,10 @@ function testSortedMultiDict() test_pair_array = Pair{Char}['a'=>1, 'b'=>2, 'c'=>3] factors5 = SortedMultiDict(test_pair_array) - - - my_assert(typeof(factors5) == SortedMultiDict{Char,Int,ForwardOrdering}) + # test for `show` method for sortedmultidict str1 = repr(MIME{Symbol("text/plain")}(), factors5) - str2 = remove_spaces(str1) - println("str1 = ", str1) - println("str2 = ", str2) - my_assert(str2 == - "SortedMultiDict{Char,Int64,ForwardOrdering}" * - "(ForwardOrdering(),['a'=>1,'b'=>2,'c'=>3])") + my_assert(Meta.parse(str1).head == :call) #@test factors2 == factors3 # Broken! TODO: fix me... my_assert(isequal(factors2, factors3)) my_assert(!isequal(SortedMultiDict{Int,Int,ForBack}(ForBack(true), [1=>2, 1=>3, 0=>1]), From 2aa37f44e59b206803fd000e3460d21aa08f6902 Mon Sep 17 00:00:00 2001 From: Stephen Vavasis Date: Sat, 30 Apr 2022 10:09:53 -0400 Subject: [PATCH 4/8] Addressing round 1 of comments from oxinabox --- docs/Project.toml | 3 + docs/src/sorted_containers.md | 266 ++---------------------------- src/DataStructures.jl | 12 +- src/deprecations.jl | 6 + src/sorted_container_iteration.jl | 167 ++++++++++--------- src/sorted_dict.jl | 45 +++-- src/sorted_multi_dict.jl | 49 +++--- src/sorted_set.jl | 111 ++++++------- test/test_deprecations.jl | 3 + test/test_sorted_containers.jl | 138 ++++++++-------- 10 files changed, 291 insertions(+), 509 deletions(-) diff --git a/docs/Project.toml b/docs/Project.toml index dc779ba28..5c51683f0 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -2,3 +2,6 @@ Coverage = "a2441757-f6aa-5fb2-8edb-039e3f45d037" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" + +[compat] +Documenter = "0.23" diff --git a/docs/src/sorted_containers.md b/docs/src/sorted_containers.md index fdca9fb2f..c65034d68 100644 --- a/docs/src/sorted_containers.md +++ b/docs/src/sorted_containers.md @@ -22,7 +22,9 @@ according to the sorted order for keys, and key=>value pairs with the same key are stored in order of insertion. The containers internally use a 2-3 tree, which is a kind of balanced -tree and is described in data structure textbooks. +tree and is described in data structure textbooks. Internally, one `Vector` is +used to store key/data pairs (the leaves of the tree) while a second holds the +tree structure. The containers require two functions to compare keys: a _less-than_ and _equals_ function. With the default ordering argument, the comparison @@ -69,13 +71,13 @@ semitokens because the first entry of a token (i.e., the container) is not a bits-type. If code profiling indicates that statements using tokens are allocating memory, then it may be advisable to rewrite the application code using semitokens -more than tokens. +rather than tokens. ## Complexity of Sorted Containers In the list of functions below, the running time of the various operations is provided. In these running times, _n_ denotes the -size (number of items) in the container, +number of items in the container, and _c_ denotes the time needed to compare two keys. @@ -86,22 +88,9 @@ and _c_ denotes the time needed to compare two keys. ```@docs SortedDict{K,V,Ord}(o::Ord=Forward) where {K, V, Ord <: Ordering} -``` - - -```@docs SortedDict(o::Ord=Forward) where {Ord <: Ordering} -``` - -```@docs SortedDict(iter, o::Ord=Forward) where {Ord <: Ordering} -``` - -```@docs SortedDict(ps::Pair...) -``` - -```@docs SortedDict{K,V}(::Val{true}, iterable) where {K,V} ``` @@ -110,38 +99,17 @@ SortedDict{K,V}(::Val{true}, iterable) where {K,V} ```@docs SortedMultiDict{K,V,Ord}(o::Ord=Forward) where {K, V, Ord <: Ordering} -``` - -```@docs SortedMultiDict(o::Ord=Forward) where {Ord <: Ordering} - -``` - -```@docs SortedMultiDict(ps::Pair...) -``` - -```@docs SortedMultiDict(iter, o::Ord=Forward) where {Ord <: Ordering} -``` - -```@docs SortedMultiDict{K,V}(::Val{true}, iterable) where {K,V} ``` -### `SortedSets` constructors +### `SortedSet` constructors ```@docs SortedSet{K,Ord}(o::Ord=Forward) where {K, Ord<:Ordering} -``` - -```@docs SortedSet(o::Ord=Forward) where {Ord <: Ordering} -``` - -```@docs SortedSet(o::Ordering, iter) -``` -```@docs SortedSet{K}(::Val{true}, iterable) where {K} ``` @@ -150,197 +118,61 @@ SortedSet{K}(::Val{true}, iterable) where {K} ```@docs Base.getindex(sd::SortedDict, k) -``` - -```@docs Base.getindex(m::SortedDict, st::IntSemiToken) -``` - -```@docs Base.setindex!(m::SortedDict, newvalue, st::IntSemiToken) -``` - -```@docs Base.setindex!(sd::SortedDict, newvalue, k) - -``` - -```@docs deref(token::Token) -``` - - -```@docs deref_key(token::Token) -``` -```@docs deref_value(token::Token) -``` - -```@docs Base.firstindex(m::SortedContainer) -``` - -```@docs Base.lastindex(m::SortedContainer) -``` - -```@docs token_firstindex(m::SortedContainer) -``` - -```@docs token_lastindex(m::SortedContainer) -``` - -```@docs Base.first(sc::SortedContainer) -``` - -```@docs Base.last(sc::SortedContainer) -``` - -```@docs pastendsemitoken(sc::SortedContainer) -``` -```@docs beforestartsemitoken(sc::SortedContainer) -``` - -```@docs pastendtoken(sc::SortedContainer) -``` - - -```@docs beforestarttoken(sc::SortedContainer) -``` - -```@docs advance(token::Token) -``` - -```@docs regress(token::Token) - -``` - -```@docs +(t::Token, k::Integer) - -``` - -```@docs Base.searchsortedfirst(m::SortedContainer, k) -``` - -```@docs Base.searchsortedlast(m::SortedContainer, k) -``` - -```@docs searchsortedafter(m::SortedContainer, k) -``` - -```@docs searchequalrange(smd::SortedMultiDict, k) -``` - -```@docs findkey(m::SortedSet, k) -``` - -```@docs findkey(sd::SortedDict, k) ``` ## Inserting & Deleting in Sorted Containers -```@docs -ss_push!(ss::SortedSet, k) -``` - -```@docs -sd_push!(sd::SortedDict, p::Pair) -``` - -```@docs -smd_push!(smd::SortedMultiDict, p::Pair) -``` - ```@docs Base.push!(ss::SortedSet, k) -``` - -```@docs Base.push!(sd::SortedDict, p::Pair) -``` - -```@docs Base.push!(smd::SortedMultiDict, p::Pair) -``` - -```@docs +push_return_token!(ss::SortedSet, k) +push_return_token!(sd::SortedDict, p::Pair) +push_return_token!(smd::SortedMultiDict, p::Pair) Base.delete!(token::Token) -``` - -```@docs Base.delete!(ss::SortedSet, k) -``` - -```@docs Base.delete!(sc::SortedDict, k) -``` -```@docs -Base.popfirst!(ss::SortedSet) -``` -```@docs -poplast!(ss::SortedSet) -``` - -```@docs Base.pop!(ss::SortedSet, k) -``` - -```@docs Base.pop!(sd::SortedDict, k) +Base.popfirst!(ss::SortedSet) +poplast!(ss::SortedSet) ``` -## Token Manipulation +## Iteration and Token Manipulation ```@docs compare(m::SortedContainer, s::IntSemiToken, t::IntSemiToken) -``` - -```@docs status(token::Token) -``` - - -## Iteration Over Sorted Containers - -```@docs - Base.iterate(sci::SortedContainerIterable) -``` - -## `in` function - -```@docs +Base.iterate(sci::SortedContainerIterable) Base.in(k,m::SortedSet) -``` - -```@docs Base.in(p::Pair, sd::SortedDict) -``` - -```@docs Base.in(p::Pair, smd::SortedMultiDict) -``` - - -```@docs Base.in(x, iter::SortedContainerIterable) ``` @@ -370,72 +202,27 @@ Time: O(1) ```@docs ordtype(sc::SortedContainer) -``` - -```@docs orderobject(sc::SortedContainer) -``` - -```@docs Base.haskey(sc::SortedContainer, k) -``` - -```@docs Base.get(sd::SortedDict,k,v) -``` - -```@docs Base.get!(sd::SortedDict,k,v) -``` - -```@docs Base.getkey(sd::SortedDict,k,defaultk) -``` - -```@docs Base.isequal(ss1::SortedSet{K,Ord}, ss2::SortedSet{K,Ord}) where {K, Ord <: Ordering} -``` - -```@docs Base.isequal(sc1::SortedDict{K,V,Ord}, sc2::SortedDict{K,V,Ord}) where {K, V, Ord <: Ordering} -``` - -```@docs Base.isequal(smd1::SortedMultiDict{K,V,Ord}, smd2::SortedMultiDict{K,V,Ord}) where {K, V, Ord <: Ordering} - -``` - -```@docs packcopy(sc::SortedSet) -``` - -```@docs packdeepcopy(sc::SortedSet) -``` - -```@docs Base.merge(sd::SortedDict{K,V,Ord}, d1::AbstractDict{K,V}...) where {K,V,Ord <: Ordering} -``` - -```@docs Base.merge!(sd::SortedDict{K,V,Ord}, d1::AbstractDict{K,V}...) where {K,V,Ord <: Ordering} -``` - -```@docs Base.merge(smd::SortedMultiDict, iter...) -``` - -```@docs Base.merge!(smd::SortedMultiDict, iter...) ``` - - ## Set operations The SortedSet container supports the following set operations. Note that -in the case of intersect, symdiff and setdiff, the two SortedSets should +in the case of `intersect`, `symdiff` and `setdiff`, the two SortedSets should have the same key and ordering object. If they have different key or ordering types, no error message is produced; instead, the built-in default versions of these functions (that can be applied to `Any` @@ -443,30 +230,11 @@ iterables and that return arrays) are invoked. ```@docs Base.union!(ss::SortedSet, iterable...) -``` - -```@docs Base.union(ss::SortedSet, iterable...) -``` - -```@docs Base.intersect(ss::SortedSet, others...) -``` - -```@docs Base.symdiff(ss1::SortedSet, iterable) - -``` - -```@docs Base.setdiff(ss1::SortedSet{K,Ord}, ss2::SortedSet{K,Ord}) where {K, Ord<:Ordering} -``` - -```@docs Base.setdiff!(m1::SortedSet, iterable) -``` - -```@docs Base.issubset(iterable, ss::SortedSet) ``` @@ -565,14 +333,14 @@ Dicts, keys for the sorted containers can be either mutable or immutable. In the case of mutable keys, it is important that the keys not be mutated once they are in the container else the indexing structure will be corrupted. (The same restriction applies to Dict.) For -example, suppose `sd` has type `SortedDict{Vector{Int},Int,ForwardOrdering}`, in -other words, keys of type `Vector{Int}`, values of type `Int`, and -lexicographic ordering (default ordering of vectors). Then the +example, the following sequence of statements leaves `sd` in a corrupted state: ```julia +sd = SortedDict{Vector{Int},Int}() k = [1,2,3] sd[k] = 19 +sd[[6,4]] = 12 k[1] = 7 ``` diff --git a/src/DataStructures.jl b/src/DataStructures.jl index 0c0594544..4d2103785 100644 --- a/src/DataStructures.jl +++ b/src/DataStructures.jl @@ -42,14 +42,14 @@ module DataStructures export SortedDict, SortedMultiDict, SortedSet export SDToken, SDSemiToken, SMDToken, SMDSemiToken export SetToken, SetSemiToken - export startof + export startof, endof ## both are deprecated export pastendsemitoken, beforestartsemitoken export pastendtoken, beforestarttoken export searchsortedafter, searchequalrange export packcopy, packdeepcopy export exclusive, inclusive, semitokens, inclusive_key, exclusive_key export orderobject, ordtype, Lt, compare, onlysemitokens - export tokens, onlytokens + export tokens, onlytokens, poplast! export MultiDict, enumerateall export RobinDict @@ -65,12 +65,7 @@ module DataStructures export findkey - import Base.== - import Base.(:) - import Base.+ - import Base.- - import Base.isequal - export sd_push!, ss_push!, smd_push!, poplast! + export push_return_token! export token_firstindex, token_lastindex include("delegate.jl") @@ -128,4 +123,5 @@ module DataStructures include("deprecations.jl") + end diff --git a/src/deprecations.jl b/src/deprecations.jl index 673fafe7e..42be1e0fe 100644 --- a/src/deprecations.jl +++ b/src/deprecations.jl @@ -17,6 +17,12 @@ Base.@deprecate_binding IntDisjointSets IntDisjointSet @deprecate dequeue_pair!(q::PriorityQueue) Base.popfirst!(q) @deprecate dequeue_pair!(q::PriorityQueue, key) popat!(q, key) +@deprecate startof(m::SortedContainer) firstindex(m::SortedContainer) +@deprecate endof(m::SortedContainer) lastindex(m::SortedContainer) +@deprecate insert!(m::SortedSet, k) push_return_token!(m::SortedSet, k) +@deprecate insert!(m::SortedDict, k, d) push_return_token!(m::SortedDict, k=>d) +@deprecate insert!(m::SortedMultiDict, k, d) (push_return_token!(m::SortedMultiDict, k=>d))[2] + function Base.peek(q::PriorityQueue) Expr(:meta, :noinline) Base.depwarn("`peek(q::PriorityQueue)` is deprecated, use `first(q)` instead.", :peek) diff --git a/src/sorted_container_iteration.jl b/src/sorted_container_iteration.jl index 90a617755..8f4208a48 100644 --- a/src/sorted_container_iteration.jl +++ b/src/sorted_container_iteration.jl @@ -19,19 +19,19 @@ const SortedMultiDictToken = Tuple{SortedMultiDict, IntSemiToken} const SDMToken = Tuple{SDMContainer, IntSemiToken} const SortedSetToken = Tuple{SortedSet, IntSemiToken} -(==)(t1::Token, t2::Token) = (t1[1] === t2[1] && t1[2] == t2[2]) +Base.:(==)(t1::Token, t2::Token) = (t1[1] === t2[1] && t1[2] == t2[2]) """ Base.firstindex(m::SortedContainer) - startof(m::SortedContainer) Return the semitoken of the first entry of the container `m`, or the past-end semitoken -if the container is empty. Time: O(log *n*) +if the container is empty. This function was called +`startof` (now deprecated) in previous versions of the package. +Time: O(log *n*) """ -Base.firstindex(m::SortedContainer) = startof(m) -startof(m::SortedContainer) = IntSemiToken(beginloc(m.bt)) +Base.firstindex(m::SortedContainer) = IntSemiToken(beginloc(m.bt)) """ @@ -48,14 +48,14 @@ token_firstindex(m::SortedContainer) = (m, firstindex(m)) """ Base.lastindex(m::SortedContainer) - endof(m::SortedContainer) Return the semitoken of the last entry of the sorted container `m`, or the before-start semitoken -if the container is empty. Time: O(log *n*) +if the container is empty. This function was called `endof` (now +deprecated) in previous versions of the package. +Time: O(log *n*) """ -Base.lastindex(m::SortedContainer) = endof(m) -endof(m::SortedContainer) = IntSemiToken(endloc(m.bt)) +Base.lastindex(m::SortedContainer) = IntSemiToken(endloc(m.bt)) """ @@ -109,42 +109,43 @@ sorted container `m`. Time: O(1) beforestarttoken(m::SortedContainer) = (m, beforestartsemitoken(m)) -delete_nocheck!(ii::Token) = delete!(ii[1].bt, ii[2].address) - """ Base.delete!(token::Token) - Base.delete!((m,st)) -Delete the item indexed by the token from a sorted container. The -token must point to live data. The second form creates the token -in-place as a tuple of a container `m` and a semitoken `st`. +Delete the item indexed by the token from a sorted container. +A `BoundsError` is thrown if the token is invalid. +Prepending with +`@inbounds` may elide the correctness check and will result +in undefined behavior if the token is invalid. Time: O(log *n*). """ -function Base.delete!(ii::Token) - has_data(ii) - delete_nocheck!(ii) +Base.@propagate_inbounds function Base.delete!(ii::Token) + Base.@boundscheck has_data(ii) + delete!(ii[1].bt, ii[2].address) end -advance_nocheck(ii::Token) = IntSemiToken(nextloc0(ii[1].bt, ii[2].address)) - """ advance(token::Token) advance((m,st)) Return the semitoken of the item in a sorted container one after the given token. A `BoundsError` is thrown if the token is -the past-end token. The second form creates the token +the past-end token. +Prepending with +`@inbounds` may elide the correctness check and will result +in undefined behavior if the token is invalid or +points to the past-end token. +The second form creates the token in-place as a tuple of a container `m` and a semitoken `st`. Time: O(log *n*) """ -function advance(ii::Token) - not_pastend(ii) - advance_nocheck(ii) +Base.@propagate_inbounds function advance(ii::Token) + Base.@boundscheck not_pastend(ii) + IntSemiToken(nextloc0(ii[1].bt, ii[2].address)) end -regress_nocheck(ii::Token) = IntSemiToken(prevloc0(ii[1].bt, ii[2].address)) """ regress(token::Token) @@ -152,13 +153,18 @@ regress_nocheck(ii::Token) = IntSemiToken(prevloc0(ii[1].bt, ii[2].address)) Return the semitoken of the item in a sorted container one before the given token. A `BoundsError` is thrown if the token is -the before-start token. The second form creates the token +the before-start token. +Prepending with +`@inbounds` may elide the correctness check and will result +in undefined behavior if the token is invalid or +points to the before-start token. +The second form creates the token in-place as a tuple of a container `m` and a semitoken `st`. Time: O(log *n*) """ -function regress(ii::Token) - not_beforestart(ii) - regress_nocheck(ii) +Base.@propagate_inbounds function regress(ii::Token) + Base.@boundscheck not_beforestart(ii) + IntSemiToken(prevloc0(ii[1].bt, ii[2].address)) end @@ -228,8 +234,13 @@ Return the order object used to construct the container. Time: O(1) Return the data item indexed by the token. If the container is a `SortedSet`, then this is a key in the set. If the container is a `SortedDict` or `SortedMultiDict`, then -this is a key=>value pair. It is a BoundsError() if the token -is invalid or is the before-start or past-end token. The +this is a key=>value pair. It is a `BoundsError` if the token +is invalid or is the before-start or past-end token. +Prepending with +`@inbounds` may elide the correctness check and will result +in undefined behavior if the token is invalid or +points to the before-start or past-end token. +The second form creates the token in-place as a tuple of a sorted container `m` and a semitoken `st`. Time: O(1) @@ -238,36 +249,25 @@ function deref(ii::Token) error("This is not reachable because the specialized methods below will always be selected but is here to make the doc work") end -@inline function deref_nocheck(ii::SortedDictToken) +Base.@propagate_inbounds function deref(ii::SortedDictToken) + Base.@boundscheck has_data(ii) @inbounds kdrec = ii[1].bt.data[ii[2].address] return Pair(kdrec.k, kdrec.d) end -function deref(ii::SortedDictToken) - has_data(ii) - deref_nocheck(ii) -end - -@inline function deref_nocheck(ii::SortedMultiDictToken) +Base.@propagate_inbounds function deref(ii::SortedMultiDictToken) + Base.@boundscheck has_data(ii) @inbounds kdrec = ii[1].bt.data[ii[2].address] - return kdrec.k => kdrec.d + return Pair(kdrec.k, kdrec.d) end -function deref(ii::SortedMultiDictToken) - has_data(ii) - deref_nocheck(ii) -end -@inline function deref_nocheck(ii::SortedSetToken) +Base.@propagate_inbounds function deref(ii::SortedSetToken) + Base.@boundscheck has_data(ii) @inbounds k = ii[1].bt.data[ii[2].address].k return k end -function deref(ii::SortedSetToken) - has_data(ii) - deref_nocheck(ii) -end - """ deref_key(token::Token) @@ -275,8 +275,13 @@ end Return the key portion of a data item (a key=>value pair) in a `SortedDict` or `SortedMultiDict` indexed by the token. -It is a BoundsError() if the token -is invalid or is the before-start or past-end token. The +It is a `BoundsError` if the token +is invalid or is the before-start or past-end token. +Prepending with +`@inbounds` may elide the correctness check and will result +in undefined behavior if the token is invalid or +points to the before-start or past-end token. +The second form creates the token in-place as a tuple of a container `m` and a semitoken `st`. Time: O(1) """ @@ -285,17 +290,12 @@ function deref_key(ii::Token) end -@inline function deref_key_nocheck(ii::SDMToken) +Base.@propagate_inbounds function deref_key(ii::SDMToken) + Base.@boundscheck has_data(ii) @inbounds k = ii[1].bt.data[ii[2].address].k return k end -function deref_key(ii::SDMToken) - has_data(ii) - deref_key_nocheck(ii) -end - - """ deref_value(token::Token) @@ -304,8 +304,13 @@ end Returns the value portion of a data item (a key=>value pair) in a `SortedDict` or `SortedMultiDict` indexed by the token. -It is a BoundsError() if the token -is invalid or is the before-start or past-end token. The +It is a `BoundsError` if the token +is invalid or is the before-start or past-end token. +Prepending with +`@inbounds` may elide the correctness check and will result +in undefined behavior if the token is invalid or +points to the before-start or past-end token. +The second form creates the token in-place as a tuple of a container `m` and a semitoken `st`. Time: O(1) """ @@ -313,15 +318,12 @@ function deref_value(ii::Token) error("Cannot invoke deref_key on a SortedSet") end -@inline function deref_value_nocheck(ii::SDMToken) +Base.@propagate_inbounds function deref_value(ii::SDMToken) + Base.@boundscheck has_data(ii) @inbounds d = ii[1].bt.data[ii[2].address].d return d end -function deref_value(ii::SDMToken) - has_data(ii) - deref_value_nocheck(ii) -end """ Base.first(sc::SortedContainer) @@ -354,16 +356,20 @@ Base.last(m::SortedContainer) = deref(token_lastindex(m)) Retrieve value portion of item from SortedDict or SortedMultiDict `m` indexed by `st`, a semitoken. Notation `m[st]` appearing in an expression -is equivalent to [`deref_value(token::Token)`](@ref) where `token=(m,st)`. Time: O(1) +is equivalent to [`deref_value(token::Token)`](@ref) where `token=(m,st)`. +It is a `BoundsError` if the token is invalid. Prepending with +`@inbounds` may elide the correctness check and results in undefined +behavior if the token is invalid. +Time: O(1) """ -function Base.getindex(m::SortedDict, +Base.@propagate_inbounds function Base.getindex(m::SortedDict, i::IntSemiToken) @boundscheck has_data((m,i)) @inbounds d = m.bt.data[i.address].d return d end # Must repeat this to break ambiguity; cannot use SDMContainer. -function Base.getindex(m::SortedMultiDict, +Base.@propagate_inbounds function Base.getindex(m::SortedMultiDict, i::IntSemiToken) @boundscheck has_data((m,i)) @inbounds d = m.bt.data[i.address].d @@ -377,9 +383,13 @@ end Base.setindex!(m::SortedMultiDict, newvalue, st::IntSemiToken) Set the value portion of item from SortedDict or SortedMultiDict -`m` indexed by `st`, a semitoken to `newvalue`. Time: O(1) +`m` indexed by `st`, a semitoken to `newvalue`. +A `BoundsError` is thrown if the token is invalid. +Prepending with `@inbounds` may elide the correctness check and +results in undefined behavior if the token is invalid. +Time: O(1) """ -function Base.setindex!(m::SortedDict, +Base.@propagate_inbounds function Base.setindex!(m::SortedDict, d_, i::IntSemiToken) @boundscheck has_data((m,i)) @@ -389,8 +399,9 @@ function Base.setindex!(m::SortedDict, convert(valtype(m),d_)) return m end + ## Must repeat this to break ambiguity; cannot use SDMContainer -function Base.setindex!(m::SortedMultiDict, +Base.@propagate_inbounds function Base.setindex!(m::SortedMultiDict, d_, i::IntSemiToken) @boundscheck has_data((m,i)) @@ -1115,9 +1126,7 @@ Base.length(m::SortedContainer) = length(m.bt.data) - length(m.bt.freedatainds) Base.isempty(m::SortedContainer) = length(m) == 0 -(:)(t1::Token, t2::Token) = _colon(t1,t2) - -function _colon(t1::Token, t2::Token) +function Base.:(:)(t1::Token, t2::Token) t1[1] !== t2[1] && throw(ArgumentError("First and second arguments of colon operator on sorted container tokens must refer to the same container")) IterableObject{typeof(t1[1]), InclusiveRange, default_KVIterType(t1[1]), OnlyTokenIter, @@ -1126,8 +1135,8 @@ end """ - +(t::Token, j::Integer) - -(t::Token, j::Integer) + Base.+(t::Token, j::Integer) + Base.-(t::Token, j::Integer) Return the token that is `j` positions ahead (if `+`) or behind (if `-`) of `t`. Here, `t` is a token for a sorted container and `j` is an integer. If `j` is negative, then `+` regresses while `-` advances. @@ -1136,11 +1145,9 @@ or past-end positions in the container, then the before-start/past-end tokens are returned (and there is no error). Time: O(*j*+log *n*), so this function is not optimized for long jumps. """ -+(t1::Token, numstep::Integer) = - numstep >= 0 ? stepforward(t1, numstep) : stepback(t1, -numstep) +Base.:(+)(t1::Token, numstep::Integer) = numstep >= 0 ? stepforward(t1, numstep) : stepback(t1, -numstep) --(t1::Token, numstep::Integer) = - numstep >= 0 ? stepback(t1, numstep) : stepforward(t1, -numstep) +Base.:(-)(t1::Token, numstep::Integer) = numstep >= 0 ? stepback(t1, numstep) : stepforward(t1, -numstep) function stepforward(t1::Token, numstep::Integer) diff --git a/src/sorted_dict.jl b/src/sorted_dict.jl index 6b6fd2105..81ca82ea7 100644 --- a/src/sorted_dict.jl +++ b/src/sorted_dict.jl @@ -1,5 +1,5 @@ -## A SortedDict is a wrapper around balancedTree with -## methods similiar to those of Julia container Dict. +## A SortedDict is a wrapper around balancedTree with methods similiar +## to those of Julia container Dict. mutable struct SortedDict{K, D, Ord <: Ordering} <: AbstractDict{K,D} bt::BalancedTree23{K,D,Ord} @@ -167,7 +167,7 @@ Assign or reassign the value associated with the key `k` to `newvalue`. Note that the key is also overwritten; this is not necessarily a no-op since the equivalence in the sort-order does not imply equality. -See also [`sd_push!(sd::SortedDict, p::Pair)`](@ref). +See also [`push_return_token!(sd::SortedDict, p::Pair)`](@ref). Time: O(*c* log *n*) """ @inline function Base.setindex!(m::SortedDict, d_, k_) @@ -183,7 +183,7 @@ Insert key-vaue pair `p`, i.e., a `k=>v` pair, into `sd`. If the key `k` is already present, this overwrites the old value. The key is also overwritten (not necessarily a no-op, since sort-order equivalence may differ from equality). -The return value is `sd`. See also [`sd_push!(sd::SortedDict, p::Pair)`]@ref. +The return value is `sd`. See also [`push_return_token!(sd::SortedDict, p::Pair)`](@ref). Time: O(*c* log *n*) """ @inline function Base.push!(m::SortedDict{K,D}, pr::Pair) where {K,D} @@ -193,7 +193,7 @@ end """ - findkey(sd::SortedDict, k) + DataStructures.findkey(sd::SortedDict, k) Return the semitoken that points to the item whose key is @@ -208,7 +208,7 @@ end """ - sd_push!(sd::SortedDict, p::Pair) + DataStructures.push_return_token!(sd::SortedDict, p::Pair) Insert pair `p` of the form `k=>v` into `sd`. If the key is already present in `sd`, this @@ -222,12 +222,11 @@ present) and whose second entry is the semitoken of the new entry. This function replaces the deprecated `insert!(sd,k,v)`. Time: O(*c* log *n*) """ -@inline function sd_push!(m::SortedDict{K,D,Ord}, pr::Pair) where {K,D, Ord <: Ordering} - b, i = insert!(m.bt, convert(K,pr.first), convert(D,pr.second), false) +@inline function push_return_token!(m::SortedDict, pr::Pair) + b, i = insert!(m.bt, convert(keytype(m), pr.first), convert(valtype(m), pr.second), false) b, IntSemiToken(i) end -@deprecate insert!(m::SortedDict, k, d) sd_push!(m::SortedDict, k=>d) @@ -273,7 +272,7 @@ function Base.get(default_::Union{Function,Type}, m::SortedDict{K,D}, k_) where end Base.get(m::SortedDict, n::SortedDict, ::Any) = - throw_error("Ambiguous invocation of 'get'; please select the correct version using Base.invoke") + error("Ambiguous invocation of 'get'; please select the correct version using Base.invoke") """ @@ -391,16 +390,16 @@ function Base.isequal(m1::SortedDict{K, D, Ord}, m2::SortedDict{K, D, Ord}) wher if ord != orderobject(m2) return invoke((==), Tuple{AbstractDict, AbstractDict}, m1, m2) end - p1 = startof(m1) - p2 = startof(m2) + p1 = firstindex(m1) + p2 = firstindex(m2) while true p1 == pastendsemitoken(m1) && return p2 == pastendsemitoken(m2) p2 == pastendsemitoken(m2) && return false - k1,d1 = deref_nocheck((m1,p1)) - k2,d2 = deref_nocheck((m2,p2)) + @inbounds k1,d1 = deref((m1,p1)) + @inbounds k2,d2 = deref((m2,p2)) (!eq(ord,k1,k2) || !isequal(d1,d2)) && return false - p1 = advance_nocheck((m1,p1)) - p2 = advance_nocheck((m2,p2)) + @inbounds p1 = advance((m1,p1)) + @inbounds p2 = advance((m2,p2)) end end @@ -431,7 +430,7 @@ struct MergeManySortedDicts{K, D, Ord <: Ordering} end function Base.iterate(sds::MergeManySortedDicts{K, D, Ord}, - state = [startof(sds.vec[i]) for i=1:length(sds.vec)]) where + state = [firstindex(sds.vec[i]) for i=1:length(sds.vec)]) where {K, D, Ord <: Ordering} ord = orderobject(sds.vec[1]) firsti = 0 @@ -444,10 +443,10 @@ function Base.iterate(sds::MergeManySortedDicts{K, D, Ord}, end firsti == 0 && return nothing foundi = firsti - firstk = deref_key_nocheck((sds.vec[firsti], state[firsti])) + @inbounds firstk = deref_key((sds.vec[firsti], state[firsti])) for i = firsti + 1 : N if state[i] != pastendsemitoken(sds.vec[i]) - k2 = deref_key_nocheck((sds.vec[i], state[i])) + @inbounds k2 = deref_key((sds.vec[i], state[i])) if !lt(ord, firstk, k2) foundi = i firstk = k2 @@ -456,12 +455,12 @@ function Base.iterate(sds::MergeManySortedDicts{K, D, Ord}, end foundsemitoken = state[foundi] for i = firsti : N - if state[i] != pastendsemitoken(sds.vec[i]) && - eq(ord, deref_key_nocheck((sds.vec[i], state[i])), firstk) - state[i] = advance_nocheck((sds.vec[i], state[i])) + @inbounds if state[i] != pastendsemitoken(sds.vec[i]) && + eq(ord, deref_key((sds.vec[i], state[i])), firstk) + state[i] = advance((sds.vec[i], state[i])) end end - (deref_nocheck((sds.vec[foundi], foundsemitoken)), state) + @inbounds return (deref((sds.vec[foundi], foundsemitoken)), state) end """ diff --git a/src/sorted_multi_dict.jl b/src/sorted_multi_dict.jl index 87b4a5908..564d836f4 100644 --- a/src/sorted_multi_dict.jl +++ b/src/sorted_multi_dict.jl @@ -20,10 +20,9 @@ SortedMultiDict{K,D,Ord}(o::Ord=Forward) where {K,D,Ord<:Ordering} = function SortedMultiDict{K,D,Ord}(o::Ord, kv) where {K,D,Ord<:Ordering} smd = SortedMultiDict{K,D,Ord}(BalancedTree23{K,D,Ord}(o)) for (k,v) in kv - smd_push!(smd, k=>v) + push_return_token!(smd, k=>v) end return smd - end @@ -150,25 +149,27 @@ const SMDToken = Tuple{SortedMultiDict, IntSemiToken} """ - smd_push!(smd::SortedMultiDict, pr::Pair) + DataStructures.push_return_token!(smd::SortedMultiDict, pr::Pair) Insert the key-value pair `pr`, i.e., `k=>v`, into `smd`. If `k` already appears as a key in `smd`, then `k=>v` is inserted in the rightmost position after existing -items with key `k`. Unlike `push!`, `smd_push!` returns -a semitoken referring to the new item. This function replaces +items with key `k`. Unlike `push!`, +the +return value is a 2-tuple whose first entry is boolean +always equal to `true` and whose second entry is the semitoken of the new entry. +(The reason for returning a bool whose value is always `true` is for consistency +with `push_return_token!` for SortedDict and SortedSet.) +This function replaces the deprecated `insert!`. Time: O(*c* log *n*) """ -@inline function smd_push!(m::SortedMultiDict{K,D,Ord}, pr::Pair) where {K, D, Ord <: Ordering} - b, i = insert!(m.bt, convert(K,pr.first), convert(D,pr.second), true) - IntSemiToken(i) +@inline function push_return_token!(m::SortedMultiDict, pr::Pair) + b, i = insert!(m.bt, convert(keytype(m),pr.first), convert(valtype(m),pr.second), true) + b, IntSemiToken(i) end -@deprecate insert!(m::SortedMultiDict, k, d) smd_push!(m::SortedMultiDict, k=>d) - - """ Base.push!(smd::SortedMultiDict, p::Pair) @@ -176,7 +177,7 @@ Insert the pair `p`, i.e., a `k=>v` into `smd`. If `k` already appears as a key in `smd`, then `k=>v` is inserted in the rightmost position after existing items with key `k`. Returns the container. -See also [`smd_push(smd::SortedMultiDict, p::Pair`]@ref. +See also [`push_return_token!(smd::SortedMultiDict, p::Pair)`](@ref). Time: O(*c* log *n*) """ @inline function Base.push!(m::SortedMultiDict{K,D}, pr::Pair) where {K,D} @@ -186,7 +187,7 @@ end """ - searchequalrange(smd::SortedMultiDict, k) + DataStructures.searchequalrange(smd::SortedMultiDict, k) Return two semitokens that correspond to the first and last items in the SortedMultiDict that have key exactly equal @@ -273,8 +274,8 @@ function Base.isequal(m1::SortedMultiDict{K, D, Ord}, if ord != orderobject(m2) return false end - p1 = startof(m1) - p2 = startof(m2) + p1 = firstindex(m1) + p2 = firstindex(m2) while true if p1 == pastendsemitoken(m1) return p2 == pastendsemitoken(m2) @@ -282,11 +283,11 @@ function Base.isequal(m1::SortedMultiDict{K, D, Ord}, if p2 == pastendsemitoken(m2) return false end - k1,d1 = deref_nocheck((m1,p1)) - k2,d2 = deref_nocheck((m2,p2)) + @inbounds k1,d1 = deref((m1,p1)) + @inbounds k2,d2 = deref((m2,p2)) (!eq(ord,k1,k2) || !isequal(d1,d2)) && return false - p1 = advance_nocheck((m1,p1)) - p2 = advance_nocheck((m2,p2)) + @inbounds p1 = advance((m1,p1)) + @inbounds p2 = advance((m2,p2)) end end @@ -316,7 +317,7 @@ struct MergeManySortedMultiDicts{K, D, Ord <: Ordering} end function Base.iterate(sds::MergeManySortedMultiDicts{K, D, Ord}, - state = [startof(sds.vec[k]) for k=1:length(sds.vec)]) where + state = [firstindex(sds.vec[k]) for k=1:length(sds.vec)]) where {K, D, Ord <: Ordering} ord = orderobject(sds.vec[1]) firsti = 0 @@ -329,10 +330,10 @@ function Base.iterate(sds::MergeManySortedMultiDicts{K, D, Ord}, end firsti == 0 && return nothing foundi = firsti - firstk = deref_key_nocheck((sds.vec[firsti], state[firsti])) + @inbounds firstk = deref_key((sds.vec[firsti], state[firsti])) for i = firsti + 1 : N if state[i] != pastendsemitoken(sds.vec[i]) - k2 = deref_key_nocheck((sds.vec[i], state[i])) + @inbounds k2 = deref_key((sds.vec[i], state[i])) if lt(ord, k2, firstk) foundi = i firstk = k2 @@ -340,8 +341,8 @@ function Base.iterate(sds::MergeManySortedMultiDicts{K, D, Ord}, end end foundsemitoken = state[foundi] - state[foundi] = advance_nocheck((sds.vec[foundi], foundsemitoken)) - (deref_nocheck((sds.vec[foundi], foundsemitoken)), state) + @inbounds state[foundi] = advance((sds.vec[foundi], foundsemitoken)) + @inbounds return (deref((sds.vec[foundi], foundsemitoken)), state) end """ diff --git a/src/sorted_set.jl b/src/sorted_set.jl index b19ebb1bc..b3d55e0de 100644 --- a/src/sorted_set.jl +++ b/src/sorted_set.jl @@ -111,9 +111,8 @@ is returned. Time: O(*c* log *n*) end - """ - ss_push!(ss::SortedSet, k) + DataStructures.push_return_token!(ss::SortedSet, k) Insert the element `k` into the SortedSet `sc`. @@ -127,14 +126,11 @@ second entry is the semitoken of the new entry. This function replaces the deprecated `insert!`. Time: O(*c* log *n*) """ -@inline function ss_push!(m::SortedSet, k_) +@inline function push_return_token!(m::SortedSet, k_) b, i = insert!(m.bt, convert(keytype(m),k_), nothing, false) return b, IntSemiToken(i) end -@deprecate insert!(m::SortedSet, k) ss_push!(m::SortedSet, k) - - """ Base.push!(ss::SortedSet, k) @@ -143,9 +139,9 @@ Insert the element `k` into the sorted set `ss`. If the `k` is already present, this overwrites the old value. (This is not necessarily a no-op; see remarks about the customizing the sort order.) -See also [`ss_push!(ss::SortedSet, k)`]@ref. +See also [`push_return_token!(ss::SortedSet, k)`](@ref). The return value is -`sc`. Time: O(*c* log *n*) +`ss`. Time: O(*c* log *n*) """ @inline function Base.push!(m::SortedSet, k_) b, i = insert!(m.bt, convert(keytype(m),k_), nothing, false) @@ -277,21 +273,21 @@ function Base.isequal(m1::SortedSet{K, Ord}, m2::SortedSet{K, Ord}) where {K, Or if ord != orderobject(m2) return invoke(issetequal, Tuple{AbstractSet, AbstractSet}, m1, m2) end - p1 = startof(m1) - p2 = startof(m2) + p1 = firstindex(m1) + p2 = firstindex(m2) while true p1 == pastendsemitoken(m1) && return p2 == pastendsemitoken(m2) p2 == pastendsemitoken(m2) && return false - k1 = deref_nocheck((m1,p1)) - k2 = deref_nocheck((m2,p2)) + @inbounds k1 = deref((m1,p1)) + @inbounds k2 = deref((m2,p2)) !eq(ord,k1,k2) && return false - p1 = advance_nocheck((m1,p1)) - p2 = advance_nocheck((m2,p2)) + @inbounds p1 = advance((m1,p1)) + @inbounds p2 = advance((m2,p2)) end end -Base.issetequal(m1::SortedSet, m2::SortedSet) = isequal(m1::SortedSet, m2::SortedSet) +Base.issetequal(m1::SortedSet, m2::SortedSet) = isequal(m1, m2) """ @@ -317,7 +313,7 @@ struct UnionManySortedSets{K, Ord <: Ordering} end function Base.iterate(ss::UnionManySortedSets{K, Ord}, - state = [startof(ss.vec[k]) for k=1:length(ss.vec)]) where + state = [firstindex(ss.vec[k]) for k=1:length(ss.vec)]) where {K, Ord <: Ordering} ord = orderobject(ss.vec[1]) N = length(ss.vec) @@ -330,10 +326,10 @@ function Base.iterate(ss::UnionManySortedSets{K, Ord}, end firsti == 0 && return nothing foundi = firsti - firstk = deref_nocheck((ss.vec[firsti], state[firsti])) + @inbounds firstk = deref((ss.vec[firsti], state[firsti])) for i = firsti + 1 : N if state[i] != pastendsemitoken(ss.vec[i]) - k2 = deref_nocheck((ss.vec[i], state[i])) + @inbounds k2 = deref((ss.vec[i], state[i])) if !lt(ord, firstk, k2) foundi = i firstk = k2 @@ -342,8 +338,8 @@ function Base.iterate(ss::UnionManySortedSets{K, Ord}, end for i = firsti : N if state[i] != pastendsemitoken(ss.vec[i]) && - eq(ord, deref_nocheck((ss.vec[i], state[i])), firstk) - state[i] = advance_nocheck((ss.vec[i], state[i])) + @inbounds eq(ord, deref((ss.vec[i], state[i])), firstk) + @inbounds state[i] = advance((ss.vec[i], state[i])) end end (firstk, state) @@ -398,26 +394,26 @@ struct TwoSortedSets_State end function Base.iterate(twoss::IntersectTwoSortedSets, - state = TwoSortedSets_State(startof(twoss.m1), - startof(twoss.m2))) + state = TwoSortedSets_State(firstindex(twoss.m1), + firstindex(twoss.m2))) m1 = twoss.m1 m2 = twoss.m2 ord = orderobject(m1) p1 = state.p1 p2 = state.p2 while p1 != pastendsemitoken(m1) && p2 != pastendsemitoken(m2) - k1 = deref_nocheck((m1, p1)) - k2 = deref_nocheck((m2, p2)) + @inbounds k1 = deref((m1, p1)) + @inbounds k2 = deref((m2, p2)) if lt(ord, k1, k2) - p1 = advance_nocheck((m1, p1)) + @inbounds p1 = advance((m1, p1)) continue end if lt(ord, k2, k1) - p2 = advance_nocheck((m2, p2)) + @inbounds p2 = advance((m2, p2)) continue end - return (k1, TwoSortedSets_State(advance_nocheck((m1, p1)), - advance_nocheck((m2, p2)))) + @inbounds return (k1, TwoSortedSets_State(advance((m1, p1)), + advance((m2, p2)))) end return nothing end @@ -474,8 +470,8 @@ struct SymdiffTwoSortedSets{K,Ord <: Ordering} end function Base.iterate(twoss::SymdiffTwoSortedSets, - state = TwoSortedSets_State(startof(twoss.m1), - startof(twoss.m2))) + state = TwoSortedSets_State(firstindex(twoss.m1), + firstindex(twoss.m2))) m1 = twoss.m1 m2 = twoss.m2 ord = orderobject(m1) @@ -488,23 +484,23 @@ function Base.iterate(twoss::SymdiffTwoSortedSets, return nothing end if m1end - return (deref_nocheck((m2, p2)), - TwoSortedSets_State(p1, advance_nocheck((m2,p2)))) + @inbounds return (deref((m2, p2)), + TwoSortedSets_State(p1, advance((m2,p2)))) end if m2end - return (deref_nocheck((m1, p1)), - TwoSortedSets_State(advance_nocheck((m1,p1)), p2)) + @inbounds return (deref((m1, p1)), + TwoSortedSets_State(advance((m1,p1)), p2)) end - k1 = deref_nocheck((m1, p1)) - k2 = deref_nocheck((m2, p2)) + @inbounds k1 = deref((m1, p1)) + @inbounds k2 = deref((m2, p2)) if lt(ord, k1, k2) - return (k1, TwoSortedSets_State(advance_nocheck((m1,p1)), p2)) + @inbounds return (k1, TwoSortedSets_State(advance((m1,p1)), p2)) end if lt(ord, k2, k1) - return (k2, TwoSortedSets_State(p1, advance_nocheck((m2,p2)))) + @inbounds return (k2, TwoSortedSets_State(p1, advance((m2,p2)))) end - p1 = advance_nocheck((m1,p1)) - p2 = advance_nocheck((m2,p2)) + @inbounds p1 = advance((m1,p1)) + @inbounds p2 = advance((m2,p2)) end end @@ -552,8 +548,8 @@ struct SetdiffTwoSortedSets{K, Ord <: Ordering} end function Base.iterate(twoss::SetdiffTwoSortedSets, - state = TwoSortedSets_State(startof(twoss.m1), - startof(twoss.m2))) + state = TwoSortedSets_State(firstindex(twoss.m1), + firstindex(twoss.m2))) m1 = twoss.m1 m2 = twoss.m2 ord = orderobject(m1) @@ -566,17 +562,17 @@ function Base.iterate(twoss::SetdiffTwoSortedSets, return nothing end if m2end - return (deref_nocheck((m1, p1)), TwoSortedSets_State(advance_nocheck((m1,p1)), p2)) + @inbounds return (deref((m1, p1)), TwoSortedSets_State(advance((m1,p1)), p2)) end - k1 = deref_nocheck((m1, p1)) - k2 = deref_nocheck((m2, p2)) + @inbounds k1 = deref((m1, p1)) + @inbounds k2 = deref((m2, p2)) if lt(ord, k1, k2) - return (k1, TwoSortedSets_State(advance_nocheck((m1,p1)), p2)) + @inbounds return (k1, TwoSortedSets_State(advance((m1,p1)), p2)) end if !lt(ord, k2, k1) - p1 = advance_nocheck((m1,p1)) + @inbounds p1 = advance((m1,p1)) end - p2 = advance_nocheck((m2, p2)) + @inbounds p2 = advance((m2, p2)) end end @@ -656,19 +652,19 @@ function Base.issubset(m1::SortedSet{K,Ord}, m2::SortedSet{K,Ord}) where {K, Ord length(m1) < length(m2) / log2(length(m2) + 2) return invoke(issubset, Tuple{Any, SortedSet}, m1, m2) end - p1 = startof(m1) - p2 = startof(m2) + p1 = firstindex(m1) + p2 = firstindex(m2) while p1 != pastendsemitoken(m1) p2 == pastendsemitoken(m2) && return false - k1 = deref_nocheck((m1, p1)) - k2 = deref_nocheck((m2, p2)) + @inbounds k1 = deref((m1, p1)) + @inbounds k2 = deref((m2, p2)) if eq(ord, k1, k2) - p1 = advance_nocheck((m1,p1)) - p2 = advance_nocheck((m2,p2)) + @inbounds p1 = advance((m1,p1)) + @inbounds p2 = advance((m2,p2)) elseif lt(ord, k1,k2) return false else - p2 = advance_nocheck((m2,p2)) + @inbounds p2 = advance((m2,p2)) end end return true @@ -717,11 +713,6 @@ packdeepcopy(m::SortedSet{K,Ord}) where {K, Ord <: Ordering} = SortedSet{K}(Val(true), deepcopy(m), orderobject(m)) -function Base.show(io::IO, m::SortedSet{K,Ord}) where {K,Ord <: Ordering} - print(io, "SortedSet{", K, ",", Ord, "}(") - print(io, collect(m), ",", orderobject(m), ")") -end - """ Base.empty(sc) diff --git a/test/test_deprecations.jl b/test/test_deprecations.jl index 09d9827ad..3234a2f7a 100644 --- a/test/test_deprecations.jl +++ b/test/test_deprecations.jl @@ -126,4 +126,7 @@ end @test isa(insert!(s2, 5, "hello"), IntSemiToken) s3 = SortedSet{Int}() @test isa(insert!(s3, 5), Tuple{Bool, IntSemiToken}) + s4 = SortedDict{Int,String}(3=>"o", 4=>"p") + @test deref_key((s4,startof(s4))) == 3 + @test deref_key((s4,endof(s4))) == 4 end diff --git a/test/test_sorted_containers.jl b/test/test_sorted_containers.jl index ac047de76..5383d84b0 100644 --- a/test/test_sorted_containers.jl +++ b/test/test_sorted_containers.jl @@ -1,5 +1,4 @@ -using Base: Ordering, Forward, Reverse, ForwardOrdering, - ReverseOrdering +using Base: Ordering, Forward, Reverse, ForwardOrdering, ReverseOrdering using DataStructures: IntSemiToken import Base.lt import DataStructures.eq @@ -12,6 +11,9 @@ lt(::CaseInsensitive, a, b) = isless(lowercase(a), lowercase(b)) eq(::CaseInsensitive, a, b) = isequal(lowercase(a), lowercase(b)) + +# For testing order objects + struct ForBack <: Ordering flag::Bool end @@ -38,18 +40,7 @@ function my_primes(N) p end -function remove_spaces(s::String) - b = Vector{UInt8}() - for c in s - if !isspace(c) - push!(b,UInt8(c)) - end - end - String(b) -end - - - +remove_spaces(s::String) = replace(s, r"\s+"=>"") @@ -256,7 +247,7 @@ function testSortedDictBasic() checkcorrectness(m1.bt, false) # fulldump(m1.bt) end - i1 = startof(m1) + i1 = firstindex(m1) count = 0 while i1 != pastendsemitoken(m1) count += 1 @@ -301,7 +292,7 @@ function testSortedDictMethods() my_assert(typeof(m08b) == SortedDict{Real,Any,ForwardOrdering}) m09a = SortedDict(Pair{Int}[1=>2, 3=>'a']) my_assert(typeof(m09a) == SortedDict{Int,Any,ForwardOrdering}) - m09b = SortedDict([(1,2), (3,'a')]) + m09b = SortedDict([(1,2), (3,'a')]) # test issue 239 my_assert(typeof(m09a) == SortedDict{Int,Any,ForwardOrdering}) my_assert(m0 == m02) @@ -318,7 +309,7 @@ function testSortedDictMethods() expected = ([6,8,12], [18.2, 32.0, 33.1]) checkcorrectness(m1.bt, false) - ii = startof(m1) + ii = firstindex(m1) m2 = packdeepcopy(m1) m3 = packcopy(m1) p = first(m1) @@ -344,7 +335,7 @@ function testSortedDictMethods() checkcorrectness(m1.bt, false) checkcorrectness(m2.bt, false) my_assert(length(m2) == 3) - ii = startof(m2) + ii = firstindex(m2) for j = 1 : 3 pr = deref((m2,ii)) my_assert(pr[1] == expected[1][j] && pr[2] == expected[2][j]) @@ -353,7 +344,7 @@ function testSortedDictMethods() checkcorrectness(m3.bt, false) my_assert(length(m3) == 3) - ii = startof(m3) + ii = firstindex(m3) for j = 1 : 3 pr = deref((m3,ii)) my_assert(pr[1] == expected[1][j] && pr[2] == expected[2][j]) @@ -447,7 +438,7 @@ function testSortedDictMethods() m1[6] = 49.0 my_assert(length(m1) == numprimes + 1) my_assert(m1[6] == 49.0) - b, i6 = sd_push!(m1, 6=>50.0) + b, i6 = push_return_token!(m1, 6=>50.0) my_assert(length(m1) == numprimes + 1) my_assert(!b) p = deref((m1,i6)) @@ -456,7 +447,7 @@ function testSortedDictMethods() p = deref((m1,i6)) my_assert(p[1] == 6 && p[2] == 9.0) my_assert(m1[i6] == 9.0) - b2, i7 = sd_push!(m1, 8=>51.0) + b2, i7 = push_return_token!(m1, 8=>51.0) my_assert(b2) my_assert(length(m1) == numprimes + 2) p = deref((m1,i7)) @@ -465,7 +456,7 @@ function testSortedDictMethods() z = pop!(m1, 6) checkcorrectness(m1.bt, false) my_assert(z == 9.0) - i8 = startof(m1) + i8 = firstindex(m1) p = deref((m1,i8)) my_assert(p[1] == 2 && p[2] == 4.0) my_assert(i8 != beforestartsemitoken(m1)) @@ -642,8 +633,8 @@ function testSortedDictLoops() m1[bitreverse(lUi)] = lUi end count = 0 - for (stok,k,v) in semitokens(inclusive(m1, startof(m1), lastindex(m1))) - for (stok2,k2,v2) in semitokens(exclusive(m1, startof(m1), pastendsemitoken(m1))) + for (stok,k,v) in semitokens(inclusive(m1, firstindex(m1), lastindex(m1))) + for (stok2,k2,v2) in semitokens(exclusive(m1, firstindex(m1), pastendsemitoken(m1))) c = compare(m1,stok,stok2) if c < 0 my_assert(deref_key((m1,stok)) < deref_key((m1,stok2))) @@ -655,7 +646,7 @@ function testSortedDictLoops() count += 1 end end - my_assert(eltype(semitokens(exclusive(m1, startof(m1), pastendsemitoken(m1)))) == + my_assert(eltype(semitokens(exclusive(m1, firstindex(m1), pastendsemitoken(m1)))) == Tuple{IntSemiToken, T, T}) my_assert(count == N^2) N = 1000 @@ -734,25 +725,25 @@ function testSortedDictLoops() pos1 = searchsortedfirst(m1, div(N,2)) sk2 = zero1 - for k in keys(exclusive(m1, startof(m1), pos1)) + for k in keys(exclusive(m1, firstindex(m1), pos1)) sk2 += k end my_assert(sk2 == skhalf) - my_assert(eltype(keys(exclusive(m1, startof(m1), pos1))) == T) + my_assert(eltype(keys(exclusive(m1, firstindex(m1), pos1))) == T) sk2a = zero1 - for k in eachindex(exclusive(m1, startof(m1), pos1)) + for k in eachindex(exclusive(m1, firstindex(m1), pos1)) sk2a += k end my_assert(sk2a == skhalf) - my_assert(eltype(eachindex(exclusive(m1, startof(m1), pos1))) == T) + my_assert(eltype(eachindex(exclusive(m1, firstindex(m1), pos1))) == T) sv2 = zero1 - for v in values(exclusive(m1, startof(m1), pos1)) + for v in values(exclusive(m1, firstindex(m1), pos1)) sv2 += v end my_assert(sv2 == svhalf) @@ -760,26 +751,26 @@ function testSortedDictLoops() for (k,v) in exclusive(m1, pastendsemitoken(m1), pastendsemitoken(m1)) count += 1 end - my_assert(eltype(keys(exclusive(m1, startof(m1), pos1))) == T) + my_assert(eltype(keys(exclusive(m1, firstindex(m1), pos1))) == T) my_assert(count == 0) count = 0 - for (k,v) in inclusive(m1, startof(m1), beforestartsemitoken(m1)) + for (k,v) in inclusive(m1, firstindex(m1), beforestartsemitoken(m1)) count += 1 end my_assert(count == 0) - my_assert(eltype(keys(inclusive(m1, startof(m1), beforestartsemitoken(m1)))) == T) + my_assert(eltype(keys(inclusive(m1, firstindex(m1), beforestartsemitoken(m1)))) == T) count = 0 sk5 = zero1 - for k in eachindex(inclusive(m1, startof(m1), startof(m1))) + for k in eachindex(inclusive(m1, firstindex(m1), firstindex(m1))) sk5 += k count += 1 end - my_assert(count == 1 && sk5 == deref_key((m1,startof(m1)))) - my_assert(eltype(eachindex(inclusive(m1, startof(m1), startof(m1)))) == T) + my_assert(count == 1 && sk5 == deref_key((m1,firstindex(m1)))) + my_assert(eltype(eachindex(inclusive(m1, firstindex(m1), firstindex(m1)))) == T) factors = SortedMultiDict{Int,Int}() N = 1000 @@ -788,7 +779,7 @@ function testSortedDictLoops() sum2 = 0 for factor = 1 : N for multiple = factor : factor : N - smd_push!(factors, multiple=>factor) + push_return_token!(factors, multiple=>factor) sum1 += multiple sum2 += factor len += 1 @@ -1276,7 +1267,7 @@ function testSortedMultiDict() len = 0 for factor = 1 : N for multiple = factor : factor : N - smd_push!(factors, multiple=>factor) + push_return_token!(factors, multiple=>factor) len += 1 end end @@ -1317,7 +1308,7 @@ function testSortedMultiDict() my_assert(60 in keys(factors)) my_assert(!(-1 in keys(factors))) checkcorrectness(factors.bt, true) - i = startof(factors) + i = firstindex(factors) i = advance((factors,i)) my_assert(deref((factors,i)) == Pair(2,1)) my_assert(deref_key((factors,i)) == 2) @@ -1349,7 +1340,7 @@ function testSortedMultiDict() my_assert(compare(factors,i,i2) != 0) my_assert(compare(factors,regress((factors,i)),i2) == 0) my_assert(compare(factors,i,i1) != 0) - smd_push!(factors, 80=>6) + push_return_token!(factors, 80=>6) my_assert(length(factors) == len + 1) checkcorrectness(factors.bt, true) expected1 = deepcopy(expected) @@ -1377,7 +1368,7 @@ function testSortedMultiDict() checkcorrectness(factors.bt, true) my_assert(length(factors) == 0) my_assert(isempty(factors)) - i = startof(factors) + i = firstindex(factors) my_assert(i == pastendsemitoken(factors)) i = lastindex(factors) my_assert(i == beforestartsemitoken(factors)) @@ -1394,7 +1385,8 @@ function testSortedMultiDict() my_assert(isequal(m1,m2)) my_assert(!isequal(m1,m3)) my_assert(!isequal(m1, SortedMultiDict("apples"=>2.0))) - stok = smd_push!(m2, "cherries"=>6.1) + b,stok = push_return_token!(m2, "cherries"=>6.1) + my_assert(b) checkcorrectness(m2.bt, true) my_assert(!isequal(m1,m2)) delete!((m2,stok)) @@ -1425,6 +1417,8 @@ function testSortedMultiDict() length(m3empty) == 0) m4 = merge(m1, m2) my_assert(isequal(m3, m4)) + m5a = SortedMultiDict(Pair{Any,Any}[1=>4.5, 2=>8, 8=>6//7]) #address issue 239 + my_assert(eltype(m5a) == Pair{Int,Real}) m5 = merge(m2, m1) my_assert(!isequal(m3, m5)) merge!(m1, m2) @@ -1432,10 +1426,10 @@ function testSortedMultiDict() m7 = SortedMultiDict{Int,Int}() n1 = 10000 for k = 1 : n1 - smd_push!(m7, k=>k+1) + push_return_token!(m7, k=>k+1) end for k = 1 : n1 - smd_push!(m7, k=>k+2) + push_return_token!(m7, k=>k+2) end for k = 1 : n1 i1, i2 = searchequalrange(m7, k) @@ -1482,30 +1476,32 @@ function testSortedMultiDict() my_assert(DataStructures.isordered(SortedMultiDict{Int, String})) # issue #773 s = SortedMultiDict{Int, Int}() - smd_push!(s, 4=>41) - smd_push!(s, 3=>31) - smd_push!(s, 2=>21) - smd_push!(s, 2=>22) - smd_push!(s, 2=>23) - smd_push!(s, 2=>24) - smd_push!(s, 2=>25) - smd_push!(s, 2=>26) - smd_push!(s, 1=>11) - smd_push!(s, 1=>12) - st1 = smd_push!(s, 1=>13) - st2 = smd_push!(s, 1=>14) - st3 = smd_push!(s, 1=>15) - st4 = smd_push!(s, 1=>16) - st5 = smd_push!(s, 1=>17) - st6 = smd_push!(s, 1=>18) + push_return_token!(s, 4=>41) + push_return_token!(s, 3=>31) + push_return_token!(s, 2=>21) + push_return_token!(s, 2=>22) + push_return_token!(s, 2=>23) + push_return_token!(s, 2=>24) + push_return_token!(s, 2=>25) + push_return_token!(s, 2=>26) + push_return_token!(s, 1=>11) + push_return_token!(s, 1=>12) + _,st1 = push_return_token!(s, 1=>13) + _,st2 = push_return_token!(s, 1=>14) + _,st3 = push_return_token!(s, 1=>15) + _,st4 = push_return_token!(s, 1=>16) + _,st5 = push_return_token!(s, 1=>17) + _,st6 = push_return_token!(s, 1=>18) delete!((s, st6)) delete!((s, st5)) delete!((s, st4)) delete!((s, st3)) delete!((s, st2)) delete!((s, st1)) - smd_push!(s, 1=>19) + push_return_token!(s, 1=>19) checkcorrectness(s.bt, true) + my_assert(length(SortedMultiDict{Int,Int}(Val(true), [1=>2,3=>7,3=>15])) == 3) + my_assert(length(SortedMultiDict{Int,Int}(Val(true), [8=>2,3=>7,3=>15], Reverse)) == 3) true end @@ -1533,6 +1529,9 @@ function testSortedSet() my_assert(typeof(SortedSet([1,2,3], Reverse)) == SortedSet{Int, ReverseOrdering{ForwardOrdering}}) my_assert(typeof(SortedSet{Float32}([1,2,3], Reverse)) == SortedSet{Float32, ReverseOrdering{ForwardOrdering}}) + my_assert(length(SortedSet{Int}(Val(true), [1,3,7])) == 3) + my_assert(length(SortedSet{Int}(Val(true), [8,3,1], Reverse)) == 3) + ss1 = SortedSet{String}(["berry", "cherry", "apple", "grape"]) q = popfirst!(ss1) my_assert(q == "apple") @@ -1542,6 +1541,8 @@ function testSortedSet() my_assert(ss1 == Set(["cherry", "berry"])) my_assert(isequal(SortedSet{Int,ForBack}(ForBack(true), [5,6,4,8]), SortedSet{Int,ForBack}(ForBack(false), [5,6,4,8]))) + my_assert(issetequal(SortedSet{Int,ForBack}(ForBack(true), [5,6,4,8]), + SortedSet{Int,ForBack}(ForBack(false), [5,6,4,8]))) my_assert(!isequal(SortedSet{Int,ForBack}(ForBack(true), [5,6,4,8]), SortedSet{Int,ForBack}(ForBack(false), [5,6,6,8]))) my_assert(isequal(SortedSet{Int,ForBack}(ForBack(true), [5,6,4,8]), @@ -1554,6 +1555,8 @@ function testSortedSet() @test_throws ArgumentError SortedSet(Reverse, Reverse) @test_throws ArgumentError SortedSet{Int}(Reverse, Reverse) @test_throws ArgumentError SortedSet{Int}(Val(true), Reverse) + @test_throws ErrorException deref_key(token_firstindex(SortedSet{Int}([6]))) + @test_throws ErrorException deref_value(token_firstindex(SortedSet{Int}([6]))) smallest = 10.0 largest = -10.0 @@ -1565,11 +1568,11 @@ function testSortedSet() smallest = min(smallest,ui) largest = max(largest,ui) end - isnew,st = ss_push!(m, 72.5) + isnew,st = push_return_token!(m, 72.5) my_assert(isnew) my_assert(deref((m,st)) == 72.5) delete!((m,st)) - isnew,st = ss_push!(m, 73.5) + isnew,st = push_return_token!(m, 73.5) my_assert(isnew) my_assert(deref((m,st)) == 73.5) delete!(m, 73.5) @@ -1586,7 +1589,7 @@ function testSortedSet() my_assert(count == N) my_assert(length(m) == N) ii2 = searchsortedfirst(m, 0.5) - i3 = startof(m) + i3 = firstindex(m) v = first(m) my_assert(v == smallest) my_assert(deref((m,i3)) == v) @@ -1664,6 +1667,7 @@ function testSortedSet() Base.emptymutable(m3) == m3empty Base.emptymutable(m3, Char) == m3empty_char end + my_assert(eltype(SortedSet((4,4.5,6//7))) == Real) # issue 239 m3_reverse = SortedSet(DataStructures.FasterReverse(), ["orange", "yellow", "red"]) let m3empty_reverse_char = empty(m3_reverse, Char) @test typeof(m3empty_reverse_char) == SortedSet{Char, DataStructures.FasterReverse} @@ -1802,6 +1806,9 @@ function testSortedDictConstructors() my_assert(typeof(SortedDict{Int,Int}(Reverse)) == SortedDict{Int,Int,ReverseOrdering{ForwardOrdering}}) my_assert(typeof(SortedDict{Int,Int}(Reverse, 1=>2)) == SortedDict{Int,Int,ReverseOrdering{ForwardOrdering}}) my_assert(typeof(SortedDict{Int,Int}(1=>2)) == SortedDict{Int,Int,ForwardOrdering}) + my_assert(length(SortedDict{Int,Int}(Val(true), [1=>2,3=>7,8=>15])) == 3) + my_assert(length(SortedDict{Int,Int}(Val(true), [8=>2,3=>7,1=>15], Reverse)) == 3) + # @test_throws ArgumentError SortedDict(Reverse, Reverse) true @@ -1996,7 +2003,7 @@ function testTokens() end my_assert(count == 5) count = 0 - for t in token_firstindex(smd1) : token_lastindex(smd1) + for t in token_firstindex(smd1) : token_lastindex(smd1) # address issue 669 count += 1 my_assert(deref_value(t) == result2[count]) end @@ -2070,6 +2077,7 @@ end #@test_throws ArgumentError isequal(SortedSet(["a"]), SortedSet(["b"],Reverse)) #@test_throws ErrorException (("a",6) in m) #@test_throws ArgumentError ((2,5) in m1) + @test_throws ErrorException get(SortedDict(1=>3), SortedDict(1=>3), 1) s = SortedSet([10,30,50]) @test pop!(s,10) == 10 From 879703e6140896e2ef91101ed60342901827e99c Mon Sep 17 00:00:00 2001 From: Stephen Vavasis Date: Thu, 19 May 2022 09:28:11 -0400 Subject: [PATCH 5/8] Two fixes: Renamed push_return_token! to push_return_semitoken!, which accurately describes the operation. Documented the fact that values(s) is not currently writeable. --- src/sorted_container_iteration.jl | 15 +++++++++ src/sorted_dict.jl | 8 ++--- src/sorted_set.jl | 4 +-- test/test_sorted_containers.jl | 54 +++++++++++++++---------------- 4 files changed, 48 insertions(+), 33 deletions(-) diff --git a/src/sorted_container_iteration.jl b/src/sorted_container_iteration.jl index 8f4208a48..9f841c939 100644 --- a/src/sorted_container_iteration.jl +++ b/src/sorted_container_iteration.jl @@ -996,6 +996,21 @@ while `k` will be the corresponding keys. The returned keys lie between `key1` ``` Same as above, except the iteration is in the reverse order. +Writing on the objects returned by `values` is not currently supported, e.g., +the following `map!` statement is not implemented even though the +analogous statement is available for `Dict` in Base. +```julia + s = SortedDict(3=>4) + map!(x -> x*2, values(s)) +``` +The workaround is an explicit loop: +```julia + s = SortedDict(3=>4) + for t in onlysemitokens(s) + s[t] *= 2 + end +``` + Running time for all iterations: O(*c*(*s* + log *n*)), where *s* is the number of steps from start to end of the iteration. """ diff --git a/src/sorted_dict.jl b/src/sorted_dict.jl index 81ca82ea7..203a9e164 100644 --- a/src/sorted_dict.jl +++ b/src/sorted_dict.jl @@ -167,7 +167,7 @@ Assign or reassign the value associated with the key `k` to `newvalue`. Note that the key is also overwritten; this is not necessarily a no-op since the equivalence in the sort-order does not imply equality. -See also [`push_return_token!(sd::SortedDict, p::Pair)`](@ref). +See also [`push_return_semitoken!(sd::SortedDict, p::Pair)`](@ref). Time: O(*c* log *n*) """ @inline function Base.setindex!(m::SortedDict, d_, k_) @@ -183,7 +183,7 @@ Insert key-vaue pair `p`, i.e., a `k=>v` pair, into `sd`. If the key `k` is already present, this overwrites the old value. The key is also overwritten (not necessarily a no-op, since sort-order equivalence may differ from equality). -The return value is `sd`. See also [`push_return_token!(sd::SortedDict, p::Pair)`](@ref). +The return value is `sd`. See also [`push_return_semitoken!(sd::SortedDict, p::Pair)`](@ref). Time: O(*c* log *n*) """ @inline function Base.push!(m::SortedDict{K,D}, pr::Pair) where {K,D} @@ -208,7 +208,7 @@ end """ - DataStructures.push_return_token!(sd::SortedDict, p::Pair) + DataStructures.push_return_semitoken!(sd::SortedDict, p::Pair) Insert pair `p` of the form `k=>v` into `sd`. If the key is already present in `sd`, this @@ -222,7 +222,7 @@ present) and whose second entry is the semitoken of the new entry. This function replaces the deprecated `insert!(sd,k,v)`. Time: O(*c* log *n*) """ -@inline function push_return_token!(m::SortedDict, pr::Pair) +@inline function push_return_semitoken!(m::SortedDict, pr::Pair) b, i = insert!(m.bt, convert(keytype(m), pr.first), convert(valtype(m), pr.second), false) b, IntSemiToken(i) end diff --git a/src/sorted_set.jl b/src/sorted_set.jl index b3d55e0de..47c481eb7 100644 --- a/src/sorted_set.jl +++ b/src/sorted_set.jl @@ -112,7 +112,7 @@ end """ - DataStructures.push_return_token!(ss::SortedSet, k) + DataStructures.push_return_semitoken!(ss::SortedSet, k) Insert the element `k` into the SortedSet `sc`. @@ -126,7 +126,7 @@ second entry is the semitoken of the new entry. This function replaces the deprecated `insert!`. Time: O(*c* log *n*) """ -@inline function push_return_token!(m::SortedSet, k_) +@inline function push_return_semitoken!(m::SortedSet, k_) b, i = insert!(m.bt, convert(keytype(m),k_), nothing, false) return b, IntSemiToken(i) end diff --git a/test/test_sorted_containers.jl b/test/test_sorted_containers.jl index 5383d84b0..03515547f 100644 --- a/test/test_sorted_containers.jl +++ b/test/test_sorted_containers.jl @@ -438,7 +438,7 @@ function testSortedDictMethods() m1[6] = 49.0 my_assert(length(m1) == numprimes + 1) my_assert(m1[6] == 49.0) - b, i6 = push_return_token!(m1, 6=>50.0) + b, i6 = push_return_semitoken!(m1, 6=>50.0) my_assert(length(m1) == numprimes + 1) my_assert(!b) p = deref((m1,i6)) @@ -447,7 +447,7 @@ function testSortedDictMethods() p = deref((m1,i6)) my_assert(p[1] == 6 && p[2] == 9.0) my_assert(m1[i6] == 9.0) - b2, i7 = push_return_token!(m1, 8=>51.0) + b2, i7 = push_return_semitoken!(m1, 8=>51.0) my_assert(b2) my_assert(length(m1) == numprimes + 2) p = deref((m1,i7)) @@ -779,7 +779,7 @@ function testSortedDictLoops() sum2 = 0 for factor = 1 : N for multiple = factor : factor : N - push_return_token!(factors, multiple=>factor) + push_return_semitoken!(factors, multiple=>factor) sum1 += multiple sum2 += factor len += 1 @@ -1267,7 +1267,7 @@ function testSortedMultiDict() len = 0 for factor = 1 : N for multiple = factor : factor : N - push_return_token!(factors, multiple=>factor) + push_return_semitoken!(factors, multiple=>factor) len += 1 end end @@ -1340,7 +1340,7 @@ function testSortedMultiDict() my_assert(compare(factors,i,i2) != 0) my_assert(compare(factors,regress((factors,i)),i2) == 0) my_assert(compare(factors,i,i1) != 0) - push_return_token!(factors, 80=>6) + push_return_semitoken!(factors, 80=>6) my_assert(length(factors) == len + 1) checkcorrectness(factors.bt, true) expected1 = deepcopy(expected) @@ -1385,7 +1385,7 @@ function testSortedMultiDict() my_assert(isequal(m1,m2)) my_assert(!isequal(m1,m3)) my_assert(!isequal(m1, SortedMultiDict("apples"=>2.0))) - b,stok = push_return_token!(m2, "cherries"=>6.1) + b,stok = push_return_semitoken!(m2, "cherries"=>6.1) my_assert(b) checkcorrectness(m2.bt, true) my_assert(!isequal(m1,m2)) @@ -1426,10 +1426,10 @@ function testSortedMultiDict() m7 = SortedMultiDict{Int,Int}() n1 = 10000 for k = 1 : n1 - push_return_token!(m7, k=>k+1) + push_return_semitoken!(m7, k=>k+1) end for k = 1 : n1 - push_return_token!(m7, k=>k+2) + push_return_semitoken!(m7, k=>k+2) end for k = 1 : n1 i1, i2 = searchequalrange(m7, k) @@ -1476,29 +1476,29 @@ function testSortedMultiDict() my_assert(DataStructures.isordered(SortedMultiDict{Int, String})) # issue #773 s = SortedMultiDict{Int, Int}() - push_return_token!(s, 4=>41) - push_return_token!(s, 3=>31) - push_return_token!(s, 2=>21) - push_return_token!(s, 2=>22) - push_return_token!(s, 2=>23) - push_return_token!(s, 2=>24) - push_return_token!(s, 2=>25) - push_return_token!(s, 2=>26) - push_return_token!(s, 1=>11) - push_return_token!(s, 1=>12) - _,st1 = push_return_token!(s, 1=>13) - _,st2 = push_return_token!(s, 1=>14) - _,st3 = push_return_token!(s, 1=>15) - _,st4 = push_return_token!(s, 1=>16) - _,st5 = push_return_token!(s, 1=>17) - _,st6 = push_return_token!(s, 1=>18) + push_return_semitoken!(s, 4=>41) + push_return_semitoken!(s, 3=>31) + push_return_semitoken!(s, 2=>21) + push_return_semitoken!(s, 2=>22) + push_return_semitoken!(s, 2=>23) + push_return_semitoken!(s, 2=>24) + push_return_semitoken!(s, 2=>25) + push_return_semitoken!(s, 2=>26) + push_return_semitoken!(s, 1=>11) + push_return_semitoken!(s, 1=>12) + _,st1 = push_return_semitoken!(s, 1=>13) + _,st2 = push_return_semitoken!(s, 1=>14) + _,st3 = push_return_semitoken!(s, 1=>15) + _,st4 = push_return_semitoken!(s, 1=>16) + _,st5 = push_return_semitoken!(s, 1=>17) + _,st6 = push_return_semitoken!(s, 1=>18) delete!((s, st6)) delete!((s, st5)) delete!((s, st4)) delete!((s, st3)) delete!((s, st2)) delete!((s, st1)) - push_return_token!(s, 1=>19) + push_return_semitoken!(s, 1=>19) checkcorrectness(s.bt, true) my_assert(length(SortedMultiDict{Int,Int}(Val(true), [1=>2,3=>7,3=>15])) == 3) my_assert(length(SortedMultiDict{Int,Int}(Val(true), [8=>2,3=>7,3=>15], Reverse)) == 3) @@ -1568,11 +1568,11 @@ function testSortedSet() smallest = min(smallest,ui) largest = max(largest,ui) end - isnew,st = push_return_token!(m, 72.5) + isnew,st = push_return_semitoken!(m, 72.5) my_assert(isnew) my_assert(deref((m,st)) == 72.5) delete!((m,st)) - isnew,st = push_return_token!(m, 73.5) + isnew,st = push_return_semitoken!(m, 73.5) my_assert(isnew) my_assert(deref((m,st)) == 73.5) delete!(m, 73.5) From f540e4ef5406512886de8aa02f03408f524624cd Mon Sep 17 00:00:00 2001 From: Stephen Vavasis Date: Thu, 19 May 2022 10:57:10 -0400 Subject: [PATCH 6/8] Previous commit was premature - had bugs --- docs/src/sorted_containers.md | 6 +++--- src/DataStructures.jl | 2 +- src/deprecations.jl | 6 +++--- src/sorted_multi_dict.jl | 10 +++++----- src/sorted_set.jl | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/src/sorted_containers.md b/docs/src/sorted_containers.md index c65034d68..6c3e788a8 100644 --- a/docs/src/sorted_containers.md +++ b/docs/src/sorted_containers.md @@ -152,9 +152,9 @@ findkey(sd::SortedDict, k) Base.push!(ss::SortedSet, k) Base.push!(sd::SortedDict, p::Pair) Base.push!(smd::SortedMultiDict, p::Pair) -push_return_token!(ss::SortedSet, k) -push_return_token!(sd::SortedDict, p::Pair) -push_return_token!(smd::SortedMultiDict, p::Pair) +push_return_semitoken!(ss::SortedSet, k) +push_return_semitoken!(sd::SortedDict, p::Pair) +push_return_semitoken!(smd::SortedMultiDict, p::Pair) Base.delete!(token::Token) Base.delete!(ss::SortedSet, k) Base.delete!(sc::SortedDict, k) diff --git a/src/DataStructures.jl b/src/DataStructures.jl index 4d2103785..1439f4e3e 100644 --- a/src/DataStructures.jl +++ b/src/DataStructures.jl @@ -65,7 +65,7 @@ module DataStructures export findkey - export push_return_token! + export push_return_semitoken! export token_firstindex, token_lastindex include("delegate.jl") diff --git a/src/deprecations.jl b/src/deprecations.jl index 42be1e0fe..78eac122c 100644 --- a/src/deprecations.jl +++ b/src/deprecations.jl @@ -19,9 +19,9 @@ Base.@deprecate_binding IntDisjointSets IntDisjointSet @deprecate startof(m::SortedContainer) firstindex(m::SortedContainer) @deprecate endof(m::SortedContainer) lastindex(m::SortedContainer) -@deprecate insert!(m::SortedSet, k) push_return_token!(m::SortedSet, k) -@deprecate insert!(m::SortedDict, k, d) push_return_token!(m::SortedDict, k=>d) -@deprecate insert!(m::SortedMultiDict, k, d) (push_return_token!(m::SortedMultiDict, k=>d))[2] +@deprecate insert!(m::SortedSet, k) push_return_semitoken!(m::SortedSet, k) +@deprecate insert!(m::SortedDict, k, d) push_return_semitoken!(m::SortedDict, k=>d) +@deprecate insert!(m::SortedMultiDict, k, d) (push_return_semitoken!(m::SortedMultiDict, k=>d))[2] function Base.peek(q::PriorityQueue) Expr(:meta, :noinline) diff --git a/src/sorted_multi_dict.jl b/src/sorted_multi_dict.jl index 564d836f4..7e9ed34b9 100644 --- a/src/sorted_multi_dict.jl +++ b/src/sorted_multi_dict.jl @@ -20,7 +20,7 @@ SortedMultiDict{K,D,Ord}(o::Ord=Forward) where {K,D,Ord<:Ordering} = function SortedMultiDict{K,D,Ord}(o::Ord, kv) where {K,D,Ord<:Ordering} smd = SortedMultiDict{K,D,Ord}(BalancedTree23{K,D,Ord}(o)) for (k,v) in kv - push_return_token!(smd, k=>v) + push_return_semitoken!(smd, k=>v) end return smd end @@ -149,7 +149,7 @@ const SMDToken = Tuple{SortedMultiDict, IntSemiToken} """ - DataStructures.push_return_token!(smd::SortedMultiDict, pr::Pair) + DataStructures.push_return_semitoken!(smd::SortedMultiDict, pr::Pair) Insert the key-value pair `pr`, i.e., `k=>v`, into `smd`. If `k` already appears as a key @@ -159,12 +159,12 @@ the return value is a 2-tuple whose first entry is boolean always equal to `true` and whose second entry is the semitoken of the new entry. (The reason for returning a bool whose value is always `true` is for consistency -with `push_return_token!` for SortedDict and SortedSet.) +with `push_return_semitoken!` for SortedDict and SortedSet.) This function replaces the deprecated `insert!`. Time: O(*c* log *n*) """ -@inline function push_return_token!(m::SortedMultiDict, pr::Pair) +@inline function push_return_semitoken!(m::SortedMultiDict, pr::Pair) b, i = insert!(m.bt, convert(keytype(m),pr.first), convert(valtype(m),pr.second), true) b, IntSemiToken(i) end @@ -177,7 +177,7 @@ Insert the pair `p`, i.e., a `k=>v` into `smd`. If `k` already appears as a key in `smd`, then `k=>v` is inserted in the rightmost position after existing items with key `k`. Returns the container. -See also [`push_return_token!(smd::SortedMultiDict, p::Pair)`](@ref). +See also [`push_return_semitoken!(smd::SortedMultiDict, p::Pair)`](@ref). Time: O(*c* log *n*) """ @inline function Base.push!(m::SortedMultiDict{K,D}, pr::Pair) where {K,D} diff --git a/src/sorted_set.jl b/src/sorted_set.jl index 47c481eb7..87e474ca4 100644 --- a/src/sorted_set.jl +++ b/src/sorted_set.jl @@ -139,7 +139,7 @@ Insert the element `k` into the sorted set `ss`. If the `k` is already present, this overwrites the old value. (This is not necessarily a no-op; see remarks about the customizing the sort order.) -See also [`push_return_token!(ss::SortedSet, k)`](@ref). +See also [`push_return_semitoken!(ss::SortedSet, k)`](@ref). The return value is `ss`. Time: O(*c* log *n*) """ From 1c63d2abff64bb3ef044d3e3de8c5fadce027200 Mon Sep 17 00:00:00 2001 From: Stephen Vavasis Date: Wed, 6 Jul 2022 18:05:57 -0400 Subject: [PATCH 7/8] updated testset with comments to refer to issues that have been resolved --- test/test_deprecations.jl | 3 +++ test/test_sorted_containers.jl | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/test/test_deprecations.jl b/test/test_deprecations.jl index 3234a2f7a..29dbd3702 100644 --- a/test/test_deprecations.jl +++ b/test/test_deprecations.jl @@ -120,6 +120,9 @@ end end @testset "insert!" begin + # issues 479 and 767: deprecate insert! (in favor of push_return_semitoken!) + # deprecate startof in favor of firstindex + # deprecate endof in favor of lastindex s = SortedDict{Int,String}(); @test isa(insert!(s, 5, "hello"), Tuple{Bool, IntSemiToken}) s2 = SortedMultiDict{Int,String}(); diff --git a/test/test_sorted_containers.jl b/test/test_sorted_containers.jl index 03515547f..0878cfad5 100644 --- a/test/test_sorted_containers.jl +++ b/test/test_sorted_containers.jl @@ -257,6 +257,12 @@ function testSortedDictBasic() checkcorrectness(m1.bt, false) end my_assert(count == 4) + # Check that issue 239 (cf 781) is resolved + ks = ["a", "c", "b"] + vs = [1,2,3] + s = SortedDict(zip(ks,vs)) + my_assert(keytype(s) == String) + my_assert(valtype(s) == Int) true end @@ -633,6 +639,7 @@ function testSortedDictLoops() m1[bitreverse(lUi)] = lUi end count = 0 + # issue 767 for (stok,k,v) in semitokens(inclusive(m1, firstindex(m1), lastindex(m1))) for (stok2,k2,v2) in semitokens(exclusive(m1, firstindex(m1), pastendsemitoken(m1))) c = compare(m1,stok,stok2) @@ -1921,6 +1928,7 @@ function testTokens() my_assert(count == 4) count = 4 + # Issue 671 for t in onlytokens(Iterators.reverse(exclusive_key(smd1, 1, 5))) my_assert(deref_value(t) == result[count]) count -= 1 From ebf63ca7c3a1e7353d52e7ca427245572b12f4b9 Mon Sep 17 00:00:00 2001 From: Stephen Vavasis Date: Fri, 15 Jul 2022 04:30:13 -0400 Subject: [PATCH 8/8] Remove explicit export of startof and endof --- src/DataStructures.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/DataStructures.jl b/src/DataStructures.jl index 1439f4e3e..5c6438bd6 100644 --- a/src/DataStructures.jl +++ b/src/DataStructures.jl @@ -42,7 +42,6 @@ module DataStructures export SortedDict, SortedMultiDict, SortedSet export SDToken, SDSemiToken, SMDToken, SMDSemiToken export SetToken, SetSemiToken - export startof, endof ## both are deprecated export pastendsemitoken, beforestartsemitoken export pastendtoken, beforestarttoken export searchsortedafter, searchequalrange