diff --git a/src/accumulator.jl b/src/accumulator.jl index 2d3e0861a..455bdf9e0 100644 --- a/src/accumulator.jl +++ b/src/accumulator.jl @@ -1,4 +1,4 @@ -# A counter type +#A counter type struct Accumulator{T, V<:Number} <: AbstractDict{T,V} map::Dict{T,V} @@ -137,6 +137,50 @@ Returns its former count. """ reset!(ct::Accumulator, x) = pop!(ct.map, x) +""" + nlargest(acc::Accumulator, [n]) + +Returns a sorted vector of the `n` most common elements, with their counts. +If `n` is omitted, the full sorted collection is returned. + +This corresponds to Python's `Counter.most_common` function. + +Example +``` +julia> nlargest(counter("abbbccddddda")) + +4-element Array{Pair{Char,Int64},1}: + 'd'=>5 + 'b'=>3 + 'c'=>2 + 'a'=>2 + + +julia> nlargest(counter("abbbccddddda"),2) + +2-element Array{Pair{Char,Int64},1}: + 'd'=>5 + 'b'=>3 + +``` +""" +nlargest(acc::Accumulator) = sort!(collect(acc), by=last, rev=true) +nlargest(acc::Accumulator, n) = partialsort!(collect(acc), 1:n, by=last, rev=true) + + +""" + nsmallest(acc::Accumulator, [n]) + +Returns a sorted vector of the `n` least common elements, with their counts. +If `n` is omitted, the full sorted collection is returned. + +This is the opposite of the `nlargest` function. +For obvious reasons this will not include zero counts for items not encountered. +(unless those elements are added to he accumulator directly, eg via `acc[foo]=0) +""" +nsmallest(acc::Accumulator) = sort!(collect(acc), by=last, rev=false) +nsmallest(acc::Accumulator, n) = partialsort!(collect(acc), 1:n, by=last, rev=false) + ## Deprecations diff --git a/test/test_accumulator.jl b/test/test_accumulator.jl index 2807248da..f3689fb30 100644 --- a/test/test_accumulator.jl +++ b/test/test_accumulator.jl @@ -30,7 +30,7 @@ @test ct["b"] == 1 dec!(ct, "b", 16) @test ct["b"] == -15 - ct["b"] = 2 + ct["b"] = 2 # Test convert inc!(ct, "b", 0x3) @@ -112,33 +112,59 @@ @test ct6["b"] == 0 @test ct6["c"] == 4 - s = ["y", "el", "sol", "se", "fue"] - @test counter(length(x) for x in s) == counter(map(length, s)) + + @testset "Generators" begin + s = ["y", "el", "sol", "se", "fue"] + @test counter(length(x) for x in s) == counter(map(length, s)) + end + + @testset "non-integer uses" begin + acc = Accumulator(Symbol, Float16) + acc[:a] = 1.5 + @test acc[:a] ≈ 1.5 + push!(acc, :a, 2.5) + @test acc[:a] ≈ 4.0 + dec!(acc, :a) + @test acc[:a] ≈ 3.0 + end + + @testset "ambiguity resolution" begin + ct7 = counter(Int) + @test_throws MethodError push!(ct7, 1=>2) + end + + @testset "nlargest" begin + @test nlargest(counter("abbbcddddda")) == ['d'=>5, 'b'=>3, 'a'=>2, 'c'=>1] + @test nlargest(counter("abbbccddddda"),2) == ['d'=>5, 'b'=>3] + @test nlargest(counter("a")) == ['a'=>1] + + @test nlargest(counter("aaabbcc")) ∈ (['a'=>3,'b'=>2, 'c'=>2], ['a'=>3,'c'=>2, 'b'=>2]) - # non-integer uses - acc = Accumulator(Symbol, Float16) - acc[:a] = 1.5 - @test acc[:a] ≈ 1.5 - push!(acc, :a, 2.5) - @test acc[:a] ≈ 4.0 - dec!(acc, :a) - @test acc[:a] ≈ 3.0 + @test_throws BoundsError nlargest(counter("a"),2) + end - # ambiguity resolution - ct7 = counter(Int) - @test_throws MethodError push!(ct7, 1=>2) + @testset "nsmallest" begin + acc = counter("aabbbcccc") + @test nsmallest(acc) == ['a'=>2, 'b'=>3, 'c'=>4] + @test nsmallest(acc,2) == ['a'=>2, 'b'=>3] + acc['d']=0 + @test nsmallest(acc,2) == ['d'=>0, 'a'=>2] + @test nsmallest(counter("aaabbcc")) ∈ (['b'=>2, 'c'=>2, 'a'=>3], ['c'=>2, 'b'=>2, 'a'=>3]) - #deprecations - ctd = counter([1,2,3]) - @test ctd[3]==1 + @test_throws BoundsError nsmallest(counter("a"),2) + end - println("\nThe following warning is expected:") - @test pop!(ctd, 3)==1 - println("\nThe following warning is expected:") - @test push!(counter([1,2,3]),counter([1,2,3])) == merge!(counter([1,2,3]), counter([1,2,3])) + @testset "deprecations" begin + ctd = counter([1,2,3]) + @test ctd[3]==1 + println("\nThe following warning is expected:") + @test pop!(ctd, 3)==1 + println("\nThe following warning is expected:") + @test push!(counter([1,2,3]),counter([1,2,3])) == merge!(counter([1,2,3]), counter([1,2,3])) + end end # @testset Accumulators