This repository has been archived by the owner on Jun 1, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 31
/
weightedhistogram.go
190 lines (156 loc) · 4.33 KB
/
weightedhistogram.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
// Package gohistogram contains implementations of weighted and exponential histograms.
package gohistogram
// Copyright (c) 2013 VividCortex, Inc. All rights reserved.
// Please see the LICENSE file for applicable license terms.
import "fmt"
// A WeightedHistogram implements Histogram. A WeightedHistogram has bins that have values
// which are exponentially weighted moving averages. This allows you keep inserting large
// amounts of data into the histogram and approximate quantiles with recency factored in.
type WeightedHistogram struct {
bins []bin
maxbins int
total float64
alpha float64
}
// NewWeightedHistogram returns a new WeightedHistogram with a maximum of n bins with a decay factor
// of alpha.
//
// There is no "optimal" bin count, but somewhere between 20 and 80 bins should be
// sufficient.
//
// Alpha should be set to 2 / (N+1), where N represents the average age of the moving window.
// For example, a 60-second window with an average age of 30 seconds would yield an
// alpha of 0.064516129.
func NewWeightedHistogram(n int, alpha float64) *WeightedHistogram {
return &WeightedHistogram{
bins: make([]bin, 0),
maxbins: n,
total: 0,
alpha: alpha,
}
}
func ewma(existingVal float64, newVal float64, alpha float64) (result float64) {
result = newVal*(1-alpha) + existingVal*alpha
return
}
func (h *WeightedHistogram) scaleDown(except int) {
for i := range h.bins {
if i != except {
h.bins[i].count = ewma(h.bins[i].count, 0, h.alpha)
}
}
}
func (h *WeightedHistogram) Add(n float64) {
defer h.trim()
for i := range h.bins {
if h.bins[i].value == n {
h.bins[i].count++
defer h.scaleDown(i)
return
}
if h.bins[i].value > n {
newbin := bin{value: n, count: 1}
head := append(make([]bin, 0), h.bins[0:i]...)
head = append(head, newbin)
tail := h.bins[i:]
h.bins = append(head, tail...)
defer h.scaleDown(i)
return
}
}
h.bins = append(h.bins, bin{count: 1, value: n})
}
func (h *WeightedHistogram) Quantile(q float64) float64 {
count := q * h.total
for i := range h.bins {
count -= float64(h.bins[i].count)
if count <= 0 {
return h.bins[i].value
}
}
return -1
}
// CDF returns the value of the cumulative distribution function
// at x
func (h *WeightedHistogram) CDF(x float64) float64 {
count := 0.0
for i := range h.bins {
if h.bins[i].value <= x {
count += float64(h.bins[i].count)
}
}
return count / h.total
}
// Mean returns the sample mean of the distribution
func (h *WeightedHistogram) Mean() float64 {
if h.total == 0 {
return 0
}
sum := 0.0
for i := range h.bins {
sum += h.bins[i].value * h.bins[i].count
}
return sum / h.total
}
// Variance returns the variance of the distribution
func (h *WeightedHistogram) Variance() float64 {
if h.total == 0 {
return 0
}
sum := 0.0
mean := h.Mean()
for i := range h.bins {
sum += (h.bins[i].count * (h.bins[i].value - mean) * (h.bins[i].value - mean))
}
return sum / h.total
}
func (h *WeightedHistogram) Count() float64 {
return h.total
}
func (h *WeightedHistogram) trim() {
total := 0.0
for i := range h.bins {
total += h.bins[i].count
}
h.total = total
for len(h.bins) > h.maxbins {
// Find closest bins in terms of value
minDelta := 1e99
minDeltaIndex := 0
for i := range h.bins {
if i == 0 {
continue
}
if delta := h.bins[i].value - h.bins[i-1].value; delta < minDelta {
minDelta = delta
minDeltaIndex = i
}
}
// We need to merge bins minDeltaIndex-1 and minDeltaIndex
totalCount := h.bins[minDeltaIndex-1].count + h.bins[minDeltaIndex].count
mergedbin := bin{
value: (h.bins[minDeltaIndex-1].value*
h.bins[minDeltaIndex-1].count +
h.bins[minDeltaIndex].value*
h.bins[minDeltaIndex].count) /
totalCount, // weighted average
count: totalCount, // summed heights
}
head := append(make([]bin, 0), h.bins[0:minDeltaIndex-1]...)
tail := append([]bin{mergedbin}, h.bins[minDeltaIndex+1:]...)
h.bins = append(head, tail...)
}
}
// String returns a string reprentation of the histogram,
// which is useful for printing to a terminal.
func (h *WeightedHistogram) String() (str string) {
str += fmt.Sprintln("Total:", h.total)
for i := range h.bins {
var bar string
for j := 0; j < int(float64(h.bins[i].count)/float64(h.total)*200); j++ {
bar += "."
}
str += fmt.Sprintln(h.bins[i].value, "\t", bar)
}
return
}