summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/beorn7/perks/histogram/histogram.go
blob: bef05c70cf78db064e6c6e2087aa5344e734b74d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
// Package histogram provides a Go implementation of BigML's histogram package
// for Clojure/Java. It is currently experimental.
package histogram

import (
	"container/heap"
	"math"
	"sort"
)

type Bin struct {
	Count int
	Sum   float64
}

func (b *Bin) Update(x *Bin) {
	b.Count += x.Count
	b.Sum += x.Sum
}

func (b *Bin) Mean() float64 {
	return b.Sum / float64(b.Count)
}

type Bins []*Bin

func (bs Bins) Len() int           { return len(bs) }
func (bs Bins) Less(i, j int) bool { return bs[i].Mean() < bs[j].Mean() }
func (bs Bins) Swap(i, j int)      { bs[i], bs[j] = bs[j], bs[i] }

func (bs *Bins) Push(x interface{}) {
	*bs = append(*bs, x.(*Bin))
}

func (bs *Bins) Pop() interface{} {
	return bs.remove(len(*bs) - 1)
}

func (bs *Bins) remove(n int) *Bin {
	if n < 0 || len(*bs) < n {
		return nil
	}
	x := (*bs)[n]
	*bs = append((*bs)[:n], (*bs)[n+1:]...)
	return x
}

type Histogram struct {
	res *reservoir
}

func New(maxBins int) *Histogram {
	return &Histogram{res: newReservoir(maxBins)}
}

func (h *Histogram) Insert(f float64) {
	h.res.insert(&Bin{1, f})
	h.res.compress()
}

func (h *Histogram) Bins() Bins {
	return h.res.bins
}

type reservoir struct {
	n       int
	maxBins int
	bins    Bins
}

func newReservoir(maxBins int) *reservoir {
	return &reservoir{maxBins: maxBins}
}

func (r *reservoir) insert(bin *Bin) {
	r.n += bin.Count
	i := sort.Search(len(r.bins), func(i int) bool {
		return r.bins[i].Mean() >= bin.Mean()
	})
	if i < 0 || i == r.bins.Len() {
		// TODO(blake): Maybe use an .insert(i, bin) instead of
		// performing the extra work of a heap.Push.
		heap.Push(&r.bins, bin)
		return
	}
	r.bins[i].Update(bin)
}

func (r *reservoir) compress() {
	for r.bins.Len() > r.maxBins {
		minGapIndex := -1
		minGap := math.MaxFloat64
		for i := 0; i < r.bins.Len()-1; i++ {
			gap := gapWeight(r.bins[i], r.bins[i+1])
			if minGap > gap {
				minGap = gap
				minGapIndex = i
			}
		}
		prev := r.bins[minGapIndex]
		next := r.bins.remove(minGapIndex + 1)
		prev.Update(next)
	}
}

func gapWeight(prev, next *Bin) float64 {
	return next.Mean() - prev.Mean()
}