summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/beorn7/perks/topk/topk.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/beorn7/perks/topk/topk.go')
-rw-r--r--vendor/github.com/beorn7/perks/topk/topk.go90
1 files changed, 90 insertions, 0 deletions
diff --git a/vendor/github.com/beorn7/perks/topk/topk.go b/vendor/github.com/beorn7/perks/topk/topk.go
new file mode 100644
index 000000000..5ac3d9904
--- /dev/null
+++ b/vendor/github.com/beorn7/perks/topk/topk.go
@@ -0,0 +1,90 @@
+package topk
+
+import (
+ "sort"
+)
+
+// http://www.cs.ucsb.edu/research/tech_reports/reports/2005-23.pdf
+
+type Element struct {
+ Value string
+ Count int
+}
+
+type Samples []*Element
+
+func (sm Samples) Len() int {
+ return len(sm)
+}
+
+func (sm Samples) Less(i, j int) bool {
+ return sm[i].Count < sm[j].Count
+}
+
+func (sm Samples) Swap(i, j int) {
+ sm[i], sm[j] = sm[j], sm[i]
+}
+
+type Stream struct {
+ k int
+ mon map[string]*Element
+
+ // the minimum Element
+ min *Element
+}
+
+func New(k int) *Stream {
+ s := new(Stream)
+ s.k = k
+ s.mon = make(map[string]*Element)
+ s.min = &Element{}
+
+ // Track k+1 so that less frequenet items contended for that spot,
+ // resulting in k being more accurate.
+ return s
+}
+
+func (s *Stream) Insert(x string) {
+ s.insert(&Element{x, 1})
+}
+
+func (s *Stream) Merge(sm Samples) {
+ for _, e := range sm {
+ s.insert(e)
+ }
+}
+
+func (s *Stream) insert(in *Element) {
+ e := s.mon[in.Value]
+ if e != nil {
+ e.Count++
+ } else {
+ if len(s.mon) < s.k+1 {
+ e = &Element{in.Value, in.Count}
+ s.mon[in.Value] = e
+ } else {
+ e = s.min
+ delete(s.mon, e.Value)
+ e.Value = in.Value
+ e.Count += in.Count
+ s.min = e
+ }
+ }
+ if e.Count < s.min.Count {
+ s.min = e
+ }
+}
+
+func (s *Stream) Query() Samples {
+ var sm Samples
+ for _, e := range s.mon {
+ sm = append(sm, e)
+ }
+ sort.Sort(sort.Reverse(sm))
+
+ if len(sm) < s.k {
+ return sm
+ }
+
+ return sm[:s.k]
+}