summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/beorn7/perks/topk/topk.go
blob: 5ac3d9904ff3683babf8c08ab5e939fc9c7fc44b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
package topk

import (
	"sort"
)

// http://www.cs.ucsb.edu/research/tech_reports/reports/2005-23.pdf

type Element struct {
	Value string
	Count int
}

type Samples []*Element

func (sm Samples) Len() int {
	return len(sm)
}

func (sm Samples) Less(i, j int) bool {
	return sm[i].Count < sm[j].Count
}

func (sm Samples) Swap(i, j int) {
	sm[i], sm[j] = sm[j], sm[i]
}

type Stream struct {
	k   int
	mon map[string]*Element

	// the minimum Element
	min *Element
}

func New(k int) *Stream {
	s := new(Stream)
	s.k = k
	s.mon = make(map[string]*Element)
	s.min = &Element{}

	// Track k+1 so that less frequenet items contended for that spot,
	// resulting in k being more accurate.
	return s
}

func (s *Stream) Insert(x string) {
	s.insert(&Element{x, 1})
}

func (s *Stream) Merge(sm Samples) {
	for _, e := range sm {
		s.insert(e)
	}
}

func (s *Stream) insert(in *Element) {
	e := s.mon[in.Value]
	if e != nil {
		e.Count++
	} else {
		if len(s.mon) < s.k+1 {
			e = &Element{in.Value, in.Count}
			s.mon[in.Value] = e
		} else {
			e = s.min
			delete(s.mon, e.Value)
			e.Value = in.Value
			e.Count += in.Count
			s.min = e
		}
	}
	if e.Count < s.min.Count {
		s.min = e
	}
}

func (s *Stream) Query() Samples {
	var sm Samples
	for _, e := range s.mon {
		sm = append(sm, e)
	}
	sort.Sort(sort.Reverse(sm))

	if len(sm) < s.k {
		return sm
	}

	return sm[:s.k]
}