diff options
Diffstat (limited to 'vendor/github.com/hashicorp/memberlist/suspicion.go')
-rw-r--r-- | vendor/github.com/hashicorp/memberlist/suspicion.go | 130 |
1 files changed, 130 insertions, 0 deletions
diff --git a/vendor/github.com/hashicorp/memberlist/suspicion.go b/vendor/github.com/hashicorp/memberlist/suspicion.go new file mode 100644 index 000000000..5f573e1fc --- /dev/null +++ b/vendor/github.com/hashicorp/memberlist/suspicion.go @@ -0,0 +1,130 @@ +package memberlist + +import ( + "math" + "sync/atomic" + "time" +) + +// suspicion manages the suspect timer for a node and provides an interface +// to accelerate the timeout as we get more independent confirmations that +// a node is suspect. +type suspicion struct { + // n is the number of independent confirmations we've seen. This must + // be updated using atomic instructions to prevent contention with the + // timer callback. + n int32 + + // k is the number of independent confirmations we'd like to see in + // order to drive the timer to its minimum value. + k int32 + + // min is the minimum timer value. + min time.Duration + + // max is the maximum timer value. + max time.Duration + + // start captures the timestamp when we began the timer. This is used + // so we can calculate durations to feed the timer during updates in + // a way the achieves the overall time we'd like. + start time.Time + + // timer is the underlying timer that implements the timeout. + timer *time.Timer + + // f is the function to call when the timer expires. We hold on to this + // because there are cases where we call it directly. + timeoutFn func() + + // confirmations is a map of "from" nodes that have confirmed a given + // node is suspect. This prevents double counting. + confirmations map[string]struct{} +} + +// newSuspicion returns a timer started with the max time, and that will drive +// to the min time after seeing k or more confirmations. The from node will be +// excluded from confirmations since we might get our own suspicion message +// gossiped back to us. The minimum time will be used if no confirmations are +// called for (k <= 0). +func newSuspicion(from string, k int, min time.Duration, max time.Duration, fn func(int)) *suspicion { + s := &suspicion{ + k: int32(k), + min: min, + max: max, + confirmations: make(map[string]struct{}), + } + + // Exclude the from node from any confirmations. + s.confirmations[from] = struct{}{} + + // Pass the number of confirmations into the timeout function for + // easy telemetry. + s.timeoutFn = func() { + fn(int(atomic.LoadInt32(&s.n))) + } + + // If there aren't any confirmations to be made then take the min + // time from the start. + timeout := max + if k < 1 { + timeout = min + } + s.timer = time.AfterFunc(timeout, s.timeoutFn) + + // Capture the start time right after starting the timer above so + // we should always err on the side of a little longer timeout if + // there's any preemption that separates this and the step above. + s.start = time.Now() + return s +} + +// remainingSuspicionTime takes the state variables of the suspicion timer and +// calculates the remaining time to wait before considering a node dead. The +// return value can be negative, so be prepared to fire the timer immediately in +// that case. +func remainingSuspicionTime(n, k int32, elapsed time.Duration, min, max time.Duration) time.Duration { + frac := math.Log(float64(n)+1.0) / math.Log(float64(k)+1.0) + raw := max.Seconds() - frac*(max.Seconds()-min.Seconds()) + timeout := time.Duration(math.Floor(1000.0*raw)) * time.Millisecond + if timeout < min { + timeout = min + } + + // We have to take into account the amount of time that has passed so + // far, so we get the right overall timeout. + return timeout - elapsed +} + +// Confirm registers that a possibly new peer has also determined the given +// node is suspect. This returns true if this was new information, and false +// if it was a duplicate confirmation, or if we've got enough confirmations to +// hit the minimum. +func (s *suspicion) Confirm(from string) bool { + // If we've got enough confirmations then stop accepting them. + if atomic.LoadInt32(&s.n) >= s.k { + return false + } + + // Only allow one confirmation from each possible peer. + if _, ok := s.confirmations[from]; ok { + return false + } + s.confirmations[from] = struct{}{} + + // Compute the new timeout given the current number of confirmations and + // adjust the timer. If the timeout becomes negative *and* we can cleanly + // stop the timer then we will call the timeout function directly from + // here. + n := atomic.AddInt32(&s.n, 1) + elapsed := time.Now().Sub(s.start) + remaining := remainingSuspicionTime(n, s.k, elapsed, s.min, s.max) + if s.timer.Stop() { + if remaining > 0 { + s.timer.Reset(remaining) + } else { + go s.timeoutFn() + } + } + return true +} |