diff --git a/lib/tun2/server.go b/lib/tun2/server.go index d4d294a..64472d3 100644 --- a/lib/tun2/server.go +++ b/lib/tun2/server.go @@ -17,6 +17,7 @@ import ( "git.xeserv.us/xena/route/database" "github.com/Xe/ln" + failure "github.com/dgryski/go-failure" "github.com/mtneug/pkg/ulid" cmap "github.com/streamrail/concurrent-map" kcp "github.com/xtaci/kcp-go" @@ -55,6 +56,7 @@ type Connection struct { user string domain string cancel context.CancelFunc + detector *failure.Detector } func (c *Connection) F() ln.F { @@ -155,57 +157,77 @@ func (s *Server) ListenAndServe() error { go func() { for { - time.Sleep(5 * time.Second) - s.pingLoopInner() + time.Sleep(time.Second) + + now := time.Now() + + s.connlock.Lock() + for _, c := range s.conns { + failureChance := c.detector.Phi(now) + + ln.Log(c.F(), ln.F{ + "action": "phi_failure_detection", + "value": failureChance, + }) + } } }() return nil } -func (s *Server) pingLoopInner() { - s.connlock.Lock() - defer s.connlock.Unlock() - - for _, c := range s.conns { - req, err := http.NewRequest("GET", "http://backend/health", nil) - if err != nil { - panic(err) - } - - c.conn.SetDeadline(time.Now().Add(time.Second)) - stream, err := c.session.OpenStream() - if err != nil { - ln.Error(err, c.F()) - c.cancel() - return - } - c.conn.SetDeadline(time.Time{}) - - stream.SetWriteDeadline(time.Now().Add(time.Second)) - err = req.Write(stream) - if err != nil { - ln.Error(err, c.F()) - c.cancel() - return - } - - stream.SetReadDeadline(time.Now().Add(time.Second)) - _, err = stream.Read(make([]byte, 30)) - if err != nil { - ln.Error(err, c.F()) - c.cancel() - return - } - - stream.Close() - - /* - ln.Log(ln.F{ - "action": "ping_health_is_good", - }, c.F()) - */ +// Ping ends a "ping" to the client. If the client doesn't respond or the connection +// dies, then the connection needs to be cleaned up. +func (c *Connection) Ping() error { + req, err := http.NewRequest("GET", "http://backend/health", nil) + if err != nil { + panic(err) } + + stream, err := c.OpenStream() + if err != nil { + ln.Error(err, c.F()) + defer c.cancel() + return err + } + defer stream.Close() + + stream.SetWriteDeadline(time.Now().Add(time.Second)) + err = req.Write(stream) + if err != nil { + ln.Error(err, c.F()) + defer c.cancel() + return err + } + + stream.SetReadDeadline(time.Now().Add(5 * time.Second)) + _, err = stream.Read(make([]byte, 30)) + if err != nil { + ln.Error(err, c.F()) + defer c.cancel() + return err + } + + c.detector.Ping(time.Now()) + + return nil +} + +// OpenStream creates a new stream (connection) to the backend server. +func (c *Connection) OpenStream() (net.Conn, error) { + err := c.conn.SetDeadline(time.Now().Add(time.Second)) + if err != nil { + ln.Error(err, c.F()) + return nil, err + } + + stream, err := c.session.OpenStream() + if err != nil { + ln.Error(err, c.F()) + return nil, err + } + + return stream, c.conn.SetDeadline(time.Time{}) } func (s *Server) HandleConn(c net.Conn, isKCP bool) { @@ -288,13 +310,14 @@ func (s *Server) HandleConn(c net.Conn, isKCP bool) { } connection := &Connection{ - id: ulid.New().String(), - conn: c, - isKCP: isKCP, - session: session, - user: defaultUser, // XXX RIP replace this with the actual token user once users are implemented - domain: auth.Domain, - cancel: cancel, + id: ulid.New().String(), + conn: c, + isKCP: isKCP, + session: session, + user: defaultUser, // XXX RIP replace this with the actual token user once users are implemented + domain: auth.Domain, + cancel: cancel, + detector: failure.New(15, 3), } ln.Log(ln.F{ @@ -321,7 +344,15 @@ func (s *Server) HandleConn(c net.Conn, isKCP bool) { s.domains.Set(auth.Domain, conns) + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + select { + case <-ticker.C: + err := connection.Ping() + if err != nil { + cancel() + } case <-ctx.Done(): s.connlock.Lock() delete(s.conns, c) diff --git a/vendor-log b/vendor-log index eeeba56..239f976 100644 --- a/vendor-log +++ b/vendor-log @@ -95,3 +95,6 @@ a1a5df8f92af764f378f07d6a3dd8eb3f7aa190a github.com/xtaci/smux 6c23252515492caf9b228a9d5cabcdbde29f7f82 golang.org/x/net/ipv4 8bf1e9bacbf65b10c81d0f4314cf2b1ebef728b5 github.com/streamrail/concurrent-map a00a8beb369cafd88bb7b32f31fc4ff3219c3565 github.com/Xe/gopreload +033754ab1fee508c9f98f2785eec2365964e0b05 github.com/aclements/go-moremath/mathx +4963dbd58fd03ebc6672b18f9237a9045e6ef479 github.com/dgryski/go-failure +91a8fc22ba247b57c243ab90b49049fb734c47c3 github.com/dgryski/go-onlinestats diff --git a/vendor/github.com/aclements/go-moremath/mathx/beta.go b/vendor/github.com/aclements/go-moremath/mathx/beta.go new file mode 100644 index 0000000..49f8722 --- /dev/null +++ b/vendor/github.com/aclements/go-moremath/mathx/beta.go @@ -0,0 +1,93 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mathx + +import "math" + +func lgamma(x float64) float64 { + y, _ := math.Lgamma(x) + return y +} + +// Beta returns the value of the complete beta function B(a, b). +func Beta(a, b float64) float64 { + // B(x,y) = Γ(x)Γ(y) / Γ(x+y) + return math.Exp(lgamma(a) + lgamma(b) - lgamma(a+b)) +} + +// BetaInc returns the value of the regularized incomplete beta +// function Iₓ(a, b). +// +// This is not to be confused with the "incomplete beta function", +// which can be computed as BetaInc(x, a, b)*Beta(a, b). +// +// If x < 0 or x > 1, returns NaN. +func BetaInc(x, a, b float64) float64 { + // Based on Numerical Recipes in C, section 6.4. This uses the + // continued fraction definition of I: + // + // (xᵃ*(1-x)ᵇ)/(a*B(a,b)) * (1/(1+(d₁/(1+(d₂/(1+...)))))) + // + // where B(a,b) is the beta function and + // + // d_{2m+1} = -(a+m)(a+b+m)x/((a+2m)(a+2m+1)) + // d_{2m} = m(b-m)x/((a+2m-1)(a+2m)) + if x < 0 || x > 1 { + return math.NaN() + } + bt := 0.0 + if 0 < x && x < 1 { + // Compute the coefficient before the continued + // fraction. + bt = math.Exp(lgamma(a+b) - lgamma(a) - lgamma(b) + + a*math.Log(x) + b*math.Log(1-x)) + } + if x < (a+1)/(a+b+2) { + // Compute continued fraction directly. + return bt * betacf(x, a, b) / a + } else { + // Compute continued fraction after symmetry transform. + return 1 - bt*betacf(1-x, b, a)/b + } +} + +// betacf is the continued fraction component of the regularized +// incomplete beta function Iₓ(a, b). +func betacf(x, a, b float64) float64 { + const maxIterations = 200 + const epsilon = 3e-14 + + raiseZero := func(z float64) float64 { + if math.Abs(z) < math.SmallestNonzeroFloat64 { + return math.SmallestNonzeroFloat64 + } + return z + } + + c := 1.0 + d := 1 / raiseZero(1-(a+b)*x/(a+1)) + h := d + for m := 1; m <= maxIterations; m++ { + mf := float64(m) + + // Even step of the recurrence. + numer := mf * (b - mf) * x / ((a + 2*mf - 1) * (a + 2*mf)) + d = 1 / raiseZero(1+numer*d) + c = raiseZero(1 + numer/c) + h *= d * c + + // Odd step of the recurrence. + numer = -(a + mf) * (a + b + mf) * x / ((a + 2*mf) * (a + 2*mf + 1)) + d = 1 / raiseZero(1+numer*d) + c = raiseZero(1 + numer/c) + hfac := d * c + h *= hfac + + if math.Abs(hfac-1) < epsilon { + return h + } + } + panic("betainc: a or b too big; failed to converge") +} diff --git a/vendor/github.com/aclements/go-moremath/mathx/choose.go b/vendor/github.com/aclements/go-moremath/mathx/choose.go new file mode 100644 index 0000000..54dc27c --- /dev/null +++ b/vendor/github.com/aclements/go-moremath/mathx/choose.go @@ -0,0 +1,62 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mathx + +import "math" + +const smallFactLimit = 20 // 20! => 62 bits +var smallFact [smallFactLimit + 1]int64 + +func init() { + smallFact[0] = 1 + fact := int64(1) + for n := int64(1); n <= smallFactLimit; n++ { + fact *= n + smallFact[n] = fact + } +} + +// Choose returns the binomial coefficient of n and k. +func Choose(n, k int) float64 { + if k == 0 || k == n { + return 1 + } + if k < 0 || n < k { + return 0 + } + if n <= smallFactLimit { // Implies k <= smallFactLimit + // It's faster to do several integer multiplications + // than it is to do an extra integer division. + // Remarkably, this is also faster than pre-computing + // Pascal's triangle (presumably because this is very + // cache efficient). + numer := int64(1) + for n1 := int64(n - (k - 1)); n1 <= int64(n); n1++ { + numer *= n1 + } + denom := smallFact[k] + return float64(numer / denom) + } + + return math.Exp(lchoose(n, k)) +} + +// Lchoose returns math.Log(Choose(n, k)). +func Lchoose(n, k int) float64 { + if k == 0 || k == n { + return 0 + } + if k < 0 || n < k { + return math.NaN() + } + return lchoose(n, k) +} + +func lchoose(n, k int) float64 { + a, _ := math.Lgamma(float64(n + 1)) + b, _ := math.Lgamma(float64(k + 1)) + c, _ := math.Lgamma(float64(n - k + 1)) + return a - b - c +} diff --git a/vendor/github.com/aclements/go-moremath/mathx/gamma.go b/vendor/github.com/aclements/go-moremath/mathx/gamma.go new file mode 100644 index 0000000..d11096e --- /dev/null +++ b/vendor/github.com/aclements/go-moremath/mathx/gamma.go @@ -0,0 +1,96 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mathx + +import "math" + +// GammaInc returns the value of the incomplete gamma function (also +// known as the regularized gamma function): +// +// P(a, x) = 1 / Γ(a) * ∫₀ˣ exp(-t) t**(a-1) dt +func GammaInc(a, x float64) float64 { + // Based on Numerical Recipes in C, section 6.2. + + if a <= 0 || x < 0 || math.IsNaN(a) || math.IsNaN(x) { + return math.NaN() + } + + if x < a+1 { + // Use the series representation, which converges more + // rapidly in this range. + return gammaIncSeries(a, x) + } else { + // Use the continued fraction representation. + return 1 - gammaIncCF(a, x) + } +} + +// GammaIncComp returns the complement of the incomplete gamma +// function 1 - GammaInc(a, x). This is more numerically stable for +// values near 0. +func GammaIncComp(a, x float64) float64 { + if a <= 0 || x < 0 || math.IsNaN(a) || math.IsNaN(x) { + return math.NaN() + } + + if x < a+1 { + return 1 - gammaIncSeries(a, x) + } else { + return gammaIncCF(a, x) + } +} + +func gammaIncSeries(a, x float64) float64 { + const maxIterations = 200 + const epsilon = 3e-14 + + if x == 0 { + return 0 + } + + ap := a + del := 1 / a + sum := del + for n := 0; n < maxIterations; n++ { + ap++ + del *= x / ap + sum += del + if math.Abs(del) < math.Abs(sum)*epsilon { + return sum * math.Exp(-x+a*math.Log(x)-lgamma(a)) + } + } + panic("a too large; failed to converge") +} + +func gammaIncCF(a, x float64) float64 { + const maxIterations = 200 + const epsilon = 3e-14 + + raiseZero := func(z float64) float64 { + if math.Abs(z) < math.SmallestNonzeroFloat64 { + return math.SmallestNonzeroFloat64 + } + return z + } + + b := x + 1 - a + c := math.MaxFloat64 + d := 1 / b + h := d + + for i := 1; i <= maxIterations; i++ { + an := -float64(i) * (float64(i) - a) + b += 2 + d = raiseZero(an*d + b) + c = raiseZero(b + an/c) + d = 1 / d + del := d * c + h *= del + if math.Abs(del-1) < epsilon { + return math.Exp(-x+a*math.Log(x)-lgamma(a)) * h + } + } + panic("a too large; failed to converge") +} diff --git a/vendor/github.com/aclements/go-moremath/mathx/package.go b/vendor/github.com/aclements/go-moremath/mathx/package.go new file mode 100644 index 0000000..9d5de0d --- /dev/null +++ b/vendor/github.com/aclements/go-moremath/mathx/package.go @@ -0,0 +1,11 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package mathx implements special functions not provided by the +// standard math package. +package mathx // import "github.com/aclements/go-moremath/mathx" + +import "math" + +var nan = math.NaN() diff --git a/vendor/github.com/aclements/go-moremath/mathx/sign.go b/vendor/github.com/aclements/go-moremath/mathx/sign.go new file mode 100644 index 0000000..372e92f --- /dev/null +++ b/vendor/github.com/aclements/go-moremath/mathx/sign.go @@ -0,0 +1,18 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mathx + +// Sign returns the sign of x: -1 if x < 0, 0 if x == 0, 1 if x > 0. +// If x is NaN, it returns NaN. +func Sign(x float64) float64 { + if x == 0 { + return 0 + } else if x < 0 { + return -1 + } else if x > 0 { + return 1 + } + return nan +} diff --git a/vendor/github.com/dgryski/go-failure/failure.go b/vendor/github.com/dgryski/go-failure/failure.go new file mode 100644 index 0000000..df6fbf2 --- /dev/null +++ b/vendor/github.com/dgryski/go-failure/failure.go @@ -0,0 +1,73 @@ +// Package failure implements the Phi Accrual Failure Detector +/* + +This package implements the paper "The φ Accrual Failure Detector" (2004) +(available at http://hdl.handle.net/10119/4784). + +To use the failure detection algorithm, you need a heartbeat loop that will +call Ping() at regular intervals. At any point, you can call Phi() which will +report how suspicious it is that a heartbeat has not been heard since the last +time Ping() was called. + +*/ +package failure + +import ( + "math" + "sync" + "time" + + "github.com/dgryski/go-onlinestats" +) + +// Detector is a failure detector +type Detector struct { + w *onlinestats.Windowed + last time.Time + minSamples int + mu sync.Mutex +} + +// New returns a new failure detector that considers the last windowSize +// samples, and ensures there are at least minSamples in the window before +// returning an answer +func New(windowSize, minSamples int) *Detector { + + d := &Detector{ + w: onlinestats.NewWindowed(windowSize), + minSamples: minSamples, + } + + return d +} + +// Ping registers a heart-beat at time now +func (d *Detector) Ping(now time.Time) { + d.mu.Lock() + defer d.mu.Unlock() + if !d.last.IsZero() { + d.w.Push(now.Sub(d.last).Seconds()) + } + d.last = now +} + +// Phi calculates the suspicion level at time 'now' that the remote end has failed +func (d *Detector) Phi(now time.Time) float64 { + d.mu.Lock() + defer d.mu.Unlock() + if d.w.Len() < d.minSamples { + return 0 + } + + t := now.Sub(d.last).Seconds() + pLater := 1 - cdf(d.w.Mean(), d.w.Stddev(), t) + phi := -math.Log10(pLater) + + return phi +} + +// cdf is the cumulative distribution function of a normally distributed random +// variable with the given mean and standard deviation +func cdf(mean, stddev, x float64) float64 { + return 0.5 + 0.5*math.Erf((x-mean)/(stddev*math.Sqrt2)) +} diff --git a/vendor/github.com/dgryski/go-onlinestats/basics.go b/vendor/github.com/dgryski/go-onlinestats/basics.go new file mode 100644 index 0000000..580e5d7 --- /dev/null +++ b/vendor/github.com/dgryski/go-onlinestats/basics.go @@ -0,0 +1,54 @@ +package onlinestats + +import "math" + +func SumSq(a []float64) float64 { + var t float64 + for _, aa := range a { + t += (aa * aa) + } + return t +} + +func Sum(a []float64) float64 { + var t float64 + for _, aa := range a { + t += aa + } + return t +} + +func Mean(a []float64) float64 { + return Sum(a) / float64(len(a)) +} + +func variance(a []float64, sample bool) float64 { + + var adjustN float64 + + if sample { + adjustN = -1 + } + + m := Mean(a) + var sum2 float64 + var sum3 float64 + for _, aa := range a { + sum2 += (aa - m) * (aa - m) + sum3 += (aa - m) + } + n := float64(len(a)) + return (sum2 - (sum3*sum3)/n) / (n + adjustN) +} + +func Variance(a []float64) float64 { + return variance(a, false) +} + +func SampleVariance(a []float64) float64 { + return variance(a, true) +} + +func SampleStddev(a []float64) float64 { + return math.Sqrt(SampleVariance(a)) +} diff --git a/vendor/github.com/dgryski/go-onlinestats/dea.go b/vendor/github.com/dgryski/go-onlinestats/dea.go new file mode 100644 index 0000000..add3471 --- /dev/null +++ b/vendor/github.com/dgryski/go-onlinestats/dea.go @@ -0,0 +1,54 @@ +package onlinestats + +import ( + "math" +) + +// http://www.drdobbs.com/tools/discontiguous-exponential-averaging/184410671 +type DEA struct { + sumOfWeights float64 + sumOfData float64 + sumOfSquaredData float64 + previousTime float64 + alpha float64 + newDataWeightUpperBound float64 +} + +func NewDEA(alpha float64, maxDt float64) *DEA { + return &DEA{ + alpha: alpha, + newDataWeightUpperBound: 1 - math.Pow(alpha, float64(maxDt)), + } +} + +func (ew *DEA) Update(newData float64, t float64) { + weightReductionFactor := math.Pow(ew.alpha, float64(t-ew.previousTime)) + newDataWeight := minf(1-weightReductionFactor, ew.newDataWeightUpperBound) + ew.sumOfWeights = weightReductionFactor*ew.sumOfWeights + newDataWeight + ew.sumOfData = weightReductionFactor*ew.sumOfData + newDataWeight*newData + ew.sumOfSquaredData = weightReductionFactor*ew.sumOfSquaredData + (newDataWeight * (newData * newData)) + ew.previousTime = t +} + +func (ew *DEA) CompletenessFraction(t float64) float64 { + return math.Pow(ew.alpha, float64(t-ew.previousTime)*ew.sumOfWeights) +} +func (ew *DEA) Mean() float64 { + return (ew.sumOfData / ew.sumOfWeights) +} + +func (ew *DEA) Var() float64 { + m := ew.Mean() + return (ew.sumOfSquaredData/ew.sumOfWeights - m*m) +} + +func (ew *DEA) Stddev() float64 { + return math.Sqrt(ew.Var()) +} + +func minf(a, b float64) float64 { + if a < b { + return a + } + return b +} diff --git a/vendor/github.com/dgryski/go-onlinestats/expweight.go b/vendor/github.com/dgryski/go-onlinestats/expweight.go new file mode 100644 index 0000000..9a01f5b --- /dev/null +++ b/vendor/github.com/dgryski/go-onlinestats/expweight.go @@ -0,0 +1,47 @@ +package onlinestats + +// From http://queue.acm.org/detail.cfm?id=2534976 + +import "math" + +type ExpWeight struct { + n int + m1 float64 + v float64 + alpha float64 +} + +func NewExpWeight(alpha float64) *ExpWeight { + return &ExpWeight{alpha: alpha} +} + +func (e *ExpWeight) Push(x float64) { + + if e.n == 0 { + e.m1 = x + e.v = 1 + } else { + e.m1 = (1-e.alpha)*x + e.alpha*e.m1 + v := (x - e.m1) + e.v = (1-e.alpha)*(v*v) + e.alpha*e.v + } + + e.n++ + +} + +func (e *ExpWeight) Len() int { + return e.n +} + +func (e *ExpWeight) Mean() float64 { + return e.m1 +} + +func (e *ExpWeight) Var() float64 { + return e.v +} + +func (e *ExpWeight) Stddev() float64 { + return math.Sqrt(e.v) +} diff --git a/vendor/github.com/dgryski/go-onlinestats/kstest.go b/vendor/github.com/dgryski/go-onlinestats/kstest.go new file mode 100644 index 0000000..775ce74 --- /dev/null +++ b/vendor/github.com/dgryski/go-onlinestats/kstest.go @@ -0,0 +1,95 @@ +package onlinestats + +// https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test +// http://www.itl.nist.gov/div898/handbook/eda/section3/eda35g.htm + +import ( + "math" + "sort" +) + +// From NR + +// KS performs a Kolmogorov-Smirnov test for the two datasets, and returns the +// p-value for the null hypothesis that the two sets come from the same distribution. +func KS(data1, data2 []float64) float64 { + + sort.Float64s(data1) + sort.Float64s(data2) + + for math.IsNaN(data1[0]) { + data1 = data1[1:] + } + + for math.IsNaN(data2[0]) { + data2 = data2[1:] + } + + n1, n2 := len(data1), len(data2) + en1, en2 := float64(n1), float64(n2) + + var d float64 + var fn1, fn2 float64 + + j1, j2 := 0, 0 + for j1 < n1 && j2 < n2 { + d1 := data1[j1] + d2 := data2[j2] + + if d1 <= d2 { + for j1 < n1 && d1 == data1[j1] { + j1++ + fn1 = float64(j1) / en1 + } + } + + if d2 <= d1 { + for j2 < n2 && d2 == data2[j2] { + j2++ + fn2 = float64(j2) / en2 + } + } + + if dt := math.Abs(fn2 - fn1); dt > d { + d = dt + } + + } + en := math.Sqrt((en1 * en2) / (en1 + en2)) + // R and Octave don't use this approximation that NR does + //return qks((en + 0.12 + 0.11/en) * d) + return qks(en * d) +} + +func qks(z float64) float64 { + + if z < 0. { + panic("bad z in qks") + } + + if z == 0. { + return 1. + } + if z < 1.18 { + return 1. - pks(z) + } + x := math.Exp(-2. * (z * z)) + return 2. * (x - math.Pow(x, 4) + math.Pow(x, 9)) +} + +func pks(z float64) float64 { + + if z < 0. { + panic("bad z in KSdist") + } + if z == 0. { + return 0. + } + if z < 1.18 { + y := math.Exp(-1.23370055013616983 / (z * z)) + return 2.25675833419102515 * math.Sqrt(-math.Log(y)) * (y + math.Pow(y, 9) + math.Pow(y, 25) + math.Pow(y, 49)) + } + + x := math.Exp(-2. * (z * z)) + return 1. - 2.*(x-math.Pow(x, 4)+math.Pow(x, 9)) +} diff --git a/vendor/github.com/dgryski/go-onlinestats/pearson.go b/vendor/github.com/dgryski/go-onlinestats/pearson.go new file mode 100644 index 0000000..176a05b --- /dev/null +++ b/vendor/github.com/dgryski/go-onlinestats/pearson.go @@ -0,0 +1,35 @@ +package onlinestats + +import "math" + +func Pearson(a, b []float64) float64 { + + if len(a) != len(b) { + panic("len(a) != len(b)") + } + + var abar, bbar float64 + var n int + for i := range a { + if !math.IsNaN(a[i]) && !math.IsNaN(b[i]) { + abar += a[i] + bbar += b[i] + n++ + } + } + nf := float64(n) + abar, bbar = abar/nf, bbar/nf + + var numerator float64 + var sumAA, sumBB float64 + + for i := range a { + if !math.IsNaN(a[i]) && !math.IsNaN(b[i]) { + numerator += (a[i] - abar) * (b[i] - bbar) + sumAA += (a[i] - abar) * (a[i] - abar) + sumBB += (b[i] - bbar) * (b[i] - bbar) + } + } + + return numerator / (math.Sqrt(sumAA) * math.Sqrt(sumBB)) +} diff --git a/vendor/github.com/dgryski/go-onlinestats/reservoir.go b/vendor/github.com/dgryski/go-onlinestats/reservoir.go new file mode 100644 index 0000000..ddeb34f --- /dev/null +++ b/vendor/github.com/dgryski/go-onlinestats/reservoir.go @@ -0,0 +1,76 @@ +package onlinestats + +import ( + "math" + "math/rand" +) + +type Reservoir struct { + data []float64 + + n int + sum float64 +} + +func NewReservoir(capacity int) *Reservoir { + return &Reservoir{ + data: make([]float64, 0, capacity), + } +} + +func (r *Reservoir) Push(n float64) { + + index := r.n + + r.n++ + + // not enough samples yet -- add it + if index < cap(r.data) { + r.data = append(r.data, n) + return + } + + ridx := rand.Intn(r.n) // == index+1, so we're 0..index inclusive + + if ridx >= len(r.data) { + // ignore this one + return + } + + // add to our sample + old := r.data[ridx] + + r.data[ridx] = n + + r.sum -= old + r.sum += n +} + +func (r *Reservoir) Len() int { + return len(r.data) +} + +func (r *Reservoir) Mean() float64 { + return r.sum / float64(r.Len()) +} + +func (r *Reservoir) Var() float64 { + n := float64(r.Len()) + mean := r.Mean() + l := r.Len() + + sum1 := 0.0 + sum2 := 0.0 + + for i := 0; i < l; i++ { + xm := r.data[i] - mean + sum1 += xm * xm + sum2 += xm + } + + return (sum1 - (sum2*sum2)/n) / (n - 1) +} + +func (r *Reservoir) Stddev() float64 { + return math.Sqrt(r.Var()) +} diff --git a/vendor/github.com/dgryski/go-onlinestats/spearman.go b/vendor/github.com/dgryski/go-onlinestats/spearman.go new file mode 100644 index 0000000..239cc5d --- /dev/null +++ b/vendor/github.com/dgryski/go-onlinestats/spearman.go @@ -0,0 +1,84 @@ +package onlinestats + +import ( + "math" + "sort" + + "github.com/aclements/go-moremath/mathx" +) + +type sort2 struct { + x []float64 + y []float64 +} + +func (s sort2) Len() int { return len(s.x) } +func (s sort2) Less(i, j int) bool { return s.x[i] < s.x[j] } +func (s sort2) Swap(i, j int) { + s.x[i], s.x[j] = s.x[j], s.x[i] + s.y[i], s.y[j] = s.y[j], s.y[i] +} + +// crank overwrites the entries in with their ranks +func crank(w []float64) float64 { + j, ji, jt, n := 1, 0, 0, len(w) + var rank float64 + + var s float64 + + for j < n { + if w[j] != w[j-1] { + w[j-1] = float64(j) + j++ + } else { + for jt = j + 1; jt <= n && w[jt-1] == w[j-1]; jt++ { + // empty + } + rank = 0.5 * (float64(j) + float64(jt) - 1) + for ji = j; ji <= (jt - 1); ji++ { + w[ji-1] = rank + } + t := float64(jt - j) + s += (t*t*t - t) + j = jt + } + } + if j == n { + w[n-1] = float64(n) + } + return s +} + +// Spearman returns the rank correlation coefficient between data1 and data2, and the associated p-value +func Spearman(data1, data2 []float64) (rs float64, p float64) { + n := len(data1) + wksp1, wksp2 := make([]float64, n), make([]float64, n) + copy(wksp1, data1) + copy(wksp2, data2) + + sort.Sort(sort2{wksp1, wksp2}) + sf := crank(wksp1) + sort.Sort(sort2{wksp2, wksp1}) + sg := crank(wksp2) + d := 0.0 + for j := 0; j < n; j++ { + sq := wksp1[j] - wksp2[j] + d += (sq * sq) + } + + en := float64(n) + en3n := en*en*en - en + + fac := (1.0 - sf/en3n) * (1.0 - sg/en3n) + rs = (1.0 - (6.0/en3n)*(d+(sf+sg)/12.0)) / math.Sqrt(fac) + + if fac = (rs + 1.0) * (1.0 - rs); fac > 0 { + t := rs * math.Sqrt((en-2.0)/fac) + df := en - 2.0 + p = mathx.BetaInc(df/(df+t*t), 0.5*df, 0.5) + } + + return rs, p +} + +func sqr(x float64) float64 { return x * x } diff --git a/vendor/github.com/dgryski/go-onlinestats/stats.go b/vendor/github.com/dgryski/go-onlinestats/stats.go new file mode 100644 index 0000000..d527b3e --- /dev/null +++ b/vendor/github.com/dgryski/go-onlinestats/stats.go @@ -0,0 +1,137 @@ +// Package onlinestats provides online, one-pass algorithms for descriptive statistics. +/* + +The implementation is based on the public domain code available at http://www.johndcook.com/skewness_kurtosis.html . + +The linear regression code is from http://www.johndcook.com/running_regression.html . + +*/ +package onlinestats + +import "math" + +type Running struct { + n int + m1, m2, m3, m4 float64 +} + +func NewRunning() *Running { + return &Running{} +} + +func (r *Running) Push(x float64) { + + n1 := float64(r.n) + r.n++ + delta := x - r.m1 + delta_n := delta / float64(r.n) + delta_n2 := delta_n * delta_n + term1 := delta * delta_n * n1 + r.m1 += delta_n + r.m4 += term1*delta_n2*float64(r.n*r.n-3*r.n+3) + 6*delta_n2*r.m2 - 4*delta_n*r.m3 + r.m3 += term1*delta_n*float64(r.n-2) - 3*delta_n*r.m2 + r.m2 += term1 +} + +func (r *Running) Len() int { + return r.n +} + +func (r *Running) Mean() float64 { + return r.m1 +} + +func (r *Running) Var() float64 { + return r.m2 / float64(r.n-1) +} + +func (r *Running) Stddev() float64 { + return math.Sqrt(r.Var()) +} + +func (r *Running) Skewness() float64 { + return math.Sqrt(float64(r.n)) * r.m3 / math.Pow(r.m2, 1.5) +} + +func (r *Running) Kurtosis() float64 { + return float64(r.n)*r.m4/(r.m2*r.m2) - 3.0 +} + +func CombineRunning(a, b *Running) *Running { + + var combined Running + + an := float64(a.n) + bn := float64(b.n) + cn := float64(an + bn) + + combined.n = a.n + b.n + + delta := b.m1 - a.m1 + delta2 := delta * delta + delta3 := delta * delta2 + delta4 := delta2 * delta2 + + combined.m1 = (an*a.m1 + bn*b.m1) / cn + + combined.m2 = a.m2 + b.m2 + delta2*an*bn/cn + + combined.m3 = a.m3 + b.m3 + delta3*an*bn*(an-bn)/(cn*cn) + combined.m3 += 3.0 * delta * (an*b.m2 - bn*a.m2) / cn + + combined.m4 = a.m4 + b.m4 + delta4*an*bn*(an*an-an*bn+bn*bn)/(cn*cn*cn) + combined.m4 += 6.0*delta2*(an*an*b.m2+bn*bn*a.m2)/(cn*cn) + 4.0*delta*(an*b.m3-bn*a.m3)/cn + + return &combined +} + +type Regression struct { + xstats Running + ystats Running + sxy float64 + n int +} + +func NewRegression() *Regression { + return &Regression{} +} + +func (r *Regression) Push(x, y float64) { + r.sxy += (r.xstats.Mean() - x) * (r.ystats.Mean() - y) * float64(r.n) / float64(r.n+1) + + r.xstats.Push(x) + r.ystats.Push(y) + r.n++ +} + +func (r *Regression) Len() int { + return r.n +} + +func (r *Regression) Slope() float64 { + sxx := r.xstats.Var() * float64(r.n-1) + return r.sxy / sxx +} + +func (r *Regression) Intercept() float64 { + return r.ystats.Mean() - r.Slope()*r.xstats.Mean() +} + +func (r *Regression) Correlation() float64 { + t := r.xstats.Stddev() * r.ystats.Stddev() + return r.sxy / (float64(r.n-1) * t) +} + +func CombineRegressions(a, b Regression) *Regression { + var combined Regression + + combined.xstats = *CombineRunning(&a.xstats, &b.xstats) + combined.ystats = *CombineRunning(&a.ystats, &b.ystats) + combined.n = a.n + b.n + + delta_x := b.xstats.Mean() - a.xstats.Mean() + delta_y := b.ystats.Mean() - a.ystats.Mean() + combined.sxy = a.sxy + b.sxy + float64(a.n*b.n)*delta_x*delta_y/float64(combined.n) + + return &combined +} diff --git a/vendor/github.com/dgryski/go-onlinestats/swilk.go b/vendor/github.com/dgryski/go-onlinestats/swilk.go new file mode 100644 index 0000000..0312a93 --- /dev/null +++ b/vendor/github.com/dgryski/go-onlinestats/swilk.go @@ -0,0 +1,424 @@ +package onlinestats + +import ( + "math" + "sort" + "strconv" +) + +func SWilk(x []float64) (float64, float64, error) { + + data := make([]float64, len(x)+1) + copy(data[1:], x) + sort.Float64s(data[1:]) + data[0] = math.NaN() + + length := len(x) + w, pw, err := swilkHelper(data, length, nil) + return w, pw, err +} + +// Calculate the Shapiro-Wilk W test and its significance level +// Based on the public domain code at https://joinup.ec.europa.eu/svn/sextante/soft/sextante_lib/trunk/algorithms/src/es/unex/sextante/tables/normalityTest/SWilk.java + +/* + * Constants and polynomial coefficients for swilk(). NOTE: FORTRAN counts the elements of the array x[length] as + * x[1] through x[length], not x[0] through x[length-1]. To avoid making pervasive, subtle changes to the algorithm + * (which would inevitably introduce pervasive, subtle bugs) the referenced arrays are padded with an unused 0th + * element, and the algorithm is ported so as to continue accessing from [1] through [length]. + */ +var c1 = []float64{math.NaN(), 0.0E0, 0.221157E0, -0.147981E0, -0.207119E1, 0.4434685E1, -0.2706056E1} +var c2 = []float64{math.NaN(), 0.0E0, 0.42981E-1, -0.293762E0, -0.1752461E1, 0.5682633E1, -0.3582633E1} +var c3 = []float64{math.NaN(), 0.5440E0, -0.39978E0, 0.25054E-1, -0.6714E-3} +var c4 = []float64{math.NaN(), 0.13822E1, -0.77857E0, 0.62767E-1, -0.20322E-2} +var c5 = []float64{math.NaN(), -0.15861E1, -0.31082E0, -0.83751E-1, 0.38915E-2} +var c6 = []float64{math.NaN(), -0.4803E0, -0.82676E-1, 0.30302E-2} +var c7 = []float64{math.NaN(), 0.164E0, 0.533E0} +var c8 = []float64{math.NaN(), 0.1736E0, 0.315E0} +var c9 = []float64{math.NaN(), 0.256E0, -0.635E-2} +var g = []float64{math.NaN(), -0.2273E1, 0.459E0} + +const ( + z90 = 0.12816E1 + z95 = 0.16449E1 + z99 = 0.23263E1 + zm = 0.17509E1 + zss = 0.56268E0 + bf1 = 0.8378E0 + xx90 = 0.556E0 + xx95 = 0.622E0 + sqrth = 0.70711E0 + th = 0.375E0 + small = 1E-19 + pi6 = 0.1909859E1 + stqr = 0.1047198E1 + upper = true +) + +/** +* ALGORITHM AS R94 APPL. STATIST. (1995) VOL.44, NO.4 +* +* Calculates Shapiro-Wilk normality test and P-value for sample sizes 3 <= n <= 5000 . +* Corrects AS 181, which was found to be inaccurate for n > 50. +* +* As described above with the constants, the data arrays x[] and a[] are referenced with a base element of 1 (like FORTRAN) +* instead of 0 (like Java) to avoid screwing up the algorithm. To pass in 100 data points, declare x[101] and fill elements +* x[1] through x[100] with data. x[0] will be ignored. +* +* @param x +* Input; Data set to analyze; 100 points go in x[101] array from x[1] through x[100] +* @param n +* Input; Number of data points in x +* @param a +* Shapiro-Wilk coefficients. Can be nil, or pre-computed by swilkCoeffs and passed in. + */ + +type SwilkFault int + +func (s SwilkFault) Error() string { + return "swilk fault " + strconv.Itoa(int(s)) +} + +func swilkHelper(x []float64, n int, a []float64) (w float64, pw float64, err error) { + + if n > 5000 { + return 0, 0, SwilkFault(2) + } + + pw = 1.0 + if w >= 0.0 { + w = 1.0 + } + an := float64(n) + if n < 3 { + return 0, 0, SwilkFault(1) + } + + if a == nil { + a = SwilkCoeffs(n) + } + + if n < 3 { + return + } + + // If W input as negative, calculate significance level of -W + var w1, xx float64 + if w < 0.0 { + w1 = 1.0 + w + } else { + + // Check for zero range + + range_ := x[n] - x[1] + if range_ < small { + return 0, 0, SwilkFault(6) + } + + // Check for correct sort order on range - scaled X + // TODO(dgryski): did the FORTRAN code puke on out-of-order X ? with ifault=7 ? + xx = x[1] / range_ + sx := xx + sa := -a[1] + j := n - 1 + for i := 2; i <= n; i++ { + xi := x[i] / range_ + // IF (XX-XI .GT. SMALL) PRINT *,' ANYTHING' + sx += xi + if i != j { + sa += float64(sign(1, i-j)) * a[imin(i, j)] + } + xx = xi + j-- + } + + // Calculate W statistic as squared correlation between data and coefficients + sa /= float64(n) + sx /= float64(n) + ssa := 0.0 + ssx := 0.0 + sax := 0.0 + j = n + var asa float64 + for i := 1; i <= n; i++ { + if i != j { + asa = float64(sign(1, i-j))*a[imin(i, j)] - sa + } else { + asa = -sa + } + xsx := x[i]/range_ - sx + ssa += asa * asa + ssx += xsx * xsx + sax += asa * xsx + j-- + } + + // W1 equals (1-W) calculated to avoid excessive rounding error + // for W very near 1 (a potential problem in very large samples) + + ssassx := math.Sqrt(ssa * ssx) + w1 = (ssassx - sax) * (ssassx + sax) / (ssa * ssx) + } + w = 1.0 - w1 + + // Calculate significance level for W (exact for N=3) + + if n == 3 { + pw = pi6 * (math.Asin(math.Sqrt(w)) - stqr) + return w, pw, nil + } + y := math.Log(w1) + xx = math.Log(an) + m := 0.0 + s := 1.0 + if n <= 11 { + gamma := poly(g, 2, an) + if y >= gamma { + pw = small + return w, pw, nil + } + y = -math.Log(gamma - y) + m = poly(c3, 4, an) + s = math.Exp(poly(c4, 4, an)) + } else { + m = poly(c5, 4, xx) + s = math.Exp(poly(c6, 3, xx)) + } + pw = alnorm((y-m)/s, upper) + + return w, pw, nil +} + +// Precomputes the coefficients array a for SWilk +func SwilkCoeffs(n int) []float64 { + + a := make([]float64, n+1) + + an := float64(n) + + n2 := n / 2 + + if n == 3 { + a[1] = sqrth + } else { + an25 := an + 0.25 + summ2 := 0.0 + for i := 1; i <= n2; i++ { + a[i] = ppnd((float64(i) - th) / an25) + summ2 += a[i] * a[i] + } + summ2 *= 2.0 + ssumm2 := math.Sqrt(summ2) + rsn := 1.0 / math.Sqrt(an) + a1 := poly(c1, 6, rsn) - a[1]/ssumm2 + + // Normalize coefficients + + var i1 int + var fac float64 + if n > 5 { + i1 = 3 + a2 := -a[2]/ssumm2 + poly(c2, 6, rsn) + fac = math.Sqrt((summ2 - 2.0*a[1]*a[1] - 2.0*a[2]*a[2]) / (1.0 - 2.0*a1*a1 - 2.0*a2*a2)) + a[1] = a1 + a[2] = a2 + } else { + i1 = 2 + fac = math.Sqrt((summ2 - 2.0*a[1]*a[1]) / (1.0 - 2.0*a1*a1)) + a[1] = a1 + } + for i := i1; i <= n2; i++ { + a[i] = -a[i] / fac + } + } + + return a +} + +/** + * Constructs an int with the absolute value of x and the sign of y + * + * @param x + * int to copy absolute value from + * @param y + * int to copy sign from + * @return int with absolute value of x and sign of y + */ +func sign(x int, y int) int { + var result = x + if x < 0 { + result = -x + } + if y < 0 { + result = -result + } + return result +} + +// Constants & polynomial coefficients for ppnd(), slightly renamed to avoid conflicts. Could define +// them inside ppnd(), but static constants are more efficient. + +// Coefficients for P close to 0.5 +const ( + a0_p = 3.3871327179E+00 + a1_p = 5.0434271938E+01 + a2_p = 1.5929113202E+02 + a3_p = 5.9109374720E+01 + b1_p = 1.7895169469E+01 + b2_p = 7.8757757664E+01 + b3_p = 6.7187563600E+01 + + // Coefficients for P not close to 0, 0.5 or 1 (names changed to avoid conflict with swilk()) + c0_p = 1.4234372777E+00 + c1_p = 2.7568153900E+00 + c2_p = 1.3067284816E+00 + c3_p = 1.7023821103E-01 + d1_p = 7.3700164250E-01 + d2_p = 1.2021132975E-01 + + // Coefficients for P near 0 or 1. + e0_p = 6.6579051150E+00 + e1_p = 3.0812263860E+00 + e2_p = 4.2868294337E-01 + e3_p = 1.7337203997E-02 + f1_p = 2.4197894225E-01 + f2_p = 1.2258202635E-02 + + split1 = 0.425 + split2 = 5.0 + const1 = 0.180625 + const2 = 1.6 +) + +/** + * ALGORITHM AS 241 APPL. STATIST. (1988) VOL. 37, NO. 3, 477-484. + * + * Produces the normal deviate Z corresponding to a given lower tail area of P; Z is accurate to about 1 part in 10**7. + * + * @param p + * @return + */ +func ppnd(p float64) float64 { + q := p - 0.5 + var r float64 + if math.Abs(q) <= split1 { + r = const1 - q*q + return q * (((a3_p*r+a2_p)*r+a1_p)*r + a0_p) / (((b3_p*r+b2_p)*r+b1_p)*r + 1.0) + } else { + if q < 0.0 { + r = p + } else { + r = 1.0 - p + } + if r <= 0.0 { + return 0.0 + } + r = math.Sqrt(-math.Log(r)) + var normal_dev float64 + if r <= split2 { + r -= const2 + normal_dev = (((c3_p*r+c2_p)*r+c1_p)*r + c0_p) / ((d2_p*r+d1_p)*r + 1.0) + } else { + r -= split2 + normal_dev = (((e3_p*r+e2_p)*r+e1_p)*r + e0_p) / ((f2_p*r+f1_p)*r + 1.0) + } + if q < 0.0 { + normal_dev = -normal_dev + } + return normal_dev + } +} + +/** + * Algorithm AS 181.2 Appl. Statist. (1982) Vol. 31, No. 2 + * + * Calculates the algebraic polynomial of order nord-1 with array of coefficients c. Zero order coefficient is c[1] + * + * @param c + * @param nord + * @param x + * @return + */ +func poly(c []float64, nord int, x float64) float64 { + poly := c[1] + if nord == 1 { + return poly + } + p := x * c[nord] + if nord != 2 { + n2 := nord - 2 + j := n2 + 1 + for i := 1; i <= n2; i++ { + p = (p + c[j]) * x + j-- + } + } + poly += p + return poly +} + +// Constants & polynomial coefficients for alnorm(), slightly renamed to avoid conflicts. +const ( + con_a = 1.28 + ltone_a = 7.0 + utzero_a = 18.66 + p_a = 0.398942280444 + q_a = 0.39990348504 + r_a = 0.398942280385 + a1_a = 5.75885480458 + a2_a = 2.62433121679 + a3_a = 5.92885724438 + + b1_a = -29.8213557807 + b2_a = 48.6959930692 + + c1_a = -3.8052E-8 + c2_a = 3.98064794E-4 + c3_a = -0.151679116635 + c4_a = 4.8385912808 + c5_a = 0.742380924027 + c6_a = 3.99019417011 + + d1_a = 1.00000615302 + d2_a = 1.98615381364 + d3_a = 5.29330324926 + d4_a = -15.1508972451 + d5_a = 30.789933034 +) + +/** + * Algorithm AS66 Applied Statistics (1973) vol.22, no.3 + * + * Evaluates the tail area of the standardised normal curve from x to infinity if upper is true or from minus infinity to x if + * upper is false. + */ +func alnorm(x float64, upper bool) float64 { + up := upper + z := x + if z < 0.0 { + up = !up + z = -z + } + var fn_val float64 + if z > ltone_a && (!up || z > utzero_a) { + fn_val = 0.0 + } else { + y := 0.5 * z * z + if z <= con_a { + fn_val = 0.5 - z*(p_a-q_a*y/(y+a1_a+b1_a/(y+a2_a+b2_a/(y+a3_a)))) + } else { + fn_val = r_a * math.Exp(-y) / (z + c1_a + d1_a/(z+c2_a+d2_a/(z+c3_a+d3_a/(z+c4_a+d4_a/(z+c5_a+d5_a/(z+c6_a)))))) + } + } + if !up { + fn_val = 1.0 - fn_val + } + return fn_val +} + +func imin(i, j int) int { + if i < j { + return i + } + return j +} diff --git a/vendor/github.com/dgryski/go-onlinestats/ttest.go b/vendor/github.com/dgryski/go-onlinestats/ttest.go new file mode 100644 index 0000000..7e6f1e4 --- /dev/null +++ b/vendor/github.com/dgryski/go-onlinestats/ttest.go @@ -0,0 +1,85 @@ +package onlinestats + +import "math" + +type Stats interface { + Mean() float64 + Var() float64 + Len() int +} + +// Welch implements a Welch's t-test. Returns the probability that the difference in means is due to chance. +func Welch(xs, ys Stats) float64 { + + xvar := xs.Var() + yvar := ys.Var() + + xn := float64(xs.Len()) + yn := float64(ys.Len()) + + wvar := (xvar/xn + yvar/yn) + + vnum := wvar * wvar + vdenom := (xvar*xvar)/(xn*xn*(xn-1)) + (yvar*yvar)/(yn*yn*(yn-1)) + + // both have 0 variance, the difference is the difference of the means + var v float64 + if vdenom == 0 { + v = xn + yn - 2 + } else { + v = vnum / vdenom + } + + // if the variances are 0, we return if the means are different + if wvar == 0 { + if math.Abs(ys.Mean()-xs.Mean()) > 0.0000000001 { + return 1 + } else { + return 0 + } + } + + t := (ys.Mean() - xs.Mean()) / math.Sqrt(wvar) + + return pt(t, v) +} + +// translated from https://bitbucket.org/petermr/euclid-dev/src/9936542c1f30/src/main/java/org/xmlcml/euclid/Univariate.java + +func pt(t, df float64) float64 { + // ALGORITHM AS 3 APPL. STATIST. (1968) VOL.17, P.189 + // Computes P(T= 2 { + for k = ks; k <= im2; k += 2 { + c = c * b * (fk - 1) / fk + s += c + if s != idf { + idf = s + fk += 2 + } + } + } + if ioe != 1 { + return 0.5 + 0.5*a*math.Sqrt(b)*s + } + if df == 1 { + s = 0 + } + return 0.5 + (a*b*s+math.Atan(a))*g1 +} diff --git a/vendor/github.com/dgryski/go-onlinestats/tukey.go b/vendor/github.com/dgryski/go-onlinestats/tukey.go new file mode 100644 index 0000000..2d29d4e --- /dev/null +++ b/vendor/github.com/dgryski/go-onlinestats/tukey.go @@ -0,0 +1,27 @@ +package onlinestats + +import "sort" + +// Tukey returns the slice (sorted) with all the 1.5 IQR outliers removed +func Tukey(data []float64) []float64 { + + sort.Float64s(data) + + first := int(0.25 * float64(len(data))) + third := int(0.75 * float64(len(data))) + + iqr := data[third] - data[first] + + min := data[first] - 1.5*iqr + max := data[third] + 1.5*iqr + + // TODO(dgryski): our data is sorted; we should be able to trim these elements faster than O(n) + result := make([]float64, 0, len(data)) + for _, r := range data { + if r >= min && r <= max { + result = append(result, r) + } + } + + return result +} diff --git a/vendor/github.com/dgryski/go-onlinestats/utest.go b/vendor/github.com/dgryski/go-onlinestats/utest.go new file mode 100644 index 0000000..3aff7c9 --- /dev/null +++ b/vendor/github.com/dgryski/go-onlinestats/utest.go @@ -0,0 +1,71 @@ +package onlinestats + +// https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U + +import ( + "math" + "sort" +) + +// MannWhitney performs a Matt-Whitney U test for the two samples xs and ys. +// It returns the two-tailed p-value for the null hypothesis that the medians +// of the two samples are the same. This uses the normal approximation which +// is more accurate if the number of samples is >30. +func MannWhitney(xs, ys []float64) float64 { + + // floats in a map.. this feels dubious? + data := make(map[float64][]int) + + for _, x := range xs { + data[x] = append(data[x], 0) + } + + for _, y := range ys { + data[y] = append(data[y], 1) + } + + floats := make([]float64, 0, len(data)) + + for k := range data { + floats = append(floats, k) + } + + sort.Float64s(floats) + + var r [2]float64 + + var idx = 1 + for _, f := range floats { + dataf := data[f] + l := len(dataf) + var rank float64 + if l == 1 { + rank = float64(idx) + } else { + rank = float64(idx) + float64(l-1)/2.0 + } + for _, xy := range dataf { + r[xy] += rank + } + idx += l + } + + n1n2 := len(xs) * len(ys) + + idx = 0 + u := float64(n1n2+(len(xs)*(len(xs)+1))/2.0) - r[0] + if u1 := float64(n1n2+(len(ys)*(len(ys)+1))/2.0) - r[1]; u > u1 { + idx = 1 + u = u1 + } + + mu := float64(n1n2) / 2.0 + sigu := math.Sqrt(float64(n1n2*(len(xs)+len(ys)+1)) / 12.0) + zu := math.Abs(float64(u)-mu) / sigu + + return 2 - 2*cdf(0, 1, zu) +} + +func cdf(mean, stddev, x float64) float64 { + return 0.5 + 0.5*math.Erf((x-mean)/(stddev*math.Sqrt2)) +} diff --git a/vendor/github.com/dgryski/go-onlinestats/windexp.go b/vendor/github.com/dgryski/go-onlinestats/windexp.go new file mode 100644 index 0000000..ee7b0bb --- /dev/null +++ b/vendor/github.com/dgryski/go-onlinestats/windexp.go @@ -0,0 +1,72 @@ +package onlinestats + +import "math" + +// WindExp maintains a window of values, followed by a exponentially-weighted +// region for the items that have left the window. This is similar to the +// Average Loss Interval from "Equation-Based Congestion Control for Unicast +// Applications" ( http://www.icir.org/tfrc/tcp-friendly.pdf ), but with lower +// update cost. The advantage is that this maintains more local history, but +// doesn't have the large changes when items leave the window. +type WindExp struct { + n int + w *Windowed + exp *ExpWeight +} + +// NewWindExp returns WindExp with the specified window capacity and exponential alpha +func NewWindExp(capacity int, alpha float64) *WindExp { + return &WindExp{ + w: NewWindowed(capacity), + exp: NewExpWeight(alpha), + } +} + +func (we *WindExp) Push(n float64) { + + old := we.w.Push(n) + + if we.n >= len(we.w.data) { + we.exp.Push(old) + } + + we.n++ +} + +func (we *WindExp) Len() int { + return we.n +} + +func (we *WindExp) Mean() float64 { + // The math says this should be correct + + if we.n <= len(we.w.data) { + return we.w.Mean() + } + + return (we.w.sum + we.exp.Mean()) / float64(we.w.Len()+1) + +} + +func (we *WindExp) Var() float64 { + // http://www.emathzone.com/tutorials/basic-statistics/combined-variance.html + + cmean := we.Mean() + + n1 := float64(we.w.Len()) + s1 := we.w.Var() + x1xc := we.w.Mean() - cmean + + n2 := float64(we.exp.Len()) + s2 := we.exp.Var() + x2xc := we.exp.Mean() - cmean + + cvar := (n1*(s1+x1xc*x1xc) + n2*(s2+x2xc*x2xc)) / (n1 + n2) + + return cvar + +} + +func (we *WindExp) Stddev() float64 { + return math.Sqrt(we.Var()) +} diff --git a/vendor/github.com/dgryski/go-onlinestats/window.go b/vendor/github.com/dgryski/go-onlinestats/window.go new file mode 100644 index 0000000..379dc17 --- /dev/null +++ b/vendor/github.com/dgryski/go-onlinestats/window.go @@ -0,0 +1,86 @@ +package onlinestats + +// add http://www.itl.nist.gov/div898/handbook/eda/section3/eda35b.htm + +import "math" + +type Windowed struct { + data []float64 + head int + + length int + sum float64 +} + +func NewWindowed(capacity int) *Windowed { + return &Windowed{ + data: make([]float64, capacity), + } +} + +func (w *Windowed) Push(n float64) float64 { + old := w.data[w.head] + + w.length++ + + w.data[w.head] = n + w.head++ + if w.head >= len(w.data) { + w.head = 0 + } + + w.sum -= old + w.sum += n + + return old +} + +func (w *Windowed) Len() int { + if w.length < len(w.data) { + return w.length + } + + return len(w.data) +} + +func (w *Windowed) Mean() float64 { + return w.sum / float64(w.Len()) +} + +func (w *Windowed) Var() float64 { + n := float64(w.Len()) + mean := w.Mean() + l := w.Len() + + sum1 := 0.0 + sum2 := 0.0 + + for i := 0; i < l; i++ { + xm := w.data[i] - mean + sum1 += xm * xm + sum2 += xm + } + + return (sum1 - (sum2*sum2)/n) / (n - 1) +} + +func (w *Windowed) Stddev() float64 { + return math.Sqrt(w.Var()) +} + +// Set sets the current windowed data. The length is reset, and Push() will start overwriting the first element of the array. +func (w *Windowed) Set(data []float64) { + if w.data != nil { + w.data = w.data[:0] + } + + w.data = append(w.data, data...) + + w.sum = 0 + for _, v := range w.data { + w.sum += v + } + + w.head = 0 + w.length = len(data) +}