tun2: add phi failure detection, move pingloop to be per-connection
This commit is contained in:
parent
0ba19b0259
commit
4d30a5cfb3
|
@ -17,6 +17,7 @@ import (
|
|||
|
||||
"git.xeserv.us/xena/route/database"
|
||||
"github.com/Xe/ln"
|
||||
failure "github.com/dgryski/go-failure"
|
||||
"github.com/mtneug/pkg/ulid"
|
||||
cmap "github.com/streamrail/concurrent-map"
|
||||
kcp "github.com/xtaci/kcp-go"
|
||||
|
@ -55,6 +56,7 @@ type Connection struct {
|
|||
user string
|
||||
domain string
|
||||
cancel context.CancelFunc
|
||||
detector *failure.Detector
|
||||
}
|
||||
|
||||
func (c *Connection) F() ln.F {
|
||||
|
@ -155,57 +157,77 @@ func (s *Server) ListenAndServe() error {
|
|||
|
||||
go func() {
|
||||
for {
|
||||
time.Sleep(5 * time.Second)
|
||||
s.pingLoopInner()
|
||||
time.Sleep(time.Second)
|
||||
|
||||
now := time.Now()
|
||||
|
||||
s.connlock.Lock()
|
||||
for _, c := range s.conns {
|
||||
failureChance := c.detector.Phi(now)
|
||||
|
||||
ln.Log(c.F(), ln.F{
|
||||
"action": "phi_failure_detection",
|
||||
"value": failureChance,
|
||||
})
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Server) pingLoopInner() {
|
||||
s.connlock.Lock()
|
||||
defer s.connlock.Unlock()
|
||||
|
||||
for _, c := range s.conns {
|
||||
req, err := http.NewRequest("GET", "http://backend/health", nil)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
c.conn.SetDeadline(time.Now().Add(time.Second))
|
||||
stream, err := c.session.OpenStream()
|
||||
if err != nil {
|
||||
ln.Error(err, c.F())
|
||||
c.cancel()
|
||||
return
|
||||
}
|
||||
c.conn.SetDeadline(time.Time{})
|
||||
|
||||
stream.SetWriteDeadline(time.Now().Add(time.Second))
|
||||
err = req.Write(stream)
|
||||
if err != nil {
|
||||
ln.Error(err, c.F())
|
||||
c.cancel()
|
||||
return
|
||||
}
|
||||
|
||||
stream.SetReadDeadline(time.Now().Add(time.Second))
|
||||
_, err = stream.Read(make([]byte, 30))
|
||||
if err != nil {
|
||||
ln.Error(err, c.F())
|
||||
c.cancel()
|
||||
return
|
||||
}
|
||||
|
||||
stream.Close()
|
||||
|
||||
/*
|
||||
ln.Log(ln.F{
|
||||
"action": "ping_health_is_good",
|
||||
}, c.F())
|
||||
*/
|
||||
// Ping ends a "ping" to the client. If the client doesn't respond or the connection
|
||||
// dies, then the connection needs to be cleaned up.
|
||||
func (c *Connection) Ping() error {
|
||||
req, err := http.NewRequest("GET", "http://backend/health", nil)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
stream, err := c.OpenStream()
|
||||
if err != nil {
|
||||
ln.Error(err, c.F())
|
||||
defer c.cancel()
|
||||
return err
|
||||
}
|
||||
defer stream.Close()
|
||||
|
||||
stream.SetWriteDeadline(time.Now().Add(time.Second))
|
||||
err = req.Write(stream)
|
||||
if err != nil {
|
||||
ln.Error(err, c.F())
|
||||
defer c.cancel()
|
||||
return err
|
||||
}
|
||||
|
||||
stream.SetReadDeadline(time.Now().Add(5 * time.Second))
|
||||
_, err = stream.Read(make([]byte, 30))
|
||||
if err != nil {
|
||||
ln.Error(err, c.F())
|
||||
defer c.cancel()
|
||||
return err
|
||||
}
|
||||
|
||||
c.detector.Ping(time.Now())
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// OpenStream creates a new stream (connection) to the backend server.
|
||||
func (c *Connection) OpenStream() (net.Conn, error) {
|
||||
err := c.conn.SetDeadline(time.Now().Add(time.Second))
|
||||
if err != nil {
|
||||
ln.Error(err, c.F())
|
||||
return nil, err
|
||||
}
|
||||
|
||||
stream, err := c.session.OpenStream()
|
||||
if err != nil {
|
||||
ln.Error(err, c.F())
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return stream, c.conn.SetDeadline(time.Time{})
|
||||
}
|
||||
|
||||
func (s *Server) HandleConn(c net.Conn, isKCP bool) {
|
||||
|
@ -288,13 +310,14 @@ func (s *Server) HandleConn(c net.Conn, isKCP bool) {
|
|||
}
|
||||
|
||||
connection := &Connection{
|
||||
id: ulid.New().String(),
|
||||
conn: c,
|
||||
isKCP: isKCP,
|
||||
session: session,
|
||||
user: defaultUser, // XXX RIP replace this with the actual token user once users are implemented
|
||||
domain: auth.Domain,
|
||||
cancel: cancel,
|
||||
id: ulid.New().String(),
|
||||
conn: c,
|
||||
isKCP: isKCP,
|
||||
session: session,
|
||||
user: defaultUser, // XXX RIP replace this with the actual token user once users are implemented
|
||||
domain: auth.Domain,
|
||||
cancel: cancel,
|
||||
detector: failure.New(15, 3),
|
||||
}
|
||||
|
||||
ln.Log(ln.F{
|
||||
|
@ -321,7 +344,15 @@ func (s *Server) HandleConn(c net.Conn, isKCP bool) {
|
|||
|
||||
s.domains.Set(auth.Domain, conns)
|
||||
|
||||
ticker := time.NewTicker(5 * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
select {
|
||||
case <-ticker.C:
|
||||
err := connection.Ping()
|
||||
if err != nil {
|
||||
cancel()
|
||||
}
|
||||
case <-ctx.Done():
|
||||
s.connlock.Lock()
|
||||
delete(s.conns, c)
|
||||
|
|
|
@ -95,3 +95,6 @@ a1a5df8f92af764f378f07d6a3dd8eb3f7aa190a github.com/xtaci/smux
|
|||
6c23252515492caf9b228a9d5cabcdbde29f7f82 golang.org/x/net/ipv4
|
||||
8bf1e9bacbf65b10c81d0f4314cf2b1ebef728b5 github.com/streamrail/concurrent-map
|
||||
a00a8beb369cafd88bb7b32f31fc4ff3219c3565 github.com/Xe/gopreload
|
||||
033754ab1fee508c9f98f2785eec2365964e0b05 github.com/aclements/go-moremath/mathx
|
||||
4963dbd58fd03ebc6672b18f9237a9045e6ef479 github.com/dgryski/go-failure
|
||||
91a8fc22ba247b57c243ab90b49049fb734c47c3 github.com/dgryski/go-onlinestats
|
||||
|
|
|
@ -0,0 +1,93 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package mathx
|
||||
|
||||
import "math"
|
||||
|
||||
func lgamma(x float64) float64 {
|
||||
y, _ := math.Lgamma(x)
|
||||
return y
|
||||
}
|
||||
|
||||
// Beta returns the value of the complete beta function B(a, b).
|
||||
func Beta(a, b float64) float64 {
|
||||
// B(x,y) = Γ(x)Γ(y) / Γ(x+y)
|
||||
return math.Exp(lgamma(a) + lgamma(b) - lgamma(a+b))
|
||||
}
|
||||
|
||||
// BetaInc returns the value of the regularized incomplete beta
|
||||
// function Iₓ(a, b).
|
||||
//
|
||||
// This is not to be confused with the "incomplete beta function",
|
||||
// which can be computed as BetaInc(x, a, b)*Beta(a, b).
|
||||
//
|
||||
// If x < 0 or x > 1, returns NaN.
|
||||
func BetaInc(x, a, b float64) float64 {
|
||||
// Based on Numerical Recipes in C, section 6.4. This uses the
|
||||
// continued fraction definition of I:
|
||||
//
|
||||
// (xᵃ*(1-x)ᵇ)/(a*B(a,b)) * (1/(1+(d₁/(1+(d₂/(1+...))))))
|
||||
//
|
||||
// where B(a,b) is the beta function and
|
||||
//
|
||||
// d_{2m+1} = -(a+m)(a+b+m)x/((a+2m)(a+2m+1))
|
||||
// d_{2m} = m(b-m)x/((a+2m-1)(a+2m))
|
||||
if x < 0 || x > 1 {
|
||||
return math.NaN()
|
||||
}
|
||||
bt := 0.0
|
||||
if 0 < x && x < 1 {
|
||||
// Compute the coefficient before the continued
|
||||
// fraction.
|
||||
bt = math.Exp(lgamma(a+b) - lgamma(a) - lgamma(b) +
|
||||
a*math.Log(x) + b*math.Log(1-x))
|
||||
}
|
||||
if x < (a+1)/(a+b+2) {
|
||||
// Compute continued fraction directly.
|
||||
return bt * betacf(x, a, b) / a
|
||||
} else {
|
||||
// Compute continued fraction after symmetry transform.
|
||||
return 1 - bt*betacf(1-x, b, a)/b
|
||||
}
|
||||
}
|
||||
|
||||
// betacf is the continued fraction component of the regularized
|
||||
// incomplete beta function Iₓ(a, b).
|
||||
func betacf(x, a, b float64) float64 {
|
||||
const maxIterations = 200
|
||||
const epsilon = 3e-14
|
||||
|
||||
raiseZero := func(z float64) float64 {
|
||||
if math.Abs(z) < math.SmallestNonzeroFloat64 {
|
||||
return math.SmallestNonzeroFloat64
|
||||
}
|
||||
return z
|
||||
}
|
||||
|
||||
c := 1.0
|
||||
d := 1 / raiseZero(1-(a+b)*x/(a+1))
|
||||
h := d
|
||||
for m := 1; m <= maxIterations; m++ {
|
||||
mf := float64(m)
|
||||
|
||||
// Even step of the recurrence.
|
||||
numer := mf * (b - mf) * x / ((a + 2*mf - 1) * (a + 2*mf))
|
||||
d = 1 / raiseZero(1+numer*d)
|
||||
c = raiseZero(1 + numer/c)
|
||||
h *= d * c
|
||||
|
||||
// Odd step of the recurrence.
|
||||
numer = -(a + mf) * (a + b + mf) * x / ((a + 2*mf) * (a + 2*mf + 1))
|
||||
d = 1 / raiseZero(1+numer*d)
|
||||
c = raiseZero(1 + numer/c)
|
||||
hfac := d * c
|
||||
h *= hfac
|
||||
|
||||
if math.Abs(hfac-1) < epsilon {
|
||||
return h
|
||||
}
|
||||
}
|
||||
panic("betainc: a or b too big; failed to converge")
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package mathx
|
||||
|
||||
import "math"
|
||||
|
||||
const smallFactLimit = 20 // 20! => 62 bits
|
||||
var smallFact [smallFactLimit + 1]int64
|
||||
|
||||
func init() {
|
||||
smallFact[0] = 1
|
||||
fact := int64(1)
|
||||
for n := int64(1); n <= smallFactLimit; n++ {
|
||||
fact *= n
|
||||
smallFact[n] = fact
|
||||
}
|
||||
}
|
||||
|
||||
// Choose returns the binomial coefficient of n and k.
|
||||
func Choose(n, k int) float64 {
|
||||
if k == 0 || k == n {
|
||||
return 1
|
||||
}
|
||||
if k < 0 || n < k {
|
||||
return 0
|
||||
}
|
||||
if n <= smallFactLimit { // Implies k <= smallFactLimit
|
||||
// It's faster to do several integer multiplications
|
||||
// than it is to do an extra integer division.
|
||||
// Remarkably, this is also faster than pre-computing
|
||||
// Pascal's triangle (presumably because this is very
|
||||
// cache efficient).
|
||||
numer := int64(1)
|
||||
for n1 := int64(n - (k - 1)); n1 <= int64(n); n1++ {
|
||||
numer *= n1
|
||||
}
|
||||
denom := smallFact[k]
|
||||
return float64(numer / denom)
|
||||
}
|
||||
|
||||
return math.Exp(lchoose(n, k))
|
||||
}
|
||||
|
||||
// Lchoose returns math.Log(Choose(n, k)).
|
||||
func Lchoose(n, k int) float64 {
|
||||
if k == 0 || k == n {
|
||||
return 0
|
||||
}
|
||||
if k < 0 || n < k {
|
||||
return math.NaN()
|
||||
}
|
||||
return lchoose(n, k)
|
||||
}
|
||||
|
||||
func lchoose(n, k int) float64 {
|
||||
a, _ := math.Lgamma(float64(n + 1))
|
||||
b, _ := math.Lgamma(float64(k + 1))
|
||||
c, _ := math.Lgamma(float64(n - k + 1))
|
||||
return a - b - c
|
||||
}
|
|
@ -0,0 +1,96 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package mathx
|
||||
|
||||
import "math"
|
||||
|
||||
// GammaInc returns the value of the incomplete gamma function (also
|
||||
// known as the regularized gamma function):
|
||||
//
|
||||
// P(a, x) = 1 / Γ(a) * ∫₀ˣ exp(-t) t**(a-1) dt
|
||||
func GammaInc(a, x float64) float64 {
|
||||
// Based on Numerical Recipes in C, section 6.2.
|
||||
|
||||
if a <= 0 || x < 0 || math.IsNaN(a) || math.IsNaN(x) {
|
||||
return math.NaN()
|
||||
}
|
||||
|
||||
if x < a+1 {
|
||||
// Use the series representation, which converges more
|
||||
// rapidly in this range.
|
||||
return gammaIncSeries(a, x)
|
||||
} else {
|
||||
// Use the continued fraction representation.
|
||||
return 1 - gammaIncCF(a, x)
|
||||
}
|
||||
}
|
||||
|
||||
// GammaIncComp returns the complement of the incomplete gamma
|
||||
// function 1 - GammaInc(a, x). This is more numerically stable for
|
||||
// values near 0.
|
||||
func GammaIncComp(a, x float64) float64 {
|
||||
if a <= 0 || x < 0 || math.IsNaN(a) || math.IsNaN(x) {
|
||||
return math.NaN()
|
||||
}
|
||||
|
||||
if x < a+1 {
|
||||
return 1 - gammaIncSeries(a, x)
|
||||
} else {
|
||||
return gammaIncCF(a, x)
|
||||
}
|
||||
}
|
||||
|
||||
func gammaIncSeries(a, x float64) float64 {
|
||||
const maxIterations = 200
|
||||
const epsilon = 3e-14
|
||||
|
||||
if x == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
ap := a
|
||||
del := 1 / a
|
||||
sum := del
|
||||
for n := 0; n < maxIterations; n++ {
|
||||
ap++
|
||||
del *= x / ap
|
||||
sum += del
|
||||
if math.Abs(del) < math.Abs(sum)*epsilon {
|
||||
return sum * math.Exp(-x+a*math.Log(x)-lgamma(a))
|
||||
}
|
||||
}
|
||||
panic("a too large; failed to converge")
|
||||
}
|
||||
|
||||
func gammaIncCF(a, x float64) float64 {
|
||||
const maxIterations = 200
|
||||
const epsilon = 3e-14
|
||||
|
||||
raiseZero := func(z float64) float64 {
|
||||
if math.Abs(z) < math.SmallestNonzeroFloat64 {
|
||||
return math.SmallestNonzeroFloat64
|
||||
}
|
||||
return z
|
||||
}
|
||||
|
||||
b := x + 1 - a
|
||||
c := math.MaxFloat64
|
||||
d := 1 / b
|
||||
h := d
|
||||
|
||||
for i := 1; i <= maxIterations; i++ {
|
||||
an := -float64(i) * (float64(i) - a)
|
||||
b += 2
|
||||
d = raiseZero(an*d + b)
|
||||
c = raiseZero(b + an/c)
|
||||
d = 1 / d
|
||||
del := d * c
|
||||
h *= del
|
||||
if math.Abs(del-1) < epsilon {
|
||||
return math.Exp(-x+a*math.Log(x)-lgamma(a)) * h
|
||||
}
|
||||
}
|
||||
panic("a too large; failed to converge")
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package mathx implements special functions not provided by the
|
||||
// standard math package.
|
||||
package mathx // import "github.com/aclements/go-moremath/mathx"
|
||||
|
||||
import "math"
|
||||
|
||||
var nan = math.NaN()
|
|
@ -0,0 +1,18 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package mathx
|
||||
|
||||
// Sign returns the sign of x: -1 if x < 0, 0 if x == 0, 1 if x > 0.
|
||||
// If x is NaN, it returns NaN.
|
||||
func Sign(x float64) float64 {
|
||||
if x == 0 {
|
||||
return 0
|
||||
} else if x < 0 {
|
||||
return -1
|
||||
} else if x > 0 {
|
||||
return 1
|
||||
}
|
||||
return nan
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
// Package failure implements the Phi Accrual Failure Detector
|
||||
/*
|
||||
|
||||
This package implements the paper "The φ Accrual Failure Detector" (2004)
|
||||
(available at http://hdl.handle.net/10119/4784).
|
||||
|
||||
To use the failure detection algorithm, you need a heartbeat loop that will
|
||||
call Ping() at regular intervals. At any point, you can call Phi() which will
|
||||
report how suspicious it is that a heartbeat has not been heard since the last
|
||||
time Ping() was called.
|
||||
|
||||
*/
|
||||
package failure
|
||||
|
||||
import (
|
||||
"math"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/dgryski/go-onlinestats"
|
||||
)
|
||||
|
||||
// Detector is a failure detector
|
||||
type Detector struct {
|
||||
w *onlinestats.Windowed
|
||||
last time.Time
|
||||
minSamples int
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
// New returns a new failure detector that considers the last windowSize
|
||||
// samples, and ensures there are at least minSamples in the window before
|
||||
// returning an answer
|
||||
func New(windowSize, minSamples int) *Detector {
|
||||
|
||||
d := &Detector{
|
||||
w: onlinestats.NewWindowed(windowSize),
|
||||
minSamples: minSamples,
|
||||
}
|
||||
|
||||
return d
|
||||
}
|
||||
|
||||
// Ping registers a heart-beat at time now
|
||||
func (d *Detector) Ping(now time.Time) {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
if !d.last.IsZero() {
|
||||
d.w.Push(now.Sub(d.last).Seconds())
|
||||
}
|
||||
d.last = now
|
||||
}
|
||||
|
||||
// Phi calculates the suspicion level at time 'now' that the remote end has failed
|
||||
func (d *Detector) Phi(now time.Time) float64 {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
if d.w.Len() < d.minSamples {
|
||||
return 0
|
||||
}
|
||||
|
||||
t := now.Sub(d.last).Seconds()
|
||||
pLater := 1 - cdf(d.w.Mean(), d.w.Stddev(), t)
|
||||
phi := -math.Log10(pLater)
|
||||
|
||||
return phi
|
||||
}
|
||||
|
||||
// cdf is the cumulative distribution function of a normally distributed random
|
||||
// variable with the given mean and standard deviation
|
||||
func cdf(mean, stddev, x float64) float64 {
|
||||
return 0.5 + 0.5*math.Erf((x-mean)/(stddev*math.Sqrt2))
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
package onlinestats
|
||||
|
||||
import "math"
|
||||
|
||||
func SumSq(a []float64) float64 {
|
||||
var t float64
|
||||
for _, aa := range a {
|
||||
t += (aa * aa)
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
func Sum(a []float64) float64 {
|
||||
var t float64
|
||||
for _, aa := range a {
|
||||
t += aa
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
func Mean(a []float64) float64 {
|
||||
return Sum(a) / float64(len(a))
|
||||
}
|
||||
|
||||
func variance(a []float64, sample bool) float64 {
|
||||
|
||||
var adjustN float64
|
||||
|
||||
if sample {
|
||||
adjustN = -1
|
||||
}
|
||||
|
||||
m := Mean(a)
|
||||
var sum2 float64
|
||||
var sum3 float64
|
||||
for _, aa := range a {
|
||||
sum2 += (aa - m) * (aa - m)
|
||||
sum3 += (aa - m)
|
||||
}
|
||||
n := float64(len(a))
|
||||
return (sum2 - (sum3*sum3)/n) / (n + adjustN)
|
||||
}
|
||||
|
||||
func Variance(a []float64) float64 {
|
||||
return variance(a, false)
|
||||
}
|
||||
|
||||
func SampleVariance(a []float64) float64 {
|
||||
return variance(a, true)
|
||||
}
|
||||
|
||||
func SampleStddev(a []float64) float64 {
|
||||
return math.Sqrt(SampleVariance(a))
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
package onlinestats
|
||||
|
||||
import (
|
||||
"math"
|
||||
)
|
||||
|
||||
// http://www.drdobbs.com/tools/discontiguous-exponential-averaging/184410671
|
||||
type DEA struct {
|
||||
sumOfWeights float64
|
||||
sumOfData float64
|
||||
sumOfSquaredData float64
|
||||
previousTime float64
|
||||
alpha float64
|
||||
newDataWeightUpperBound float64
|
||||
}
|
||||
|
||||
func NewDEA(alpha float64, maxDt float64) *DEA {
|
||||
return &DEA{
|
||||
alpha: alpha,
|
||||
newDataWeightUpperBound: 1 - math.Pow(alpha, float64(maxDt)),
|
||||
}
|
||||
}
|
||||
|
||||
func (ew *DEA) Update(newData float64, t float64) {
|
||||
weightReductionFactor := math.Pow(ew.alpha, float64(t-ew.previousTime))
|
||||
newDataWeight := minf(1-weightReductionFactor, ew.newDataWeightUpperBound)
|
||||
ew.sumOfWeights = weightReductionFactor*ew.sumOfWeights + newDataWeight
|
||||
ew.sumOfData = weightReductionFactor*ew.sumOfData + newDataWeight*newData
|
||||
ew.sumOfSquaredData = weightReductionFactor*ew.sumOfSquaredData + (newDataWeight * (newData * newData))
|
||||
ew.previousTime = t
|
||||
}
|
||||
|
||||
func (ew *DEA) CompletenessFraction(t float64) float64 {
|
||||
return math.Pow(ew.alpha, float64(t-ew.previousTime)*ew.sumOfWeights)
|
||||
}
|
||||
func (ew *DEA) Mean() float64 {
|
||||
return (ew.sumOfData / ew.sumOfWeights)
|
||||
}
|
||||
|
||||
func (ew *DEA) Var() float64 {
|
||||
m := ew.Mean()
|
||||
return (ew.sumOfSquaredData/ew.sumOfWeights - m*m)
|
||||
}
|
||||
|
||||
func (ew *DEA) Stddev() float64 {
|
||||
return math.Sqrt(ew.Var())
|
||||
}
|
||||
|
||||
func minf(a, b float64) float64 {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
package onlinestats
|
||||
|
||||
// From http://queue.acm.org/detail.cfm?id=2534976
|
||||
|
||||
import "math"
|
||||
|
||||
type ExpWeight struct {
|
||||
n int
|
||||
m1 float64
|
||||
v float64
|
||||
alpha float64
|
||||
}
|
||||
|
||||
func NewExpWeight(alpha float64) *ExpWeight {
|
||||
return &ExpWeight{alpha: alpha}
|
||||
}
|
||||
|
||||
func (e *ExpWeight) Push(x float64) {
|
||||
|
||||
if e.n == 0 {
|
||||
e.m1 = x
|
||||
e.v = 1
|
||||
} else {
|
||||
e.m1 = (1-e.alpha)*x + e.alpha*e.m1
|
||||
v := (x - e.m1)
|
||||
e.v = (1-e.alpha)*(v*v) + e.alpha*e.v
|
||||
}
|
||||
|
||||
e.n++
|
||||
|
||||
}
|
||||
|
||||
func (e *ExpWeight) Len() int {
|
||||
return e.n
|
||||
}
|
||||
|
||||
func (e *ExpWeight) Mean() float64 {
|
||||
return e.m1
|
||||
}
|
||||
|
||||
func (e *ExpWeight) Var() float64 {
|
||||
return e.v
|
||||
}
|
||||
|
||||
func (e *ExpWeight) Stddev() float64 {
|
||||
return math.Sqrt(e.v)
|
||||
}
|
|
@ -0,0 +1,95 @@
|
|||
package onlinestats
|
||||
|
||||
// https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test
|
||||
// http://www.itl.nist.gov/div898/handbook/eda/section3/eda35g.htm
|
||||
|
||||
import (
|
||||
"math"
|
||||
"sort"
|
||||
)
|
||||
|
||||
// From NR
|
||||
|
||||
// KS performs a Kolmogorov-Smirnov test for the two datasets, and returns the
|
||||
// p-value for the null hypothesis that the two sets come from the same distribution.
|
||||
func KS(data1, data2 []float64) float64 {
|
||||
|
||||
sort.Float64s(data1)
|
||||
sort.Float64s(data2)
|
||||
|
||||
for math.IsNaN(data1[0]) {
|
||||
data1 = data1[1:]
|
||||
}
|
||||
|
||||
for math.IsNaN(data2[0]) {
|
||||
data2 = data2[1:]
|
||||
}
|
||||
|
||||
n1, n2 := len(data1), len(data2)
|
||||
en1, en2 := float64(n1), float64(n2)
|
||||
|
||||
var d float64
|
||||
var fn1, fn2 float64
|
||||
|
||||
j1, j2 := 0, 0
|
||||
for j1 < n1 && j2 < n2 {
|
||||
d1 := data1[j1]
|
||||
d2 := data2[j2]
|
||||
|
||||
if d1 <= d2 {
|
||||
for j1 < n1 && d1 == data1[j1] {
|
||||
j1++
|
||||
fn1 = float64(j1) / en1
|
||||
}
|
||||
}
|
||||
|
||||
if d2 <= d1 {
|
||||
for j2 < n2 && d2 == data2[j2] {
|
||||
j2++
|
||||
fn2 = float64(j2) / en2
|
||||
}
|
||||
}
|
||||
|
||||
if dt := math.Abs(fn2 - fn1); dt > d {
|
||||
d = dt
|
||||
}
|
||||
|
||||
}
|
||||
en := math.Sqrt((en1 * en2) / (en1 + en2))
|
||||
// R and Octave don't use this approximation that NR does
|
||||
//return qks((en + 0.12 + 0.11/en) * d)
|
||||
return qks(en * d)
|
||||
}
|
||||
|
||||
func qks(z float64) float64 {
|
||||
|
||||
if z < 0. {
|
||||
panic("bad z in qks")
|
||||
}
|
||||
|
||||
if z == 0. {
|
||||
return 1.
|
||||
}
|
||||
if z < 1.18 {
|
||||
return 1. - pks(z)
|
||||
}
|
||||
x := math.Exp(-2. * (z * z))
|
||||
return 2. * (x - math.Pow(x, 4) + math.Pow(x, 9))
|
||||
}
|
||||
|
||||
func pks(z float64) float64 {
|
||||
|
||||
if z < 0. {
|
||||
panic("bad z in KSdist")
|
||||
}
|
||||
if z == 0. {
|
||||
return 0.
|
||||
}
|
||||
if z < 1.18 {
|
||||
y := math.Exp(-1.23370055013616983 / (z * z))
|
||||
return 2.25675833419102515 * math.Sqrt(-math.Log(y)) * (y + math.Pow(y, 9) + math.Pow(y, 25) + math.Pow(y, 49))
|
||||
}
|
||||
|
||||
x := math.Exp(-2. * (z * z))
|
||||
return 1. - 2.*(x-math.Pow(x, 4)+math.Pow(x, 9))
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
package onlinestats
|
||||
|
||||
import "math"
|
||||
|
||||
func Pearson(a, b []float64) float64 {
|
||||
|
||||
if len(a) != len(b) {
|
||||
panic("len(a) != len(b)")
|
||||
}
|
||||
|
||||
var abar, bbar float64
|
||||
var n int
|
||||
for i := range a {
|
||||
if !math.IsNaN(a[i]) && !math.IsNaN(b[i]) {
|
||||
abar += a[i]
|
||||
bbar += b[i]
|
||||
n++
|
||||
}
|
||||
}
|
||||
nf := float64(n)
|
||||
abar, bbar = abar/nf, bbar/nf
|
||||
|
||||
var numerator float64
|
||||
var sumAA, sumBB float64
|
||||
|
||||
for i := range a {
|
||||
if !math.IsNaN(a[i]) && !math.IsNaN(b[i]) {
|
||||
numerator += (a[i] - abar) * (b[i] - bbar)
|
||||
sumAA += (a[i] - abar) * (a[i] - abar)
|
||||
sumBB += (b[i] - bbar) * (b[i] - bbar)
|
||||
}
|
||||
}
|
||||
|
||||
return numerator / (math.Sqrt(sumAA) * math.Sqrt(sumBB))
|
||||
}
|
|
@ -0,0 +1,76 @@
|
|||
package onlinestats
|
||||
|
||||
import (
|
||||
"math"
|
||||
"math/rand"
|
||||
)
|
||||
|
||||
type Reservoir struct {
|
||||
data []float64
|
||||
|
||||
n int
|
||||
sum float64
|
||||
}
|
||||
|
||||
func NewReservoir(capacity int) *Reservoir {
|
||||
return &Reservoir{
|
||||
data: make([]float64, 0, capacity),
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Reservoir) Push(n float64) {
|
||||
|
||||
index := r.n
|
||||
|
||||
r.n++
|
||||
|
||||
// not enough samples yet -- add it
|
||||
if index < cap(r.data) {
|
||||
r.data = append(r.data, n)
|
||||
return
|
||||
}
|
||||
|
||||
ridx := rand.Intn(r.n) // == index+1, so we're 0..index inclusive
|
||||
|
||||
if ridx >= len(r.data) {
|
||||
// ignore this one
|
||||
return
|
||||
}
|
||||
|
||||
// add to our sample
|
||||
old := r.data[ridx]
|
||||
|
||||
r.data[ridx] = n
|
||||
|
||||
r.sum -= old
|
||||
r.sum += n
|
||||
}
|
||||
|
||||
func (r *Reservoir) Len() int {
|
||||
return len(r.data)
|
||||
}
|
||||
|
||||
func (r *Reservoir) Mean() float64 {
|
||||
return r.sum / float64(r.Len())
|
||||
}
|
||||
|
||||
func (r *Reservoir) Var() float64 {
|
||||
n := float64(r.Len())
|
||||
mean := r.Mean()
|
||||
l := r.Len()
|
||||
|
||||
sum1 := 0.0
|
||||
sum2 := 0.0
|
||||
|
||||
for i := 0; i < l; i++ {
|
||||
xm := r.data[i] - mean
|
||||
sum1 += xm * xm
|
||||
sum2 += xm
|
||||
}
|
||||
|
||||
return (sum1 - (sum2*sum2)/n) / (n - 1)
|
||||
}
|
||||
|
||||
func (r *Reservoir) Stddev() float64 {
|
||||
return math.Sqrt(r.Var())
|
||||
}
|
|
@ -0,0 +1,84 @@
|
|||
package onlinestats
|
||||
|
||||
import (
|
||||
"math"
|
||||
"sort"
|
||||
|
||||
"github.com/aclements/go-moremath/mathx"
|
||||
)
|
||||
|
||||
type sort2 struct {
|
||||
x []float64
|
||||
y []float64
|
||||
}
|
||||
|
||||
func (s sort2) Len() int { return len(s.x) }
|
||||
func (s sort2) Less(i, j int) bool { return s.x[i] < s.x[j] }
|
||||
func (s sort2) Swap(i, j int) {
|
||||
s.x[i], s.x[j] = s.x[j], s.x[i]
|
||||
s.y[i], s.y[j] = s.y[j], s.y[i]
|
||||
}
|
||||
|
||||
// crank overwrites the entries in with their ranks
|
||||
func crank(w []float64) float64 {
|
||||
j, ji, jt, n := 1, 0, 0, len(w)
|
||||
var rank float64
|
||||
|
||||
var s float64
|
||||
|
||||
for j < n {
|
||||
if w[j] != w[j-1] {
|
||||
w[j-1] = float64(j)
|
||||
j++
|
||||
} else {
|
||||
for jt = j + 1; jt <= n && w[jt-1] == w[j-1]; jt++ {
|
||||
// empty
|
||||
}
|
||||
rank = 0.5 * (float64(j) + float64(jt) - 1)
|
||||
for ji = j; ji <= (jt - 1); ji++ {
|
||||
w[ji-1] = rank
|
||||
}
|
||||
t := float64(jt - j)
|
||||
s += (t*t*t - t)
|
||||
j = jt
|
||||
}
|
||||
}
|
||||
if j == n {
|
||||
w[n-1] = float64(n)
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// Spearman returns the rank correlation coefficient between data1 and data2, and the associated p-value
|
||||
func Spearman(data1, data2 []float64) (rs float64, p float64) {
|
||||
n := len(data1)
|
||||
wksp1, wksp2 := make([]float64, n), make([]float64, n)
|
||||
copy(wksp1, data1)
|
||||
copy(wksp2, data2)
|
||||
|
||||
sort.Sort(sort2{wksp1, wksp2})
|
||||
sf := crank(wksp1)
|
||||
sort.Sort(sort2{wksp2, wksp1})
|
||||
sg := crank(wksp2)
|
||||
d := 0.0
|
||||
for j := 0; j < n; j++ {
|
||||
sq := wksp1[j] - wksp2[j]
|
||||
d += (sq * sq)
|
||||
}
|
||||
|
||||
en := float64(n)
|
||||
en3n := en*en*en - en
|
||||
|
||||
fac := (1.0 - sf/en3n) * (1.0 - sg/en3n)
|
||||
rs = (1.0 - (6.0/en3n)*(d+(sf+sg)/12.0)) / math.Sqrt(fac)
|
||||
|
||||
if fac = (rs + 1.0) * (1.0 - rs); fac > 0 {
|
||||
t := rs * math.Sqrt((en-2.0)/fac)
|
||||
df := en - 2.0
|
||||
p = mathx.BetaInc(df/(df+t*t), 0.5*df, 0.5)
|
||||
}
|
||||
|
||||
return rs, p
|
||||
}
|
||||
|
||||
func sqr(x float64) float64 { return x * x }
|
|
@ -0,0 +1,137 @@
|
|||
// Package onlinestats provides online, one-pass algorithms for descriptive statistics.
|
||||
/*
|
||||
|
||||
The implementation is based on the public domain code available at http://www.johndcook.com/skewness_kurtosis.html .
|
||||
|
||||
The linear regression code is from http://www.johndcook.com/running_regression.html .
|
||||
|
||||
*/
|
||||
package onlinestats
|
||||
|
||||
import "math"
|
||||
|
||||
type Running struct {
|
||||
n int
|
||||
m1, m2, m3, m4 float64
|
||||
}
|
||||
|
||||
func NewRunning() *Running {
|
||||
return &Running{}
|
||||
}
|
||||
|
||||
func (r *Running) Push(x float64) {
|
||||
|
||||
n1 := float64(r.n)
|
||||
r.n++
|
||||
delta := x - r.m1
|
||||
delta_n := delta / float64(r.n)
|
||||
delta_n2 := delta_n * delta_n
|
||||
term1 := delta * delta_n * n1
|
||||
r.m1 += delta_n
|
||||
r.m4 += term1*delta_n2*float64(r.n*r.n-3*r.n+3) + 6*delta_n2*r.m2 - 4*delta_n*r.m3
|
||||
r.m3 += term1*delta_n*float64(r.n-2) - 3*delta_n*r.m2
|
||||
r.m2 += term1
|
||||
}
|
||||
|
||||
func (r *Running) Len() int {
|
||||
return r.n
|
||||
}
|
||||
|
||||
func (r *Running) Mean() float64 {
|
||||
return r.m1
|
||||
}
|
||||
|
||||
func (r *Running) Var() float64 {
|
||||
return r.m2 / float64(r.n-1)
|
||||
}
|
||||
|
||||
func (r *Running) Stddev() float64 {
|
||||
return math.Sqrt(r.Var())
|
||||
}
|
||||
|
||||
func (r *Running) Skewness() float64 {
|
||||
return math.Sqrt(float64(r.n)) * r.m3 / math.Pow(r.m2, 1.5)
|
||||
}
|
||||
|
||||
func (r *Running) Kurtosis() float64 {
|
||||
return float64(r.n)*r.m4/(r.m2*r.m2) - 3.0
|
||||
}
|
||||
|
||||
func CombineRunning(a, b *Running) *Running {
|
||||
|
||||
var combined Running
|
||||
|
||||
an := float64(a.n)
|
||||
bn := float64(b.n)
|
||||
cn := float64(an + bn)
|
||||
|
||||
combined.n = a.n + b.n
|
||||
|
||||
delta := b.m1 - a.m1
|
||||
delta2 := delta * delta
|
||||
delta3 := delta * delta2
|
||||
delta4 := delta2 * delta2
|
||||
|
||||
combined.m1 = (an*a.m1 + bn*b.m1) / cn
|
||||
|
||||
combined.m2 = a.m2 + b.m2 + delta2*an*bn/cn
|
||||
|
||||
combined.m3 = a.m3 + b.m3 + delta3*an*bn*(an-bn)/(cn*cn)
|
||||
combined.m3 += 3.0 * delta * (an*b.m2 - bn*a.m2) / cn
|
||||
|
||||
combined.m4 = a.m4 + b.m4 + delta4*an*bn*(an*an-an*bn+bn*bn)/(cn*cn*cn)
|
||||
combined.m4 += 6.0*delta2*(an*an*b.m2+bn*bn*a.m2)/(cn*cn) + 4.0*delta*(an*b.m3-bn*a.m3)/cn
|
||||
|
||||
return &combined
|
||||
}
|
||||
|
||||
type Regression struct {
|
||||
xstats Running
|
||||
ystats Running
|
||||
sxy float64
|
||||
n int
|
||||
}
|
||||
|
||||
func NewRegression() *Regression {
|
||||
return &Regression{}
|
||||
}
|
||||
|
||||
func (r *Regression) Push(x, y float64) {
|
||||
r.sxy += (r.xstats.Mean() - x) * (r.ystats.Mean() - y) * float64(r.n) / float64(r.n+1)
|
||||
|
||||
r.xstats.Push(x)
|
||||
r.ystats.Push(y)
|
||||
r.n++
|
||||
}
|
||||
|
||||
func (r *Regression) Len() int {
|
||||
return r.n
|
||||
}
|
||||
|
||||
func (r *Regression) Slope() float64 {
|
||||
sxx := r.xstats.Var() * float64(r.n-1)
|
||||
return r.sxy / sxx
|
||||
}
|
||||
|
||||
func (r *Regression) Intercept() float64 {
|
||||
return r.ystats.Mean() - r.Slope()*r.xstats.Mean()
|
||||
}
|
||||
|
||||
func (r *Regression) Correlation() float64 {
|
||||
t := r.xstats.Stddev() * r.ystats.Stddev()
|
||||
return r.sxy / (float64(r.n-1) * t)
|
||||
}
|
||||
|
||||
func CombineRegressions(a, b Regression) *Regression {
|
||||
var combined Regression
|
||||
|
||||
combined.xstats = *CombineRunning(&a.xstats, &b.xstats)
|
||||
combined.ystats = *CombineRunning(&a.ystats, &b.ystats)
|
||||
combined.n = a.n + b.n
|
||||
|
||||
delta_x := b.xstats.Mean() - a.xstats.Mean()
|
||||
delta_y := b.ystats.Mean() - a.ystats.Mean()
|
||||
combined.sxy = a.sxy + b.sxy + float64(a.n*b.n)*delta_x*delta_y/float64(combined.n)
|
||||
|
||||
return &combined
|
||||
}
|
|
@ -0,0 +1,424 @@
|
|||
package onlinestats
|
||||
|
||||
import (
|
||||
"math"
|
||||
"sort"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
func SWilk(x []float64) (float64, float64, error) {
|
||||
|
||||
data := make([]float64, len(x)+1)
|
||||
copy(data[1:], x)
|
||||
sort.Float64s(data[1:])
|
||||
data[0] = math.NaN()
|
||||
|
||||
length := len(x)
|
||||
w, pw, err := swilkHelper(data, length, nil)
|
||||
return w, pw, err
|
||||
}
|
||||
|
||||
// Calculate the Shapiro-Wilk W test and its significance level
|
||||
// Based on the public domain code at https://joinup.ec.europa.eu/svn/sextante/soft/sextante_lib/trunk/algorithms/src/es/unex/sextante/tables/normalityTest/SWilk.java
|
||||
|
||||
/*
|
||||
* Constants and polynomial coefficients for swilk(). NOTE: FORTRAN counts the elements of the array x[length] as
|
||||
* x[1] through x[length], not x[0] through x[length-1]. To avoid making pervasive, subtle changes to the algorithm
|
||||
* (which would inevitably introduce pervasive, subtle bugs) the referenced arrays are padded with an unused 0th
|
||||
* element, and the algorithm is ported so as to continue accessing from [1] through [length].
|
||||
*/
|
||||
var c1 = []float64{math.NaN(), 0.0E0, 0.221157E0, -0.147981E0, -0.207119E1, 0.4434685E1, -0.2706056E1}
|
||||
var c2 = []float64{math.NaN(), 0.0E0, 0.42981E-1, -0.293762E0, -0.1752461E1, 0.5682633E1, -0.3582633E1}
|
||||
var c3 = []float64{math.NaN(), 0.5440E0, -0.39978E0, 0.25054E-1, -0.6714E-3}
|
||||
var c4 = []float64{math.NaN(), 0.13822E1, -0.77857E0, 0.62767E-1, -0.20322E-2}
|
||||
var c5 = []float64{math.NaN(), -0.15861E1, -0.31082E0, -0.83751E-1, 0.38915E-2}
|
||||
var c6 = []float64{math.NaN(), -0.4803E0, -0.82676E-1, 0.30302E-2}
|
||||
var c7 = []float64{math.NaN(), 0.164E0, 0.533E0}
|
||||
var c8 = []float64{math.NaN(), 0.1736E0, 0.315E0}
|
||||
var c9 = []float64{math.NaN(), 0.256E0, -0.635E-2}
|
||||
var g = []float64{math.NaN(), -0.2273E1, 0.459E0}
|
||||
|
||||
const (
|
||||
z90 = 0.12816E1
|
||||
z95 = 0.16449E1
|
||||
z99 = 0.23263E1
|
||||
zm = 0.17509E1
|
||||
zss = 0.56268E0
|
||||
bf1 = 0.8378E0
|
||||
xx90 = 0.556E0
|
||||
xx95 = 0.622E0
|
||||
sqrth = 0.70711E0
|
||||
th = 0.375E0
|
||||
small = 1E-19
|
||||
pi6 = 0.1909859E1
|
||||
stqr = 0.1047198E1
|
||||
upper = true
|
||||
)
|
||||
|
||||
/**
|
||||
* ALGORITHM AS R94 APPL. STATIST. (1995) VOL.44, NO.4
|
||||
*
|
||||
* Calculates Shapiro-Wilk normality test and P-value for sample sizes 3 <= n <= 5000 .
|
||||
* Corrects AS 181, which was found to be inaccurate for n > 50.
|
||||
*
|
||||
* As described above with the constants, the data arrays x[] and a[] are referenced with a base element of 1 (like FORTRAN)
|
||||
* instead of 0 (like Java) to avoid screwing up the algorithm. To pass in 100 data points, declare x[101] and fill elements
|
||||
* x[1] through x[100] with data. x[0] will be ignored.
|
||||
*
|
||||
* @param x
|
||||
* Input; Data set to analyze; 100 points go in x[101] array from x[1] through x[100]
|
||||
* @param n
|
||||
* Input; Number of data points in x
|
||||
* @param a
|
||||
* Shapiro-Wilk coefficients. Can be nil, or pre-computed by swilkCoeffs and passed in.
|
||||
*/
|
||||
|
||||
type SwilkFault int
|
||||
|
||||
func (s SwilkFault) Error() string {
|
||||
return "swilk fault " + strconv.Itoa(int(s))
|
||||
}
|
||||
|
||||
func swilkHelper(x []float64, n int, a []float64) (w float64, pw float64, err error) {
|
||||
|
||||
if n > 5000 {
|
||||
return 0, 0, SwilkFault(2)
|
||||
}
|
||||
|
||||
pw = 1.0
|
||||
if w >= 0.0 {
|
||||
w = 1.0
|
||||
}
|
||||
an := float64(n)
|
||||
if n < 3 {
|
||||
return 0, 0, SwilkFault(1)
|
||||
}
|
||||
|
||||
if a == nil {
|
||||
a = SwilkCoeffs(n)
|
||||
}
|
||||
|
||||
if n < 3 {
|
||||
return
|
||||
}
|
||||
|
||||
// If W input as negative, calculate significance level of -W
|
||||
var w1, xx float64
|
||||
if w < 0.0 {
|
||||
w1 = 1.0 + w
|
||||
} else {
|
||||
|
||||
// Check for zero range
|
||||
|
||||
range_ := x[n] - x[1]
|
||||
if range_ < small {
|
||||
return 0, 0, SwilkFault(6)
|
||||
}
|
||||
|
||||
// Check for correct sort order on range - scaled X
|
||||
// TODO(dgryski): did the FORTRAN code puke on out-of-order X ? with ifault=7 ?
|
||||
xx = x[1] / range_
|
||||
sx := xx
|
||||
sa := -a[1]
|
||||
j := n - 1
|
||||
for i := 2; i <= n; i++ {
|
||||
xi := x[i] / range_
|
||||
// IF (XX-XI .GT. SMALL) PRINT *,' ANYTHING'
|
||||
sx += xi
|
||||
if i != j {
|
||||
sa += float64(sign(1, i-j)) * a[imin(i, j)]
|
||||
}
|
||||
xx = xi
|
||||
j--
|
||||
}
|
||||
|
||||
// Calculate W statistic as squared correlation between data and coefficients
|
||||
sa /= float64(n)
|
||||
sx /= float64(n)
|
||||
ssa := 0.0
|
||||
ssx := 0.0
|
||||
sax := 0.0
|
||||
j = n
|
||||
var asa float64
|
||||
for i := 1; i <= n; i++ {
|
||||
if i != j {
|
||||
asa = float64(sign(1, i-j))*a[imin(i, j)] - sa
|
||||
} else {
|
||||
asa = -sa
|
||||
}
|
||||
xsx := x[i]/range_ - sx
|
||||
ssa += asa * asa
|
||||
ssx += xsx * xsx
|
||||
sax += asa * xsx
|
||||
j--
|
||||
}
|
||||
|
||||
// W1 equals (1-W) calculated to avoid excessive rounding error
|
||||
// for W very near 1 (a potential problem in very large samples)
|
||||
|
||||
ssassx := math.Sqrt(ssa * ssx)
|
||||
w1 = (ssassx - sax) * (ssassx + sax) / (ssa * ssx)
|
||||
}
|
||||
w = 1.0 - w1
|
||||
|
||||
// Calculate significance level for W (exact for N=3)
|
||||
|
||||
if n == 3 {
|
||||
pw = pi6 * (math.Asin(math.Sqrt(w)) - stqr)
|
||||
return w, pw, nil
|
||||
}
|
||||
y := math.Log(w1)
|
||||
xx = math.Log(an)
|
||||
m := 0.0
|
||||
s := 1.0
|
||||
if n <= 11 {
|
||||
gamma := poly(g, 2, an)
|
||||
if y >= gamma {
|
||||
pw = small
|
||||
return w, pw, nil
|
||||
}
|
||||
y = -math.Log(gamma - y)
|
||||
m = poly(c3, 4, an)
|
||||
s = math.Exp(poly(c4, 4, an))
|
||||
} else {
|
||||
m = poly(c5, 4, xx)
|
||||
s = math.Exp(poly(c6, 3, xx))
|
||||
}
|
||||
pw = alnorm((y-m)/s, upper)
|
||||
|
||||
return w, pw, nil
|
||||
}
|
||||
|
||||
// Precomputes the coefficients array a for SWilk
|
||||
func SwilkCoeffs(n int) []float64 {
|
||||
|
||||
a := make([]float64, n+1)
|
||||
|
||||
an := float64(n)
|
||||
|
||||
n2 := n / 2
|
||||
|
||||
if n == 3 {
|
||||
a[1] = sqrth
|
||||
} else {
|
||||
an25 := an + 0.25
|
||||
summ2 := 0.0
|
||||
for i := 1; i <= n2; i++ {
|
||||
a[i] = ppnd((float64(i) - th) / an25)
|
||||
summ2 += a[i] * a[i]
|
||||
}
|
||||
summ2 *= 2.0
|
||||
ssumm2 := math.Sqrt(summ2)
|
||||
rsn := 1.0 / math.Sqrt(an)
|
||||
a1 := poly(c1, 6, rsn) - a[1]/ssumm2
|
||||
|
||||
// Normalize coefficients
|
||||
|
||||
var i1 int
|
||||
var fac float64
|
||||
if n > 5 {
|
||||
i1 = 3
|
||||
a2 := -a[2]/ssumm2 + poly(c2, 6, rsn)
|
||||
fac = math.Sqrt((summ2 - 2.0*a[1]*a[1] - 2.0*a[2]*a[2]) / (1.0 - 2.0*a1*a1 - 2.0*a2*a2))
|
||||
a[1] = a1
|
||||
a[2] = a2
|
||||
} else {
|
||||
i1 = 2
|
||||
fac = math.Sqrt((summ2 - 2.0*a[1]*a[1]) / (1.0 - 2.0*a1*a1))
|
||||
a[1] = a1
|
||||
}
|
||||
for i := i1; i <= n2; i++ {
|
||||
a[i] = -a[i] / fac
|
||||
}
|
||||
}
|
||||
|
||||
return a
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an int with the absolute value of x and the sign of y
|
||||
*
|
||||
* @param x
|
||||
* int to copy absolute value from
|
||||
* @param y
|
||||
* int to copy sign from
|
||||
* @return int with absolute value of x and sign of y
|
||||
*/
|
||||
func sign(x int, y int) int {
|
||||
var result = x
|
||||
if x < 0 {
|
||||
result = -x
|
||||
}
|
||||
if y < 0 {
|
||||
result = -result
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Constants & polynomial coefficients for ppnd(), slightly renamed to avoid conflicts. Could define
|
||||
// them inside ppnd(), but static constants are more efficient.
|
||||
|
||||
// Coefficients for P close to 0.5
|
||||
const (
|
||||
a0_p = 3.3871327179E+00
|
||||
a1_p = 5.0434271938E+01
|
||||
a2_p = 1.5929113202E+02
|
||||
a3_p = 5.9109374720E+01
|
||||
b1_p = 1.7895169469E+01
|
||||
b2_p = 7.8757757664E+01
|
||||
b3_p = 6.7187563600E+01
|
||||
|
||||
// Coefficients for P not close to 0, 0.5 or 1 (names changed to avoid conflict with swilk())
|
||||
c0_p = 1.4234372777E+00
|
||||
c1_p = 2.7568153900E+00
|
||||
c2_p = 1.3067284816E+00
|
||||
c3_p = 1.7023821103E-01
|
||||
d1_p = 7.3700164250E-01
|
||||
d2_p = 1.2021132975E-01
|
||||
|
||||
// Coefficients for P near 0 or 1.
|
||||
e0_p = 6.6579051150E+00
|
||||
e1_p = 3.0812263860E+00
|
||||
e2_p = 4.2868294337E-01
|
||||
e3_p = 1.7337203997E-02
|
||||
f1_p = 2.4197894225E-01
|
||||
f2_p = 1.2258202635E-02
|
||||
|
||||
split1 = 0.425
|
||||
split2 = 5.0
|
||||
const1 = 0.180625
|
||||
const2 = 1.6
|
||||
)
|
||||
|
||||
/**
|
||||
* ALGORITHM AS 241 APPL. STATIST. (1988) VOL. 37, NO. 3, 477-484.
|
||||
*
|
||||
* Produces the normal deviate Z corresponding to a given lower tail area of P; Z is accurate to about 1 part in 10**7.
|
||||
*
|
||||
* @param p
|
||||
* @return
|
||||
*/
|
||||
func ppnd(p float64) float64 {
|
||||
q := p - 0.5
|
||||
var r float64
|
||||
if math.Abs(q) <= split1 {
|
||||
r = const1 - q*q
|
||||
return q * (((a3_p*r+a2_p)*r+a1_p)*r + a0_p) / (((b3_p*r+b2_p)*r+b1_p)*r + 1.0)
|
||||
} else {
|
||||
if q < 0.0 {
|
||||
r = p
|
||||
} else {
|
||||
r = 1.0 - p
|
||||
}
|
||||
if r <= 0.0 {
|
||||
return 0.0
|
||||
}
|
||||
r = math.Sqrt(-math.Log(r))
|
||||
var normal_dev float64
|
||||
if r <= split2 {
|
||||
r -= const2
|
||||
normal_dev = (((c3_p*r+c2_p)*r+c1_p)*r + c0_p) / ((d2_p*r+d1_p)*r + 1.0)
|
||||
} else {
|
||||
r -= split2
|
||||
normal_dev = (((e3_p*r+e2_p)*r+e1_p)*r + e0_p) / ((f2_p*r+f1_p)*r + 1.0)
|
||||
}
|
||||
if q < 0.0 {
|
||||
normal_dev = -normal_dev
|
||||
}
|
||||
return normal_dev
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Algorithm AS 181.2 Appl. Statist. (1982) Vol. 31, No. 2
|
||||
*
|
||||
* Calculates the algebraic polynomial of order nord-1 with array of coefficients c. Zero order coefficient is c[1]
|
||||
*
|
||||
* @param c
|
||||
* @param nord
|
||||
* @param x
|
||||
* @return
|
||||
*/
|
||||
func poly(c []float64, nord int, x float64) float64 {
|
||||
poly := c[1]
|
||||
if nord == 1 {
|
||||
return poly
|
||||
}
|
||||
p := x * c[nord]
|
||||
if nord != 2 {
|
||||
n2 := nord - 2
|
||||
j := n2 + 1
|
||||
for i := 1; i <= n2; i++ {
|
||||
p = (p + c[j]) * x
|
||||
j--
|
||||
}
|
||||
}
|
||||
poly += p
|
||||
return poly
|
||||
}
|
||||
|
||||
// Constants & polynomial coefficients for alnorm(), slightly renamed to avoid conflicts.
|
||||
const (
|
||||
con_a = 1.28
|
||||
ltone_a = 7.0
|
||||
utzero_a = 18.66
|
||||
p_a = 0.398942280444
|
||||
q_a = 0.39990348504
|
||||
r_a = 0.398942280385
|
||||
a1_a = 5.75885480458
|
||||
a2_a = 2.62433121679
|
||||
a3_a = 5.92885724438
|
||||
|
||||
b1_a = -29.8213557807
|
||||
b2_a = 48.6959930692
|
||||
|
||||
c1_a = -3.8052E-8
|
||||
c2_a = 3.98064794E-4
|
||||
c3_a = -0.151679116635
|
||||
c4_a = 4.8385912808
|
||||
c5_a = 0.742380924027
|
||||
c6_a = 3.99019417011
|
||||
|
||||
d1_a = 1.00000615302
|
||||
d2_a = 1.98615381364
|
||||
d3_a = 5.29330324926
|
||||
d4_a = -15.1508972451
|
||||
d5_a = 30.789933034
|
||||
)
|
||||
|
||||
/**
|
||||
* Algorithm AS66 Applied Statistics (1973) vol.22, no.3
|
||||
*
|
||||
* Evaluates the tail area of the standardised normal curve from x to infinity if upper is true or from minus infinity to x if
|
||||
* upper is false.
|
||||
*/
|
||||
func alnorm(x float64, upper bool) float64 {
|
||||
up := upper
|
||||
z := x
|
||||
if z < 0.0 {
|
||||
up = !up
|
||||
z = -z
|
||||
}
|
||||
var fn_val float64
|
||||
if z > ltone_a && (!up || z > utzero_a) {
|
||||
fn_val = 0.0
|
||||
} else {
|
||||
y := 0.5 * z * z
|
||||
if z <= con_a {
|
||||
fn_val = 0.5 - z*(p_a-q_a*y/(y+a1_a+b1_a/(y+a2_a+b2_a/(y+a3_a))))
|
||||
} else {
|
||||
fn_val = r_a * math.Exp(-y) / (z + c1_a + d1_a/(z+c2_a+d2_a/(z+c3_a+d3_a/(z+c4_a+d4_a/(z+c5_a+d5_a/(z+c6_a))))))
|
||||
}
|
||||
}
|
||||
if !up {
|
||||
fn_val = 1.0 - fn_val
|
||||
}
|
||||
return fn_val
|
||||
}
|
||||
|
||||
func imin(i, j int) int {
|
||||
if i < j {
|
||||
return i
|
||||
}
|
||||
return j
|
||||
}
|
|
@ -0,0 +1,85 @@
|
|||
package onlinestats
|
||||
|
||||
import "math"
|
||||
|
||||
type Stats interface {
|
||||
Mean() float64
|
||||
Var() float64
|
||||
Len() int
|
||||
}
|
||||
|
||||
// Welch implements a Welch's t-test. Returns the probability that the difference in means is due to chance.
|
||||
func Welch(xs, ys Stats) float64 {
|
||||
|
||||
xvar := xs.Var()
|
||||
yvar := ys.Var()
|
||||
|
||||
xn := float64(xs.Len())
|
||||
yn := float64(ys.Len())
|
||||
|
||||
wvar := (xvar/xn + yvar/yn)
|
||||
|
||||
vnum := wvar * wvar
|
||||
vdenom := (xvar*xvar)/(xn*xn*(xn-1)) + (yvar*yvar)/(yn*yn*(yn-1))
|
||||
|
||||
// both have 0 variance, the difference is the difference of the means
|
||||
var v float64
|
||||
if vdenom == 0 {
|
||||
v = xn + yn - 2
|
||||
} else {
|
||||
v = vnum / vdenom
|
||||
}
|
||||
|
||||
// if the variances are 0, we return if the means are different
|
||||
if wvar == 0 {
|
||||
if math.Abs(ys.Mean()-xs.Mean()) > 0.0000000001 {
|
||||
return 1
|
||||
} else {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
t := (ys.Mean() - xs.Mean()) / math.Sqrt(wvar)
|
||||
|
||||
return pt(t, v)
|
||||
}
|
||||
|
||||
// translated from https://bitbucket.org/petermr/euclid-dev/src/9936542c1f30/src/main/java/org/xmlcml/euclid/Univariate.java
|
||||
|
||||
func pt(t, df float64) float64 {
|
||||
// ALGORITHM AS 3 APPL. STATIST. (1968) VOL.17, P.189
|
||||
// Computes P(T<t)
|
||||
var a, b, idf, im2, ioe, s, c, ks, fk, k float64
|
||||
const g1 = 0.3183098862 // 1/Pi
|
||||
if df < 1 {
|
||||
return 0
|
||||
}
|
||||
|
||||
idf = df
|
||||
a = t / math.Sqrt(idf)
|
||||
b = idf / (idf + t*t)
|
||||
im2 = df - 2
|
||||
ioe = float64(int(idf) % 2)
|
||||
s = 1
|
||||
c = 1
|
||||
idf = 1
|
||||
ks = 2 + ioe
|
||||
fk = ks
|
||||
if im2 >= 2 {
|
||||
for k = ks; k <= im2; k += 2 {
|
||||
c = c * b * (fk - 1) / fk
|
||||
s += c
|
||||
if s != idf {
|
||||
idf = s
|
||||
fk += 2
|
||||
}
|
||||
}
|
||||
}
|
||||
if ioe != 1 {
|
||||
return 0.5 + 0.5*a*math.Sqrt(b)*s
|
||||
}
|
||||
if df == 1 {
|
||||
s = 0
|
||||
}
|
||||
return 0.5 + (a*b*s+math.Atan(a))*g1
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
package onlinestats
|
||||
|
||||
import "sort"
|
||||
|
||||
// Tukey returns the slice (sorted) with all the 1.5 IQR outliers removed
|
||||
func Tukey(data []float64) []float64 {
|
||||
|
||||
sort.Float64s(data)
|
||||
|
||||
first := int(0.25 * float64(len(data)))
|
||||
third := int(0.75 * float64(len(data)))
|
||||
|
||||
iqr := data[third] - data[first]
|
||||
|
||||
min := data[first] - 1.5*iqr
|
||||
max := data[third] + 1.5*iqr
|
||||
|
||||
// TODO(dgryski): our data is sorted; we should be able to trim these elements faster than O(n)
|
||||
result := make([]float64, 0, len(data))
|
||||
for _, r := range data {
|
||||
if r >= min && r <= max {
|
||||
result = append(result, r)
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
package onlinestats
|
||||
|
||||
// https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U
|
||||
|
||||
import (
|
||||
"math"
|
||||
"sort"
|
||||
)
|
||||
|
||||
// MannWhitney performs a Matt-Whitney U test for the two samples xs and ys.
|
||||
// It returns the two-tailed p-value for the null hypothesis that the medians
|
||||
// of the two samples are the same. This uses the normal approximation which
|
||||
// is more accurate if the number of samples is >30.
|
||||
func MannWhitney(xs, ys []float64) float64 {
|
||||
|
||||
// floats in a map.. this feels dubious?
|
||||
data := make(map[float64][]int)
|
||||
|
||||
for _, x := range xs {
|
||||
data[x] = append(data[x], 0)
|
||||
}
|
||||
|
||||
for _, y := range ys {
|
||||
data[y] = append(data[y], 1)
|
||||
}
|
||||
|
||||
floats := make([]float64, 0, len(data))
|
||||
|
||||
for k := range data {
|
||||
floats = append(floats, k)
|
||||
}
|
||||
|
||||
sort.Float64s(floats)
|
||||
|
||||
var r [2]float64
|
||||
|
||||
var idx = 1
|
||||
for _, f := range floats {
|
||||
dataf := data[f]
|
||||
l := len(dataf)
|
||||
var rank float64
|
||||
if l == 1 {
|
||||
rank = float64(idx)
|
||||
} else {
|
||||
rank = float64(idx) + float64(l-1)/2.0
|
||||
}
|
||||
for _, xy := range dataf {
|
||||
r[xy] += rank
|
||||
}
|
||||
idx += l
|
||||
}
|
||||
|
||||
n1n2 := len(xs) * len(ys)
|
||||
|
||||
idx = 0
|
||||
u := float64(n1n2+(len(xs)*(len(xs)+1))/2.0) - r[0]
|
||||
if u1 := float64(n1n2+(len(ys)*(len(ys)+1))/2.0) - r[1]; u > u1 {
|
||||
idx = 1
|
||||
u = u1
|
||||
}
|
||||
|
||||
mu := float64(n1n2) / 2.0
|
||||
sigu := math.Sqrt(float64(n1n2*(len(xs)+len(ys)+1)) / 12.0)
|
||||
zu := math.Abs(float64(u)-mu) / sigu
|
||||
|
||||
return 2 - 2*cdf(0, 1, zu)
|
||||
}
|
||||
|
||||
func cdf(mean, stddev, x float64) float64 {
|
||||
return 0.5 + 0.5*math.Erf((x-mean)/(stddev*math.Sqrt2))
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
package onlinestats
|
||||
|
||||
import "math"
|
||||
|
||||
// WindExp maintains a window of values, followed by a exponentially-weighted
|
||||
// region for the items that have left the window. This is similar to the
|
||||
// Average Loss Interval from "Equation-Based Congestion Control for Unicast
|
||||
// Applications" ( http://www.icir.org/tfrc/tcp-friendly.pdf ), but with lower
|
||||
// update cost. The advantage is that this maintains more local history, but
|
||||
// doesn't have the large changes when items leave the window.
|
||||
type WindExp struct {
|
||||
n int
|
||||
w *Windowed
|
||||
exp *ExpWeight
|
||||
}
|
||||
|
||||
// NewWindExp returns WindExp with the specified window capacity and exponential alpha
|
||||
func NewWindExp(capacity int, alpha float64) *WindExp {
|
||||
return &WindExp{
|
||||
w: NewWindowed(capacity),
|
||||
exp: NewExpWeight(alpha),
|
||||
}
|
||||
}
|
||||
|
||||
func (we *WindExp) Push(n float64) {
|
||||
|
||||
old := we.w.Push(n)
|
||||
|
||||
if we.n >= len(we.w.data) {
|
||||
we.exp.Push(old)
|
||||
}
|
||||
|
||||
we.n++
|
||||
}
|
||||
|
||||
func (we *WindExp) Len() int {
|
||||
return we.n
|
||||
}
|
||||
|
||||
func (we *WindExp) Mean() float64 {
|
||||
// The math says this should be correct
|
||||
|
||||
if we.n <= len(we.w.data) {
|
||||
return we.w.Mean()
|
||||
}
|
||||
|
||||
return (we.w.sum + we.exp.Mean()) / float64(we.w.Len()+1)
|
||||
|
||||
}
|
||||
|
||||
func (we *WindExp) Var() float64 {
|
||||
// http://www.emathzone.com/tutorials/basic-statistics/combined-variance.html
|
||||
|
||||
cmean := we.Mean()
|
||||
|
||||
n1 := float64(we.w.Len())
|
||||
s1 := we.w.Var()
|
||||
x1xc := we.w.Mean() - cmean
|
||||
|
||||
n2 := float64(we.exp.Len())
|
||||
s2 := we.exp.Var()
|
||||
x2xc := we.exp.Mean() - cmean
|
||||
|
||||
cvar := (n1*(s1+x1xc*x1xc) + n2*(s2+x2xc*x2xc)) / (n1 + n2)
|
||||
|
||||
return cvar
|
||||
|
||||
}
|
||||
|
||||
func (we *WindExp) Stddev() float64 {
|
||||
return math.Sqrt(we.Var())
|
||||
}
|
|
@ -0,0 +1,86 @@
|
|||
package onlinestats
|
||||
|
||||
// add http://www.itl.nist.gov/div898/handbook/eda/section3/eda35b.htm
|
||||
|
||||
import "math"
|
||||
|
||||
type Windowed struct {
|
||||
data []float64
|
||||
head int
|
||||
|
||||
length int
|
||||
sum float64
|
||||
}
|
||||
|
||||
func NewWindowed(capacity int) *Windowed {
|
||||
return &Windowed{
|
||||
data: make([]float64, capacity),
|
||||
}
|
||||
}
|
||||
|
||||
func (w *Windowed) Push(n float64) float64 {
|
||||
old := w.data[w.head]
|
||||
|
||||
w.length++
|
||||
|
||||
w.data[w.head] = n
|
||||
w.head++
|
||||
if w.head >= len(w.data) {
|
||||
w.head = 0
|
||||
}
|
||||
|
||||
w.sum -= old
|
||||
w.sum += n
|
||||
|
||||
return old
|
||||
}
|
||||
|
||||
func (w *Windowed) Len() int {
|
||||
if w.length < len(w.data) {
|
||||
return w.length
|
||||
}
|
||||
|
||||
return len(w.data)
|
||||
}
|
||||
|
||||
func (w *Windowed) Mean() float64 {
|
||||
return w.sum / float64(w.Len())
|
||||
}
|
||||
|
||||
func (w *Windowed) Var() float64 {
|
||||
n := float64(w.Len())
|
||||
mean := w.Mean()
|
||||
l := w.Len()
|
||||
|
||||
sum1 := 0.0
|
||||
sum2 := 0.0
|
||||
|
||||
for i := 0; i < l; i++ {
|
||||
xm := w.data[i] - mean
|
||||
sum1 += xm * xm
|
||||
sum2 += xm
|
||||
}
|
||||
|
||||
return (sum1 - (sum2*sum2)/n) / (n - 1)
|
||||
}
|
||||
|
||||
func (w *Windowed) Stddev() float64 {
|
||||
return math.Sqrt(w.Var())
|
||||
}
|
||||
|
||||
// Set sets the current windowed data. The length is reset, and Push() will start overwriting the first element of the array.
|
||||
func (w *Windowed) Set(data []float64) {
|
||||
if w.data != nil {
|
||||
w.data = w.data[:0]
|
||||
}
|
||||
|
||||
w.data = append(w.data, data...)
|
||||
|
||||
w.sum = 0
|
||||
for _, v := range w.data {
|
||||
w.sum += v
|
||||
}
|
||||
|
||||
w.head = 0
|
||||
w.length = len(data)
|
||||
}
|
Loading…
Reference in New Issue