148 lines
4.4 KiB
Go
148 lines
4.4 KiB
Go
// Copyright 2015 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package stats
|
|
|
|
import (
|
|
"errors"
|
|
"math"
|
|
)
|
|
|
|
// A TTestResult is the result of a t-test.
|
|
type TTestResult struct {
|
|
// N1 and N2 are the sizes of the input samples. For a
|
|
// one-sample t-test, N2 is 0.
|
|
N1, N2 int
|
|
|
|
// T is the value of the t-statistic for this t-test.
|
|
T float64
|
|
|
|
// DoF is the degrees of freedom for this t-test.
|
|
DoF float64
|
|
|
|
// AltHypothesis specifies the alternative hypothesis tested
|
|
// by this test against the null hypothesis that there is no
|
|
// difference in the means of the samples.
|
|
AltHypothesis LocationHypothesis
|
|
|
|
// P is p-value for this t-test for the given null hypothesis.
|
|
P float64
|
|
}
|
|
|
|
func newTTestResult(n1, n2 int, t, dof float64, alt LocationHypothesis) *TTestResult {
|
|
dist := TDist{dof}
|
|
var p float64
|
|
switch alt {
|
|
case LocationDiffers:
|
|
p = 2 * (1 - dist.CDF(math.Abs(t)))
|
|
case LocationLess:
|
|
p = dist.CDF(t)
|
|
case LocationGreater:
|
|
p = 1 - dist.CDF(t)
|
|
}
|
|
return &TTestResult{N1: n1, N2: n2, T: t, DoF: dof, AltHypothesis: alt, P: p}
|
|
}
|
|
|
|
// A TTestSample is a sample that can be used for a one or two sample
|
|
// t-test.
|
|
type TTestSample interface {
|
|
Weight() float64
|
|
Mean() float64
|
|
Variance() float64
|
|
}
|
|
|
|
var (
|
|
ErrSampleSize = errors.New("sample is too small")
|
|
ErrZeroVariance = errors.New("sample has zero variance")
|
|
ErrMismatchedSamples = errors.New("samples have different lengths")
|
|
)
|
|
|
|
// TwoSampleTTest performs a two-sample (unpaired) Student's t-test on
|
|
// samples x1 and x2. This is a test of the null hypothesis that x1
|
|
// and x2 are drawn from populations with equal means. It assumes x1
|
|
// and x2 are independent samples, that the distributions have equal
|
|
// variance, and that the populations are normally distributed.
|
|
func TwoSampleTTest(x1, x2 TTestSample, alt LocationHypothesis) (*TTestResult, error) {
|
|
n1, n2 := x1.Weight(), x2.Weight()
|
|
if n1 == 0 || n2 == 0 {
|
|
return nil, ErrSampleSize
|
|
}
|
|
v1, v2 := x1.Variance(), x2.Variance()
|
|
if v1 == 0 && v2 == 0 {
|
|
return nil, ErrZeroVariance
|
|
}
|
|
|
|
dof := n1 + n2 - 2
|
|
v12 := ((n1-1)*v1 + (n2-1)*v2) / dof
|
|
t := (x1.Mean() - x2.Mean()) / math.Sqrt(v12*(1/n1+1/n2))
|
|
return newTTestResult(int(n1), int(n2), t, dof, alt), nil
|
|
}
|
|
|
|
// TwoSampleWelchTTest performs a two-sample (unpaired) Welch's t-test
|
|
// on samples x1 and x2. This is like TwoSampleTTest, but does not
|
|
// assume the distributions have equal variance.
|
|
func TwoSampleWelchTTest(x1, x2 TTestSample, alt LocationHypothesis) (*TTestResult, error) {
|
|
n1, n2 := x1.Weight(), x2.Weight()
|
|
if n1 <= 1 || n2 <= 1 {
|
|
// TODO: Can we still do this with n == 1?
|
|
return nil, ErrSampleSize
|
|
}
|
|
v1, v2 := x1.Variance(), x2.Variance()
|
|
if v1 == 0 && v2 == 0 {
|
|
return nil, ErrZeroVariance
|
|
}
|
|
|
|
dof := math.Pow(v1/n1+v2/n2, 2) /
|
|
(math.Pow(v1/n1, 2)/(n1-1) + math.Pow(v2/n2, 2)/(n2-1))
|
|
s := math.Sqrt(v1/n1 + v2/n2)
|
|
t := (x1.Mean() - x2.Mean()) / s
|
|
return newTTestResult(int(n1), int(n2), t, dof, alt), nil
|
|
}
|
|
|
|
// PairedTTest performs a two-sample paired t-test on samples x1 and
|
|
// x2. If μ0 is non-zero, this tests if the average of the difference
|
|
// is significantly different from μ0. If x1 and x2 are identical,
|
|
// this returns nil.
|
|
func PairedTTest(x1, x2 []float64, μ0 float64, alt LocationHypothesis) (*TTestResult, error) {
|
|
if len(x1) != len(x2) {
|
|
return nil, ErrMismatchedSamples
|
|
}
|
|
if len(x1) <= 1 {
|
|
// TODO: Can we still do this with n == 1?
|
|
return nil, ErrSampleSize
|
|
}
|
|
|
|
dof := float64(len(x1) - 1)
|
|
|
|
diff := make([]float64, len(x1))
|
|
for i := range x1 {
|
|
diff[i] = x1[i] - x2[i]
|
|
}
|
|
sd := StdDev(diff)
|
|
if sd == 0 {
|
|
// TODO: Can we still do the test?
|
|
return nil, ErrZeroVariance
|
|
}
|
|
t := (Mean(diff) - μ0) * math.Sqrt(float64(len(x1))) / sd
|
|
return newTTestResult(len(x1), len(x2), t, dof, alt), nil
|
|
}
|
|
|
|
// OneSampleTTest performs a one-sample t-test on sample x. This tests
|
|
// the null hypothesis that the population mean is equal to μ0. This
|
|
// assumes the distribution of the population of sample means is
|
|
// normal.
|
|
func OneSampleTTest(x TTestSample, μ0 float64, alt LocationHypothesis) (*TTestResult, error) {
|
|
n, v := x.Weight(), x.Variance()
|
|
if n == 0 {
|
|
return nil, ErrSampleSize
|
|
}
|
|
if v == 0 {
|
|
// TODO: Can we still do the test?
|
|
return nil, ErrZeroVariance
|
|
}
|
|
dof := n - 1
|
|
t := (x.Mean() - μ0) * math.Sqrt(n) / math.Sqrt(v)
|
|
return newTTestResult(int(n), 0, t, dof, alt), nil
|
|
}
|