102 lines
2.5 KiB
Go
102 lines
2.5 KiB
Go
// Copyright 2015 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package stats
|
|
|
|
import (
|
|
"math"
|
|
|
|
"github.com/aclements/go-moremath/mathx"
|
|
)
|
|
|
|
// HypergeometicDist is a hypergeometric distribution.
|
|
type HypergeometicDist struct {
|
|
// N is the size of the population. N >= 0.
|
|
N int
|
|
|
|
// K is the number of successes in the population. 0 <= K <= N.
|
|
K int
|
|
|
|
// Draws is the number of draws from the population. This is
|
|
// usually written "n", but is called Draws here because of
|
|
// limitations on Go identifier naming. 0 <= Draws <= N.
|
|
Draws int
|
|
}
|
|
|
|
// PMF is the probability of getting exactly int(k) successes in
|
|
// d.Draws draws with replacement from a population of size d.N that
|
|
// contains exactly d.K successes.
|
|
func (d HypergeometicDist) PMF(k float64) float64 {
|
|
ki := int(math.Floor(k))
|
|
l, h := d.bounds()
|
|
if ki < l || ki > h {
|
|
return 0
|
|
}
|
|
return d.pmf(ki)
|
|
}
|
|
|
|
func (d HypergeometicDist) pmf(k int) float64 {
|
|
return math.Exp(mathx.Lchoose(d.K, k) + mathx.Lchoose(d.N-d.K, d.Draws-k) - mathx.Lchoose(d.N, d.Draws))
|
|
}
|
|
|
|
// CDF is the probability of getting int(k) or fewer successes in
|
|
// d.Draws draws with replacement from a population of size d.N that
|
|
// contains exactly d.K successes.
|
|
func (d HypergeometicDist) CDF(k float64) float64 {
|
|
// Based on Klotz, A Computational Approach to Statistics.
|
|
ki := int(math.Floor(k))
|
|
l, h := d.bounds()
|
|
if ki < l {
|
|
return 0
|
|
} else if ki >= h {
|
|
return 1
|
|
}
|
|
// Use symmetry to compute the smaller sum.
|
|
flip := false
|
|
if ki > (d.Draws+1)/(d.N+1)*(d.K+1) {
|
|
flip = true
|
|
ki = d.K - ki - 1
|
|
d.Draws = d.N - d.Draws
|
|
}
|
|
p := d.pmf(ki) * d.sum(ki)
|
|
if flip {
|
|
p = 1 - p
|
|
}
|
|
return p
|
|
}
|
|
|
|
func (d HypergeometicDist) sum(k int) float64 {
|
|
const epsilon = 1e-14
|
|
sum, ak := 1.0, 1.0
|
|
L := maxint(0, d.Draws+d.K-d.N)
|
|
for dk := 1; dk <= k-L && ak/sum > epsilon; dk++ {
|
|
ak *= float64(1+k-dk) / float64(d.Draws-k+dk)
|
|
ak *= float64(d.N-d.K-d.Draws+k+1-dk) / float64(d.K-k+dk)
|
|
sum += ak
|
|
}
|
|
return sum
|
|
}
|
|
|
|
func (d HypergeometicDist) bounds() (int, int) {
|
|
return maxint(0, d.Draws+d.K-d.N), minint(d.Draws, d.K)
|
|
}
|
|
|
|
func (d HypergeometicDist) Bounds() (float64, float64) {
|
|
l, h := d.bounds()
|
|
return float64(l), float64(h)
|
|
}
|
|
|
|
func (d HypergeometicDist) Step() float64 {
|
|
return 1
|
|
}
|
|
|
|
func (d HypergeometicDist) Mean() float64 {
|
|
return float64(d.Draws*d.K) / float64(d.N)
|
|
}
|
|
|
|
func (d HypergeometicDist) Variance() float64 {
|
|
return float64(d.Draws*d.K*(d.N-d.K)*(d.N-d.Draws)) /
|
|
float64(d.N*d.N*(d.N-1))
|
|
}
|