// Copyright 2015 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package stats import "math/rand" // A DistCommon is a statistical distribution. DistCommon is a base // interface provided by both continuous and discrete distributions. type DistCommon interface { // CDF returns the cumulative probability Pr[X <= x]. // // For continuous distributions, the CDF is the integral of // the PDF from -inf to x. // // For discrete distributions, the CDF is the sum of the PMF // at all defined points from -inf to x, inclusive. Note that // the CDF of a discrete distribution is defined for the whole // real line (unlike the PMF) but has discontinuities where // the PMF is non-zero. // // The CDF is a monotonically increasing function and has a // domain of all real numbers. If the distribution has bounded // support, it has a range of [0, 1]; otherwise it has a range // of (0, 1). Finally, CDF(-inf)==0 and CDF(inf)==1. CDF(x float64) float64 // Bounds returns reasonable bounds for this distribution's // PDF/PMF and CDF. The total weight outside of these bounds // should be approximately 0. // // For a discrete distribution, both bounds are integer // multiples of Step(). // // If this distribution has finite support, it returns exact // bounds l, h such that CDF(l')=0 for all l' < l and // CDF(h')=1 for all h' >= h. Bounds() (float64, float64) } // A Dist is a continuous statistical distribution. type Dist interface { DistCommon // PDF returns the value of the probability density function // of this distribution at x. PDF(x float64) float64 } // A DiscreteDist is a discrete statistical distribution. // // Most discrete distributions are defined only at integral values of // the random variable. However, some are defined at other intervals, // so this interface takes a float64 value for the random variable. // The probability mass function rounds down to the nearest defined // point. Note that float64 values can exactly represent integer // values between ±2**53, so this generally shouldn't be an issue for // integer-valued distributions (likewise, for half-integer-valued // distributions, float64 can exactly represent all values between // ±2**52). type DiscreteDist interface { DistCommon // PMF returns the value of the probability mass function // Pr[X = x'], where x' is x rounded down to the nearest // defined point on the distribution. // // Note for implementers: for integer-valued distributions, // round x using int(math.Floor(x)). Do not use int(x), since // that truncates toward zero (unless all x <= 0 are handled // the same). PMF(x float64) float64 // Step returns s, where the distribution is defined for sℕ. Step() float64 } // TODO: Add a Support method for finite support distributions? Or // maybe just another return value from Bounds indicating that the // bounds are exact? // TODO: Plot method to return a pre-configured Plot object with // reasonable bounds and an integral function? Have to distinguish // PDF/CDF/InvCDF. Three methods? Argument? // // Doesn't have to be a method of Dist. Could be just a function that // takes a Dist and uses Bounds. // InvCDF returns the inverse CDF function of the given distribution // (also known as the quantile function or the percent point // function). This is a function f such that f(dist.CDF(x)) == x. If // dist.CDF is only weakly monotonic (that it, there are intervals // over which it is constant) and y > 0, f returns the smallest x that // satisfies this condition. In general, the inverse CDF is not // well-defined for y==0, but for convenience if y==0, f returns the // largest x that satisfies this condition. For distributions with // infinite support both the largest and smallest x are -Inf; however, // for distributions with finite support, this is the lower bound of // the support. // // If y < 0 or y > 1, f returns NaN. // // If dist implements InvCDF(float64) float64, this returns that // method. Otherwise, it returns a function that uses a generic // numerical method to construct the inverse CDF at y by finding x // such that dist.CDF(x) == y. This may have poor precision around // points of discontinuity, including f(0) and f(1). func InvCDF(dist DistCommon) func(y float64) (x float64) { type invCDF interface { InvCDF(float64) float64 } if dist, ok := dist.(invCDF); ok { return dist.InvCDF } // Otherwise, use a numerical algorithm. // // TODO: For discrete distributions, use the step size to // inform this computation. return func(y float64) (x float64) { const almostInf = 1e100 const xtol = 1e-16 if y < 0 || y > 1 { return nan } else if y == 0 { l, _ := dist.Bounds() if dist.CDF(l) == 0 { // Finite support return l } else { // Infinite support return -inf } } else if y == 1 { _, h := dist.Bounds() if dist.CDF(h) == 1 { // Finite support return h } else { // Infinite support return inf } } // Find loX, hiX for which cdf(loX) < y <= cdf(hiX). var loX, loY, hiX, hiY float64 x1, y1 := 0.0, dist.CDF(0) xdelta := 1.0 if y1 < y { hiX, hiY = x1, y1 for hiY < y && hiX != inf { loX, loY, hiX = hiX, hiY, hiX+xdelta hiY = dist.CDF(hiX) xdelta *= 2 } } else { loX, loY = x1, y1 for y <= loY && loX != -inf { hiX, hiY, loX = loX, loY, loX-xdelta loY = dist.CDF(loX) xdelta *= 2 } } if loX == -inf { return loX } else if hiX == inf { return hiX } // Use bisection on the interval to find the smallest // x at which cdf(x) <= y. _, x = bisectBool(func(x float64) bool { return dist.CDF(x) < y }, loX, hiX, xtol) return } } // Rand returns a random number generator that draws from the given // distribution. The returned generator takes an optional source of // randomness; if this is nil, it uses the default global source. // // If dist implements Rand(*rand.Rand) float64, Rand returns that // method. Otherwise, it returns a generic generator based on dist's // inverse CDF (which may in turn use an efficient implementation or a // generic numerical implementation; see InvCDF). func Rand(dist DistCommon) func(*rand.Rand) float64 { type distRand interface { Rand(*rand.Rand) float64 } if dist, ok := dist.(distRand); ok { return dist.Rand } // Otherwise, use a generic algorithm. inv := InvCDF(dist) return func(r *rand.Rand) float64 { var y float64 for y == 0 { if r == nil { y = rand.Float64() } else { y = r.Float64() } } return inv(y) } }