route/vendor/github.com/aclements/go-moremath/fit/lsquares.go

175 lines
4.6 KiB
Go
Raw Normal View History

2017-10-06 15:29:20 +00:00
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package fit
import (
"fmt"
"math"
"strings"
"github.com/gonum/matrix/mat64"
)
// LinearLeastSquares computes the least squares fit for the function
//
// f(x) = Β₀terms₀(x) + Β₁terms₁(x) + ...
//
// to the data (xs[i], ys[i]). It returns the parameters Β₀, Β₁, ...
// that minimize the sum of the squares of the residuals of f:
//
// ∑ (ys[i] - f(xs[i]))²
//
// If weights is non-nil, it is used to weight these residuals:
//
// ∑ weights[i] × (ys[i] - f(xs[i]))²
//
// The function f is specified by one Go function for each linear
// term. For efficiency, the Go function is vectorized: it will be
// passed a slice of x values in xs and must fill the slice termOut
// with the value of the term for each value in xs.
//
// Note that this is called a "linear" least squares fit because the
// fitted function is linear in the computed parameters. The function
// need not be linear in x.
func LinearLeastSquares(xs, ys, weights []float64, terms ...func(xs, termOut []float64)) (params []float64) {
// The optimal parameters are found by solving for Β̂ in the
// "normal equations":
//
// (𝐗ᵀ𝐖𝐗)Β̂ = 𝐗ᵀ𝐖𝐲
//
// where 𝐖 is a diagonal weight matrix (or the identity matrix
// for the unweighted case).
// TODO: Consider using orthogonal decomposition.
// TODO: Consider providing a multidimensional version of
// this.
if len(xs) != len(ys) {
panic("len(xs) != len(ys)")
}
if weights != nil && len(xs) != len(weights) {
panic("len(xs) != len(weights)")
}
// Construct 𝐗ᵀ. This is the more convenient representation
// for efficiently calling the term functions.
xTVals := make([]float64, len(terms)*len(xs))
for i, term := range terms {
term(xs, xTVals[i*len(xs):i*len(xs)+len(xs)])
}
XT := mat64.NewDense(len(terms), len(xs), xTVals)
X := XT.T()
// Construct 𝐗ᵀ𝐖.
var XTW *mat64.Dense
if weights == nil {
// 𝐖 is the identity matrix.
XTW = XT
} else {
// Since 𝐖 is a diagonal matrix, we do this directly.
XTW = mat64.DenseCopyOf(XT)
WDiag := mat64.NewVector(len(weights), weights)
for row := 0; row < len(terms); row++ {
rowView := XTW.RowView(row)
rowView.MulElemVec(rowView, WDiag)
}
}
// Construct 𝐲.
y := mat64.NewVector(len(ys), ys)
// Compute Β̂.
lhs := mat64.NewDense(len(terms), len(terms), nil)
lhs.Mul(XTW, X)
rhs := mat64.NewVector(len(terms), nil)
rhs.MulVec(XTW, y)
BVals := make([]float64, len(terms))
B := mat64.NewVector(len(terms), BVals)
B.SolveVec(lhs, rhs)
return BVals
}
// PolynomialRegressionResult is the resulting polynomial from a
// PolynomialRegression.
//
// TODO: Should this just be a least squares regression result? We
// have the terms functions, so we can construct F, though it won't be
// very efficient.
type PolynomialRegressionResult struct {
// Coefficients is the coefficients of the fitted polynomial.
// Coefficients[i] is the coefficient of the x^i term.
Coefficients []float64
// F evaluates the fitted polynomial at x.
F func(x float64) float64
}
func (r PolynomialRegressionResult) String() string {
var terms []string
for pow, factor := range r.Coefficients {
switch {
case factor == 0:
continue
case pow == 0:
terms = append(terms, fmt.Sprintf("%v", factor))
case pow == 1:
terms = append(terms, fmt.Sprintf("%vx", factor))
default:
terms = append(terms, fmt.Sprintf("%vx^%d", factor, pow))
}
}
if len(terms) == 0 {
return "0"
}
return strings.Join(terms, "+")
}
// PolynomialRegression performs a least squares regression with a
// polynomial of the given degree. If weights is non-nil, it is used
// to weight the residuals.
func PolynomialRegression(xs, ys, weights []float64, degree int) PolynomialRegressionResult {
terms := make([]func(xs, termOut []float64), degree+1)
terms[0] = func(xs, termsOut []float64) {
for i := range termsOut {
termsOut[i] = 1
}
}
if degree >= 1 {
terms[1] = func(xs, termOut []float64) {
copy(termOut, xs)
}
}
if degree >= 2 {
terms[2] = func(xs, termOut []float64) {
for i, x := range xs {
termOut[i] = x * x
}
}
}
for d := 3; d < len(terms); d++ {
d := d
terms[d] = func(xs, termOut []float64) {
for i, x := range xs {
termOut[i] = math.Pow(x, float64(d+1))
}
}
}
coeffs := LinearLeastSquares(xs, ys, weights, terms...)
f := func(x float64) float64 {
y := coeffs[0]
xp := x
for _, c := range coeffs[1:] {
y += xp * c
xp *= x
}
return y
}
return PolynomialRegressionResult{coeffs, f}
}