// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package fit import ( "fmt" "math" "strings" "github.com/gonum/matrix/mat64" ) // LinearLeastSquares computes the least squares fit for the function // // f(x) = Β₀terms₀(x) + Β₁terms₁(x) + ... // // to the data (xs[i], ys[i]). It returns the parameters Β₀, Β₁, ... // that minimize the sum of the squares of the residuals of f: // // ∑ (ys[i] - f(xs[i]))² // // If weights is non-nil, it is used to weight these residuals: // // ∑ weights[i] × (ys[i] - f(xs[i]))² // // The function f is specified by one Go function for each linear // term. For efficiency, the Go function is vectorized: it will be // passed a slice of x values in xs and must fill the slice termOut // with the value of the term for each value in xs. // // Note that this is called a "linear" least squares fit because the // fitted function is linear in the computed parameters. The function // need not be linear in x. func LinearLeastSquares(xs, ys, weights []float64, terms ...func(xs, termOut []float64)) (params []float64) { // The optimal parameters are found by solving for Β̂ in the // "normal equations": // // (𝐗ᵀ𝐖𝐗)Β̂ = 𝐗ᵀ𝐖𝐲 // // where 𝐖 is a diagonal weight matrix (or the identity matrix // for the unweighted case). // TODO: Consider using orthogonal decomposition. // TODO: Consider providing a multidimensional version of // this. if len(xs) != len(ys) { panic("len(xs) != len(ys)") } if weights != nil && len(xs) != len(weights) { panic("len(xs) != len(weights)") } // Construct 𝐗ᵀ. This is the more convenient representation // for efficiently calling the term functions. xTVals := make([]float64, len(terms)*len(xs)) for i, term := range terms { term(xs, xTVals[i*len(xs):i*len(xs)+len(xs)]) } XT := mat64.NewDense(len(terms), len(xs), xTVals) X := XT.T() // Construct 𝐗ᵀ𝐖. var XTW *mat64.Dense if weights == nil { // 𝐖 is the identity matrix. XTW = XT } else { // Since 𝐖 is a diagonal matrix, we do this directly. XTW = mat64.DenseCopyOf(XT) WDiag := mat64.NewVector(len(weights), weights) for row := 0; row < len(terms); row++ { rowView := XTW.RowView(row) rowView.MulElemVec(rowView, WDiag) } } // Construct 𝐲. y := mat64.NewVector(len(ys), ys) // Compute Β̂. lhs := mat64.NewDense(len(terms), len(terms), nil) lhs.Mul(XTW, X) rhs := mat64.NewVector(len(terms), nil) rhs.MulVec(XTW, y) BVals := make([]float64, len(terms)) B := mat64.NewVector(len(terms), BVals) B.SolveVec(lhs, rhs) return BVals } // PolynomialRegressionResult is the resulting polynomial from a // PolynomialRegression. // // TODO: Should this just be a least squares regression result? We // have the terms functions, so we can construct F, though it won't be // very efficient. type PolynomialRegressionResult struct { // Coefficients is the coefficients of the fitted polynomial. // Coefficients[i] is the coefficient of the x^i term. Coefficients []float64 // F evaluates the fitted polynomial at x. F func(x float64) float64 } func (r PolynomialRegressionResult) String() string { var terms []string for pow, factor := range r.Coefficients { switch { case factor == 0: continue case pow == 0: terms = append(terms, fmt.Sprintf("%v", factor)) case pow == 1: terms = append(terms, fmt.Sprintf("%vx", factor)) default: terms = append(terms, fmt.Sprintf("%vx^%d", factor, pow)) } } if len(terms) == 0 { return "0" } return strings.Join(terms, "+") } // PolynomialRegression performs a least squares regression with a // polynomial of the given degree. If weights is non-nil, it is used // to weight the residuals. func PolynomialRegression(xs, ys, weights []float64, degree int) PolynomialRegressionResult { terms := make([]func(xs, termOut []float64), degree+1) terms[0] = func(xs, termsOut []float64) { for i := range termsOut { termsOut[i] = 1 } } if degree >= 1 { terms[1] = func(xs, termOut []float64) { copy(termOut, xs) } } if degree >= 2 { terms[2] = func(xs, termOut []float64) { for i, x := range xs { termOut[i] = x * x } } } for d := 3; d < len(terms); d++ { d := d terms[d] = func(xs, termOut []float64) { for i, x := range xs { termOut[i] = math.Pow(x, float64(d+1)) } } } coeffs := LinearLeastSquares(xs, ys, weights, terms...) f := func(x float64) float64 { y := coeffs[0] xp := x for _, c := range coeffs[1:] { y += xp * c xp *= x } return y } return PolynomialRegressionResult{coeffs, f} }