diff --git a/notebooks/regressions.ipynb b/notebooks/regressions.ipynb index 0f36608..f889c55 100644 --- a/notebooks/regressions.ipynb +++ b/notebooks/regressions.ipynb @@ -148,6 +148,26 @@ "comparePredicted(df.x, df.y, predY);\n" ] }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[33m0.9999999469410812\u001b[39m" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "linregPoly.score(df.select('y').rows(), pl.DataFrame({\"py\":predY}).rows());" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -157,7 +177,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -166,7 +186,7 @@ "![name]()" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -188,7 +208,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -197,7 +217,7 @@ "![name]()" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -209,6 +229,26 @@ "\n", "comparePredicted(df.x, df.y, predLassoY);" ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[33m0.8637104981289901\u001b[39m" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lassoPoly.score(df.select('y').rows(), pl.DataFrame({\"py\":predLassoY}).rows());\n" + ] } ], "metadata": { diff --git a/regr/mod.wasm b/regr/mod.wasm index 2b297a2..f0a9636 100755 Binary files a/regr/mod.wasm and b/regr/mod.wasm differ diff --git a/regr/src/ElasticNet.go b/regr/src/ElasticNet.go index b0c3a6e..d5a9ffe 100644 --- a/regr/src/ElasticNet.go +++ b/regr/src/ElasticNet.go @@ -93,4 +93,9 @@ func setjsmethods(obj js.Value, regr *ElasticNet) { Y, _ := regr.Predict(X) return ToJSArray(Y) })) + obj.Set("score", js.FuncOf(func(this js.Value, args []js.Value) interface{} { + X := JSFloatArray2D(args[0]) + Y := JSFloatArray2D(args[1]) + return regr.Score(X, Y) + })) } diff --git a/regr/src/LinearRegression.go b/regr/src/LinearRegression.go index e9e1817..9d66994 100644 --- a/regr/src/LinearRegression.go +++ b/regr/src/LinearRegression.go @@ -9,13 +9,6 @@ import ( "gonum.org/v1/gonum/mat" ) -// ref: mat.Dense -// fit: solve least squares -// predict: predict y from x -// save: save model -// load: load model -// note: separate wasm/js glue - type LinearRegression struct { Coef *mat.Dense } @@ -34,6 +27,12 @@ func (reg *LinearRegression) Predict(X [][]float64) ([]float64, error) { return YDense.RawMatrix().Data, nil } +func (reg *LinearRegression) Score(YT, Y [][]float64) float64 { + TDense := Array2DToDense(YT) + YDense := Array2DToDense(Y) + return R2Score(TDense, YDense, nil, "raw_values").At(0, 0) +} + func (l *LinearRegression) Save() ([]byte, error) { return nil, nil } @@ -55,5 +54,10 @@ func NewLinearRegressionJS(this js.Value, args []js.Value) interface{} { Y, _ := reg.Predict(X) return ToJSArray(Y) })) + obj.Set("score", js.FuncOf(func(this js.Value, args []js.Value) interface{} { + X := JSFloatArray2D(args[0]) + Y := JSFloatArray2D(args[1]) + return reg.Score(X, Y) + })) return obj } diff --git a/regr/src/score.go b/regr/src/score.go new file mode 100644 index 0000000..b0f568b --- /dev/null +++ b/regr/src/score.go @@ -0,0 +1,87 @@ +package src + +import ( + "math" + + "gonum.org/v1/gonum/mat" +) + +type MatConst struct { + Rows, Columns int + Value float64 +} + +func (m MatConst) Dims() (int, int) { return m.Rows, m.Columns } + +func (m MatConst) At(i, j int) float64 { return m.Value } + +func (m MatConst) T() mat.Matrix { return MatConst{Rows: m.Columns, Columns: m.Rows, Value: m.Value} } + +func R2Score(yTrue, yPred mat.Matrix, sampleWeight *mat.Dense, multioutput string) *mat.Dense { + nSamples, nOutputs := yTrue.Dims() + if sampleWeight == nil { + sampleWeight = mat.DenseCopyOf(MatConst{Rows: nSamples, Columns: 1, Value: 1.}) + } + numerator := mat.NewDense(1, nOutputs, nil) + diff := mat.NewDense(nSamples, nOutputs, nil) + diff.Sub(yPred, yTrue) + diff2 := mat.NewDense(nSamples, nOutputs, nil) + diff2.MulElem(diff, diff) + numerator.Mul(sampleWeight.T(), diff2) + + sampleWeightSum := mat.Sum(sampleWeight) + + yTrueAvg := mat.NewDense(1, nOutputs, nil) + yTrueAvg.Mul(sampleWeight.T(), yTrue) + yTrueAvg.Scale(1./sampleWeightSum, yTrueAvg) + + diff2.Apply(func(i int, j int, _ float64) float64 { + v := yTrue.At(i, j) - yTrueAvg.At(0, j) + return v * v + }, diff2) + denominator := mat.NewDense(1, nOutputs, nil) + denominator.Mul(sampleWeight.T(), diff2) + + r2score := mat.NewDense(1, nOutputs, nil) + r2score.Apply(func(i int, j int, v float64) float64 { + d := math.Max(denominator.At(i, j), 1e-20) + return 1. - numerator.At(i, j)/d + }, r2score) + switch multioutput { + case "raw_values": + return r2score + case "variance_weighted": + r2 := mat.NewDense(1, 1, nil) + r2.Mul(denominator, r2score.T()) + sumden := mat.Sum(denominator) + r2.Scale(1./sumden, r2) + return r2 + default: // "uniform_average": + return mat.NewDense(1, 1, []float64{mat.Sum(r2score) / float64(nOutputs)}) + } + +} + +func MeanSquaredError(yTrue, yPred mat.Matrix, sampleWeight *mat.Dense, multioutput string) *mat.Dense { + nSamples, nOutputs := yTrue.Dims() + tmp := mat.NewDense(1, nOutputs, nil) + tmp.Apply(func(_ int, j int, v float64) float64 { + N, D := 0., 0. + for i := 0; i < nSamples; i++ { + ydiff := yPred.At(i, j) - yTrue.At(i, j) + w := 1. + if sampleWeight != nil { + w = sampleWeight.At(0, j) + } + N += w * (ydiff * ydiff) + D += w + } + return N / D + }, tmp) + switch multioutput { + case "raw_values": + return tmp + default: // "uniform_average": + return mat.NewDense(1, 1, []float64{mat.Sum(tmp) / float64(nOutputs)}) + } +} diff --git a/regr/src/utils.go b/regr/src/utils.go index 8e4ff57..e576adc 100644 --- a/regr/src/utils.go +++ b/regr/src/utils.go @@ -17,6 +17,14 @@ func Array2DToDense(X [][]float64) *mat.Dense { return dense } +func JSFloatArray(arg js.Value) []float64 { + arr := make([]float64, arg.Length()) + for i := 0; i < len(arr); i++ { + arr[i] = arg.Index(i).Float() + } + return arr +} + func JSFloatArray2D(arg js.Value) [][]float64 { arr := make([][]float64, arg.Length()) for i := 0; i < len(arr); i++ {