diff --git a/Makefile b/Makefile index 48fac9a..1ddb6c0 100644 --- a/Makefile +++ b/Makefile @@ -5,4 +5,5 @@ dev: prod: GOOS=js GOARCH=wasm tinygo build -o stat/mod.wasm -no-debug ./stat/main.go - GOOS=js GOARCH=wasm tinygo build -o plot/mod.wasm -no-debug ./plot/main.go \ No newline at end of file + GOOS=js GOARCH=wasm tinygo build -o regr/mod.wasm -no-debug ./regr/main.go + GOOS=js GOARCH=wasm go build -o plot/mod.wasm ./plot/main.go \ No newline at end of file diff --git a/notebooks/regressions.ipynb b/notebooks/regressions.ipynb index c3bfd59..0f36608 100644 --- a/notebooks/regressions.ipynb +++ b/notebooks/regressions.ipynb @@ -147,6 +147,68 @@ "\n", "comparePredicted(df.x, df.y, predY);\n" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ElasticNet" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "![name]()" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "const elasticNetPoly = regr.ElasticNet(1000, 0.0001);\n", + "elasticNetPoly.fit(polyX.rows(), polyY.rows());\n", + "const predEnetY = elasticNetPoly.predict(polyX.rows());\n", + "\n", + "comparePredicted(df.x, df.y, predEnetY);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Lasso" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "![name]()" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "const lassoPoly = regr.Lasso(1000, 0.0001);\n", + "lassoPoly.fit(polyX.rows(), polyY.rows());\n", + "const predLassoY = lassoPoly.predict(polyX.rows());\n", + "\n", + "comparePredicted(df.x, df.y, predLassoY);" + ] } ], "metadata": { diff --git a/regr/main.go b/regr/main.go index bee5480..c5fe412 100644 --- a/regr/main.go +++ b/regr/main.go @@ -12,6 +12,8 @@ import ( func InitRegrExports(this js.Value, args []js.Value) interface{} { exports := args[0] exports.Set("Linear", js.FuncOf(src.NewLinearRegressionJS)) + exports.Set("ElasticNet", js.FuncOf(src.NewElasticNetJS)) + exports.Set("Lasso", js.FuncOf(src.NewLassoJS)) return nil } diff --git a/regr/mod.wasm b/regr/mod.wasm index 1ece383..2b297a2 100755 Binary files a/regr/mod.wasm and b/regr/mod.wasm differ diff --git a/regr/src/CDescent.go b/regr/src/CDescent.go new file mode 100644 index 0000000..a65d6cf --- /dev/null +++ b/regr/src/CDescent.go @@ -0,0 +1,194 @@ +package src + +import ( + "math" + + "golang.org/x/exp/rand" + "gonum.org/v1/gonum/mat" +) + +type CDResult struct { + Gap float64 + Eps float64 + NIter int +} + +// Author: Pascal Masschelier, https://github.com/pa-m +// v https://github.com/pa-m/sklearn/blob/master/linear_model/cdfast.go +// +// Coordinate descent algorithm for Elastic-Net +// v https://github.com/scikit-learn/scikit-learn/blob/a24c8b464d094d2c468a16ea9f8bf8d42d949f84/sklearn/linear_model/cd_fast.pyx +func CoordinateDescent(w *mat.Dense, l1reg, l2reg float64, X *mat.Dense, Y *mat.Dense, maxIter int, tol float64, rng *rand.Rand, random, positive bool) *CDResult { + /* + coordinate descent algorithm + for Elastic-Net regression + We minimize + (1/2) * norm(y - X w, 2)^2 + alpha norm(w, 1) + (beta/2) norm(w, 2)^2 + */ + gap := tol + 1. + dwtol := tol + + NSamples, NFeatures := X.Dims() + _ = NSamples + _, NTasks := Y.Dims() + + var dwii, wmax, dwmax, dualNormXtA, cons, XtAaxis1norm, wiiabsmax, l21norm float64 + var ii, fIter, nIter int + tmp := mat.NewVecDense(NTasks, nil) + wii := mat.NewVecDense(NTasks, nil) + + R, Y2, XtA, RY, xw := &mat.Dense{}, &mat.Dense{}, &mat.Dense{}, &mat.Dense{}, &mat.Dense{} + RNorm, wNorm, ANorm, nn, tmpfactor := 0., 0., 0., 0., 0. + XtA = mat.NewDense(NFeatures, NTasks, nil) + + // # norm_cols_X = (np.asarray(X) ** 2).sum(axis=0) + normColsX := make([]float64, NFeatures) + Xmat := X.RawMatrix() + for jX := 0; jX < Xmat.Rows*Xmat.Stride; jX = jX + Xmat.Stride { + for i, v := range Xmat.Data[jX : jX+Xmat.Cols] { + normColsX[i] += v * v + } + } + // # R = Y - np.dot(X, W.T) + R.Mul(X, w) + R.Sub(Y, R) + + // # tol = tol * linalg.norm(Y, ord='fro') ** 2 + + Y2.MulElem(Y, Y) + tol *= mat.Sum(Y2) + + for nIter = 0; nIter < maxIter; nIter++ { + wmax, dwmax = 0., 0. + + for fIter = 0; fIter < NFeatures; fIter++ { + if random { + if rng != nil { + ii = rng.Intn(NFeatures) + } else { + ii = rand.Intn(NFeatures) + } + } else { + ii = fIter + } + if normColsX[ii] == 0. { + continue + } + // # w_ii = W[:, ii] # Store previous value + wii.CopyVec(w.RowView(ii)) // store previous value + + // if np.sum(w_ii ** 2) != 0.0: # can do better + if mat.Norm(wii, 2) != 0. { + // # R += X[:,ii] *wii # rank 1 update + + xw.Mul(X.ColView(ii), wii.T()) + R.Add(R, xw) + } + + // # tmp = np.dot(X[:, ii][None, :], R).ravel() + tmp.MulVec(R.T(), X.ColView(ii)) + + // # nn = sqrt(np.sum(tmp ** 2)) + nn = mat.Norm(tmp, 2) + // # W[:, ii] = tmp * fmax(1. - l1_reg / nn, 0) / (norm_cols_X[ii] + l2_reg) + if l1reg < nn { + tmpfactor = math.Max(1.-l1reg/nn, 0) / (normColsX[ii] + l2reg) + + } else { + tmpfactor = 0. + } + w.RowView(ii).(*mat.VecDense).ScaleVec(tmpfactor, tmp) + + // # if np.sum(W[:, ii] ** 2) != 0.0: # can do better + + if mat.Norm(w.RowView(ii), 2) != 0. { + // # R -= np.dot(X[:, ii][:, None], W[:, ii][None, :]) + // # Update residual : rank 1 update + xw.Mul(X.ColView(ii), w.RowView(ii).T()) + R.Sub(R, xw) + } + // # update the maximum absolute coefficient update + dwii = 0. + for o := 0; o < NTasks; o++ { + v := math.Abs(w.At(ii, o) - wii.AtVec(o)) + if v > dwii { + dwii = v + } + } + + if dwii > dwmax { + dwmax = dwii + } + wiiabsmax = 0. + wmat := w.RawMatrix() + for jw := 0; jw < wmat.Rows*wmat.Stride; jw = jw + wmat.Stride { + for _, v := range wmat.Data[jw : jw+wmat.Cols] { + v = math.Abs(v) + if v > wiiabsmax { + wiiabsmax = v + } + } + } + if wiiabsmax > wmax { + wmax = wiiabsmax + } + + } + + if wmax == 0. || dwmax/wmax < dwtol || nIter == maxIter-1 { + // # the biggest coordinate update of this iteration was smaller + // # than the tolerance: check the duality gap as ultimate + // # stopping criterion + + // # XtA = np.dot(X.T, R) - l2_reg * W.T + XtA.Mul(X.T(), R) + XtAmat := XtA.RawMatrix() + Wmat := w.RawMatrix() + for jXtA, jW := 0, 0; jXtA < XtAmat.Rows*XtAmat.Stride; jXtA, jW = jXtA+XtAmat.Stride, jW+Wmat.Stride { + for i := range XtAmat.Data[jXtA : jXtA+XtAmat.Cols] { + XtAmat.Data[jXtA+i] -= l2reg * Wmat.Data[jW+i] + } + } + + // # dual_norm_XtA = np.max(np.sqrt(np.sum(XtA ** 2, axis=1))) + dualNormXtA = 0. + for ii := 0; ii < NFeatures; ii++ { + XtAaxis1norm = mat.Norm(XtA.RowView(ii), 2) + if XtAaxis1norm > dualNormXtA { + dualNormXtA = XtAaxis1norm + } + } + // # R_norm = linalg.norm(R, ord='fro') + // # w_norm = linalg.norm(W, ord='fro') + RNorm = mat.Norm(R, 2) + wNorm = mat.Norm(w, 2) + if dualNormXtA > l1reg { + cons = l1reg / dualNormXtA + ANorm = RNorm * cons + gap = .5 * (RNorm*RNorm + ANorm*ANorm) + } else { + cons = 1. + gap = RNorm * RNorm + } + // # ry_sum = np.sum(R * y) + RY.MulElem(R, Y) + // # l21_norm = np.sqrt(np.sum(W ** 2, axis=0)).sum() + l21norm = 0. + for ii = 0; ii < NFeatures; ii++ { + l21norm += mat.Norm(w.RowView(ii), 2) + } + gap += l1reg*l21norm - cons*mat.Sum(RY) + .5*l2reg*(1.+cons*cons)*(wNorm*wNorm) + + // fmt.Printf("X\n%4f\nR:\n%4f\nW:\n%.4f\nXtA:\n%4f\n", mat.Formatted(X), mat.Formatted(R), mat.Formatted(w), mat.Formatted(XtA)) + // fmt.Println("dwmax", dwmax, "wmax", wmax, "dwtol", dwtol) + // fmt.Println(nIter, gap, "l1reg", l1reg, "l2reg", l2reg, "l21norm", l21norm, "sumRY", cons*mat.Sum(RY), "dualNormXtA", dualNormXtA, "RNorm", RNorm, "gap", gap) + + if gap < tol { + // return if we have reached the desired tolerance + break + } + } + + } + return &CDResult{Gap: gap, Eps: tol, NIter: nIter + 1} +} diff --git a/regr/src/ElasticNet.go b/regr/src/ElasticNet.go new file mode 100644 index 0000000..b0c3a6e --- /dev/null +++ b/regr/src/ElasticNet.go @@ -0,0 +1,96 @@ +//go:build js && wasm +// +build js,wasm + +package src + +import ( + "syscall/js" + + "gonum.org/v1/gonum/mat" +) + +type ElasticNet struct { + LinearRegression + Tol, Alpha, L1Ratio float64 + MaxIter int + WarmStart, Positive bool + CDResult CDResult +} + +func NewElasticNet(maxIter int, alpha float64) *ElasticNet { + regr := &ElasticNet{} + regr.Tol = 1e-4 + regr.Alpha = alpha + regr.L1Ratio = .5 + regr.MaxIter = maxIter + return regr +} + +func NewLasso(maxIter int, alpha float64) *ElasticNet { + regr := NewElasticNet(maxIter, alpha) + regr.L1Ratio = 1. + return regr +} + +func (regr *ElasticNet) Fit(Xs, Ys [][]float64, random bool) error { + X, Y := Array2DToDense(Xs), Array2DToDense(Ys) + + NSamples, NFeatures := X.Dims() + _, NOutputs := Y.Dims() + + l1reg := regr.Alpha * regr.L1Ratio * float64(NSamples) + l2reg := regr.Alpha * (1. - regr.L1Ratio) * float64(NSamples) + if !regr.WarmStart { + regr.Coef = mat.NewDense(NFeatures, NOutputs, nil) + } + regr.CDResult = *CoordinateDescent(regr.Coef, l1reg, l2reg, X, Y, regr.MaxIter, regr.Tol, nil, random, regr.Positive) + return nil +} + +func NewLassoJS(this js.Value, args []js.Value) interface{} { + obj := js.Global().Get("Object").New() + maxIter, alpha := getargs(args) + regr := NewLasso(maxIter, alpha) + setjsmethods(obj, regr) + return obj +} + +func NewElasticNetJS(this js.Value, args []js.Value) interface{} { + obj := js.Global().Get("Object").New() + maxIter, alpha := getargs(args) + regr := NewElasticNet(maxIter, alpha) + setjsmethods(obj, regr) + return obj +} + +func getargs(args []js.Value) (int, float64) { + maxIter := 100 + if !args[0].IsUndefined() { + maxIter = args[0].Int() + } + + alpha := 1e-4 + if len(args) > 1 && !args[1].IsUndefined() { + alpha = args[1].Float() + } + + return maxIter, alpha +} + +func setjsmethods(obj js.Value, regr *ElasticNet) { + obj.Set("fit", js.FuncOf(func(this js.Value, args []js.Value) interface{} { + X := JSFloatArray2D(args[0]) + Y := JSFloatArray2D(args[1]) + random := false + if len(args) == 3 && !args[2].IsUndefined() { + random = args[2].Bool() + } + return regr.Fit(X, Y, random) + })) + + obj.Set("predict", js.FuncOf(func(this js.Value, args []js.Value) interface{} { + X := JSFloatArray2D(args[0]) + Y, _ := regr.Predict(X) + return ToJSArray(Y) + })) +}