shortcuts/notebooks/regressions.ipynb

127 lines
65 KiB
Plaintext
Raw Normal View History

2024-09-30 23:58:32 +00:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Regressions\n",
"\n",
"$X = (0,1)$\n",
"\n",
"$Y = sin(2\\pi X)$"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"![name](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAkAAAAEgCAIAAADT5FumAABLrklEQVR4nOzdB3hT5foA8Dd7NknTNt1NW0YLpZVNywYFRIELCCIgQ1FwsQQHV0XkqlfF+5d78TpAryCyRAQEFBAQQfZqKdBS6N4zHdnr/J/00FBCWjqSnCR9f08enpyRc9720LznO+c778ckCAIQQgghT8OkOgCEnE6pVAqFwsZzTCYTg8Fwwa7t7ujgwYNHjx4VCASrVq1yQQwIeSs61QEg5CwlJSUzZswYP3780qVLx48fHxcXl5ycTC4aMmTIggULnB1Abm6uWCzev3+/zfxHH300KChox44dNvMPHz48depUHo83ePDgxx57LKnezJkzL1265OxQEfJEmMCQ15o1a1bnzp337du3YcOGffv2zZw5s7q6mlz0wgsvTJ061dkBBAUFLV++vFevXi1cf/To0S+//LJWq33//fd//fXXM2fOHDp0KDAwcMCAAe+++27znz1w4IDRaHRE1Ah5DLyEiLwTQRAnTpyYOXOmdc706dPZbDb5fvbs2U19ikajtXD7969pM5PD4dx/kZBch06n2739TH7cuhGRSPR///d/RqPx/ffff/TRR5OSkuwGo9PpnnvuuZycHCYT/6JRB4ItMOSdaDRaVFTUypUrT5w4Qc6JiooKDQ0FgCNHjowbN27x4sUAcOvWraeeemrixImvv/56jx49+Hw+mXLOnz8/YcKEl19+GQCys7OXLl0aHx+v0WgA4Mcffxw4cODcuXOHDx++b98+MietXbv20UcfJWd+/vnn5B4///zz3r177927l5zMyMgYNWrU448//uyzz/744492MyWZ1Wxy2+rVq81m8/fffw8ACoVi6NCh06dPHzly5Pjx41UqVWlp6dy5c0tKSmbNmvXJJ58AwPr160eNGjVr1qz4+PiNGzc6/5eNEEUIhLzUoUOHyL4bcXFx//rXv+rq6qyL5s2b97e//Y18v2rVqoSEBK1WazabV61aRafTlUolQRAvvPDCuHHjyHX++usvsjOIwWDg8/kXL14kCCIrK+uXX34hCGLr1q1hYWHkp8rLyyUSyU8//UR+0M/P77vvviMIQq/XR0ZGrlq1ipz/6aefxsbG3h/z8ePHAeD48eM288PCwkaMGEEQRFlZ2RdffEEQhMFgEAgE3377LUEQR44cAQCtVmvdOBnMF198IRKJnPCrRcgtYAsMea3Ro0enpaW98847arV62bJl8fHxhYWFdtdks9kcDodGo02cONFsNldUVDS1TbPZbDKZdu3aZTQao6Kixo8fDwDffvvtmDFjBAIBAPj7+48ZM+a7776z+eChQ4dycnLa3HNEKpUqFAoACAgIePHFFwGAyWRGRkaWlZXdv/KyZcvIYLp06VJbW6vVatu2U4TcHCYw5M3CwsJWr16dmZm5cePGoqKif/zjH82v/8B7SGw2+9NPP/3Pf/4jl8tXrFihVqsBICcnRyaTWdcJDg7Oycmx+WBWVhaXyw0MDGzbD1JUVNS5c2cAUKvVK1eunD9//vLly0tKSuxehzx79uzzzz+/ePHi9evXt213CHkEvOWLvBNBEOnp6d26dSPvh82ZM+f3339PT09v/5ZfeeWVGTNmbNmy5b333svNzd26datAINDpdNYVdDqdv7+/zacEAoHZbG5hDxEbWVlZFRUVffr0IbufSCSSDRs20Gi0Q4cO2V152LBhFy9ejI+PP3r06M6dO9u2U4TcH7bAkHcymUwffPBB4zl0Or1Tp04t3wKLxaqtrbWZqdFoUlJSpFLpwoULV69eff78eQB46KGHbt++bV0nLS0tPj7e5oOxsbF6vf7atWtt+FlWr14dERGxaNEiADh37lzv3r3t5iSz2QwAV65codPpcXFxTXUJQchrYAsMea09e/asW7duypQpfn5++/bt++23306dOnX/auTdYOt767/x8fFfffXVhg0bZDLZn3/+Sc4nb6dt27aNzWafOnVq2LBhALBixYphw4YdOXJk+PDhu3fvzszM3Lx5s81eEhMTH3744eeff37NmjUmk+ns2bN284pKpQKA9PR0iURCo9EyMjI2bdpUVla2b98+Pp9PJsIffvihe/fuubm5CoWC3EjXrl0BYNu2bQkJCTExMTqdbs2aNQMGDLB2gETIKzGwmA3ySnQ6feDAgTdu3NizZ8/+/fvNZvOGDRvkcjkAbN68Wa/XBwYG3rp1SyQSpaamdurUqaysLCIi4ocffkhKSsrMzBw4cGCvXr04HE5KSgqfz588ebJUKr1x48aQIUMUCsX27dsPHjzYp0+fd955h8ViBQQETJkyZe/evdu3b+fz+R9//HFERER1dfW6desSEhLIZlx0dPSECRMYDMaBAwdqa2snTZokEAgUCkX37t2tMR8+fPivv/4id3H16tUbN25wOJwJEyasXr06KCiIXGfYsGH5+fnXr18n63SUlpYGBwdHR0f7+PhcuHCBTqc/9thjsbGx586dq6ure+GFF4RCYXp6+oABA6g7FAg5Cw0vLyCEEPJEeA8MIYSQR8IEhhBCyCNhAkMIIeSRMIGh5pSVwZUr0HRhCoQQogx2o0f2EQR8/jns22d5Q6PBxInw0ktUx4QQQo1gAvNsly7B2bPAYMDQodCoP7YD7NkDv/xy5z1BwO7dEBEB48Y5chcIIdQeeAnRg23cCG++ack0u3bB4sV3841D/PHHg+cghCiXn59/7NixEydO5ObmWgc11Wq1V65cccHeCwsL8/LybGYWFxe///778fHxZ8+edere3SWBqVSqV155xe6iW7duuTwcD1BUBFu33jNn/XpQKh22/fqyRPdw+Hi/eGS9FR5Z1zAajTNmzFi0aFFWVlZqauq8efOmT59OLtq4cWPv3r3Ly8sduLuqejYz582bN2PGDJuZwcHBr7322rVr18jKMs7jLpcQCYJoatAH8/1fpQjg5k2weQZdp4PMTHjoIcdsPynJsovGBg1yzJat8Mh6KzyyrrFhw4br16+npKSQk/Pnz583bx75fubMmTExMQEBAQ7cnd2qF+vWrTM6/Ny2xdwlgaHW8vVt6cy2mTYNbt+G+nEcLYYPhylTHLZxhFD73bp1S6VSmUwmBoNBlp/+5z//SVayNplMvXr1IgiCRqPV1NQQBCESiTQazfXr1yMjI2UyGUEQNTU1ACASieh0em1trdls5vF4HA6HHLv12rVrAoEgMjLSx8eH3J1Wq83IyJDJZF27dmWxWOQcmUxG7p1EEERubm5hYaFjc2dT3OUSImqthASwKa3erx9ERDhs+0wmvPsufPEFvPkmfPklvPUW0PE/C0LuZMCAAZmZmWPHjj1z5gw5JzQ0FABqamo+/fRTX1/foqIiANi6dauvr++rr746e/bsGTNmREdHp6am6nS6f//7376+vvn5+QBw8ODB7t27r127lhzQoF+/fidOnPjll1/GNXTc2rFjx6hRo06fPr19+/a4uLjTp08DQEZGxlNPPTVixAhynfLy8hEjRixevDglJWXHjh2u+BVQPST0HXV1dfPmzbO7KD093eXheIaqKuK994ixY4lx44g1awiViuqAWgmPrLfCI+syy5cvp9efWsbExHz55Zcmk4mcn5ycDAAFBQUEQZAjjJMDIBiNxoCAgNWrVxMEQQ7uk5OTQ36kZ8+eH330EUEQzz///Pz588mZv/32G0EQJSUlAoHgwIEDFRUVBEF8/PHHXbp0MRgMBEGsWrWqb9++5MoLFixITEw0m83WW0JHjhxx6o9P2SVEnU5nvVxL3o3U6/VUBeOhfH1h5co7z2khhDqgNWvWvPLKK5s3b/72229ffPHF8+fP/+9//2tmfQaD0b179/sHumssKCjoyy+/HDFixJQpUx599FEAOHbsmEaj6d+/P7nCww8//MYbb9y6dYscMNZq9+7dCxcudOUAqpQlMA6Hs3HjRuukSqVatmwZVcF4NMxeCHVkcrn87bfffuONN5577rnvvvvuk08+uX9A8MYemGDefPPN0tLSZ555ZtGiRe+9996LL75YVFTk4+Nj/
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"// deno-lint-ignore-file\n",
"\n",
"import { display } from \"https://deno.land/x/display@v0.1.1/mod.ts\";\n",
"import pl from \"npm:nodejs-polars\";\n",
"import plot from \"../plot/mod.ts\";\n",
"\n",
"const data = await Deno.readTextFile(\"assets/X_Y_Sinusoid_Data.csv\");\n",
"const df = pl.readCSV(data, { sep: \",\" });\n",
"\n",
"const real = pl.DataFrame({ x: new Array(100).fill(0).map((_, i) => i / 100)}).select(\n",
" pl.col('x'),\n",
" pl.col('x').mul(2).mul(3.14).sin().alias('y')\n",
");\n",
"\n",
"const draw = (x, y, title = \"Sinusoid Data\") => \n",
" plot.DrawPlot(\n",
" { \n",
" title,\n",
" width: 6,\n",
" height: 3,\n",
" XLabel: \"X\", \n",
" YLabel: \"Y\", \n",
" }, \n",
" { type: \"line\", data: [real.x, real.y], legend: \"Sinusoid\", lineDashes: [3, 4], lineColor: \"#ff8888\", lineWidth: 1 },\n",
" { type: \"scatter\", data: [x, y], legend: \"Data\", lineDashes: [3, 4], lineWidth: 2, glyphColor: \"#4444ff\", glyphShape: \"circle\" },\n",
" { type: \"trend\", data: [x, y], legend: \"Trend\", lineDashes: [4, 2], lineColor: '#aacccc', lineWidth: .5 },\n",
" );\n",
"\n",
"\n",
"draw(df.x, df.y);"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"![name](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAqAAAAGACAIAAAD9Gkc5AABrlklEQVR4nOzdB1hT1xcA8EPCCGHLkKGCMkW04La4xb0HuKqiuLXWvVt3tVZtrdbRPyqu4kDcW5HioooL2ciesgOB7OT/QZQiywRe8pJwfh9fv7x178GGnNz37lAXiUSAEEIIIdWiTnYACCHZevDgwb1796hU6s6dO8mOBSEkPxSyA0AIEeDNmzcLFy7U1tZ2dXUdPHiwh4eHj49PXl4eAHh4eNja2p45c4bsGBFCcoUJHiFV4ObmtnXrVjabPXv27Lt37964caOgoKBr1658Pl/yQq5evSrLGBFCcoUJHiEVRKPRFixYkJSU9OrVq7rOqdb/JjMzc/ny5XKJDiEkD5jgEVJNdDodAD5+/Fhtv0gk2rdv39ChQ729vfv06XPo0CEAiI2N9fb2zsrKmjx58qlTp0gKGSFEJOxkh5BqCgkJ0dLS6tSpU7X9Z86c+eOPP6Kiouh0em5urp2dnZWV1ejRo8ePHx8dHe3v709SvAghgmGCR0ilREZGXrx48cWLF35+fkePHrWysqp2wrFjx4YMGSJu35uamg4cOPDEiROjR48mKV6EkKzgLXqEVEphYWFRUVH//v2jo6NnzJhR84Tk5GQzM7PKTQsLi+TkZPnGiBCSB2zBI6RSevbsOWfOnHpO0NHR4XA4lZtsNtvExEQuoSGE5Apb8Ag1Ld98882HDx8qN2NiYtq3by9+LRQKyYsLIUQwTPAIqQgWiwUAZWVlNeefFlUQv16/fv3jx48fPXrE5/PPnTuXlpa2atUqAHBwcMjKyrp37154eDgZ4SOECKaGc9EjpALi4+NPnz4tboLTaLTly5eLu9GJp6p9/PixQCAwNDScN2+enp5eQkKCn59fdnZ2hw4dRo8e3apVKwDg8/mbNm3Kzs4eOnTohAkTyP6FEEKNhQkeIYQQUkF4ix4hhBBSQZjgEUIIIRWECR7JC/bQRgghOcIEj2ROKISMw9eyNh3h8cgOBSGEmgxM8Ei2kpNhjjcPVqzU+eWn+ROL3r8nOyCEEGoaMME3Vffvy+GeuVAIW7ZAl9ADVqx4fV7+sFdbt2yBsjJZV4sQQggTfNOTlwcHtjFKRk69ONj3yRPZ1pWQAMzE3GlJW8WbY9IO6mfGvn4t20oRQqhSWlpaUFBQSEhISkoKn88X72Sz2W/evJFD7RkZGampqdV2ZmVlbd++vX379qGhoTKtXVESfGlp6eLFi2s9FB8fL/dwVBaTCUuXgqXvFj1O7qB/Nu75kXH/vgyrEwhgZsIGHT5DvEkV8ebHLfv8JyYr+IZBUsE3jKri8/lTpkxZsmRJYmLi+/fvfXx8Jk+eLD7k5+fXsWPH3NzcBhQr1RvGx8dnypQp1XZaWFisWrUqIiKitLS0AQFITlEWmxGJRGw2u9ZDOD82gW7fBs3E2DFpBwHAgJc7LXGLn9++gQNlVZ09861jxrGqe7rm32bl3QIYJqsq8Q2DpIRvGFX1v//9LzIy8t27d+LNuXPn+vj4iF9PnTrV0dHR1NS0AcVK9YY5cOAAX9ZtmropSoJH8pGeDvPjllFFn7qzj0k7eDNpnkDgSKXKpLpsLerKzn21tbcIBOZJSdtsbecuWdLcrbe+TCpDCKEq4uPjS0tLBQIBteIDTkNDY+fOnRV3Fsu5ubmJRCI1NTUGgyESifT19VksVmRkpI2NjZmZmUgkYjAYAKCvr0+hUIqLi4VCoba2tpaWVsWtUGZERISOjo6NjY2enp64OhaLFR0draWl5eDgoKGhIX4QYGZmRq3y8SoSiVJSUjIyMhr23UJainKLHslH94JbXfNvV25SRbylqctklN0TEhIWbty4/6b/oXs95++xG7NyakevSDdPO6iyGDlCCMlIt27dEhIShg4d+vz5c/EeKysrAGAwGHv27DEyMsrMzASAv//+28jIaPny5dOnT58yZUqbNm3ev3/P4XD2799vZGSUlpYGAHfu3HF2dv79998BIDo6ukuXLiEhIdeuXRsxYoS45PPnz3fr1i0kJOTcuXPt2rV79uwZAMTFxU2aNKlfv37ic3Jzc/v16/fDDz+8e/fu/Pnz8vgnECmGkpISHx+fWg/FxMTIPRyVxTt49G77FSeMhx6ktTpjPu2i9YqMyStE2dmEVxQVFTVmzJiCgoLKPRwOZ+rUqYRXVBO+YZBU8A2jwlauXEmhlLdjHR0dDx8+LBAIxPvfvn1bcUczXSQS5eXlAUBoaKhIJOLz+aamplu3bhWJRBERERUDfZPFl7i6uu7atUskEnl6es6dO1e88/bt2yKRKDs7W0dH5/Hjx+Kdv/zyi729PY/HE4lEmzdv7ty5s3j/vHnzunfvLhQKKx9JP3jwQKa/Prbgmxb1RXO/fbbniJ2hf6ft5zpPcbqxx/LvPdC8ObG1XL/+dsOGDSdPnjQyMqrcqampCaBWWFh7TwuEECLcr7/+mpiYuG3bNg6Hs2DBgtmzZ9d/PpVKdXZ2Li4uruccU1PTwMDAc+fO8fn8IUOGAEBQUBCLxfr222/FJwwYMCC+QrULL1++PHz4cDU1tUb/WpLCBN/kvHiRbm/ffORIKxubHBcXggsXiWDv3tAff/z50KEz+vrVn7UbGvY7eDCY4CoRQqhu1tbWGzdujIuLmz59+okTJ8Tt9Xp8NQHPnTt33LhxM2fOtLS0PHz4MABkZmbq6emJbxUAQLNmzcRj4apexeVyc3JyWrRoUV/R//4LgYGffioeH/wnPf2/Qy9fVr+w8lBgYNXdZCb4vCry8/Nx4Vr5OHTojLv71A4dTLS0cogtWSiEn3/+5++/DwYGnjY3p9c8YeTIYUFBd4itFCGEanX9+vXK1xoaGitXrgSAnJzGfu7RaLSjR4/m5OT4+PgsXLgwMjLS1NSUyWRWpjBx7zwbG5uqV2lqahoZGX1lXJyGBtBon34oX2ZnKvW/Q+o1esdXHqLRqu4mrRc9l8vdvHlz5SaPxxP/oyCZ+vgR4uJeHTq0VijMfvSI4AQfEHD35s3zV674tWxZ+/uqe3fzvLz0rCywsCC2ZoQQqu7mzZsdOnSwtrYWb6alpRkYGNjZ2Ul4uY6Ojrh1XlmC2LVr11atWqWnp7djx44//vgjKyvL3d1dTU0tPDz8m2++qWiH/2tiYtK6detqBbZv3/7BgweLFi2qs8qOHes8ZGFR3+fmsNoHHpOW4DU1NQ8ePFi5yWQyly5dSlYwTcfly2EODp3MzEAgMC0oKCC05MuPHt0LCfFVV6/ztpCBATg4OL99G2Vh4Uxg1QghVBONRuvbt++KFSvat2+flJS0e/fu//3vf5qamkwmMzIyEgDCw8ObNWsWGxsLALGxsR07dszJySkqKsrMzCwpKbG2tra1tZ07d+6oUaPKysry8/PT0tKYTGZwcDCbze7Xr9+9e/datmzZo0cPHR2dLVu2LFiwYOvWrXl5eTt27AgICFBTU8vKykpNTS0uLo6JiXFycjpw4ECPHj28vb07deqUmppKoVA+fPjQp08f9ZotcoKoKciNcXGC9/X1rXkoNjbW0dGRjKBU0OLFy2bPXuHq2gIAvL29/fz8CCn27NmzYWFh+/bt++rjqxcvXgQHB69evZqQemuFbxgkFXzDqLD3799HRkYWFhaam5u7u7ubVYzRTUlJiYmJEZ/QoUOHyMhIgUBQcYuxe3x8fH5+PgA4OTlZW1tnZWXdu3fPxMSkX79+ERERhYWFzs7Oqamp2dnZOTk55ubmQ4cOpX2+Kx4VFfXy5UtTU9Pu3buLH8O/fPmysh3l4eFBpVLz8vJCQkI4HM6gQYPCw8M5HE7Pnj11dXVl9OvjRDdNCI/Hy83NFGd3oohE4Ov7v+Tk5N9++02S87t06bJv3z4CA0AIobq0r1Btp3WFyk2LKre+O3fuXPVMCwuLGTNmiF937dpV/KKsrMzd3b1mXc4Vqu7p0qVLtXNMTEzGjRsnfl05Pl52sBd9E3L79u2hQ4eKX4tEwGYDl9uoAktLYerU3+Picnbs2CHhJWpqavr6+tjfAiGEZA1b8
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import { polynomialTransform } from '../encoding.ts';\n",
"import { trainTestSplit } from \"../split.ts\";\n",
"import regr from '../regr/mod.ts';\n",
"\n",
"const polyDf = df.select('x').hstack(\n",
" polynomialTransform(df.drop('y'), 36, false, false)\n",
").hstack(df.select('y'));\n",
"\n",
"const [trainX, trainY] = [polyDf.drop('y'), polyDf.select('y')]\n",
"const linreg = regr.Linear();\n",
"linreg.fit(trainX.rows(), trainY.rows());\n",
"const predY = linreg.predict(trainX.rows());\n",
"\n",
"\n",
"plot.DrawPlot(\n",
" { \n",
" width: 7,\n",
" height: 4,\n",
" XLabel: \"X\", \n",
" YLabel: \"Y\", \n",
" }, \n",
" { type: \"line\", data: [real.x, real.y], legend: \"Sinusoid\", lineDashes: [3, 4], lineColor: \"#ff8888\", lineWidth: 1 },\n",
" { type: \"linePoints\", data: [trainX.x, df.y], legend: \"Test Data\", lineDashes: [3, 4], lineColor: \"#8888ff\", glyphColor: \"#4444ff\", glyphShape: \"circle\" },\n",
" { type: \"linePoints\", data: [trainX.x, predY], lineWidth: .5, legend: \"Predicted\", glyphColor: '#f00', glyphShape: \"pyramid\" },\n",
");\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Deno",
"language": "typescript",
"name": "deno"
},
"language_info": {
"codemirror_mode": "typescript",
"file_extension": ".ts",
"mimetype": "text/x.typescript",
"name": "typescript",
"nbconvert_exporter": "script",
"pygments_lexer": "typescript",
"version": "5.6.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}