2024-09-25 19:52:53 +00:00
|
|
|
|
{
|
|
|
|
|
"cells": [
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "b9db28d4-ea49-443c-a5d6-e1c44bfe4942",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"source": [
|
|
|
|
|
"# Data Analytics with JS\n",
|
|
|
|
|
"\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"The dataset contains all the information about cars, a name of a manufacturer,\n",
|
|
|
|
|
"all car's technical parameters and a sale price of a car.\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"\n",
|
|
|
|
|
"Libraries:\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"\n",
|
|
|
|
|
"- nodejs-polars\n",
|
|
|
|
|
"- @observable/plot"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "d163c580-6aa0-4e8c-a780-5cc931003dc8",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"source": [
|
|
|
|
|
"## Exploring Data\n",
|
|
|
|
|
"\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"Use [pola-rs](https://pola-rs.github.io/nodejs-polars/modules.html) dataframes\n",
|
|
|
|
|
"to read and manipulate data.\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"\n",
|
|
|
|
|
"- use `df.head(n)` to get n first rows\n",
|
|
|
|
|
"- use `df.sample(n)` to get n random rows from the dataset\n",
|
|
|
|
|
"- use `df.describe()` to get mean, std, min, max\n",
|
|
|
|
|
"- use `df.select(...cols)` to get new dataframe for columns\n",
|
|
|
|
|
"- use `df.groupBy(..cols).agg()` to group data and get aggragates"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 1,
|
|
|
|
|
"id": "1cb3589a-bffe-422a-8e32-a853161b93c4",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"application/vnd.dataresource+json": {
|
|
|
|
|
"bytes": null,
|
|
|
|
|
"data": [
|
|
|
|
|
{
|
|
|
|
|
"CarName": "alfa-romero giulia",
|
|
|
|
|
"aspiration": "std",
|
|
|
|
|
"boreratio": 3.47,
|
|
|
|
|
"car_ID": 1,
|
|
|
|
|
"carbody": "convertible",
|
|
|
|
|
"carheight": 48.8,
|
|
|
|
|
"carlength": 168.8,
|
|
|
|
|
"carwidth": 64.1,
|
|
|
|
|
"citympg": 21,
|
|
|
|
|
"compressionratio": 9,
|
|
|
|
|
"curbweight": 2548,
|
|
|
|
|
"cylindernumber": "four",
|
|
|
|
|
"doornumber": "two",
|
|
|
|
|
"drivewheel": "rwd",
|
|
|
|
|
"enginelocation": "front",
|
|
|
|
|
"enginesize": 130,
|
|
|
|
|
"enginetype": "dohc",
|
|
|
|
|
"fuelsystem": "mpfi",
|
|
|
|
|
"fueltype": "gas",
|
|
|
|
|
"highwaympg": 27,
|
|
|
|
|
"horsepower": 111,
|
|
|
|
|
"peakrpm": 5000,
|
|
|
|
|
"price": 13495,
|
|
|
|
|
"stroke": 2.68,
|
|
|
|
|
"symboling": 3,
|
|
|
|
|
"wheelbase": 88.6
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"CarName": "alfa-romero stelvio",
|
|
|
|
|
"aspiration": "std",
|
|
|
|
|
"boreratio": 3.47,
|
|
|
|
|
"car_ID": 2,
|
|
|
|
|
"carbody": "convertible",
|
|
|
|
|
"carheight": 48.8,
|
|
|
|
|
"carlength": 168.8,
|
|
|
|
|
"carwidth": 64.1,
|
|
|
|
|
"citympg": 21,
|
|
|
|
|
"compressionratio": 9,
|
|
|
|
|
"curbweight": 2548,
|
|
|
|
|
"cylindernumber": "four",
|
|
|
|
|
"doornumber": "two",
|
|
|
|
|
"drivewheel": "rwd",
|
|
|
|
|
"enginelocation": "front",
|
|
|
|
|
"enginesize": 130,
|
|
|
|
|
"enginetype": "dohc",
|
|
|
|
|
"fuelsystem": "mpfi",
|
|
|
|
|
"fueltype": "gas",
|
|
|
|
|
"highwaympg": 27,
|
|
|
|
|
"horsepower": 111,
|
|
|
|
|
"peakrpm": 5000,
|
|
|
|
|
"price": 16500,
|
|
|
|
|
"stroke": 2.68,
|
|
|
|
|
"symboling": 3,
|
|
|
|
|
"wheelbase": 88.6
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"CarName": "alfa-romero Quadrifoglio",
|
|
|
|
|
"aspiration": "std",
|
|
|
|
|
"boreratio": 2.68,
|
|
|
|
|
"car_ID": 3,
|
|
|
|
|
"carbody": "hatchback",
|
|
|
|
|
"carheight": 52.4,
|
|
|
|
|
"carlength": 171.2,
|
|
|
|
|
"carwidth": 65.5,
|
|
|
|
|
"citympg": 19,
|
|
|
|
|
"compressionratio": 9,
|
|
|
|
|
"curbweight": 2823,
|
|
|
|
|
"cylindernumber": "six",
|
|
|
|
|
"doornumber": "two",
|
|
|
|
|
"drivewheel": "rwd",
|
|
|
|
|
"enginelocation": "front",
|
|
|
|
|
"enginesize": 152,
|
|
|
|
|
"enginetype": "ohcv",
|
|
|
|
|
"fuelsystem": "mpfi",
|
|
|
|
|
"fueltype": "gas",
|
|
|
|
|
"highwaympg": 26,
|
|
|
|
|
"horsepower": 154,
|
|
|
|
|
"peakrpm": 5000,
|
|
|
|
|
"price": 16500,
|
|
|
|
|
"stroke": 3.47,
|
|
|
|
|
"symboling": 1,
|
|
|
|
|
"wheelbase": 94.5
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"CarName": "audi 100 ls",
|
|
|
|
|
"aspiration": "std",
|
|
|
|
|
"boreratio": 3.19,
|
|
|
|
|
"car_ID": 4,
|
|
|
|
|
"carbody": "sedan",
|
|
|
|
|
"carheight": 54.3,
|
|
|
|
|
"carlength": 176.6,
|
|
|
|
|
"carwidth": 66.2,
|
|
|
|
|
"citympg": 24,
|
|
|
|
|
"compressionratio": 10,
|
|
|
|
|
"curbweight": 2337,
|
|
|
|
|
"cylindernumber": "four",
|
|
|
|
|
"doornumber": "four",
|
|
|
|
|
"drivewheel": "fwd",
|
|
|
|
|
"enginelocation": "front",
|
|
|
|
|
"enginesize": 109,
|
|
|
|
|
"enginetype": "ohc",
|
|
|
|
|
"fuelsystem": "mpfi",
|
|
|
|
|
"fueltype": "gas",
|
|
|
|
|
"highwaympg": 30,
|
|
|
|
|
"horsepower": 102,
|
|
|
|
|
"peakrpm": 5500,
|
|
|
|
|
"price": 13950,
|
|
|
|
|
"stroke": 3.4,
|
|
|
|
|
"symboling": 2,
|
|
|
|
|
"wheelbase": 99.8
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"CarName": "audi 100ls",
|
|
|
|
|
"aspiration": "std",
|
|
|
|
|
"boreratio": 3.19,
|
|
|
|
|
"car_ID": 5,
|
|
|
|
|
"carbody": "sedan",
|
|
|
|
|
"carheight": 54.3,
|
|
|
|
|
"carlength": 176.6,
|
|
|
|
|
"carwidth": 66.4,
|
|
|
|
|
"citympg": 18,
|
|
|
|
|
"compressionratio": 8,
|
|
|
|
|
"curbweight": 2824,
|
|
|
|
|
"cylindernumber": "five",
|
|
|
|
|
"doornumber": "four",
|
|
|
|
|
"drivewheel": "4wd",
|
|
|
|
|
"enginelocation": "front",
|
|
|
|
|
"enginesize": 136,
|
|
|
|
|
"enginetype": "ohc",
|
|
|
|
|
"fuelsystem": "mpfi",
|
|
|
|
|
"fueltype": "gas",
|
|
|
|
|
"highwaympg": 22,
|
|
|
|
|
"horsepower": 115,
|
|
|
|
|
"peakrpm": 5500,
|
|
|
|
|
"price": 17450,
|
|
|
|
|
"stroke": 3.4,
|
|
|
|
|
"symboling": 2,
|
|
|
|
|
"wheelbase": 99.4
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"description": null,
|
|
|
|
|
"dialect": null,
|
|
|
|
|
"encoding": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"hash": null,
|
|
|
|
|
"homepage": null,
|
|
|
|
|
"licenses": null,
|
|
|
|
|
"mediatype": null,
|
|
|
|
|
"path": null,
|
|
|
|
|
"schema": {
|
|
|
|
|
"fields": [
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "car_ID",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "symboling",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "CarName",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "fueltype",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "aspiration",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "doornumber",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "carbody",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "drivewheel",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "enginelocation",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "wheelbase",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "carlength",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "carwidth",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "carheight",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "curbweight",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "enginetype",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "cylindernumber",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "enginesize",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "fuelsystem",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "boreratio",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "stroke",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "compressionratio",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "horsepower",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "peakrpm",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "citympg",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "highwaympg",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "price",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"foreignKeys": null,
|
|
|
|
|
"missingValues": null,
|
|
|
|
|
"primaryKey": null
|
|
|
|
|
},
|
|
|
|
|
"sources": null,
|
|
|
|
|
"title": null
|
|
|
|
|
},
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<table><thead><tr><th>car_ID</th><th>symboling</th><th>CarName</th><th>fueltype</th><th>aspiration</th><th>doornumber</th><th>carbody</th><th>drivewheel</th><th>enginelocation</th><th>wheelbase</th><th>carlength</th><th>carwidth</th><th>carheight</th><th>curbweight</th><th>enginetype</th><th>cylindernumber</th><th>enginesize</th><th>fuelsystem</th><th>boreratio</th><th>stroke</th><th>compressionratio</th><th>horsepower</th><th>peakrpm</th><th>citympg</th><th>highwaympg</th><th>price</th></tr></thead><tbody><tr><td>1</td><td>3</td><td>alfa-romero giulia</td><td>gas</td><td>std</td><td>two</td><td>convertible</td><td>rwd</td><td>front</td><td>88.6</td><td>168.8</td><td>64.1</td><td>48.8</td><td>2548</td><td>dohc</td><td>four</td><td>130</td><td>mpfi</td><td>3.47</td><td>2.68</td><td>9</td><td>111</td><td>5000</td><td>21</td><td>27</td><td>13495</td></tr><tr><td>2</td><td>3</td><td>alfa-romero stelvio</td><td>gas</td><td>std</td><td>two</td><td>convertible</td><td>rwd</td><td>front</td><td>88.6</td><td>168.8</td><td>64.1</td><td>48.8</td><td>2548</td><td>dohc</td><td>four</td><td>130</td><td>mpfi</td><td>3.47</td><td>2.68</td><td>9</td><td>111</td><td>5000</td><td>21</td><td>27</td><td>16500</td></tr><tr><td>3</td><td>1</td><td>alfa-romero Quadrifoglio</td><td>gas</td><td>std</td><td>two</td><td>hatchback</td><td>rwd</td><td>front</td><td>94.5</td><td>171.2</td><td>65.5</td><td>52.4</td><td>2823</td><td>ohcv</td><td>six</td><td>152</td><td>mpfi</td><td>2.68</td><td>3.47</td><td>9</td><td>154</td><td>5000</td><td>19</td><td>26</td><td>16500</td></tr><tr><td>4</td><td>2</td><td>audi 100 ls</td><td>gas</td><td>std</td><td>four</td><td>sedan</td><td>fwd</td><td>front</td><td>99.8</td><td>176.6</td><td>66.2</td><td>54.3</td><td>2337</td><td>ohc</td><td>four</td><td>109</td><td>mpfi</td><td>3.19</td><td>3.4</td><td>10</td><td>102</td><td>5500</td><td>24</td><td>30</td><td>13950</td></tr><tr><td>5</td><td>2</td><td>audi 100ls</td><td>gas</td><td>std</td><td>four</td><td>sedan</td><td>4wd</td><td>front</td><td>99.4</td><td>176.6</td><td>66.4</td><td>54.3</td><td>2824</td><td>ohc</td><td>five</td><td>136</td><td>mpfi</td><td>3.19</td><td>3.4</td><td>8</td><td>115</td><td>5500</td><td>18</td><td>22</td><td>17450</td></tr></tbody></table>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 1,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"import { display } from \"https://deno.land/x/display@v0.1.1/mod.ts\";\n",
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"import { Plot } from \"https://l12.xyz/x/shortcuts/raw/plots.ts\";\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"import pl from \"npm:nodejs-polars\";\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"let data = await Deno.readTextFile(\"assets/CarPrice_Assignment.csv\");\n",
|
|
|
|
|
"let df = pl.readCSV(data, { sep: \",\" });\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"await display(df.head(5));"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 2,
|
|
|
|
|
"id": "a82e43fd-04a6-4bc8-a752-3b03abd6e983",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"application/vnd.dataresource+json": {
|
|
|
|
|
"bytes": null,
|
|
|
|
|
"data": [
|
|
|
|
|
{
|
|
|
|
|
"describe": "mean",
|
|
|
|
|
"enginesize": 126.90731707317073,
|
|
|
|
|
"horsepower": 104.1170731707317,
|
|
|
|
|
"price": 13276.710570731706
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"describe": "std",
|
|
|
|
|
"enginesize": 41.642693438179855,
|
|
|
|
|
"horsepower": 39.54416680936116,
|
|
|
|
|
"price": 7988.852331743148
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"describe": "min",
|
|
|
|
|
"enginesize": 61,
|
|
|
|
|
"horsepower": 48,
|
|
|
|
|
"price": 5118
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"describe": "max",
|
|
|
|
|
"enginesize": 326,
|
|
|
|
|
"horsepower": 288,
|
|
|
|
|
"price": 45400
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"describe": "median",
|
|
|
|
|
"enginesize": 120,
|
|
|
|
|
"horsepower": 95,
|
|
|
|
|
"price": 10295
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"description": null,
|
|
|
|
|
"dialect": null,
|
|
|
|
|
"encoding": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"hash": null,
|
|
|
|
|
"homepage": null,
|
|
|
|
|
"licenses": null,
|
|
|
|
|
"mediatype": null,
|
|
|
|
|
"path": null,
|
|
|
|
|
"schema": {
|
|
|
|
|
"fields": [
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "describe",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "enginesize",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "horsepower",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "price",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"foreignKeys": null,
|
|
|
|
|
"missingValues": null,
|
|
|
|
|
"primaryKey": null
|
|
|
|
|
},
|
|
|
|
|
"sources": null,
|
|
|
|
|
"title": null
|
|
|
|
|
},
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<table><thead><tr><th>describe</th><th>enginesize</th><th>horsepower</th><th>price</th></tr></thead><tbody><tr><td>mean</td><td>126.90731707317073</td><td>104.1170731707317</td><td>13276.710570731706</td></tr><tr><td>std</td><td>41.642693438179855</td><td>39.54416680936116</td><td>7988.852331743148</td></tr><tr><td>min</td><td>61</td><td>48</td><td>5118</td></tr><tr><td>max</td><td>326</td><td>288</td><td>45400</td></tr><tr><td>median</td><td>120</td><td>95</td><td>10295</td></tr></tbody></table>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 2,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"await display(\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
" df.select(\n",
|
|
|
|
|
" \"enginesize\",\n",
|
|
|
|
|
" \"horsepower\",\n",
|
|
|
|
|
" \"price\",\n",
|
|
|
|
|
" ).describe(),\n",
|
|
|
|
|
");"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "5c931435-0e19-4d32-8ca8-ae81a9cb43c8",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"## Data Cleaning"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 3,
|
|
|
|
|
"id": "e70e5f22-bdde-4140-9015-a6281b8478bf",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"\u001b[33mfalse\u001b[39m"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 3,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"// check for duplicates\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"const hasDups = df.select(\"car_ID\").isDuplicated().toArray().includes(true);\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"// if there are duplicates, use df.filter()\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"hasDups;"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "7fb89a88-f3ad-457d-9211-2b75eef6096f",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"Resolve categorical values. For this dataset it is a brand name:"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 4,
|
|
|
|
|
"id": "ffb4509e-1930-4e2e-954c-51e919084514",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"scrolled": true
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"application/vnd.dataresource+json": {
|
|
|
|
|
"bytes": null,
|
|
|
|
|
"data": [
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "toyota"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "mercury"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "mitsubishi"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "honda"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "mazda"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "peugeot"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "porcshce"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "renault"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "isuzu"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "dodge"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "bmw"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "vw"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "maxda"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "volkswagen"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "alfa-romero"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "chevrolet"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "toyouta"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "jaguar"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "saab"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "porsche"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "audi"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "vokswagen"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "subaru"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "nissan"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "plymouth"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-09-25 20:34:25 +00:00
|
|
|
|
"brand": "volvo"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "buick"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"description": null,
|
|
|
|
|
"dialect": null,
|
|
|
|
|
"encoding": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"hash": null,
|
|
|
|
|
"homepage": null,
|
|
|
|
|
"licenses": null,
|
|
|
|
|
"mediatype": null,
|
|
|
|
|
"path": null,
|
|
|
|
|
"schema": {
|
|
|
|
|
"fields": [
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "brand",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"foreignKeys": null,
|
|
|
|
|
"missingValues": null,
|
|
|
|
|
"primaryKey": null
|
|
|
|
|
},
|
|
|
|
|
"sources": null,
|
|
|
|
|
"title": null
|
|
|
|
|
},
|
|
|
|
|
"text/html": [
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"<table><thead><tr><th>brand</th></tr></thead><tbody><tr><td>toyota</td></tr><tr><td>mercury</td></tr><tr><td>mitsubishi</td></tr><tr><td>honda</td></tr><tr><td>mazda</td></tr><tr><td>peugeot</td></tr><tr><td>porcshce</td></tr><tr><td>renault</td></tr><tr><td>isuzu</td></tr><tr><td>dodge</td></tr><tr><td>bmw</td></tr><tr><td>vw</td></tr><tr><td>maxda</td></tr><tr><td>volkswagen</td></tr><tr><td>alfa-romero</td></tr><tr><td>chevrolet</td></tr><tr><td>toyouta</td></tr><tr><td>jaguar</td></tr><tr><td>saab</td></tr><tr><td>porsche</td></tr><tr><td>audi</td></tr><tr><td>vokswagen</td></tr><tr><td>subaru</td></tr><tr><td>nissan</td></tr><tr><td>plymouth</td></tr><tr><td>volvo</td></tr><tr><td>buick</td></tr></tbody></table>"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 4,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"// get brand names from `CarName`\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"let brandNameTable = df.select(\"CarName\").map((row) => {\n",
|
|
|
|
|
" const [carName] = row;\n",
|
|
|
|
|
" const brand = carName.split(\" \")[0].toLowerCase();\n",
|
|
|
|
|
" return brand;\n",
|
|
|
|
|
"});\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"\n",
|
|
|
|
|
"// create a dataframe from brand names\n",
|
|
|
|
|
"let brandDf = pl.DataFrame({\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
" \"brand\": brandNameTable,\n",
|
|
|
|
|
"});\n",
|
|
|
|
|
"await display(brandDf.unique());"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 5,
|
|
|
|
|
"id": "7f91cbc3-364c-49c9-97e3-370f78b708f8",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"scrolled": true
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"application/vnd.dataresource+json": {
|
|
|
|
|
"bytes": null,
|
|
|
|
|
"data": [
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "nissan"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "toyota"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "isuzu"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "plymouth"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "subaru"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "audi"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "renault"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "honda"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "dodge"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "bmw"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "volvo"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "peugeot"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "volkswagen"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "chevrolet"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "mitsubishi"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "buick"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "alfa-romero"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "porsche"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "mazda"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "saab"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "jaguar"
|
2024-09-25 20:34:25 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "mercury"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"description": null,
|
|
|
|
|
"dialect": null,
|
|
|
|
|
"encoding": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"hash": null,
|
|
|
|
|
"homepage": null,
|
|
|
|
|
"licenses": null,
|
|
|
|
|
"mediatype": null,
|
|
|
|
|
"path": null,
|
|
|
|
|
"schema": {
|
|
|
|
|
"fields": [
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "brand",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"foreignKeys": null,
|
|
|
|
|
"missingValues": null,
|
|
|
|
|
"primaryKey": null
|
|
|
|
|
},
|
|
|
|
|
"sources": null,
|
|
|
|
|
"title": null
|
|
|
|
|
},
|
|
|
|
|
"text/html": [
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"<table><thead><tr><th>brand</th></tr></thead><tbody><tr><td>nissan</td></tr><tr><td>toyota</td></tr><tr><td>isuzu</td></tr><tr><td>plymouth</td></tr><tr><td>subaru</td></tr><tr><td>audi</td></tr><tr><td>renault</td></tr><tr><td>honda</td></tr><tr><td>dodge</td></tr><tr><td>bmw</td></tr><tr><td>volvo</td></tr><tr><td>peugeot</td></tr><tr><td>volkswagen</td></tr><tr><td>chevrolet</td></tr><tr><td>mitsubishi</td></tr><tr><td>buick</td></tr><tr><td>alfa-romero</td></tr><tr><td>porsche</td></tr><tr><td>mazda</td></tr><tr><td>saab</td></tr><tr><td>jaguar</td></tr><tr><td>mercury</td></tr></tbody></table>"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 5,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"// transform to remove duplicates\n",
|
|
|
|
|
"brandNameTable = brandNameTable.map((name) => {\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
" name = name\n",
|
|
|
|
|
" .replace(\"maxda\", \"mazda\")\n",
|
|
|
|
|
" .replace(\"porcshce\", \"porsche\")\n",
|
|
|
|
|
" .replace(\"toyouta\", \"toyota\")\n",
|
|
|
|
|
" .replace(/(vw|vokswagen)/ig, \"volkswagen\");\n",
|
|
|
|
|
" return name;\n",
|
|
|
|
|
"});\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"\n",
|
|
|
|
|
"brandDf = pl.DataFrame({\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
" brand: brandNameTable,\n",
|
|
|
|
|
"});\n",
|
|
|
|
|
"await display(brandDf.unique());"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 6,
|
|
|
|
|
"id": "eb19de54-11bb-40af-8e27-e5e3a6bd11f0",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"application/vnd.dataresource+json": {
|
|
|
|
|
"bytes": null,
|
|
|
|
|
"data": [
|
|
|
|
|
{
|
|
|
|
|
"CarName": "alfa-romero giulia",
|
|
|
|
|
"aspiration": "std",
|
|
|
|
|
"boreratio": 3.47,
|
|
|
|
|
"brand": "alfa-romero",
|
|
|
|
|
"car_ID": 1,
|
|
|
|
|
"carbody": "convertible",
|
|
|
|
|
"carheight": 48.8,
|
|
|
|
|
"carlength": 168.8,
|
|
|
|
|
"carwidth": 64.1,
|
|
|
|
|
"citympg": 21,
|
|
|
|
|
"compressionratio": 9,
|
|
|
|
|
"curbweight": 2548,
|
|
|
|
|
"cylindernumber": "four",
|
|
|
|
|
"doornumber": "two",
|
|
|
|
|
"drivewheel": "rwd",
|
|
|
|
|
"enginelocation": "front",
|
|
|
|
|
"enginesize": 130,
|
|
|
|
|
"enginetype": "dohc",
|
|
|
|
|
"fuelsystem": "mpfi",
|
|
|
|
|
"fueltype": "gas",
|
|
|
|
|
"highwaympg": 27,
|
|
|
|
|
"horsepower": 111,
|
|
|
|
|
"peakrpm": 5000,
|
|
|
|
|
"price": 13495,
|
|
|
|
|
"stroke": 2.68,
|
|
|
|
|
"symboling": 3,
|
|
|
|
|
"wheelbase": 88.6
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"CarName": "alfa-romero stelvio",
|
|
|
|
|
"aspiration": "std",
|
|
|
|
|
"boreratio": 3.47,
|
|
|
|
|
"brand": "alfa-romero",
|
|
|
|
|
"car_ID": 2,
|
|
|
|
|
"carbody": "convertible",
|
|
|
|
|
"carheight": 48.8,
|
|
|
|
|
"carlength": 168.8,
|
|
|
|
|
"carwidth": 64.1,
|
|
|
|
|
"citympg": 21,
|
|
|
|
|
"compressionratio": 9,
|
|
|
|
|
"curbweight": 2548,
|
|
|
|
|
"cylindernumber": "four",
|
|
|
|
|
"doornumber": "two",
|
|
|
|
|
"drivewheel": "rwd",
|
|
|
|
|
"enginelocation": "front",
|
|
|
|
|
"enginesize": 130,
|
|
|
|
|
"enginetype": "dohc",
|
|
|
|
|
"fuelsystem": "mpfi",
|
|
|
|
|
"fueltype": "gas",
|
|
|
|
|
"highwaympg": 27,
|
|
|
|
|
"horsepower": 111,
|
|
|
|
|
"peakrpm": 5000,
|
|
|
|
|
"price": 16500,
|
|
|
|
|
"stroke": 2.68,
|
|
|
|
|
"symboling": 3,
|
|
|
|
|
"wheelbase": 88.6
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"CarName": "alfa-romero Quadrifoglio",
|
|
|
|
|
"aspiration": "std",
|
|
|
|
|
"boreratio": 2.68,
|
|
|
|
|
"brand": "alfa-romero",
|
|
|
|
|
"car_ID": 3,
|
|
|
|
|
"carbody": "hatchback",
|
|
|
|
|
"carheight": 52.4,
|
|
|
|
|
"carlength": 171.2,
|
|
|
|
|
"carwidth": 65.5,
|
|
|
|
|
"citympg": 19,
|
|
|
|
|
"compressionratio": 9,
|
|
|
|
|
"curbweight": 2823,
|
|
|
|
|
"cylindernumber": "six",
|
|
|
|
|
"doornumber": "two",
|
|
|
|
|
"drivewheel": "rwd",
|
|
|
|
|
"enginelocation": "front",
|
|
|
|
|
"enginesize": 152,
|
|
|
|
|
"enginetype": "ohcv",
|
|
|
|
|
"fuelsystem": "mpfi",
|
|
|
|
|
"fueltype": "gas",
|
|
|
|
|
"highwaympg": 26,
|
|
|
|
|
"horsepower": 154,
|
|
|
|
|
"peakrpm": 5000,
|
|
|
|
|
"price": 16500,
|
|
|
|
|
"stroke": 3.47,
|
|
|
|
|
"symboling": 1,
|
|
|
|
|
"wheelbase": 94.5
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"CarName": "audi 100 ls",
|
|
|
|
|
"aspiration": "std",
|
|
|
|
|
"boreratio": 3.19,
|
|
|
|
|
"brand": "audi",
|
|
|
|
|
"car_ID": 4,
|
|
|
|
|
"carbody": "sedan",
|
|
|
|
|
"carheight": 54.3,
|
|
|
|
|
"carlength": 176.6,
|
|
|
|
|
"carwidth": 66.2,
|
|
|
|
|
"citympg": 24,
|
|
|
|
|
"compressionratio": 10,
|
|
|
|
|
"curbweight": 2337,
|
|
|
|
|
"cylindernumber": "four",
|
|
|
|
|
"doornumber": "four",
|
|
|
|
|
"drivewheel": "fwd",
|
|
|
|
|
"enginelocation": "front",
|
|
|
|
|
"enginesize": 109,
|
|
|
|
|
"enginetype": "ohc",
|
|
|
|
|
"fuelsystem": "mpfi",
|
|
|
|
|
"fueltype": "gas",
|
|
|
|
|
"highwaympg": 30,
|
|
|
|
|
"horsepower": 102,
|
|
|
|
|
"peakrpm": 5500,
|
|
|
|
|
"price": 13950,
|
|
|
|
|
"stroke": 3.4,
|
|
|
|
|
"symboling": 2,
|
|
|
|
|
"wheelbase": 99.8
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"CarName": "audi 100ls",
|
|
|
|
|
"aspiration": "std",
|
|
|
|
|
"boreratio": 3.19,
|
|
|
|
|
"brand": "audi",
|
|
|
|
|
"car_ID": 5,
|
|
|
|
|
"carbody": "sedan",
|
|
|
|
|
"carheight": 54.3,
|
|
|
|
|
"carlength": 176.6,
|
|
|
|
|
"carwidth": 66.4,
|
|
|
|
|
"citympg": 18,
|
|
|
|
|
"compressionratio": 8,
|
|
|
|
|
"curbweight": 2824,
|
|
|
|
|
"cylindernumber": "five",
|
|
|
|
|
"doornumber": "four",
|
|
|
|
|
"drivewheel": "4wd",
|
|
|
|
|
"enginelocation": "front",
|
|
|
|
|
"enginesize": 136,
|
|
|
|
|
"enginetype": "ohc",
|
|
|
|
|
"fuelsystem": "mpfi",
|
|
|
|
|
"fueltype": "gas",
|
|
|
|
|
"highwaympg": 22,
|
|
|
|
|
"horsepower": 115,
|
|
|
|
|
"peakrpm": 5500,
|
|
|
|
|
"price": 17450,
|
|
|
|
|
"stroke": 3.4,
|
|
|
|
|
"symboling": 2,
|
|
|
|
|
"wheelbase": 99.4
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"description": null,
|
|
|
|
|
"dialect": null,
|
|
|
|
|
"encoding": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"hash": null,
|
|
|
|
|
"homepage": null,
|
|
|
|
|
"licenses": null,
|
|
|
|
|
"mediatype": null,
|
|
|
|
|
"path": null,
|
|
|
|
|
"schema": {
|
|
|
|
|
"fields": [
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "brand",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "car_ID",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "symboling",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "CarName",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "fueltype",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "aspiration",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "doornumber",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "carbody",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "drivewheel",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "enginelocation",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "wheelbase",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "carlength",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "carwidth",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "carheight",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "curbweight",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "enginetype",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "cylindernumber",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "enginesize",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "fuelsystem",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "boreratio",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "stroke",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "compressionratio",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "horsepower",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "peakrpm",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "citympg",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "highwaympg",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "price",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"foreignKeys": null,
|
|
|
|
|
"missingValues": null,
|
|
|
|
|
"primaryKey": null
|
|
|
|
|
},
|
|
|
|
|
"sources": null,
|
|
|
|
|
"title": null
|
|
|
|
|
},
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<table><thead><tr><th>brand</th><th>car_ID</th><th>symboling</th><th>CarName</th><th>fueltype</th><th>aspiration</th><th>doornumber</th><th>carbody</th><th>drivewheel</th><th>enginelocation</th><th>wheelbase</th><th>carlength</th><th>carwidth</th><th>carheight</th><th>curbweight</th><th>enginetype</th><th>cylindernumber</th><th>enginesize</th><th>fuelsystem</th><th>boreratio</th><th>stroke</th><th>compressionratio</th><th>horsepower</th><th>peakrpm</th><th>citympg</th><th>highwaympg</th><th>price</th></tr></thead><tbody><tr><td>alfa-romero</td><td>1</td><td>3</td><td>alfa-romero giulia</td><td>gas</td><td>std</td><td>two</td><td>convertible</td><td>rwd</td><td>front</td><td>88.6</td><td>168.8</td><td>64.1</td><td>48.8</td><td>2548</td><td>dohc</td><td>four</td><td>130</td><td>mpfi</td><td>3.47</td><td>2.68</td><td>9</td><td>111</td><td>5000</td><td>21</td><td>27</td><td>13495</td></tr><tr><td>alfa-romero</td><td>2</td><td>3</td><td>alfa-romero stelvio</td><td>gas</td><td>std</td><td>two</td><td>convertible</td><td>rwd</td><td>front</td><td>88.6</td><td>168.8</td><td>64.1</td><td>48.8</td><td>2548</td><td>dohc</td><td>four</td><td>130</td><td>mpfi</td><td>3.47</td><td>2.68</td><td>9</td><td>111</td><td>5000</td><td>21</td><td>27</td><td>16500</td></tr><tr><td>alfa-romero</td><td>3</td><td>1</td><td>alfa-romero Quadrifoglio</td><td>gas</td><td>std</td><td>two</td><td>hatchback</td><td>rwd</td><td>front</td><td>94.5</td><td>171.2</td><td>65.5</td><td>52.4</td><td>2823</td><td>ohcv</td><td>six</td><td>152</td><td>mpfi</td><td>2.68</td><td>3.47</td><td>9</td><td>154</td><td>5000</td><td>19</td><td>26</td><td>16500</td></tr><tr><td>audi</td><td>4</td><td>2</td><td>audi 100 ls</td><td>gas</td><td>std</td><td>four</td><td>sedan</td><td>fwd</td><td>front</td><td>99.8</td><td>176.6</td><td>66.2</td><td>54.3</td><td>2337</td><td>ohc</td><td>four</td><td>109</td><td>mpfi</td><td>3.19</td><td>3.4</td><td>10</td><td>102</td><td>5500</td><td>24</td><td>30</td><td>13950</td></tr><tr><td>audi</td><td>5</td><td>2</td><td>audi 100ls</td><td>gas</td><td>std</td><td>four</td><td>sedan</td><td>4wd</td><td>front</td><td>99.4</td><td>176.6</td><td>66.4</td><td>54.3</td><td>2824</td><td>ohc</td><td>five</td><td>136</td><td>mpfi</td><td>3.19</td><td>3.4</td><td>8</td><td>115</td><td>5500</td><td>18</td><td>22</td><td>17450</td></tr></tbody></table>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 6,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"// add new column `brand` to our dataframe\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"df = brandDf.hstack(df);\n",
|
|
|
|
|
"await display(df.head(5));"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "81abdb0d-41ce-445c-9a75-f9494eb1f9d3",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"Drop unnecessary values, and write to the new file:"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 7,
|
|
|
|
|
"id": "6da96c06-b0b6-46b7-be2d-824af909aec4",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"application/vnd.dataresource+json": {
|
|
|
|
|
"bytes": null,
|
|
|
|
|
"data": [
|
|
|
|
|
{
|
|
|
|
|
"aspiration": "std",
|
|
|
|
|
"boreratio": 3.47,
|
|
|
|
|
"brand": "alfa-romero",
|
|
|
|
|
"carbody": "convertible",
|
|
|
|
|
"carheight": 48.8,
|
|
|
|
|
"carlength": 168.8,
|
|
|
|
|
"carwidth": 64.1,
|
|
|
|
|
"citympg": 21,
|
|
|
|
|
"compressionratio": 9,
|
|
|
|
|
"curbweight": 2548,
|
|
|
|
|
"cylindernumber": "four",
|
|
|
|
|
"doornumber": "two",
|
|
|
|
|
"drivewheel": "rwd",
|
|
|
|
|
"enginelocation": "front",
|
|
|
|
|
"enginesize": 130,
|
|
|
|
|
"enginetype": "dohc",
|
|
|
|
|
"fuelsystem": "mpfi",
|
|
|
|
|
"fueltype": "gas",
|
|
|
|
|
"highwaympg": 27,
|
|
|
|
|
"horsepower": 111,
|
|
|
|
|
"peakrpm": 5000,
|
|
|
|
|
"price": 13495,
|
|
|
|
|
"stroke": 2.68,
|
|
|
|
|
"wheelbase": 88.6
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"aspiration": "std",
|
|
|
|
|
"boreratio": 3.47,
|
|
|
|
|
"brand": "alfa-romero",
|
|
|
|
|
"carbody": "convertible",
|
|
|
|
|
"carheight": 48.8,
|
|
|
|
|
"carlength": 168.8,
|
|
|
|
|
"carwidth": 64.1,
|
|
|
|
|
"citympg": 21,
|
|
|
|
|
"compressionratio": 9,
|
|
|
|
|
"curbweight": 2548,
|
|
|
|
|
"cylindernumber": "four",
|
|
|
|
|
"doornumber": "two",
|
|
|
|
|
"drivewheel": "rwd",
|
|
|
|
|
"enginelocation": "front",
|
|
|
|
|
"enginesize": 130,
|
|
|
|
|
"enginetype": "dohc",
|
|
|
|
|
"fuelsystem": "mpfi",
|
|
|
|
|
"fueltype": "gas",
|
|
|
|
|
"highwaympg": 27,
|
|
|
|
|
"horsepower": 111,
|
|
|
|
|
"peakrpm": 5000,
|
|
|
|
|
"price": 16500,
|
|
|
|
|
"stroke": 2.68,
|
|
|
|
|
"wheelbase": 88.6
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"aspiration": "std",
|
|
|
|
|
"boreratio": 2.68,
|
|
|
|
|
"brand": "alfa-romero",
|
|
|
|
|
"carbody": "hatchback",
|
|
|
|
|
"carheight": 52.4,
|
|
|
|
|
"carlength": 171.2,
|
|
|
|
|
"carwidth": 65.5,
|
|
|
|
|
"citympg": 19,
|
|
|
|
|
"compressionratio": 9,
|
|
|
|
|
"curbweight": 2823,
|
|
|
|
|
"cylindernumber": "six",
|
|
|
|
|
"doornumber": "two",
|
|
|
|
|
"drivewheel": "rwd",
|
|
|
|
|
"enginelocation": "front",
|
|
|
|
|
"enginesize": 152,
|
|
|
|
|
"enginetype": "ohcv",
|
|
|
|
|
"fuelsystem": "mpfi",
|
|
|
|
|
"fueltype": "gas",
|
|
|
|
|
"highwaympg": 26,
|
|
|
|
|
"horsepower": 154,
|
|
|
|
|
"peakrpm": 5000,
|
|
|
|
|
"price": 16500,
|
|
|
|
|
"stroke": 3.47,
|
|
|
|
|
"wheelbase": 94.5
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"description": null,
|
|
|
|
|
"dialect": null,
|
|
|
|
|
"encoding": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"hash": null,
|
|
|
|
|
"homepage": null,
|
|
|
|
|
"licenses": null,
|
|
|
|
|
"mediatype": null,
|
|
|
|
|
"path": null,
|
|
|
|
|
"schema": {
|
|
|
|
|
"fields": [
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "brand",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "fueltype",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "aspiration",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "doornumber",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "carbody",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "drivewheel",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "enginelocation",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "wheelbase",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "carlength",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "carwidth",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "carheight",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "curbweight",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "enginetype",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "cylindernumber",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "enginesize",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "fuelsystem",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "boreratio",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "stroke",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "compressionratio",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "horsepower",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "peakrpm",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "citympg",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "highwaympg",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "price",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"foreignKeys": null,
|
|
|
|
|
"missingValues": null,
|
|
|
|
|
"primaryKey": null
|
|
|
|
|
},
|
|
|
|
|
"sources": null,
|
|
|
|
|
"title": null
|
|
|
|
|
},
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<table><thead><tr><th>brand</th><th>fueltype</th><th>aspiration</th><th>doornumber</th><th>carbody</th><th>drivewheel</th><th>enginelocation</th><th>wheelbase</th><th>carlength</th><th>carwidth</th><th>carheight</th><th>curbweight</th><th>enginetype</th><th>cylindernumber</th><th>enginesize</th><th>fuelsystem</th><th>boreratio</th><th>stroke</th><th>compressionratio</th><th>horsepower</th><th>peakrpm</th><th>citympg</th><th>highwaympg</th><th>price</th></tr></thead><tbody><tr><td>alfa-romero</td><td>gas</td><td>std</td><td>two</td><td>convertible</td><td>rwd</td><td>front</td><td>88.6</td><td>168.8</td><td>64.1</td><td>48.8</td><td>2548</td><td>dohc</td><td>four</td><td>130</td><td>mpfi</td><td>3.47</td><td>2.68</td><td>9</td><td>111</td><td>5000</td><td>21</td><td>27</td><td>13495</td></tr><tr><td>alfa-romero</td><td>gas</td><td>std</td><td>two</td><td>convertible</td><td>rwd</td><td>front</td><td>88.6</td><td>168.8</td><td>64.1</td><td>48.8</td><td>2548</td><td>dohc</td><td>four</td><td>130</td><td>mpfi</td><td>3.47</td><td>2.68</td><td>9</td><td>111</td><td>5000</td><td>21</td><td>27</td><td>16500</td></tr><tr><td>alfa-romero</td><td>gas</td><td>std</td><td>two</td><td>hatchback</td><td>rwd</td><td>front</td><td>94.5</td><td>171.2</td><td>65.5</td><td>52.4</td><td>2823</td><td>ohcv</td><td>six</td><td>152</td><td>mpfi</td><td>2.68</td><td>3.47</td><td>9</td><td>154</td><td>5000</td><td>19</td><td>26</td><td>16500</td></tr></tbody></table>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 7,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"df = df.drop(\"car_ID\", \"symboling\", \"CarName\");\n",
|
|
|
|
|
"df.head(3);"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "0f827b40-99b5-445d-b339-2bb55b9d6686",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"- Use df.groupBy(...cols) in order to get aggregates form the dataset:\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"Count cars by brand:"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 8,
|
|
|
|
|
"id": "7512602c-6ce4-4167-9f74-78bc5e8c0332",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"scrolled": true
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"application/vnd.dataresource+json": {
|
|
|
|
|
"bytes": null,
|
|
|
|
|
"data": [
|
|
|
|
|
{
|
|
|
|
|
"brand": "mercury",
|
|
|
|
|
"brand_count": 1
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"brand": "renault",
|
|
|
|
|
"brand_count": 2
|
|
|
|
|
},
|
|
|
|
|
{
|
2024-09-25 20:34:25 +00:00
|
|
|
|
"brand": "jaguar",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"brand_count": 3
|
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "alfa-romero",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"brand_count": 3
|
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "chevrolet",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"brand_count": 3
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"brand": "isuzu",
|
|
|
|
|
"brand_count": 4
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"brand": "porsche",
|
|
|
|
|
"brand_count": 5
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"brand": "saab",
|
|
|
|
|
"brand_count": 6
|
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "audi",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"brand_count": 7
|
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "plymouth",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"brand_count": 7
|
|
|
|
|
},
|
|
|
|
|
{
|
2024-09-25 20:34:25 +00:00
|
|
|
|
"brand": "buick",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"brand_count": 8
|
|
|
|
|
},
|
|
|
|
|
{
|
2024-09-25 20:34:25 +00:00
|
|
|
|
"brand": "bmw",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"brand_count": 8
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"brand": "dodge",
|
|
|
|
|
"brand_count": 9
|
|
|
|
|
},
|
|
|
|
|
{
|
2024-09-25 20:34:25 +00:00
|
|
|
|
"brand": "peugeot",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"brand_count": 11
|
|
|
|
|
},
|
|
|
|
|
{
|
2024-09-25 20:34:25 +00:00
|
|
|
|
"brand": "volvo",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"brand_count": 11
|
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "volkswagen",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"brand_count": 12
|
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "subaru",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"brand_count": 12
|
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "honda",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"brand_count": 13
|
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand": "mitsubishi",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"brand_count": 13
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"brand": "mazda",
|
|
|
|
|
"brand_count": 17
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"brand": "nissan",
|
|
|
|
|
"brand_count": 18
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"brand": "toyota",
|
|
|
|
|
"brand_count": 32
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"description": null,
|
|
|
|
|
"dialect": null,
|
|
|
|
|
"encoding": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"hash": null,
|
|
|
|
|
"homepage": null,
|
|
|
|
|
"licenses": null,
|
|
|
|
|
"mediatype": null,
|
|
|
|
|
"path": null,
|
|
|
|
|
"schema": {
|
|
|
|
|
"fields": [
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "brand",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "brand_count",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"foreignKeys": null,
|
|
|
|
|
"missingValues": null,
|
|
|
|
|
"primaryKey": null
|
|
|
|
|
},
|
|
|
|
|
"sources": null,
|
|
|
|
|
"title": null
|
|
|
|
|
},
|
|
|
|
|
"text/html": [
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"<table><thead><tr><th>brand</th><th>brand_count</th></tr></thead><tbody><tr><td>mercury</td><td>1</td></tr><tr><td>renault</td><td>2</td></tr><tr><td>jaguar</td><td>3</td></tr><tr><td>alfa-romero</td><td>3</td></tr><tr><td>chevrolet</td><td>3</td></tr><tr><td>isuzu</td><td>4</td></tr><tr><td>porsche</td><td>5</td></tr><tr><td>saab</td><td>6</td></tr><tr><td>audi</td><td>7</td></tr><tr><td>plymouth</td><td>7</td></tr><tr><td>buick</td><td>8</td></tr><tr><td>bmw</td><td>8</td></tr><tr><td>dodge</td><td>9</td></tr><tr><td>peugeot</td><td>11</td></tr><tr><td>volvo</td><td>11</td></tr><tr><td>volkswagen</td><td>12</td></tr><tr><td>subaru</td><td>12</td></tr><tr><td>honda</td><td>13</td></tr><tr><td>mitsubishi</td><td>13</td></tr><tr><td>mazda</td><td>17</td></tr><tr><td>nissan</td><td>18</td></tr><tr><td>toyota</td><td>32</td></tr></tbody></table>"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 8,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"let brandCount = df.groupBy(\"brand\").len().sort(\"brand_count\");\n",
|
|
|
|
|
"brandCount;"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 9,
|
|
|
|
|
"id": "44b5baf0-cbd2-48d7-a28b-e871682bbaf0",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"application/vnd.dataresource+json": {
|
|
|
|
|
"bytes": null,
|
|
|
|
|
"data": [
|
|
|
|
|
{
|
|
|
|
|
"brand": null,
|
|
|
|
|
"describe": "mean",
|
2024-09-25 20:34:25 +00:00
|
|
|
|
"price": 15079.877371916704
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"brand": null,
|
|
|
|
|
"describe": "std",
|
2024-09-25 20:34:25 +00:00
|
|
|
|
"price": 8738.804703803093
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"brand": "alfa-romero",
|
|
|
|
|
"describe": "min",
|
|
|
|
|
"price": 6007
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"brand": "volvo",
|
|
|
|
|
"describe": "max",
|
|
|
|
|
"price": 34600
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"brand": null,
|
|
|
|
|
"describe": "median",
|
|
|
|
|
"price": 10534.274509803921
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"description": null,
|
|
|
|
|
"dialect": null,
|
|
|
|
|
"encoding": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"hash": null,
|
|
|
|
|
"homepage": null,
|
|
|
|
|
"licenses": null,
|
|
|
|
|
"mediatype": null,
|
|
|
|
|
"path": null,
|
|
|
|
|
"schema": {
|
|
|
|
|
"fields": [
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "describe",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "brand",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "price",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"foreignKeys": null,
|
|
|
|
|
"missingValues": null,
|
|
|
|
|
"primaryKey": null
|
|
|
|
|
},
|
|
|
|
|
"sources": null,
|
|
|
|
|
"title": null
|
|
|
|
|
},
|
|
|
|
|
"text/html": [
|
2024-09-25 20:34:25 +00:00
|
|
|
|
"<table><thead><tr><th>describe</th><th>brand</th><th>price</th></tr></thead><tbody><tr><td>mean</td><td>null</td><td>15079.877371916704</td></tr><tr><td>std</td><td>null</td><td>8738.804703803093</td></tr><tr><td>min</td><td>alfa-romero</td><td>6007</td></tr><tr><td>max</td><td>volvo</td><td>34600</td></tr><tr><td>median</td><td>null</td><td>10534.274509803921</td></tr></tbody></table>"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 9,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"let avgPricePerBrand = df.groupBy(\"brand\").agg({ \"price\": [\"mean\"] });\n",
|
|
|
|
|
"avgPricePerBrand.describe();"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 10,
|
|
|
|
|
"id": "98a18f05-665f-4143-b5d5-6ecda68ca2df",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"{\n",
|
|
|
|
|
" renault: \u001b[33m9595\u001b[39m,\n",
|
2024-09-25 20:34:25 +00:00
|
|
|
|
" jaguar: \u001b[33m34600\u001b[39m,\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
" saab: \u001b[33m15223.333333333334\u001b[39m,\n",
|
2024-10-04 05:24:07 +00:00
|
|
|
|
" nissan: \u001b[33m10415.666666666666\u001b[39m,\n",
|
|
|
|
|
" toyota: \u001b[33m9885.8125\u001b[39m,\n",
|
|
|
|
|
" mitsubishi: \u001b[33m9239.76923076923\u001b[39m,\n",
|
|
|
|
|
" porsche: \u001b[33m31400.5\u001b[39m,\n",
|
|
|
|
|
" plymouth: \u001b[33m7963.428571428572\u001b[39m,\n",
|
|
|
|
|
" dodge: \u001b[33m7875.444444444444\u001b[39m,\n",
|
|
|
|
|
" mazda: \u001b[33m10652.882352941177\u001b[39m,\n",
|
2024-09-25 20:34:25 +00:00
|
|
|
|
" honda: \u001b[33m8184.692307692308\u001b[39m,\n",
|
2024-10-04 05:24:07 +00:00
|
|
|
|
" peugeot: \u001b[33m15489.09090909091\u001b[39m,\n",
|
2024-09-25 20:34:25 +00:00
|
|
|
|
" audi: \u001b[33m17859.166714285715\u001b[39m,\n",
|
2024-10-04 05:24:07 +00:00
|
|
|
|
" volvo: \u001b[33m18063.18181818182\u001b[39m,\n",
|
|
|
|
|
" buick: \u001b[33m33647\u001b[39m,\n",
|
2024-09-25 20:34:25 +00:00
|
|
|
|
" chevrolet: \u001b[33m6007\u001b[39m,\n",
|
|
|
|
|
" isuzu: \u001b[33m8916.5\u001b[39m,\n",
|
2024-10-04 05:24:07 +00:00
|
|
|
|
" bmw: \u001b[33m26118.75\u001b[39m,\n",
|
2024-09-25 20:34:25 +00:00
|
|
|
|
" \u001b[32m\"alfa-romero\"\u001b[39m: \u001b[33m15498.333333333334\u001b[39m,\n",
|
2024-10-04 05:24:07 +00:00
|
|
|
|
" volkswagen: \u001b[33m10077.5\u001b[39m,\n",
|
|
|
|
|
" mercury: \u001b[33m16503\u001b[39m,\n",
|
|
|
|
|
" subaru: \u001b[33m8541.25\u001b[39m\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"}"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 10,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"// map brand name to price\n",
|
|
|
|
|
"avgPricePerBrand = avgPricePerBrand\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
" .toRecords()\n",
|
|
|
|
|
" .reduce((acc, rec) => ({ ...acc, [rec.brand]: rec.price }), {});"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 11,
|
|
|
|
|
"id": "9e7dd2b4-4356-41ad-bd1e-20b7938f55f0",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"application/vnd.dataresource+json": {
|
|
|
|
|
"bytes": null,
|
|
|
|
|
"data": [
|
|
|
|
|
{
|
2024-09-25 20:34:25 +00:00
|
|
|
|
"brand_category": "Mid_Range"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand_category": "Budget"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand_category": "Mid_Range"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand_category": "Mid_Range"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
},
|
|
|
|
|
{
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"brand_category": "Budget"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"description": null,
|
|
|
|
|
"dialect": null,
|
|
|
|
|
"encoding": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"hash": null,
|
|
|
|
|
"homepage": null,
|
|
|
|
|
"licenses": null,
|
|
|
|
|
"mediatype": null,
|
|
|
|
|
"path": null,
|
|
|
|
|
"schema": {
|
|
|
|
|
"fields": [
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "brand_category",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"foreignKeys": null,
|
|
|
|
|
"missingValues": null,
|
|
|
|
|
"primaryKey": null
|
|
|
|
|
},
|
|
|
|
|
"sources": null,
|
|
|
|
|
"title": null
|
|
|
|
|
},
|
|
|
|
|
"text/html": [
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"<table><thead><tr><th>brand_category</th></tr></thead><tbody><tr><td>Mid_Range</td></tr><tr><td>Budget</td></tr><tr><td>Mid_Range</td></tr><tr><td>Mid_Range</td></tr><tr><td>Budget</td></tr></tbody></table>"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 11,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"// create brand categories by budget\n",
|
|
|
|
|
"let brandCategory = df.brand.toArray().map((brand) => {\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
" const avgPrice = avgPricePerBrand[brand];\n",
|
|
|
|
|
" return avgPrice < 10000\n",
|
|
|
|
|
" ? \"Budget\"\n",
|
|
|
|
|
" : avgPrice > 20000\n",
|
|
|
|
|
" ? \"Luxury\"\n",
|
|
|
|
|
" : \"Mid_Range\";\n",
|
|
|
|
|
"});\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"let catDf = pl.DataFrame({\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
" \"brand_category\": brandCategory,\n",
|
|
|
|
|
"});\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"catDf.sample(5);"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "14930aef-1fc0-4baf-92bf-8a093c3d2e86",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"Write the cleaned dataset to a new file:"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 12,
|
|
|
|
|
"id": "350c57c7-ec9e-400d-a8e7-19301c720c56",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"df = catDf.hstack(df);\n",
|
|
|
|
|
"df.writeCSV(\"assets/cleaned_car_prices.csv\");"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "0ea22d38-cb20-47a0-8f92-422bc5925abf",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"source": [
|
|
|
|
|
"## Exploratory Data Analysis\n",
|
|
|
|
|
"\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"For plotting we use [@observable/plot](https://observablehq.com/plot) and\n",
|
|
|
|
|
"configured shotcuts for jupyter notebooks imported from\n",
|
|
|
|
|
"[l12.xyz/x/shortcuts](https://l12.xyz/x/shortcuts)."
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 13,
|
|
|
|
|
"id": "2fed060e-1785-4dc1-ba44-4bcd5cee92f6",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
2024-09-25 20:34:25 +00:00
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"[\n",
|
|
|
|
|
" { brand: \"mercury\", brand_count: 1 },\n",
|
|
|
|
|
" { brand: \"renault\", brand_count: 2 },\n",
|
|
|
|
|
" { brand: \"jaguar\", brand_count: 3 },\n",
|
|
|
|
|
" { brand: \"alfa-romero\", brand_count: 3 },\n",
|
2024-10-04 05:24:07 +00:00
|
|
|
|
" { brand: \"chevrolet\", brand_count: 3 },\n",
|
2024-09-25 20:34:25 +00:00
|
|
|
|
" { brand: \"isuzu\", brand_count: 4 },\n",
|
|
|
|
|
" { brand: \"porsche\", brand_count: 5 },\n",
|
|
|
|
|
" { brand: \"saab\", brand_count: 6 },\n",
|
|
|
|
|
" { brand: \"audi\", brand_count: 7 },\n",
|
2024-10-04 05:24:07 +00:00
|
|
|
|
" { brand: \"plymouth\", brand_count: 7 },\n",
|
2024-09-25 20:34:25 +00:00
|
|
|
|
" { brand: \"buick\", brand_count: 8 },\n",
|
|
|
|
|
" { brand: \"bmw\", brand_count: 8 },\n",
|
|
|
|
|
" { brand: \"dodge\", brand_count: 9 },\n",
|
|
|
|
|
" { brand: \"peugeot\", brand_count: 11 },\n",
|
|
|
|
|
" { brand: \"volvo\", brand_count: 11 },\n",
|
|
|
|
|
" { brand: \"volkswagen\", brand_count: 12 },\n",
|
2024-10-04 05:24:07 +00:00
|
|
|
|
" { brand: \"subaru\", brand_count: 12 },\n",
|
2024-09-25 20:34:25 +00:00
|
|
|
|
" { brand: \"honda\", brand_count: 13 },\n",
|
2024-10-04 05:24:07 +00:00
|
|
|
|
" { brand: \"mitsubishi\", brand_count: 13 },\n",
|
2024-09-25 20:34:25 +00:00
|
|
|
|
" { brand: \"mazda\", brand_count: 17 },\n",
|
|
|
|
|
" { brand: \"nissan\", brand_count: 18 },\n",
|
|
|
|
|
" { brand: \"toyota\", brand_count: 32 }\n",
|
|
|
|
|
"]\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
2024-09-25 19:52:53 +00:00
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"image/svg+xml": [
|
|
|
|
|
"<svg style=\"background-color:#fff\" viewBox=\"0 0 640 500\" height=\"500\" width=\"640\" text-anchor=\"middle\" font-size=\"10\" font-family=\"system-ui, sans-serif\" fill=\"currentColor\" class=\"plot-d6a7b5\"><style>:where(.plot-d6a7b5) {\n",
|
|
|
|
|
" --plot-background: white;\n",
|
|
|
|
|
" display: block;\n",
|
|
|
|
|
" height: auto;\n",
|
|
|
|
|
" height: intrinsic;\n",
|
|
|
|
|
" max-width: 100%;\n",
|
|
|
|
|
"}\n",
|
|
|
|
|
":where(.plot-d6a7b5 text),\n",
|
|
|
|
|
":where(.plot-d6a7b5 tspan) {\n",
|
|
|
|
|
" white-space: pre;\n",
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"}</style><g transform=\"translate(0,9)\" stroke=\"currentColor\" fill=\"none\" aria-hidden=\"true\" aria-label=\"y-axis tick\"><path d=\"M0,0L-6,0\" transform=\"translate(80,26)\" /><path d=\"M0,0L-6,0\" transform=\"translate(80,46)\" /><path d=\"M0,0L-6,0\" transform=\"translate(80,66)\" /><path d=\"M0,0L-6,0\" transform=\"translate(80,86)\" /><path d=\"M0,0L-6,0\" transform=\"translate(80,106)\" /><path d=\"M0,0L-6,0\" transform=\"translate(80,126)\" /><path d=\"M0,0L-6,0\" transform=\"translate(80,146)\" /><path d=\"M0,0L-6,0\" transform=\"translate(80,166)\" /><path d=\"M0,0L-6,0\" transform=\"translate(80,186)\" /><path d=\"M0,0L-6,0\" transform=\"translate(80,206)\" /><path d=\"M0,0L-6,0\" transform=\"translate(80,226)\" /><path d=\"M0,0L-6,0\" transform=\"translate(80,246)\" /><path d=\"M0,0L-6,0\" transform=\"translate(80,266)\" /><path d=\"M0,0L-6,0\" transform=\"translate(80,286)\" /><path d=\"M0,0L-6,0\" transform=\"translate(80,306)\" /><path d=\"M0,0L-6,0\" transform=\"translate(80,326)\" /><path d=\"M0,0L-6,0\" transform=\"translate(80,346)\" /><path d=\"M0,0L-6,0\" transform=\"translate(80,366)\" /><path d=\"M0,0L-6,0\" transform=\"translate(80,386)\" /><path d=\"M0,0L-6,0\" transform=\"translate(80,406)\" /><path d=\"M0,0L-6,0\" transform=\"translate(80,426)\" /><path d=\"M0,0L-6,0\" transform=\"translate(80,446)\" /></g><g transform=\"translate(-9,9)\" text-anchor=\"end\" aria-label=\"y-axis tick label\"><text transform=\"translate(80,26)\" y=\"0.32em\">toyota</text><text transform=\"translate(80,46)\" y=\"0.32em\">nissan</text><text transform=\"translate(80,66)\" y=\"0.32em\">mazda</text><text transform=\"translate(80,86)\" y=\"0.32em\">honda</text><text transform=\"translate(80,106)\" y=\"0.32em\">mitsubishi</text><text transform=\"translate(80,126)\" y=\"0.32em\">subaru</text><text transform=\"translate(80,146)\" y=\"0.32em\">volkswagen</text><text transform=\"translate(80,166)\" y=\"0.32em\">peugeot</text><text transform=\"translate(80,186)\" y=\"0.32em\">volvo</text><text transform=\"translate(80,206)\" y=\"0.32em\">dodge</text><text transform=\"translate(80,226)\" y=\"0.32em\">bmw</text><text transform=\"translate(80,246)\" y=\"0.32em\">buick</text><text transform=\"translate(80,266)\" y=\"0.32em\">audi</text><text transform=\"translate(80,286)\" y=\"0.32em\">plymouth</text><text transform=\"translate(80,306)\" y=\"0.32em\">saab</text><text transform=\"translate(80,326)\" y=\"0.32em\">porsche</text><text transform=\"translate(80,346)\" y=\"0.32em\">isuzu</text><text transform=\"translate(80,366)\" y=\"0.32em\">alfa-romero</text><text transform=\"translate(80,386)\" y=\"0.32em\">chevrolet</text><text transform=\"translate(80,406)\" y=\"0.32em\">jaguar</text><text transform=\"translate(80,426)\" y=\"0.32em\">renault</text><text transform=\"translate(80,446)\" y=\"0.32em\">mercury</text></g><g transform=\"translate(-77,0)\" aria-label=\"y-axis label\"><text transform=\"translate(80,245) rotate(-90)\" y=\"0.71em\">brand</text></g><g stroke=\"currentColor\" fill=\"none\" aria-hidden=\"true\" aria-label=\"x-axis tick\"><path d=\"M0,0L0,6\" transform=\"translate(80,470)\" /><path d=\"M0,0L0,6\" transform=\"translate(164.375,470)\" /><path d=\"M0,0L0,6\" transform=\"translate(248.75,470)\" /><path d=\"M0,0L0,6\" transform=\"translate(333.125,470)\" /><path d=\"M0,0L0,6\" transform=\"translate(417.5,470)\" /><path d=\"M0,0L0,6\" transform=\"translate(501.875,470)\" /><path d=\"M0,0L0,6\" transform=\"translate(586.25,470)\" /></g><g transform=\"translate(0,9)\" font-variant=\"tabular-nums\" aria-label=\"x-axis tick label\"><text transform=\"translate(80,470)\" y=\"0.71em\">0</text><text transform=\"translate(164.375,470)\" y=\"0.71em\">5</text><text transform=\"translate(248.75,470)\" y=\"0.71em\">10</text><text transform=\"translate(333.125,470)\" y=\"0.71em\">15</text><text transform=\"translate(417.5,470)\" y=\"0.71em\">20</text><text transform=\"translate(501.875,470)\" y=\"0.71em\">25</text><text transform=\"translate(586.25,470)\" y=\"0.71em\">30</text></g><g transform=\"translate(17,27)\" tex
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 13,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"\n",
|
|
|
|
|
"import { document } from \"https://l12.xyz/x/shortcuts/raw/plots.ts\";\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"const brandCountRecords = brandCount.toRecords();\n",
|
|
|
|
|
"console.log(brandCountRecords);\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"const brandCountPlot = Plot.plot({\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
" marginLeft: 80,\n",
|
|
|
|
|
" style: {\n",
|
|
|
|
|
" backgroundColor: \"#fff\",\n",
|
|
|
|
|
" },\n",
|
|
|
|
|
" x: { padding: 0.4 },\n",
|
|
|
|
|
" marks: [\n",
|
|
|
|
|
" Plot.barX(brandCountRecords, {\n",
|
|
|
|
|
" x: \"brand_count\",\n",
|
|
|
|
|
" y: \"brand\",\n",
|
|
|
|
|
" sort: { y: \"x\", order: \"descending\" },\n",
|
|
|
|
|
" }),\n",
|
|
|
|
|
" ],\n",
|
|
|
|
|
" document,\n",
|
|
|
|
|
"});\n",
|
|
|
|
|
"await display(brandCountPlot);"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 14,
|
|
|
|
|
"id": "e21f5bdf-a5a8-43b2-855b-0eb079656da7",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"[\n",
|
|
|
|
|
" \u001b[32m\"wheelbase\"\u001b[39m, \u001b[32m\"carlength\"\u001b[39m,\n",
|
|
|
|
|
" \u001b[32m\"carwidth\"\u001b[39m, \u001b[32m\"carheight\"\u001b[39m,\n",
|
|
|
|
|
" \u001b[32m\"curbweight\"\u001b[39m, \u001b[32m\"enginesize\"\u001b[39m,\n",
|
|
|
|
|
" \u001b[32m\"boreratio\"\u001b[39m, \u001b[32m\"stroke\"\u001b[39m,\n",
|
|
|
|
|
" \u001b[32m\"compressionratio\"\u001b[39m, \u001b[32m\"horsepower\"\u001b[39m,\n",
|
|
|
|
|
" \u001b[32m\"peakrpm\"\u001b[39m, \u001b[32m\"citympg\"\u001b[39m,\n",
|
|
|
|
|
" \u001b[32m\"highwaympg\"\u001b[39m\n",
|
|
|
|
|
"]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 14,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"let numericColumns = df.columns.filter((col) =>\n",
|
|
|
|
|
" df[col].isNumeric() && col !== \"price\"\n",
|
|
|
|
|
");\n",
|
|
|
|
|
"numericColumns;"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "1c8d3b90-b4b1-4da6-9e7c-f8fd7cffb77d",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"Sometimes we can get some intuitive insight seeing the data plotted from\n",
|
|
|
|
|
"different dimensions. It is an optional step, but it might help to get some\n",
|
|
|
|
|
"assumtions about the relationships in the dataset. Below is an example for\n",
|
|
|
|
|
"drawing plots side-by-side."
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 15,
|
|
|
|
|
"id": "9a82f623-b39a-473a-91b9-0a0347c64c63",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"\n",
|
|
|
|
|
" <section style=\"display:grid;grid-template-columns: repeat(3, 1fr);\">\n",
|
2024-09-25 20:34:25 +00:00
|
|
|
|
" <img title=\"wheelbase / price\" src='
|
2024-09-25 19:52:53 +00:00
|
|
|
|
" </section>\n",
|
|
|
|
|
" "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 15,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"import { sideBySidePlot } from \"https://l12.xyz/x/shortcuts/raw/plots.ts\";\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"\n",
|
|
|
|
|
"let records = df.toRecords();\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"const plt = sideBySidePlot({\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
" x: numericColumns,\n",
|
|
|
|
|
" y: [\"price\"],\n",
|
|
|
|
|
" marks: [\n",
|
|
|
|
|
" (x, y) => Plot.dot(records, { x, y }),\n",
|
|
|
|
|
" (x, y) => Plot.linearRegressionY(records, { x, y, stroke: \"red\" }),\n",
|
|
|
|
|
" ],\n",
|
|
|
|
|
" cols: 3,\n",
|
|
|
|
|
"});\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"\n",
|
|
|
|
|
"await display(\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
" plt,\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
");"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "53d04850-df05-474c-8568-a5f7c97146f3",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"source": [
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"Let's view the list of top features that have high correlation coefficient. The\n",
|
|
|
|
|
"pearsonCorr() function calculates the Pearson'r correlation coefficients with\n",
|
|
|
|
|
"respect to the 'price'."
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 16,
|
|
|
|
|
"id": "924838be-1b4d-4edf-abe5-8bf02e02cf11",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"application/vnd.dataresource+json": {
|
|
|
|
|
"bytes": null,
|
|
|
|
|
"data": [
|
|
|
|
|
{
|
|
|
|
|
"Variable": "enginesize",
|
|
|
|
|
"idx (price)": 0.8741448025245117
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"Variable": "curbweight",
|
|
|
|
|
"idx (price)": 0.8353048793372955
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"Variable": "horsepower",
|
|
|
|
|
"idx (price)": 0.8081388225362217
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"Variable": "carwidth",
|
|
|
|
|
"idx (price)": 0.7593252997414263
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"Variable": "carlength",
|
|
|
|
|
"idx (price)": 0.6829200156779843
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"Variable": "wheelbase",
|
|
|
|
|
"idx (price)": 0.5778155982921477
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"Variable": "boreratio",
|
|
|
|
|
"idx (price)": 0.5531732367984261
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"Variable": "carheight",
|
|
|
|
|
"idx (price)": 0.11933622657047727
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"Variable": "stroke",
|
|
|
|
|
"idx (price)": 0.07944308388192935
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"Variable": "compressionratio",
|
|
|
|
|
"idx (price)": 0.06798350579944248
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"Variable": "peakrpm",
|
|
|
|
|
"idx (price)": -0.0852671502778569
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"Variable": "citympg",
|
|
|
|
|
"idx (price)": -0.6857513360270401
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"Variable": "highwaympg",
|
|
|
|
|
"idx (price)": -0.6975990916465564
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"description": null,
|
|
|
|
|
"dialect": null,
|
|
|
|
|
"encoding": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"hash": null,
|
|
|
|
|
"homepage": null,
|
|
|
|
|
"licenses": null,
|
|
|
|
|
"mediatype": null,
|
|
|
|
|
"path": null,
|
|
|
|
|
"schema": {
|
|
|
|
|
"fields": [
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "Variable",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "idx (price)",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"foreignKeys": null,
|
|
|
|
|
"missingValues": null,
|
|
|
|
|
"primaryKey": null
|
|
|
|
|
},
|
|
|
|
|
"sources": null,
|
|
|
|
|
"title": null
|
|
|
|
|
},
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<table><thead><tr><th>Variable</th><th>idx (price)</th></tr></thead><tbody><tr><td>enginesize</td><td>0.8741448025245117</td></tr><tr><td>curbweight</td><td>0.8353048793372955</td></tr><tr><td>horsepower</td><td>0.8081388225362217</td></tr><tr><td>carwidth</td><td>0.7593252997414263</td></tr><tr><td>carlength</td><td>0.6829200156779843</td></tr><tr><td>wheelbase</td><td>0.5778155982921477</td></tr><tr><td>boreratio</td><td>0.5531732367984261</td></tr><tr><td>carheight</td><td>0.11933622657047727</td></tr><tr><td>stroke</td><td>0.07944308388192935</td></tr><tr><td>compressionratio</td><td>0.06798350579944248</td></tr><tr><td>peakrpm</td><td>-0.0852671502778569</td></tr><tr><td>citympg</td><td>-0.6857513360270401</td></tr><tr><td>highwaympg</td><td>-0.6975990916465564</td></tr></tbody></table>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 16,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"// we select pearon's idx from dataframe for all numeric cols,\n",
|
|
|
|
|
"// then we transpose result so that columns become rows,\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"// then we sort by the idx column\n",
|
|
|
|
|
"df.select(\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
" ...numericColumns.map((col) => pl.pearsonCorr(col, \"price\")),\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
")\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
" .transpose({\n",
|
|
|
|
|
" columnNames: [\"idx (price)\"],\n",
|
|
|
|
|
" headerName: \"Variable\",\n",
|
|
|
|
|
" includeHeader: true,\n",
|
|
|
|
|
" })\n",
|
|
|
|
|
" .sort(\"idx (price)\", true);"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "63bc6b92-5ab9-43e0-a957-57d895bc60f9",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"source": [
|
|
|
|
|
"### Linearity Assumption\n",
|
|
|
|
|
"\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"Linear regression needs the relationship between independent variable and the\n",
|
|
|
|
|
"dependent variable to be linear. We can test this assumption with some scatter\n",
|
|
|
|
|
"plots and regression lines.\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"** Here we use the same side-by-side plot shortcut, but for selected varisbles\n",
|
|
|
|
|
"with high correlation coefficent."
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 17,
|
|
|
|
|
"id": "2d5a635a-c819-4989-9818-a1927363993d",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"\n",
|
|
|
|
|
" <section style=\"display:grid;grid-template-columns: repeat(2, 1fr);\">\n",
|
2024-09-25 20:34:25 +00:00
|
|
|
|
" <img title=\"enginesize / price\" src='
|
2024-09-25 19:52:53 +00:00
|
|
|
|
" </section>\n",
|
|
|
|
|
" "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 17,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"const plt = sideBySidePlot({\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
" x: [\"enginesize\", \"curbweight\", \"horsepower\", \"carwidth\"],\n",
|
|
|
|
|
" y: [\"price\"],\n",
|
|
|
|
|
" marks: [\n",
|
|
|
|
|
" (x, y) => Plot.dot(records, { x, y }),\n",
|
|
|
|
|
" (x, y) => Plot.linearRegressionY(records, { x, y, stroke: \"red\" }),\n",
|
|
|
|
|
" ],\n",
|
|
|
|
|
" cols: 2,\n",
|
|
|
|
|
"});\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"\n",
|
|
|
|
|
"await display(\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
" plt,\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
");"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "0a351f17-309b-444b-b4a8-26500b077e47",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"source": [
|
|
|
|
|
"### Homoscedasticity\n",
|
|
|
|
|
"\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"The assumption of homoscedasticity (constant variance), is crucial to linear\n",
|
|
|
|
|
"regression models. Homoscedasticity describes a situation in which the error\n",
|
|
|
|
|
"term or variance or the \"noise\" or random disturbance in the relationship\n",
|
|
|
|
|
"between the independent variables and the dependent variable is the same across\n",
|
|
|
|
|
"all values of the independent variable. In other words, there is a constant\n",
|
|
|
|
|
"variance present in the response variable as the predictor variable increases.\n",
|
|
|
|
|
"If the \"noise\" is not the same across the values of an independent variable, we\n",
|
|
|
|
|
"call it heteroscedasticity, opposite of homoscedasticity.\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"\n",
|
|
|
|
|
"#### Residuals\n",
|
|
|
|
|
"\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"Next we apply residual expression to 'price' and 'enginesize' varibles in order\n",
|
|
|
|
|
"to check this assumption.\n",
|
|
|
|
|
"[The residuals function](https://l12.xyz/x/shortcuts/src/branch/main/expr.ts)\n",
|
|
|
|
|
"uses mean squared."
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 18,
|
|
|
|
|
"id": "fc9a31c3-6ac0-4058-b5bb-341eb1c40dd1",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"image/svg+xml": [
|
|
|
|
|
"<svg viewBox=\"0 0 640 400\" height=\"400\" width=\"640\" text-anchor=\"middle\" font-size=\"10\" font-family=\"system-ui, sans-serif\" fill=\"currentColor\" class=\"plot-d6a7b5\"><style>:where(.plot-d6a7b5) {\n",
|
|
|
|
|
" --plot-background: white;\n",
|
|
|
|
|
" display: block;\n",
|
|
|
|
|
" height: auto;\n",
|
|
|
|
|
" height: intrinsic;\n",
|
|
|
|
|
" max-width: 100%;\n",
|
|
|
|
|
"}\n",
|
|
|
|
|
":where(.plot-d6a7b5 text),\n",
|
|
|
|
|
":where(.plot-d6a7b5 tspan) {\n",
|
|
|
|
|
" white-space: pre;\n",
|
|
|
|
|
"}</style><g stroke=\"currentColor\" fill=\"none\" aria-hidden=\"true\" aria-label=\"y-axis tick\"><path d=\"M0,0L-6,0\" transform=\"translate(40,360.676690955552)\" /><path d=\"M0,0L-6,0\" transform=\"translate(40,332.6045058220467)\" /><path d=\"M0,0L-6,0\" transform=\"translate(40,304.5323206885414)\" /><path d=\"M0,0L-6,0\" transform=\"translate(40,276.4601355550362)\" /><path d=\"M0,0L-6,0\" transform=\"translate(40,248.3879504215309)\" /><path d=\"M0,0L-6,0\" transform=\"translate(40,220.31576528802563)\" /><path d=\"M0,0L-6,0\" transform=\"translate(40,192.24358015452034)\" /><path d=\"M0,0L-6,0\" transform=\"translate(40,164.17139502101506)\" /><path d=\"M0,0L-6,0\" transform=\"translate(40,136.0992098875098)\" /><path d=\"M0,0L-6,0\" transform=\"translate(40,108.02702475400451)\" /><path d=\"M0,0L-6,0\" transform=\"translate(40,79.95483962049926)\" /><path d=\"M0,0L-6,0\" transform=\"translate(40,51.882654486993985)\" /><path d=\"M0,0L-6,0\" transform=\"translate(40,23.810469353488706)\" /></g><g transform=\"translate(-9,0)\" font-variant=\"tabular-nums\" text-anchor=\"end\" aria-label=\"y-axis tick label\"><text transform=\"translate(40,360.676690955552)\" y=\"0.32em\">−10,000</text><text transform=\"translate(40,332.6045058220467)\" y=\"0.32em\">−8,000</text><text transform=\"translate(40,304.5323206885414)\" y=\"0.32em\">−6,000</text><text transform=\"translate(40,276.4601355550362)\" y=\"0.32em\">−4,000</text><text transform=\"translate(40,248.3879504215309)\" y=\"0.32em\">−2,000</text><text transform=\"translate(40,220.31576528802563)\" y=\"0.32em\">0</text><text transform=\"translate(40,192.24358015452034)\" y=\"0.32em\">2,000</text><text transform=\"translate(40,164.17139502101506)\" y=\"0.32em\">4,000</text><text transform=\"translate(40,136.0992098875098)\" y=\"0.32em\">6,000</text><text transform=\"translate(40,108.02702475400451)\" y=\"0.32em\">8,000</text><text transform=\"translate(40,79.95483962049926)\" y=\"0.32em\">10,000</text><text transform=\"translate(40,51.882654486993985)\" y=\"0.32em\">12,000</text><text transform=\"translate(40,23.810469353488706)\" y=\"0.32em\">14,000</text></g><g transform=\"translate(-37,-17)\" text-anchor=\"start\" aria-label=\"y-axis label\"><text transform=\"translate(40,20)\" y=\"0.71em\">↑ price</text></g><g stroke=\"currentColor\" fill=\"none\" aria-hidden=\"true\" aria-label=\"x-axis tick\"><path d=\"M0,0L0,6\" transform=\"translate(125.35849056603774,370)\" /><path d=\"M0,0L0,6\" transform=\"translate(234.79245283018867,370)\" /><path d=\"M0,0L0,6\" transform=\"translate(344.22641509433964,370)\" /><path d=\"M0,0L0,6\" transform=\"translate(453.6603773584905,370)\" /><path d=\"M0,0L0,6\" transform=\"translate(563.0943396226415,370)\" /></g><g transform=\"translate(0,9)\" font-variant=\"tabular-nums\" aria-label=\"x-axis tick label\"><text transform=\"translate(125.35849056603774,370)\" y=\"0.71em\">100</text><text transform=\"translate(234.79245283018867,370)\" y=\"0.71em\">150</text><text transform=\"translate(344.22641509433964,370)\" y=\"0.71em\">200</text><text transform=\"translate(453.6603773584905,370)\" y=\"0.71em\">250</text><text transform=\"translate(563.0943396226415,370)\" y=\"0.71em\">300</text></g><g transform=\"translate(17,27)\" text-anchor=\"end\" aria-label=\"x-axis label\"><text transform=\"translate(620,370)\">enginesize →</text></g><g stroke-width=\"1.5\" stroke=\"currentColor\" fill=\"none\" aria-label=\"dot\"><circle r=\"3\" cy=\"224.53148603945945\" cx=\"191.0188679245283\" /><circle r=\"3\" cy=\"182.35302787636778\" cx=\"191.0188679245283\" /><circle r=\"3\" cy=\"234.13729878380033\" cx=\"239.16981132075472\" /><circle r=\"3\" cy=\"168.7146235099468\" cx=\"145.0566037735849\" /><circle r=\"3\" cy=\"183.14172291270714\" cx=\"204.1509433962264\" /><circle r=\"3\" cy=\"214.0211265595629\" cx=\"204.1509433962264\" /><circle r=\"3\" cy=\"179.49233884535147\" cx=\"204.1509433962264\" /><circle r=\"3\" cy=\"162.50866683958074\" cx=\"204.1509433962264\" /><circle r=\"3\" cy=\"81.19067569235949\" cx=\"193.20754716981133\"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 18,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"import { residuals } from \"https://l12.xyz/x/shortcuts/raw/expr.ts\";\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"\n",
|
|
|
|
|
"let residualDf = df.select(\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
" \"enginesize\",\n",
|
|
|
|
|
" residuals(pl.col(\"enginesize\"), pl.col(\"price\")),\n",
|
|
|
|
|
");\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"\n",
|
|
|
|
|
"let residPlot = Plot.plot({\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
" x: \"enginesize\",\n",
|
|
|
|
|
" y: \"price\",\n",
|
|
|
|
|
" marks: [\n",
|
|
|
|
|
" Plot.dot(residualDf.toRecords(), { x: \"enginesize\", y: \"price\" }),\n",
|
|
|
|
|
" Plot.ruleY([0], { stroke: \"#ccc\" }),\n",
|
|
|
|
|
" ],\n",
|
|
|
|
|
" document,\n",
|
|
|
|
|
"});\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"await display(residPlot);"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "29a0ec4e-084c-4af6-9af6-a14ae5cee4e7",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"source": [
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"From the above plot, we can tell the error variance across the true line is\n",
|
|
|
|
|
"dispersed somewhat not uniformly, but in a funnel like shape. So, the assumption\n",
|
|
|
|
|
"of the _homoscedasticity_ is more likely not met."
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "33d787f3-2a7b-421c-9d9b-a86719a92f9a",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"source": [
|
|
|
|
|
"## Normality\n",
|
|
|
|
|
"\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"The linear regression analysis requires the dependent variable, 'price', to be\n",
|
|
|
|
|
"normally distributed. A histogram, box plot, or a Q-Q-Plot can check if the\n",
|
|
|
|
|
"target variable is normally distributed. The goodness of fit test, e.g., the\n",
|
|
|
|
|
"Kolmogorov-Smirnov test can check for normality in the dependent variable.\n",
|
|
|
|
|
"[This documentation](https://towardsdatascience.com/normality-tests-in-python-31e04aa4f411)\n",
|
|
|
|
|
"contains more information on the normality assumption.\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"\n",
|
|
|
|
|
"Let's display all three charts to show how our target variable, 'price' behaves."
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 19,
|
|
|
|
|
"id": "b94d0d8d-ae95-41f5-b17f-0ab9b3aa27ec",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"\n",
|
2024-09-25 20:34:25 +00:00
|
|
|
|
" <section style=\"display:flex;flex-direction: column; gap: 1em;\">\n",
|
|
|
|
|
" <div style=\"gap:0.5em; display:flex; flex-direction: row; border: 1px solid black;\">\n",
|
|
|
|
|
" <img src=\"
|
|
|
|
|
" <img src=\"
|
|
|
|
|
" </div>\n",
|
|
|
|
|
" <div style=\"padding:1em; border: 1px solid black;\">\n",
|
|
|
|
|
" <img src=\"
|
|
|
|
|
" </div>\n",
|
|
|
|
|
" </section>\n",
|
|
|
|
|
" "
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 19,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"import { threeChart } from \"https://l12.xyz/x/shortcuts/raw/plots.ts\";\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"await display(threeChart(records, \"price\"));"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "81fe890e-56c4-4076-b57c-38fe438c0ead",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"source": [
|
|
|
|
|
"These three charts above can tell us a lot about our target variable:\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"- Our target variable, 'price' is not normally distributed\n",
|
|
|
|
|
"- Our target variable is right-skewed\n",
|
|
|
|
|
"- There are some outliers in the variable\n",
|
|
|
|
|
"\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"The right-skewed plot means that most prices in the dataset are on the lower end\n",
|
|
|
|
|
"(below 15,000). The 'max' value is very far from the '75%' quantile statistic.\n",
|
|
|
|
|
"All these plots show that the assumption for accurate linear regression modeling\n",
|
|
|
|
|
"is not met.\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"Next, we will perform the log transformation to correct our target variable and\n",
|
|
|
|
|
"to make it more normally distributed."
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 20,
|
|
|
|
|
"id": "8f8c6044-d3a1-4f8e-89ba-365fea3fbd8b",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"\n",
|
2024-09-25 20:34:25 +00:00
|
|
|
|
" <section style=\"display:flex;flex-direction: column; gap: 1em;\">\n",
|
|
|
|
|
" <div style=\"gap:0.5em; display:flex; flex-direction: row; border: 1px solid black;\">\n",
|
|
|
|
|
" <img src=\"
|
|
|
|
|
" <img src=\"
|
|
|
|
|
" </div>\n",
|
|
|
|
|
" <div style=\"padding:1em; border: 1px solid black;\">\n",
|
|
|
|
|
" <img src=\"
|
|
|
|
|
" </div>\n",
|
|
|
|
|
" </section>\n",
|
|
|
|
|
" "
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"execution_count": 20,
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"let log2df = df.select(pl.col(\"price\").log());\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"await display(threeChart(log2df.toRecords(), \"price\"));"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "09a9d9c0-7fb6-4353-91f8-4899520b36d7",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"source": [
|
|
|
|
|
"## Data Encoding"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"execution_count": 21,
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"id": "79f05578-b839-4278-afb1-0123c1d32d17",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"application/vnd.dataresource+json": {
|
|
|
|
|
"bytes": null,
|
|
|
|
|
"data": [
|
|
|
|
|
{
|
|
|
|
|
"aspiration": "std",
|
|
|
|
|
"boreratio": 3.47,
|
|
|
|
|
"brand": "alfa-romero",
|
|
|
|
|
"brand_category": "Mid_Range",
|
|
|
|
|
"carbody": "convertible",
|
|
|
|
|
"carheight": 48.8,
|
|
|
|
|
"carlength": 168.8,
|
|
|
|
|
"carwidth": 64.1,
|
|
|
|
|
"citympg": 21,
|
|
|
|
|
"compressionratio": 9,
|
|
|
|
|
"curbweight": 2548,
|
|
|
|
|
"cylindernumber": "four",
|
|
|
|
|
"doornumber": "two",
|
|
|
|
|
"drivewheel": "rwd",
|
|
|
|
|
"enginelocation": "front",
|
|
|
|
|
"enginesize": 130,
|
|
|
|
|
"enginetype": "dohc",
|
|
|
|
|
"fuelsystem": "mpfi",
|
|
|
|
|
"fueltype": "gas",
|
|
|
|
|
"highwaympg": 27,
|
|
|
|
|
"horsepower": 111,
|
|
|
|
|
"peakrpm": 5000,
|
|
|
|
|
"price": 13495,
|
|
|
|
|
"stroke": 2.68,
|
|
|
|
|
"wheelbase": 88.6
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"aspiration": "std",
|
|
|
|
|
"boreratio": 3.47,
|
|
|
|
|
"brand": "alfa-romero",
|
|
|
|
|
"brand_category": "Mid_Range",
|
|
|
|
|
"carbody": "convertible",
|
|
|
|
|
"carheight": 48.8,
|
|
|
|
|
"carlength": 168.8,
|
|
|
|
|
"carwidth": 64.1,
|
|
|
|
|
"citympg": 21,
|
|
|
|
|
"compressionratio": 9,
|
|
|
|
|
"curbweight": 2548,
|
|
|
|
|
"cylindernumber": "four",
|
|
|
|
|
"doornumber": "two",
|
|
|
|
|
"drivewheel": "rwd",
|
|
|
|
|
"enginelocation": "front",
|
|
|
|
|
"enginesize": 130,
|
|
|
|
|
"enginetype": "dohc",
|
|
|
|
|
"fuelsystem": "mpfi",
|
|
|
|
|
"fueltype": "gas",
|
|
|
|
|
"highwaympg": 27,
|
|
|
|
|
"horsepower": 111,
|
|
|
|
|
"peakrpm": 5000,
|
|
|
|
|
"price": 16500,
|
|
|
|
|
"stroke": 2.68,
|
|
|
|
|
"wheelbase": 88.6
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"aspiration": "std",
|
|
|
|
|
"boreratio": 2.68,
|
|
|
|
|
"brand": "alfa-romero",
|
|
|
|
|
"brand_category": "Mid_Range",
|
|
|
|
|
"carbody": "hatchback",
|
|
|
|
|
"carheight": 52.4,
|
|
|
|
|
"carlength": 171.2,
|
|
|
|
|
"carwidth": 65.5,
|
|
|
|
|
"citympg": 19,
|
|
|
|
|
"compressionratio": 9,
|
|
|
|
|
"curbweight": 2823,
|
|
|
|
|
"cylindernumber": "six",
|
|
|
|
|
"doornumber": "two",
|
|
|
|
|
"drivewheel": "rwd",
|
|
|
|
|
"enginelocation": "front",
|
|
|
|
|
"enginesize": 152,
|
|
|
|
|
"enginetype": "ohcv",
|
|
|
|
|
"fuelsystem": "mpfi",
|
|
|
|
|
"fueltype": "gas",
|
|
|
|
|
"highwaympg": 26,
|
|
|
|
|
"horsepower": 154,
|
|
|
|
|
"peakrpm": 5000,
|
|
|
|
|
"price": 16500,
|
|
|
|
|
"stroke": 3.47,
|
|
|
|
|
"wheelbase": 94.5
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"aspiration": "std",
|
|
|
|
|
"boreratio": 3.19,
|
|
|
|
|
"brand": "audi",
|
|
|
|
|
"brand_category": "Mid_Range",
|
|
|
|
|
"carbody": "sedan",
|
|
|
|
|
"carheight": 54.3,
|
|
|
|
|
"carlength": 176.6,
|
|
|
|
|
"carwidth": 66.2,
|
|
|
|
|
"citympg": 24,
|
|
|
|
|
"compressionratio": 10,
|
|
|
|
|
"curbweight": 2337,
|
|
|
|
|
"cylindernumber": "four",
|
|
|
|
|
"doornumber": "four",
|
|
|
|
|
"drivewheel": "fwd",
|
|
|
|
|
"enginelocation": "front",
|
|
|
|
|
"enginesize": 109,
|
|
|
|
|
"enginetype": "ohc",
|
|
|
|
|
"fuelsystem": "mpfi",
|
|
|
|
|
"fueltype": "gas",
|
|
|
|
|
"highwaympg": 30,
|
|
|
|
|
"horsepower": 102,
|
|
|
|
|
"peakrpm": 5500,
|
|
|
|
|
"price": 13950,
|
|
|
|
|
"stroke": 3.4,
|
|
|
|
|
"wheelbase": 99.8
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"aspiration": "std",
|
|
|
|
|
"boreratio": 3.19,
|
|
|
|
|
"brand": "audi",
|
|
|
|
|
"brand_category": "Mid_Range",
|
|
|
|
|
"carbody": "sedan",
|
|
|
|
|
"carheight": 54.3,
|
|
|
|
|
"carlength": 176.6,
|
|
|
|
|
"carwidth": 66.4,
|
|
|
|
|
"citympg": 18,
|
|
|
|
|
"compressionratio": 8,
|
|
|
|
|
"curbweight": 2824,
|
|
|
|
|
"cylindernumber": "five",
|
|
|
|
|
"doornumber": "four",
|
|
|
|
|
"drivewheel": "4wd",
|
|
|
|
|
"enginelocation": "front",
|
|
|
|
|
"enginesize": 136,
|
|
|
|
|
"enginetype": "ohc",
|
|
|
|
|
"fuelsystem": "mpfi",
|
|
|
|
|
"fueltype": "gas",
|
|
|
|
|
"highwaympg": 22,
|
|
|
|
|
"horsepower": 115,
|
|
|
|
|
"peakrpm": 5500,
|
|
|
|
|
"price": 17450,
|
|
|
|
|
"stroke": 3.4,
|
|
|
|
|
"wheelbase": 99.4
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"description": null,
|
|
|
|
|
"dialect": null,
|
|
|
|
|
"encoding": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"hash": null,
|
|
|
|
|
"homepage": null,
|
|
|
|
|
"licenses": null,
|
|
|
|
|
"mediatype": null,
|
|
|
|
|
"path": null,
|
|
|
|
|
"schema": {
|
|
|
|
|
"fields": [
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "brand_category",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "brand",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "fueltype",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "aspiration",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "doornumber",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "carbody",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "drivewheel",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "enginelocation",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "wheelbase",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "carlength",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "carwidth",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "carheight",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "curbweight",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "enginetype",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "cylindernumber",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "enginesize",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "fuelsystem",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "boreratio",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "stroke",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "compressionratio",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "horsepower",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "peakrpm",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "citympg",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "highwaympg",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "price",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"foreignKeys": null,
|
|
|
|
|
"missingValues": null,
|
|
|
|
|
"primaryKey": null
|
|
|
|
|
},
|
|
|
|
|
"sources": null,
|
|
|
|
|
"title": null
|
|
|
|
|
},
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<table><thead><tr><th>brand_category</th><th>brand</th><th>fueltype</th><th>aspiration</th><th>doornumber</th><th>carbody</th><th>drivewheel</th><th>enginelocation</th><th>wheelbase</th><th>carlength</th><th>carwidth</th><th>carheight</th><th>curbweight</th><th>enginetype</th><th>cylindernumber</th><th>enginesize</th><th>fuelsystem</th><th>boreratio</th><th>stroke</th><th>compressionratio</th><th>horsepower</th><th>peakrpm</th><th>citympg</th><th>highwaympg</th><th>price</th></tr></thead><tbody><tr><td>Mid_Range</td><td>alfa-romero</td><td>gas</td><td>std</td><td>two</td><td>convertible</td><td>rwd</td><td>front</td><td>88.6</td><td>168.8</td><td>64.1</td><td>48.8</td><td>2548</td><td>dohc</td><td>four</td><td>130</td><td>mpfi</td><td>3.47</td><td>2.68</td><td>9</td><td>111</td><td>5000</td><td>21</td><td>27</td><td>13495</td></tr><tr><td>Mid_Range</td><td>alfa-romero</td><td>gas</td><td>std</td><td>two</td><td>convertible</td><td>rwd</td><td>front</td><td>88.6</td><td>168.8</td><td>64.1</td><td>48.8</td><td>2548</td><td>dohc</td><td>four</td><td>130</td><td>mpfi</td><td>3.47</td><td>2.68</td><td>9</td><td>111</td><td>5000</td><td>21</td><td>27</td><td>16500</td></tr><tr><td>Mid_Range</td><td>alfa-romero</td><td>gas</td><td>std</td><td>two</td><td>hatchback</td><td>rwd</td><td>front</td><td>94.5</td><td>171.2</td><td>65.5</td><td>52.4</td><td>2823</td><td>ohcv</td><td>six</td><td>152</td><td>mpfi</td><td>2.68</td><td>3.47</td><td>9</td><td>154</td><td>5000</td><td>19</td><td>26</td><td>16500</td></tr><tr><td>Mid_Range</td><td>audi</td><td>gas</td><td>std</td><td>four</td><td>sedan</td><td>fwd</td><td>front</td><td>99.8</td><td>176.6</td><td>66.2</td><td>54.3</td><td>2337</td><td>ohc</td><td>four</td><td>109</td><td>mpfi</td><td>3.19</td><td>3.4</td><td>10</td><td>102</td><td>5500</td><td>24</td><td>30</td><td>13950</td></tr><tr><td>Mid_Range</td><td>audi</td><td>gas</td><td>std</td><td>four</td><td>sedan</td><td>4wd</td><td>front</td><td>99.4</td><td>176.6</td><td>66.4</td><td>54.3</td><td>2824</td><td>ohc</td><td>five</td><td>136</td><td>mpfi</td><td>3.19</td><td>3.4</td><td>8</td><td>115</td><td>5500</td><td>18</td><td>22</td><td>17450</td></tr></tbody></table>"
|
|
|
|
|
]
|
|
|
|
|
},
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"execution_count": 21,
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"let carData = pl.readCSV(\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
" await Deno.readTextFile(\"assets/cleaned_car_prices.csv\"),\n",
|
|
|
|
|
" { sep: \",\" },\n",
|
|
|
|
|
");\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"carData.head(5);"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "af6eb67c-574a-430a-a04c-24d0ef9f35fa",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"We'll drop some unnecessary columns:"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"execution_count": 22,
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"id": "e2df0c3f-dbc0-4820-b807-373ce3787645",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"application/vnd.dataresource+json": {
|
|
|
|
|
"bytes": null,
|
|
|
|
|
"data": [
|
|
|
|
|
{
|
|
|
|
|
"aspiration": "std",
|
|
|
|
|
"boreratio": 3.47,
|
|
|
|
|
"brand_category": "Mid_Range",
|
|
|
|
|
"carheight": 48.8,
|
|
|
|
|
"carlength": 168.8,
|
|
|
|
|
"carwidth": 64.1,
|
|
|
|
|
"citympg": 21,
|
|
|
|
|
"curbweight": 2548,
|
|
|
|
|
"cylindernumber": "four",
|
|
|
|
|
"doornumber": "two",
|
|
|
|
|
"drivewheel": "rwd",
|
|
|
|
|
"enginesize": 130,
|
|
|
|
|
"enginetype": "dohc",
|
|
|
|
|
"fuelsystem": "mpfi",
|
|
|
|
|
"fueltype": "gas",
|
|
|
|
|
"highwaympg": 27,
|
|
|
|
|
"horsepower": 111,
|
|
|
|
|
"price": 13495,
|
|
|
|
|
"wheelbase": 88.6
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"aspiration": "std",
|
|
|
|
|
"boreratio": 3.47,
|
|
|
|
|
"brand_category": "Mid_Range",
|
|
|
|
|
"carheight": 48.8,
|
|
|
|
|
"carlength": 168.8,
|
|
|
|
|
"carwidth": 64.1,
|
|
|
|
|
"citympg": 21,
|
|
|
|
|
"curbweight": 2548,
|
|
|
|
|
"cylindernumber": "four",
|
|
|
|
|
"doornumber": "two",
|
|
|
|
|
"drivewheel": "rwd",
|
|
|
|
|
"enginesize": 130,
|
|
|
|
|
"enginetype": "dohc",
|
|
|
|
|
"fuelsystem": "mpfi",
|
|
|
|
|
"fueltype": "gas",
|
|
|
|
|
"highwaympg": 27,
|
|
|
|
|
"horsepower": 111,
|
|
|
|
|
"price": 16500,
|
|
|
|
|
"wheelbase": 88.6
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"aspiration": "std",
|
|
|
|
|
"boreratio": 2.68,
|
|
|
|
|
"brand_category": "Mid_Range",
|
|
|
|
|
"carheight": 52.4,
|
|
|
|
|
"carlength": 171.2,
|
|
|
|
|
"carwidth": 65.5,
|
|
|
|
|
"citympg": 19,
|
|
|
|
|
"curbweight": 2823,
|
|
|
|
|
"cylindernumber": "six",
|
|
|
|
|
"doornumber": "two",
|
|
|
|
|
"drivewheel": "rwd",
|
|
|
|
|
"enginesize": 152,
|
|
|
|
|
"enginetype": "ohcv",
|
|
|
|
|
"fuelsystem": "mpfi",
|
|
|
|
|
"fueltype": "gas",
|
|
|
|
|
"highwaympg": 26,
|
|
|
|
|
"horsepower": 154,
|
|
|
|
|
"price": 16500,
|
|
|
|
|
"wheelbase": 94.5
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"aspiration": "std",
|
|
|
|
|
"boreratio": 3.19,
|
|
|
|
|
"brand_category": "Mid_Range",
|
|
|
|
|
"carheight": 54.3,
|
|
|
|
|
"carlength": 176.6,
|
|
|
|
|
"carwidth": 66.2,
|
|
|
|
|
"citympg": 24,
|
|
|
|
|
"curbweight": 2337,
|
|
|
|
|
"cylindernumber": "four",
|
|
|
|
|
"doornumber": "four",
|
|
|
|
|
"drivewheel": "fwd",
|
|
|
|
|
"enginesize": 109,
|
|
|
|
|
"enginetype": "ohc",
|
|
|
|
|
"fuelsystem": "mpfi",
|
|
|
|
|
"fueltype": "gas",
|
|
|
|
|
"highwaympg": 30,
|
|
|
|
|
"horsepower": 102,
|
|
|
|
|
"price": 13950,
|
|
|
|
|
"wheelbase": 99.8
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"aspiration": "std",
|
|
|
|
|
"boreratio": 3.19,
|
|
|
|
|
"brand_category": "Mid_Range",
|
|
|
|
|
"carheight": 54.3,
|
|
|
|
|
"carlength": 176.6,
|
|
|
|
|
"carwidth": 66.4,
|
|
|
|
|
"citympg": 18,
|
|
|
|
|
"curbweight": 2824,
|
|
|
|
|
"cylindernumber": "five",
|
|
|
|
|
"doornumber": "four",
|
|
|
|
|
"drivewheel": "4wd",
|
|
|
|
|
"enginesize": 136,
|
|
|
|
|
"enginetype": "ohc",
|
|
|
|
|
"fuelsystem": "mpfi",
|
|
|
|
|
"fueltype": "gas",
|
|
|
|
|
"highwaympg": 22,
|
|
|
|
|
"horsepower": 115,
|
|
|
|
|
"price": 17450,
|
|
|
|
|
"wheelbase": 99.4
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"description": null,
|
|
|
|
|
"dialect": null,
|
|
|
|
|
"encoding": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"hash": null,
|
|
|
|
|
"homepage": null,
|
|
|
|
|
"licenses": null,
|
|
|
|
|
"mediatype": null,
|
|
|
|
|
"path": null,
|
|
|
|
|
"schema": {
|
|
|
|
|
"fields": [
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "brand_category",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "fueltype",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "aspiration",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "doornumber",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "drivewheel",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "wheelbase",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "carlength",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "carwidth",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "carheight",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "curbweight",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "enginetype",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "cylindernumber",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "enginesize",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "fuelsystem",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "string"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "boreratio",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "horsepower",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "citympg",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "highwaympg",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "price",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"foreignKeys": null,
|
|
|
|
|
"missingValues": null,
|
|
|
|
|
"primaryKey": null
|
|
|
|
|
},
|
|
|
|
|
"sources": null,
|
|
|
|
|
"title": null
|
|
|
|
|
},
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<table><thead><tr><th>brand_category</th><th>fueltype</th><th>aspiration</th><th>doornumber</th><th>drivewheel</th><th>wheelbase</th><th>carlength</th><th>carwidth</th><th>carheight</th><th>curbweight</th><th>enginetype</th><th>cylindernumber</th><th>enginesize</th><th>fuelsystem</th><th>boreratio</th><th>horsepower</th><th>citympg</th><th>highwaympg</th><th>price</th></tr></thead><tbody><tr><td>Mid_Range</td><td>gas</td><td>std</td><td>two</td><td>rwd</td><td>88.6</td><td>168.8</td><td>64.1</td><td>48.8</td><td>2548</td><td>dohc</td><td>four</td><td>130</td><td>mpfi</td><td>3.47</td><td>111</td><td>21</td><td>27</td><td>13495</td></tr><tr><td>Mid_Range</td><td>gas</td><td>std</td><td>two</td><td>rwd</td><td>88.6</td><td>168.8</td><td>64.1</td><td>48.8</td><td>2548</td><td>dohc</td><td>four</td><td>130</td><td>mpfi</td><td>3.47</td><td>111</td><td>21</td><td>27</td><td>16500</td></tr><tr><td>Mid_Range</td><td>gas</td><td>std</td><td>two</td><td>rwd</td><td>94.5</td><td>171.2</td><td>65.5</td><td>52.4</td><td>2823</td><td>ohcv</td><td>six</td><td>152</td><td>mpfi</td><td>2.68</td><td>154</td><td>19</td><td>26</td><td>16500</td></tr><tr><td>Mid_Range</td><td>gas</td><td>std</td><td>four</td><td>fwd</td><td>99.8</td><td>176.6</td><td>66.2</td><td>54.3</td><td>2337</td><td>ohc</td><td>four</td><td>109</td><td>mpfi</td><td>3.19</td><td>102</td><td>24</td><td>30</td><td>13950</td></tr><tr><td>Mid_Range</td><td>gas</td><td>std</td><td>four</td><td>4wd</td><td>99.4</td><td>176.6</td><td>66.4</td><td>54.3</td><td>2824</td><td>ohc</td><td>five</td><td>136</td><td>mpfi</td><td>3.19</td><td>115</td><td>18</td><td>22</td><td>17450</td></tr></tbody></table>"
|
|
|
|
|
]
|
|
|
|
|
},
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"execution_count": 22,
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"let carDataGeneralized = carData.drop(\n",
|
|
|
|
|
" \"brand\",\n",
|
|
|
|
|
" \"carbody\",\n",
|
|
|
|
|
" \"enginelocation\",\n",
|
|
|
|
|
" \"stroke\",\n",
|
|
|
|
|
" \"compressionratio\",\n",
|
|
|
|
|
" \"peakrpm\",\n",
|
|
|
|
|
");\n",
|
|
|
|
|
"carDataGeneralized.head(5);"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "826ab213-65a2-43d0-b9ce-fecdebd79eb8",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"source": [
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"Next we use one hot (binary) encoding. We assume that all non-numeric colums are\n",
|
|
|
|
|
"categorical."
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"execution_count": 23,
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"id": "9bc95dd8-0240-45c6-a0f9-21b1873ae04b",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"application/vnd.dataresource+json": {
|
|
|
|
|
"bytes": null,
|
|
|
|
|
"data": [
|
|
|
|
|
{
|
|
|
|
|
"aspiration_std": 1,
|
|
|
|
|
"aspiration_turbo": 0,
|
|
|
|
|
"boreratio": 3.47,
|
|
|
|
|
"brand_category_Budget": 0,
|
|
|
|
|
"brand_category_Luxury": 0,
|
|
|
|
|
"brand_category_Mid_Range": 1,
|
|
|
|
|
"carheight": 48.8,
|
|
|
|
|
"carlength": 168.8,
|
|
|
|
|
"carwidth": 64.1,
|
|
|
|
|
"citympg": 21,
|
|
|
|
|
"curbweight": 2548,
|
|
|
|
|
"cylindernumber_eight": 0,
|
|
|
|
|
"cylindernumber_five": 0,
|
|
|
|
|
"cylindernumber_four": 1,
|
|
|
|
|
"cylindernumber_six": 0,
|
|
|
|
|
"cylindernumber_three": 0,
|
|
|
|
|
"cylindernumber_twelve": 0,
|
|
|
|
|
"cylindernumber_two": 0,
|
|
|
|
|
"doornumber_four": 0,
|
|
|
|
|
"doornumber_two": 1,
|
|
|
|
|
"drivewheel_4wd": 0,
|
|
|
|
|
"drivewheel_fwd": 0,
|
|
|
|
|
"drivewheel_rwd": 1,
|
|
|
|
|
"enginesize": 130,
|
|
|
|
|
"enginetype_dohc": 1,
|
|
|
|
|
"enginetype_dohcv": 0,
|
|
|
|
|
"enginetype_l": 0,
|
|
|
|
|
"enginetype_ohc": 0,
|
|
|
|
|
"enginetype_ohcf": 0,
|
|
|
|
|
"enginetype_ohcv": 0,
|
|
|
|
|
"enginetype_rotor": 0,
|
|
|
|
|
"fuelsystem_1bbl": 0,
|
|
|
|
|
"fuelsystem_2bbl": 0,
|
|
|
|
|
"fuelsystem_4bbl": 0,
|
|
|
|
|
"fuelsystem_idi": 0,
|
|
|
|
|
"fuelsystem_mfi": 0,
|
|
|
|
|
"fuelsystem_mpfi": 1,
|
|
|
|
|
"fuelsystem_spdi": 0,
|
|
|
|
|
"fuelsystem_spfi": 0,
|
|
|
|
|
"fueltype_diesel": 0,
|
|
|
|
|
"fueltype_gas": 1,
|
|
|
|
|
"highwaympg": 27,
|
|
|
|
|
"horsepower": 111,
|
|
|
|
|
"price": 13495,
|
|
|
|
|
"wheelbase": 88.6
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"aspiration_std": 1,
|
|
|
|
|
"aspiration_turbo": 0,
|
|
|
|
|
"boreratio": 3.47,
|
|
|
|
|
"brand_category_Budget": 0,
|
|
|
|
|
"brand_category_Luxury": 0,
|
|
|
|
|
"brand_category_Mid_Range": 1,
|
|
|
|
|
"carheight": 48.8,
|
|
|
|
|
"carlength": 168.8,
|
|
|
|
|
"carwidth": 64.1,
|
|
|
|
|
"citympg": 21,
|
|
|
|
|
"curbweight": 2548,
|
|
|
|
|
"cylindernumber_eight": 0,
|
|
|
|
|
"cylindernumber_five": 0,
|
|
|
|
|
"cylindernumber_four": 1,
|
|
|
|
|
"cylindernumber_six": 0,
|
|
|
|
|
"cylindernumber_three": 0,
|
|
|
|
|
"cylindernumber_twelve": 0,
|
|
|
|
|
"cylindernumber_two": 0,
|
|
|
|
|
"doornumber_four": 0,
|
|
|
|
|
"doornumber_two": 1,
|
|
|
|
|
"drivewheel_4wd": 0,
|
|
|
|
|
"drivewheel_fwd": 0,
|
|
|
|
|
"drivewheel_rwd": 1,
|
|
|
|
|
"enginesize": 130,
|
|
|
|
|
"enginetype_dohc": 1,
|
|
|
|
|
"enginetype_dohcv": 0,
|
|
|
|
|
"enginetype_l": 0,
|
|
|
|
|
"enginetype_ohc": 0,
|
|
|
|
|
"enginetype_ohcf": 0,
|
|
|
|
|
"enginetype_ohcv": 0,
|
|
|
|
|
"enginetype_rotor": 0,
|
|
|
|
|
"fuelsystem_1bbl": 0,
|
|
|
|
|
"fuelsystem_2bbl": 0,
|
|
|
|
|
"fuelsystem_4bbl": 0,
|
|
|
|
|
"fuelsystem_idi": 0,
|
|
|
|
|
"fuelsystem_mfi": 0,
|
|
|
|
|
"fuelsystem_mpfi": 1,
|
|
|
|
|
"fuelsystem_spdi": 0,
|
|
|
|
|
"fuelsystem_spfi": 0,
|
|
|
|
|
"fueltype_diesel": 0,
|
|
|
|
|
"fueltype_gas": 1,
|
|
|
|
|
"highwaympg": 27,
|
|
|
|
|
"horsepower": 111,
|
|
|
|
|
"price": 16500,
|
|
|
|
|
"wheelbase": 88.6
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"aspiration_std": 1,
|
|
|
|
|
"aspiration_turbo": 0,
|
|
|
|
|
"boreratio": 2.68,
|
|
|
|
|
"brand_category_Budget": 0,
|
|
|
|
|
"brand_category_Luxury": 0,
|
|
|
|
|
"brand_category_Mid_Range": 1,
|
|
|
|
|
"carheight": 52.4,
|
|
|
|
|
"carlength": 171.2,
|
|
|
|
|
"carwidth": 65.5,
|
|
|
|
|
"citympg": 19,
|
|
|
|
|
"curbweight": 2823,
|
|
|
|
|
"cylindernumber_eight": 0,
|
|
|
|
|
"cylindernumber_five": 0,
|
|
|
|
|
"cylindernumber_four": 0,
|
|
|
|
|
"cylindernumber_six": 1,
|
|
|
|
|
"cylindernumber_three": 0,
|
|
|
|
|
"cylindernumber_twelve": 0,
|
|
|
|
|
"cylindernumber_two": 0,
|
|
|
|
|
"doornumber_four": 0,
|
|
|
|
|
"doornumber_two": 1,
|
|
|
|
|
"drivewheel_4wd": 0,
|
|
|
|
|
"drivewheel_fwd": 0,
|
|
|
|
|
"drivewheel_rwd": 1,
|
|
|
|
|
"enginesize": 152,
|
|
|
|
|
"enginetype_dohc": 0,
|
|
|
|
|
"enginetype_dohcv": 0,
|
|
|
|
|
"enginetype_l": 0,
|
|
|
|
|
"enginetype_ohc": 0,
|
|
|
|
|
"enginetype_ohcf": 0,
|
|
|
|
|
"enginetype_ohcv": 1,
|
|
|
|
|
"enginetype_rotor": 0,
|
|
|
|
|
"fuelsystem_1bbl": 0,
|
|
|
|
|
"fuelsystem_2bbl": 0,
|
|
|
|
|
"fuelsystem_4bbl": 0,
|
|
|
|
|
"fuelsystem_idi": 0,
|
|
|
|
|
"fuelsystem_mfi": 0,
|
|
|
|
|
"fuelsystem_mpfi": 1,
|
|
|
|
|
"fuelsystem_spdi": 0,
|
|
|
|
|
"fuelsystem_spfi": 0,
|
|
|
|
|
"fueltype_diesel": 0,
|
|
|
|
|
"fueltype_gas": 1,
|
|
|
|
|
"highwaympg": 26,
|
|
|
|
|
"horsepower": 154,
|
|
|
|
|
"price": 16500,
|
|
|
|
|
"wheelbase": 94.5
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"aspiration_std": 1,
|
|
|
|
|
"aspiration_turbo": 0,
|
|
|
|
|
"boreratio": 3.19,
|
|
|
|
|
"brand_category_Budget": 0,
|
|
|
|
|
"brand_category_Luxury": 0,
|
|
|
|
|
"brand_category_Mid_Range": 1,
|
|
|
|
|
"carheight": 54.3,
|
|
|
|
|
"carlength": 176.6,
|
|
|
|
|
"carwidth": 66.2,
|
|
|
|
|
"citympg": 24,
|
|
|
|
|
"curbweight": 2337,
|
|
|
|
|
"cylindernumber_eight": 0,
|
|
|
|
|
"cylindernumber_five": 0,
|
|
|
|
|
"cylindernumber_four": 1,
|
|
|
|
|
"cylindernumber_six": 0,
|
|
|
|
|
"cylindernumber_three": 0,
|
|
|
|
|
"cylindernumber_twelve": 0,
|
|
|
|
|
"cylindernumber_two": 0,
|
|
|
|
|
"doornumber_four": 1,
|
|
|
|
|
"doornumber_two": 0,
|
|
|
|
|
"drivewheel_4wd": 0,
|
|
|
|
|
"drivewheel_fwd": 1,
|
|
|
|
|
"drivewheel_rwd": 0,
|
|
|
|
|
"enginesize": 109,
|
|
|
|
|
"enginetype_dohc": 0,
|
|
|
|
|
"enginetype_dohcv": 0,
|
|
|
|
|
"enginetype_l": 0,
|
|
|
|
|
"enginetype_ohc": 1,
|
|
|
|
|
"enginetype_ohcf": 0,
|
|
|
|
|
"enginetype_ohcv": 0,
|
|
|
|
|
"enginetype_rotor": 0,
|
|
|
|
|
"fuelsystem_1bbl": 0,
|
|
|
|
|
"fuelsystem_2bbl": 0,
|
|
|
|
|
"fuelsystem_4bbl": 0,
|
|
|
|
|
"fuelsystem_idi": 0,
|
|
|
|
|
"fuelsystem_mfi": 0,
|
|
|
|
|
"fuelsystem_mpfi": 1,
|
|
|
|
|
"fuelsystem_spdi": 0,
|
|
|
|
|
"fuelsystem_spfi": 0,
|
|
|
|
|
"fueltype_diesel": 0,
|
|
|
|
|
"fueltype_gas": 1,
|
|
|
|
|
"highwaympg": 30,
|
|
|
|
|
"horsepower": 102,
|
|
|
|
|
"price": 13950,
|
|
|
|
|
"wheelbase": 99.8
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"aspiration_std": 1,
|
|
|
|
|
"aspiration_turbo": 0,
|
|
|
|
|
"boreratio": 3.19,
|
|
|
|
|
"brand_category_Budget": 0,
|
|
|
|
|
"brand_category_Luxury": 0,
|
|
|
|
|
"brand_category_Mid_Range": 1,
|
|
|
|
|
"carheight": 54.3,
|
|
|
|
|
"carlength": 176.6,
|
|
|
|
|
"carwidth": 66.4,
|
|
|
|
|
"citympg": 18,
|
|
|
|
|
"curbweight": 2824,
|
|
|
|
|
"cylindernumber_eight": 0,
|
|
|
|
|
"cylindernumber_five": 1,
|
|
|
|
|
"cylindernumber_four": 0,
|
|
|
|
|
"cylindernumber_six": 0,
|
|
|
|
|
"cylindernumber_three": 0,
|
|
|
|
|
"cylindernumber_twelve": 0,
|
|
|
|
|
"cylindernumber_two": 0,
|
|
|
|
|
"doornumber_four": 1,
|
|
|
|
|
"doornumber_two": 0,
|
|
|
|
|
"drivewheel_4wd": 1,
|
|
|
|
|
"drivewheel_fwd": 0,
|
|
|
|
|
"drivewheel_rwd": 0,
|
|
|
|
|
"enginesize": 136,
|
|
|
|
|
"enginetype_dohc": 0,
|
|
|
|
|
"enginetype_dohcv": 0,
|
|
|
|
|
"enginetype_l": 0,
|
|
|
|
|
"enginetype_ohc": 1,
|
|
|
|
|
"enginetype_ohcf": 0,
|
|
|
|
|
"enginetype_ohcv": 0,
|
|
|
|
|
"enginetype_rotor": 0,
|
|
|
|
|
"fuelsystem_1bbl": 0,
|
|
|
|
|
"fuelsystem_2bbl": 0,
|
|
|
|
|
"fuelsystem_4bbl": 0,
|
|
|
|
|
"fuelsystem_idi": 0,
|
|
|
|
|
"fuelsystem_mfi": 0,
|
|
|
|
|
"fuelsystem_mpfi": 1,
|
|
|
|
|
"fuelsystem_spdi": 0,
|
|
|
|
|
"fuelsystem_spfi": 0,
|
|
|
|
|
"fueltype_diesel": 0,
|
|
|
|
|
"fueltype_gas": 1,
|
|
|
|
|
"highwaympg": 22,
|
|
|
|
|
"horsepower": 115,
|
|
|
|
|
"price": 17450,
|
|
|
|
|
"wheelbase": 99.4
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"description": null,
|
|
|
|
|
"dialect": null,
|
|
|
|
|
"encoding": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"hash": null,
|
|
|
|
|
"homepage": null,
|
|
|
|
|
"licenses": null,
|
|
|
|
|
"mediatype": null,
|
|
|
|
|
"path": null,
|
|
|
|
|
"schema": {
|
|
|
|
|
"fields": [
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "brand_category_Budget",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "brand_category_Luxury",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "brand_category_Mid_Range",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "fueltype_diesel",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "fueltype_gas",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "aspiration_std",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "aspiration_turbo",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "doornumber_four",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "doornumber_two",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "drivewheel_4wd",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "drivewheel_fwd",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "drivewheel_rwd",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "wheelbase",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "carlength",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "carwidth",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "carheight",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "curbweight",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "enginetype_dohc",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "enginetype_dohcv",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "enginetype_l",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "enginetype_ohc",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "enginetype_ohcf",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "enginetype_ohcv",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "enginetype_rotor",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "cylindernumber_eight",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "cylindernumber_five",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "cylindernumber_four",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "cylindernumber_six",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "cylindernumber_three",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "cylindernumber_twelve",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "cylindernumber_two",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "enginesize",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "fuelsystem_1bbl",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "fuelsystem_2bbl",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "fuelsystem_4bbl",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "fuelsystem_idi",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "fuelsystem_mfi",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "fuelsystem_mpfi",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "fuelsystem_spdi",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "fuelsystem_spfi",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "boreratio",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "horsepower",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "citympg",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "highwaympg",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "integer"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"constraints": null,
|
|
|
|
|
"description": null,
|
|
|
|
|
"example": null,
|
|
|
|
|
"format": null,
|
|
|
|
|
"name": "price",
|
|
|
|
|
"rdfType": null,
|
|
|
|
|
"title": null,
|
|
|
|
|
"type": "number"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"foreignKeys": null,
|
|
|
|
|
"missingValues": null,
|
|
|
|
|
"primaryKey": null
|
|
|
|
|
},
|
|
|
|
|
"sources": null,
|
|
|
|
|
"title": null
|
|
|
|
|
},
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<table><thead><tr><th>brand_category_Budget</th><th>brand_category_Luxury</th><th>brand_category_Mid_Range</th><th>fueltype_diesel</th><th>fueltype_gas</th><th>aspiration_std</th><th>aspiration_turbo</th><th>doornumber_four</th><th>doornumber_two</th><th>drivewheel_4wd</th><th>drivewheel_fwd</th><th>drivewheel_rwd</th><th>wheelbase</th><th>carlength</th><th>carwidth</th><th>carheight</th><th>curbweight</th><th>enginetype_dohc</th><th>enginetype_dohcv</th><th>enginetype_l</th><th>enginetype_ohc</th><th>enginetype_ohcf</th><th>enginetype_ohcv</th><th>enginetype_rotor</th><th>cylindernumber_eight</th><th>cylindernumber_five</th><th>cylindernumber_four</th><th>cylindernumber_six</th><th>cylindernumber_three</th><th>cylindernumber_twelve</th><th>cylindernumber_two</th><th>enginesize</th><th>fuelsystem_1bbl</th><th>fuelsystem_2bbl</th><th>fuelsystem_4bbl</th><th>fuelsystem_idi</th><th>fuelsystem_mfi</th><th>fuelsystem_mpfi</th><th>fuelsystem_spdi</th><th>fuelsystem_spfi</th><th>boreratio</th><th>horsepower</th><th>citympg</th><th>highwaympg</th><th>price</th></tr></thead><tbody><tr><td>0</td><td>0</td><td>1</td><td>0</td><td>1</td><td>1</td><td>0</td><td>0</td><td>1</td><td>0</td><td>0</td><td>1</td><td>88.6</td><td>168.8</td><td>64.1</td><td>48.8</td><td>2548</td><td>1</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>1</td><td>0</td><td>0</td><td>0</td><td>0</td><td>130</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>1</td><td>0</td><td>0</td><td>3.47</td><td>111</td><td>21</td><td>27</td><td>13495</td></tr><tr><td>0</td><td>0</td><td>1</td><td>0</td><td>1</td><td>1</td><td>0</td><td>0</td><td>1</td><td>0</td><td>0</td><td>1</td><td>88.6</td><td>168.8</td><td>64.1</td><td>48.8</td><td>2548</td><td>1</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>1</td><td>0</td><td>0</td><td>0</td><td>0</td><td>130</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>1</td><td>0</td><td>0</td><td>3.47</td><td>111</td><td>21</td><td>27</td><td>16500</td></tr><tr><td>0</td><td>0</td><td>1</td><td>0</td><td>1</td><td>1</td><td>0</td><td>0</td><td>1</td><td>0</td><td>0</td><td>1</td><td>94.5</td><td>171.2</td><td>65.5</td><td>52.4</td><td>2823</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>1</td><td>0</td><td>0</td><td>0</td><td>0</td><td>1</td><td>0</td><td>0</td><td>0</td><td>152</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>1</td><td>0</td><td>0</td><td>2.68</td><td>154</td><td>19</td><td>26</td><td>16500</td></tr><tr><td>0</td><td>0</td><td>1</td><td>0</td><td>1</td><td>1</td><td>0</td><td>1</td><td>0</td><td>0</td><td>1</td><td>0</td><td>99.8</td><td>176.6</td><td>66.2</td><td>54.3</td><td>2337</td><td>0</td><td>0</td><td>0</td><td>1</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>1</td><td>0</td><td>0</td><td>0</td><td>0</td><td>109</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>1</td><td>0</td><td>0</td><td>3.19</td><td>102</td><td>24</td><td>30</td><td>13950</td></tr><tr><td>0</td><td>0</td><td>1</td><td>0</td><td>1</td><td>1</td><td>0</td><td>1</td><td>0</td><td>1</td><td>0</td><td>0</td><td>99.4</td><td>176.6</td><td>66.4</td><td>54.3</td><td>2824</td><td>0</td><td>0</td><td>0</td><td>1</td><td>0</td><td>0</td><td>0</td><td>0</td><td>1</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>136</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>1</td><td>0</td><td>0</td><td>3.19</td><td>115</td><td>18</td><td>22</td><td>17450</td></tr></tbody></table>"
|
|
|
|
|
]
|
|
|
|
|
},
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"execution_count": 23,
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"import { oneHotEncoding } from \"https://l12.xyz/x/shortcuts/raw/encoding.ts\";\n",
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"\n",
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"let encodedCarData = oneHotEncoding(carDataGeneralized);\n",
|
|
|
|
|
"encodedCarData.head(5);"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"execution_count": 24,
|
2024-09-25 19:52:53 +00:00
|
|
|
|
"id": "aff549ce-6736-43c7-8d40-b09b9ca7fa59",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
2024-09-26 01:41:33 +00:00
|
|
|
|
"encodedCarData.writeCSV(\"assets/encoded_car_data.csv\");"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"id": "ec799133-584f-450d-bd8b-6b443fbf5fb5",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"editable": true,
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": ""
|
|
|
|
|
},
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": []
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
"display_name": "Deno",
|
|
|
|
|
"language": "typescript",
|
|
|
|
|
"name": "deno"
|
|
|
|
|
},
|
|
|
|
|
"language_info": {
|
|
|
|
|
"codemirror_mode": "typescript",
|
|
|
|
|
"file_extension": ".ts",
|
|
|
|
|
"mimetype": "text/x.typescript",
|
|
|
|
|
"name": "typescript",
|
|
|
|
|
"nbconvert_exporter": "script",
|
|
|
|
|
"pygments_lexer": "typescript",
|
2024-10-04 05:24:07 +00:00
|
|
|
|
"version": "5.6.2"
|
2024-09-25 19:52:53 +00:00
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
"nbformat_minor": 5
|
|
|
|
|
}
|