+ plots, + enc
This commit is contained in:
parent
43e21e87a7
commit
5d38fb02f9
|
@ -27,7 +27,7 @@
|
|||
],
|
||||
"source": [
|
||||
"const document = null;\n",
|
||||
"import * as p from \"https://l12.xyz/x/shortcuts/raw/plots.ts\"\n"
|
||||
"import * as p from \"https://l12.xyz/x/shortcuts/raw/plots.ts\";"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
84
encoding.ts
84
encoding.ts
|
@ -1,6 +1,6 @@
|
|||
import pl from "npm:nodejs-polars";
|
||||
|
||||
export function oneHotEncoding(dataframe) {
|
||||
export function oneHotEncoding(dataframe: pl.DataFrame): pl.DataFrame {
|
||||
let df = pl.DataFrame();
|
||||
for (const columnName of dataframe.columns) {
|
||||
const column = dataframe[columnName];
|
||||
|
@ -12,3 +12,85 @@ export function oneHotEncoding(dataframe) {
|
|||
}
|
||||
return df;
|
||||
}
|
||||
|
||||
export function polynomialTransform(
|
||||
dataframe: pl.DataFrame,
|
||||
degree = 2,
|
||||
interaction_only = false,
|
||||
include_bias = true,
|
||||
): pl.DataFrame {
|
||||
let polyRecords: number[][] = [];
|
||||
dataframe.map((X: number[]) => {
|
||||
polyRecords.push(
|
||||
polynomialFeatures(X, degree, interaction_only, include_bias),
|
||||
);
|
||||
});
|
||||
return pl.readRecords(polyRecords);
|
||||
}
|
||||
|
||||
export function polynomialFeatures(
|
||||
X: number[],
|
||||
degree = 2,
|
||||
interaction_only = false,
|
||||
include_bias = true,
|
||||
): number[] {
|
||||
let features = [...X];
|
||||
let prev_chunk = [...X];
|
||||
const indices = Array.from({ length: X.length }, (_, i) => i);
|
||||
for (let d = 1; d < degree; d++) {
|
||||
const new_chunk: any[] = [];
|
||||
for (let i = 0; i < (interaction_only ? X.length - d : X.length); i++) {
|
||||
const v = X[i];
|
||||
const next_index = new_chunk.length;
|
||||
for (let j = i + (interaction_only ? 1 : 0); j < prev_chunk.length; j++) {
|
||||
new_chunk.push(v * prev_chunk[j]);
|
||||
}
|
||||
indices[i] = next_index;
|
||||
}
|
||||
features = features.concat(new_chunk);
|
||||
prev_chunk = new_chunk;
|
||||
}
|
||||
if (include_bias) {
|
||||
features.unshift(1);
|
||||
}
|
||||
return features;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds missing rows at given interval, uses mean of previous and next value.
|
||||
* Example for one feature: [1, 2, 4, 5] -> [1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5]
|
||||
* @param feature
|
||||
* @param df
|
||||
* @param bin
|
||||
*/
|
||||
export function augmentMeanForward(
|
||||
feature: string,
|
||||
df: pl.DataFrame,
|
||||
interval = 100,
|
||||
) {
|
||||
let sorted = df.sort(feature);
|
||||
let result = sorted.head(0);
|
||||
let n: null | number = null;
|
||||
for (let i = 0; i < sorted.height - 1; i++) {
|
||||
let p1 = n ?? sorted.row(i).at(-1);
|
||||
let p2 = sorted.row(i + 1).at(-1);
|
||||
if (p2 - p1 > interval) {
|
||||
let avg = (p1 + p2) / 2;
|
||||
result = pl.concat([
|
||||
result,
|
||||
pl.concat([result.tail(2), sorted.slice({ offset: i + 1, length: 2 })])
|
||||
.shift(-1)
|
||||
.fillNull("mean")
|
||||
.tail(1),
|
||||
]);
|
||||
if (p2 - avg > interval) {
|
||||
i--;
|
||||
n = avg;
|
||||
continue;
|
||||
}
|
||||
result = pl.concat([result, sorted.slice(1, i)]);
|
||||
n = null;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
11
expr.ts
11
expr.ts
|
@ -12,9 +12,8 @@ export const fillzero = (
|
|||
value = 0.0001,
|
||||
) => (pl.all().replaceStrict(0, value, pl.all()));
|
||||
|
||||
export const ScaleExpr: pl.Expr = (pl.all().minus(pl.all().min())).div(
|
||||
pl.all().max().minus(pl.all().min()),
|
||||
);
|
||||
export const StdNormExpr: pl.Expr = pl.all().minus(pl.all().mean()).div(
|
||||
pl.all().std(),
|
||||
);
|
||||
export const minmaxScale = (col: pl.Expr) =>
|
||||
(col.minus(col.min())).div(col.max().minus(col.min()));
|
||||
|
||||
export const standardScale = (col: pl.Expr) =>
|
||||
(col.minus(col.mean())).div(col.std());
|
||||
|
|
47
plots.ts
47
plots.ts
|
@ -208,3 +208,50 @@ export function threeChart(data: any[], x = "column", opts = { width: 800 }) {
|
|||
}),
|
||||
};
|
||||
}
|
||||
|
||||
export function distPlot(...data: number[][]) {
|
||||
const colors = [
|
||||
"red",
|
||||
"blue",
|
||||
"green",
|
||||
"orange",
|
||||
"purple",
|
||||
"brown",
|
||||
"pink",
|
||||
"gray",
|
||||
"black",
|
||||
"cyan",
|
||||
"magenta",
|
||||
"yellow",
|
||||
"lightblue",
|
||||
"lightgreen",
|
||||
"lightgray",
|
||||
];
|
||||
const plt = Plot.plot({
|
||||
margin: 50,
|
||||
width: 1200,
|
||||
marks: [
|
||||
Plot.hexgrid(),
|
||||
data.map((a, i) =>
|
||||
Plot.areaY(
|
||||
a,
|
||||
Plot.binX({ y: "count" }, {
|
||||
x: (x: number) => x,
|
||||
fill: colors[i % colors.length],
|
||||
curve: "catmull-rom",
|
||||
fillOpacity: .5,
|
||||
}),
|
||||
)
|
||||
),
|
||||
],
|
||||
document,
|
||||
});
|
||||
plt.setAttribute("xmlns", "http://www.w3.org/2000/svg");
|
||||
return {
|
||||
[Symbol.for("Jupyter.display")]: () => ({
|
||||
"text/html": `<img src='data:image/svg+xml;base64,${
|
||||
btoa(unescape(encodeURIComponent(plt)))
|
||||
}'>`,
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue