From a4946c2c1e1ab9d14f84aabc1b712818d2144eb3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 12 May 2026 14:41:14 +0000 Subject: [PATCH 1/4] Initial plan From 06702b7f63453de0f7611c9157d150e0174a22a0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 12 May 2026 14:46:09 +0000 Subject: [PATCH 2/4] Recover autoloop tsikit-learn implementation Co-authored-by: mrjf <180956+mrjf@users.noreply.github.com> --- .github/workflows/ci.yml | 105 ++++++++++++ .gitignore | 3 + AGENTS.md | 79 +++++++++ biome.json | 15 ++ bunfig.toml | 2 + package.json | 34 ++++ playground/index.html | 225 ++++++++++++++++++++++++++ src/base.ts | 136 ++++++++++++++++ src/exceptions.ts | 37 +++++ src/index.ts | 31 ++++ src/linear_model/index.ts | 2 + src/linear_model/linear_regression.ts | 152 +++++++++++++++++ src/linear_model/ridge.ts | 156 ++++++++++++++++++ src/metrics/classification.ts | 173 ++++++++++++++++++++ src/metrics/index.ts | 2 + src/metrics/regression.ts | 120 ++++++++++++++ src/model_selection/index.ts | 1 + src/model_selection/split.ts | 204 +++++++++++++++++++++++ src/preprocessing/index.ts | 4 + src/preprocessing/label_encoder.ts | 54 +++++++ src/preprocessing/minmax_scaler.ts | 105 ++++++++++++ src/preprocessing/normalizer.ts | 68 ++++++++ src/preprocessing/standard_scaler.ts | 96 +++++++++++ src/utils/class_weight.ts | 69 ++++++++ src/utils/extmath.ts | 196 ++++++++++++++++++++++ src/utils/index.ts | 4 + src/utils/multiclass.ts | 68 ++++++++ src/utils/validation.ts | 104 ++++++++++++ tests/base.test.ts | 63 ++++++++ tests/linear_model.test.ts | 174 ++++++++++++++++++++ tests/metrics_model_selection.test.ts | 109 +++++++++++++ tests/preprocessing.test.ts | 119 ++++++++++++++ tsconfig.json | 24 +++ 33 files changed, 2734 insertions(+) create mode 100644 .github/workflows/ci.yml create mode 100644 .gitignore create mode 100644 AGENTS.md create mode 100644 biome.json create mode 100644 bunfig.toml create mode 100644 package.json create mode 100644 playground/index.html create mode 100644 src/base.ts create mode 100644 src/exceptions.ts create mode 100644 src/index.ts create mode 100644 src/linear_model/index.ts create mode 100644 src/linear_model/linear_regression.ts create mode 100644 src/linear_model/ridge.ts create mode 100644 src/metrics/classification.ts create mode 100644 src/metrics/index.ts create mode 100644 src/metrics/regression.ts create mode 100644 src/model_selection/index.ts create mode 100644 src/model_selection/split.ts create mode 100644 src/preprocessing/index.ts create mode 100644 src/preprocessing/label_encoder.ts create mode 100644 src/preprocessing/minmax_scaler.ts create mode 100644 src/preprocessing/normalizer.ts create mode 100644 src/preprocessing/standard_scaler.ts create mode 100644 src/utils/class_weight.ts create mode 100644 src/utils/extmath.ts create mode 100644 src/utils/index.ts create mode 100644 src/utils/multiclass.ts create mode 100644 src/utils/validation.ts create mode 100644 tests/base.test.ts create mode 100644 tests/linear_model.test.ts create mode 100644 tests/metrics_model_selection.test.ts create mode 100644 tests/preprocessing.test.ts create mode 100644 tsconfig.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..2bd1ead --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,105 @@ +name: CI + +on: + push: + branches: [main, "autoloop/**"] + pull_request: + branches: [main] + +jobs: + test: + name: Test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Bun + uses: oven-sh/setup-bun@v2 + with: + bun-version: latest + + - name: Install dependencies + run: bun install + + - name: Type check + run: bunx tsc --noEmit + + - name: Run tests + run: bun test + + lint: + name: Lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Bun + uses: oven-sh/setup-bun@v2 + with: + bun-version: latest + + - name: Install dependencies + run: bun install + + - name: Lint + run: bunx biome check src tests + + playground: + name: Build Playground + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Bun + uses: oven-sh/setup-bun@v2 + with: + bun-version: latest + + - name: Install dependencies + run: bun install + + - name: Build library bundle + run: bun build src/index.ts --outfile playground/tsikit-learn.js --target browser --minify + + - name: Upload playground artifact + uses: actions/upload-artifact@v4 + with: + name: playground + path: playground/ + + pages: + name: Deploy to GitHub Pages + runs-on: ubuntu-latest + needs: [test, playground] + if: github.ref == 'refs/heads/main' + permissions: + pages: write + id-token: write + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + steps: + - uses: actions/checkout@v4 + + - name: Setup Bun + uses: oven-sh/setup-bun@v2 + with: + bun-version: latest + + - name: Install dependencies + run: bun install + + - name: Build library bundle + run: bun build src/index.ts --outfile playground/tsikit-learn.js --target browser --minify + + - name: Setup Pages + uses: actions/configure-pages@v5 + + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + path: playground/ + + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9ebfc2d --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +node_modules/ +dist/ +coverage/ diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..adbb06d --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,79 @@ +# Agent Instructions for tsikit-learn + +## Overview + +`tsikit-learn` is a TypeScript port of [scikit-learn](https://scikit-learn.org/). The project is being built one feature at a time by the Autoloop agent. + +## Stack + +- **Runtime & bundler**: Bun +- **Language**: TypeScript (strictest settings — `strict: true`, `noUncheckedIndexedAccess: true`, `exactOptionalPropertyTypes: true`) +- **Linting**: Biome +- **Testing**: Bun test runner with fast-check for property-based tests +- **Data layer**: `tsb` (TypeScript pandas port) as a peer dependency; typed arrays for numeric computation + +## Directory Structure + +``` +src/ + index.ts — public entry point, re-exports everything + exceptions.ts — NotFittedError, ConvergenceWarning, etc. + base.ts — BaseEstimator, mixins, clone, check_is_fitted + utils/ + extmath.ts — math utilities (safeDot, gramMatrix, cholesky, etc.) + validation.ts — input validation + multiclass.ts — multiclass helpers + class_weight.ts — class weight utilities + index.ts — re-exports all utils + preprocessing/ + standard_scaler.ts — StandardScaler + minmax_scaler.ts — MinMaxScaler + label_encoder.ts — LabelEncoder + normalizer.ts — Normalizer + index.ts + metrics/ + regression.ts — MSE, MAE, R², MAPE, explained_variance + classification.ts — accuracy, confusion_matrix, precision, recall, F1, log_loss + index.ts + model_selection/ + split.ts — train_test_split, KFold, StratifiedKFold + index.ts + linear_model/ + linear_regression.ts — LinearRegression (OLS via Cholesky) + ridge.ts — Ridge (L2 regularization) + index.ts +tests/ + base.test.ts + preprocessing.test.ts + metrics_model_selection.test.ts + linear_model.test.ts +playground/ + index.html — interactive demos, deployed to GitHub Pages +``` + +## TypeScript Conventions + +- No `any`, no `@ts-ignore`, no `as` casts (unless provably safe) +- Use `Float64Array` for continuous numeric data, `Int32Array` for integer labels +- Use `?? 0` or null checks for `noUncheckedIndexedAccess` compliance +- Export everything from module `index.ts` files + +## Evaluation Metric + +The CI evaluation script counts TypeScript source files in `src/` (excluding `index.ts`) that contain `export`. Currently: **15 files**. + +## Adding a New Module + +1. Create `src/{module}/{feature}.ts` — implement the class with `fit`, `predict`/`transform`, `score` +2. Create or update `src/{module}/index.ts` — re-export from the new file +3. Update `src/index.ts` — add `export * from "./{module}/index.js"` +4. Add tests in `tests/{module}.test.ts` +5. Add a card to `playground/index.html` + +## Running Locally + +```bash +bun install +bun test +bunx tsc --noEmit +``` diff --git a/biome.json b/biome.json new file mode 100644 index 0000000..600b130 --- /dev/null +++ b/biome.json @@ -0,0 +1,15 @@ +{ + "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json", + "organizeImports": { "enabled": true }, + "linter": { + "enabled": true, + "rules": { + "recommended": true + } + }, + "formatter": { + "enabled": true, + "indentStyle": "space", + "indentWidth": 2 + } +} diff --git a/bunfig.toml b/bunfig.toml new file mode 100644 index 0000000..0c9079a --- /dev/null +++ b/bunfig.toml @@ -0,0 +1,2 @@ +[test] +coverage = true diff --git a/package.json b/package.json new file mode 100644 index 0000000..e6bd00a --- /dev/null +++ b/package.json @@ -0,0 +1,34 @@ +{ + "name": "tsikit-learn", + "version": "0.1.0", + "description": "A complete TypeScript port of scikit-learn", + "type": "module", + "main": "./dist/index.js", + "module": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": { + ".": { + "import": "./dist/index.js", + "types": "./dist/index.d.ts" + } + }, + "scripts": { + "build": "bun build src/index.ts --outdir dist --target browser", + "test": "bun test", + "typecheck": "bunx tsc --noEmit", + "lint": "bunx biome check src tests" + }, + "devDependencies": { + "@biomejs/biome": "^1.9.4", + "fast-check": "^3.22.0", + "typescript": "^5.7.2" + }, + "peerDependencies": { + "tsb": "^0.1.0" + }, + "peerDependenciesMeta": { + "tsb": { + "optional": true + } + } +} diff --git a/playground/index.html b/playground/index.html new file mode 100644 index 0000000..2004305 --- /dev/null +++ b/playground/index.html @@ -0,0 +1,225 @@ + + + + + + tsikit-learn — TypeScript scikit-learn + + + +
+

tsikit-learn 🤖

+

A complete TypeScript port of scikit-learn — one feature at a time.

+
+ +
+
+

exceptions

+

NotFittedError, ConvergenceWarning, ValueError

+ ✅ Ported +
+
+

base

+

BaseEstimator, ClassifierMixin, RegressorMixin, TransformerMixin

+ ✅ Ported +
+
+

utils

+

extmath, validation, multiclass, class_weight

+ ✅ Ported +
+
+

preprocessing

+

StandardScaler, MinMaxScaler, LabelEncoder, Normalizer

+ ✅ Ported +
+
+

metrics

+

MSE, MAE, R², accuracy, precision, recall, F1, log_loss

+ ✅ Ported +
+
+

model_selection

+

train_test_split, KFold, StratifiedKFold

+ ✅ Ported +
+
+

linear_model.LinearRegression

+

OLS via Cholesky decomposition (normal equations)

+ ✅ Ported   + ▶ Demo +
+
+

linear_model.Ridge

+

L2-regularized least squares

+ ✅ Ported +
+
+

linear_model.Lasso

+

L1-regularized least squares

+ 🕐 Pending +
+
+

linear_model.LogisticRegression

+

Logistic regression with SGD/L-BFGS solver

+ 🕐 Pending +
+
+

tree

+

DecisionTreeClassifier, DecisionTreeRegressor

+ 🕐 Pending +
+
+

neighbors

+

KNeighborsClassifier, KNeighborsRegressor, NearestNeighbors

+ 🕐 Pending +
+
+

naive_bayes

+

GaussianNB, MultinomialNB, BernoulliNB

+ 🕐 Pending +
+
+

svm

+

SVC, SVR, LinearSVC, LinearSVR

+ 🕐 Pending +
+
+

cluster

+

KMeans, DBSCAN, AgglomerativeClustering

+ 🕐 Pending +
+
+

ensemble

+

RandomForest, GradientBoosting, AdaBoost

+ 🕐 Pending +
+
+ +
+
+

LinearRegression Demo

+

Click "Generate" to create a noisy linear dataset, then "Fit" to train a LinearRegression model.

+ + + +
// Click "Generate Data" to start
+
+
+ + + + diff --git a/src/base.ts b/src/base.ts new file mode 100644 index 0000000..d7af2e1 --- /dev/null +++ b/src/base.ts @@ -0,0 +1,136 @@ +/** + * Base classes for all estimators. + * Mirrors sklearn.base. + */ + +import { NotFittedError } from "./exceptions.js"; + +export type Params = Record; + +/** + * Base class for all scikit-learn estimators. + * Provides get_params / set_params following sklearn conventions. + */ +export abstract class BaseEstimator { + /** + * Get parameters for this estimator. + * Returns own enumerable string-keyed properties that are not functions. + */ + get_params(deep = true): Params { + const out: Params = {}; + for (const key of Object.keys(this)) { + const val = (this as Record)[key]; + if (typeof val !== "function") { + out[key] = deep && val instanceof BaseEstimator ? val.get_params(deep) : val; + } + } + return out; + } + + /** Set the parameters of this estimator. */ + set_params(params: Params): this { + for (const [key, val] of Object.entries(params)) { + (this as Record)[key] = val; + } + return this; + } + + /** Assert the estimator is fitted. */ + protected _check_is_fitted(attributes: string[]): void { + const missing = attributes.filter((a) => (this as Record)[a] === undefined); + if (missing.length > 0) { + throw new NotFittedError( + `This ${this.constructor.name} instance is not fitted yet. Call 'fit' first.`, + ); + } + } +} + +/** Mixin class for all classifiers. */ +export abstract class ClassifierMixin { + readonly _estimator_type = "classifier" as const; + + /** Return the mean accuracy on the given test data and labels. */ + score(X: Float64Array[], y: Float64Array | Int32Array): number { + const yPred = this.predict(X); + let correct = 0; + for (let i = 0; i < y.length; i++) { + if ((yPred[i] ?? 0) === (y[i] ?? 0)) correct++; + } + return y.length > 0 ? correct / y.length : 0; + } + + abstract predict(X: Float64Array[]): Int32Array | Float64Array; +} + +/** Mixin class for all regressors. */ +export abstract class RegressorMixin { + readonly _estimator_type = "regressor" as const; + + /** Return the coefficient of determination R² of the prediction. */ + score(X: Float64Array[], y: Float64Array): number { + const yPred = this.predict(X); + const yMean = Array.from(y).reduce((a, b) => a + b, 0) / y.length; + let ssTot = 0; + let ssRes = 0; + for (let i = 0; i < y.length; i++) { + const yi = y[i] ?? 0; + const pi = yPred[i] ?? 0; + ssTot += (yi - yMean) ** 2; + ssRes += (yi - pi) ** 2; + } + return ssTot === 0 ? 1 : 1 - ssRes / ssTot; + } + + abstract predict(X: Float64Array[]): Float64Array; +} + +/** Mixin class for all transformers. */ +export abstract class TransformerMixin { + readonly _estimator_type = "transformer" as const; + + /** Fit and transform in one step. */ + fit_transform(X: Float64Array[], y?: Float64Array | Int32Array): Float64Array[] { + return this.fit(X, y).transform(X); + } + + abstract fit(X: Float64Array[], y?: Float64Array | Int32Array): this; + abstract transform(X: Float64Array[]): Float64Array[]; +} + +/** Mixin class for all clusterers. */ +export abstract class ClusterMixin { + readonly _estimator_type = "clusterer" as const; + + /** Perform clustering on X and return cluster labels. */ + fit_predict(X: Float64Array[], y?: Float64Array | Int32Array): Int32Array { + return this.fit(X, y).labels_ ?? new Int32Array(X.length); + } + + abstract fit(X: Float64Array[], y?: Float64Array | Int32Array): this; + labels_?: Int32Array; +} + +/** Clone an estimator with the same parameters. */ +export function clone(estimator: T): T { + const Cls = estimator.constructor as new () => T; + const newEst = new Cls(); + newEst.set_params(estimator.get_params(false)); + return newEst; +} + +/** Check if an estimator is fitted by looking for a trailing underscore attribute. */ +export function check_is_fitted(estimator: BaseEstimator, attributes?: string[]): void { + const attrs = attributes ?? Object.keys(estimator).filter((k) => k.endsWith("_") && !k.startsWith("_")); + if (attrs.length === 0) { + throw new NotFittedError( + `This ${estimator.constructor.name} instance is not fitted yet.`, + ); + } + const missing = attrs.filter((a) => (estimator as unknown as Record)[a] === undefined); + if (missing.length > 0) { + throw new NotFittedError( + `This ${estimator.constructor.name} instance is not fitted yet. Missing attributes: ${missing.join(", ")}.`, + ); + } +} diff --git a/src/exceptions.ts b/src/exceptions.ts new file mode 100644 index 0000000..5314a48 --- /dev/null +++ b/src/exceptions.ts @@ -0,0 +1,37 @@ +/** + * Exceptions used throughout tsikit-learn. + * Mirrors sklearn.exceptions. + */ + +/** Raised when an estimator is used before being fitted. */ +export class NotFittedError extends Error { + override readonly name = "NotFittedError"; + constructor(message = "This estimator is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.") { + super(message); + } +} + +/** Warning raised when convergence is not reached. */ +export class ConvergenceWarning extends Error { + override readonly name = "ConvergenceWarning"; +} + +/** Raised when an invalid value is encountered. */ +export class ValueError extends Error { + override readonly name = "ValueError"; +} + +/** Raised when feature dimensions don't match. */ +export class DataDimensionalityWarning extends Error { + override readonly name = "DataDimensionalityWarning"; +} + +/** Raised when an undefined parameter is encountered. */ +export class UndefinedMetricWarning extends Error { + override readonly name = "UndefinedMetricWarning"; +} + +/** Raised when a change or metric is not positive. */ +export class EfficiencyWarning extends Error { + override readonly name = "EfficiencyWarning"; +} diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 0000000..0d022c2 --- /dev/null +++ b/src/index.ts @@ -0,0 +1,31 @@ +/** + * tsikit-learn — A complete TypeScript port of scikit-learn. + * + * Ported modules (Phase 1 + Phase 2 + linear_model): + * - exceptions: NotFittedError, ConvergenceWarning, ValueError + * - base: BaseEstimator, ClassifierMixin, RegressorMixin, TransformerMixin, ClusterMixin + * - utils: extmath, validation, multiclass, class_weight + * - preprocessing: StandardScaler, MinMaxScaler, LabelEncoder, Normalizer + * - metrics: regression (mse, mae, r2), classification (accuracy, precision, recall, f1) + * - model_selection: train_test_split, KFold, StratifiedKFold + * - linear_model: LinearRegression, Ridge + */ + +// Core +export * from "./exceptions.js"; +export * from "./base.js"; + +// Utils +export * from "./utils/index.js"; + +// Preprocessing +export * from "./preprocessing/index.js"; + +// Metrics +export * from "./metrics/index.js"; + +// Model selection +export * from "./model_selection/index.js"; + +// Linear models +export * from "./linear_model/index.js"; diff --git a/src/linear_model/index.ts b/src/linear_model/index.ts new file mode 100644 index 0000000..1875ef5 --- /dev/null +++ b/src/linear_model/index.ts @@ -0,0 +1,2 @@ +export * from "./linear_regression.js"; +export * from "./ridge.js"; diff --git a/src/linear_model/linear_regression.ts b/src/linear_model/linear_regression.ts new file mode 100644 index 0000000..ff22f57 --- /dev/null +++ b/src/linear_model/linear_regression.ts @@ -0,0 +1,152 @@ +/** + * Linear Regression — Ordinary Least Squares. + * Mirrors sklearn.linear_model.LinearRegression. + * + * Uses the normal equations: β = (X.T X)⁻¹ X.T y + * Solved via Cholesky decomposition for numerical stability. + */ + +import { BaseEstimator, RegressorMixin } from "../base.js"; +import { checkArray, checkXy } from "../utils/validation.js"; +import { + gramMatrix, + xtDotY, + choleskyLinsolve, + safeDot, + addDiagonal, +} from "../utils/extmath.js"; + +export interface LinearRegressionParams { + fit_intercept?: boolean; + copy_X?: boolean; + positive?: boolean; +} + +/** + * Ordinary least squares Linear Regression. + * + * Minimizes the residual sum of squares between observed and predicted values. + * Equivalent to sklearn.linear_model.LinearRegression. + * + * @example + * ```ts + * import { LinearRegression } from 'tsikit-learn'; + * + * const X = [new Float64Array([1]), new Float64Array([2]), new Float64Array([3])]; + * const y = new Float64Array([2, 4, 6]); + * + * const reg = new LinearRegression(); + * reg.fit(X, y); + * console.log(reg.coef_); // Float64Array [2] + * console.log(reg.intercept_); // ~0 + * console.log(reg.predict([new Float64Array([4])])); // Float64Array [8] + * ``` + */ +export class LinearRegression extends BaseEstimator { + fit_intercept: boolean; + copy_X: boolean; + positive: boolean; + + coef_?: Float64Array; + intercept_?: number; + n_features_in_?: number; + rank_?: number; + + constructor(params: LinearRegressionParams = {}) { + super(); + this.fit_intercept = params.fit_intercept ?? true; + this.copy_X = params.copy_X ?? true; + this.positive = params.positive ?? false; + } + + fit(X: Float64Array[], y: Float64Array): this { + checkXy(X, y); + checkArray(X); + + const n = X.length; + const nFeatures = (X[0] ?? new Float64Array(0)).length; + this.n_features_in_ = nFeatures; + + let XCenter = X; + let yCenter = y; + let xMean: Float64Array | undefined; + let yMean = 0; + + if (this.fit_intercept) { + // Center X and y + xMean = new Float64Array(nFeatures); + for (let i = 0; i < n; i++) { + const row = X[i] ?? new Float64Array(nFeatures); + for (let j = 0; j < nFeatures; j++) { + xMean[j] = (xMean[j] ?? 0) + (row[j] ?? 0); + } + } + for (let j = 0; j < nFeatures; j++) { + xMean[j] = (xMean[j] ?? 0) / n; + } + yMean = 0; + for (const v of y) yMean += v; + yMean /= n; + + XCenter = X.map((row) => { + const centered = new Float64Array(row); + for (let j = 0; j < centered.length; j++) { + centered[j] = (centered[j] ?? 0) - (xMean as Float64Array)[j]!; + } + return centered; + }); + yCenter = new Float64Array(y.length); + for (let i = 0; i < y.length; i++) { + yCenter[i] = (y[i] ?? 0) - yMean; + } + } + + // Solve normal equations: (X.T @ X) @ β = X.T @ y + const XtX = gramMatrix(XCenter); + const Xty = xtDotY(XCenter, yCenter); + + // Add tiny ridge to handle near-singular matrices + addDiagonal(XtX, 1e-12); + + const coef = choleskyLinsolve(XtX, Xty); + this.coef_ = coef; + this.rank_ = nFeatures; + + if (this.fit_intercept && xMean !== undefined) { + let intercept = yMean; + for (let j = 0; j < nFeatures; j++) { + intercept -= (coef[j] ?? 0) * (xMean[j] ?? 0); + } + this.intercept_ = intercept; + } else { + this.intercept_ = 0; + } + + return this; + } + + predict(X: Float64Array[]): Float64Array { + this._check_is_fitted(["coef_", "intercept_"]); + const coef = this.coef_ as Float64Array; + const intercept = this.intercept_ as number; + const yPred = safeDot(X, coef); + for (let i = 0; i < yPred.length; i++) { + yPred[i] = (yPred[i] ?? 0) + intercept; + } + return yPred; + } + + /** R² score on test data. */ + score(X: Float64Array[], y: Float64Array): number { + const yPred = this.predict(X); + const yMean = Array.from(y).reduce((a, b) => a + b, 0) / y.length; + let ssTot = 0; + let ssRes = 0; + for (let i = 0; i < y.length; i++) { + const yi = y[i] ?? 0; + ssTot += (yi - yMean) ** 2; + ssRes += (yi - (yPred[i] ?? 0)) ** 2; + } + return ssTot === 0 ? 1 : 1 - ssRes / ssTot; + } +} diff --git a/src/linear_model/ridge.ts b/src/linear_model/ridge.ts new file mode 100644 index 0000000..f431b40 --- /dev/null +++ b/src/linear_model/ridge.ts @@ -0,0 +1,156 @@ +/** + * Ridge Regression — L2-regularized Linear Regression. + * Mirrors sklearn.linear_model.Ridge. + * + * Minimizes: ||y - Xw||² + alpha * ||w||² + * Solved as: β = (X.T X + alpha * I)⁻¹ X.T y + */ + +import { BaseEstimator } from "../base.js"; +import { checkArray, checkXy } from "../utils/validation.js"; +import { + gramMatrix, + xtDotY, + choleskyLinsolve, + safeDot, + addDiagonal, +} from "../utils/extmath.js"; + +export interface RidgeParams { + alpha?: number; + fit_intercept?: boolean; + copy_X?: boolean; + max_iter?: number; + tol?: number; + solver?: "auto" | "cholesky"; +} + +/** + * Linear least squares with L2 regularization. + * + * Equivalent to sklearn.linear_model.Ridge. + * + * @example + * ```ts + * import { Ridge } from 'tsikit-learn'; + * + * const X = [new Float64Array([1, 0]), new Float64Array([0, 1]), new Float64Array([1, 1])]; + * const y = new Float64Array([1, 2, 3]); + * + * const reg = new Ridge({ alpha: 1.0 }); + * reg.fit(X, y); + * console.log(reg.coef_); + * ``` + */ +export class Ridge extends BaseEstimator { + alpha: number; + fit_intercept: boolean; + copy_X: boolean; + max_iter: number; + tol: number; + solver: "auto" | "cholesky"; + + coef_?: Float64Array; + intercept_?: number; + n_features_in_?: number; + n_iter_?: number; + + constructor(params: RidgeParams = {}) { + super(); + this.alpha = params.alpha ?? 1.0; + this.fit_intercept = params.fit_intercept ?? true; + this.copy_X = params.copy_X ?? true; + this.max_iter = params.max_iter ?? 1000; + this.tol = params.tol ?? 1e-4; + this.solver = params.solver ?? "auto"; + } + + fit(X: Float64Array[], y: Float64Array): this { + checkXy(X, y); + checkArray(X); + + const n = X.length; + const nFeatures = (X[0] ?? new Float64Array(0)).length; + this.n_features_in_ = nFeatures; + + let XCenter = X; + let yCenter = y; + let xMean: Float64Array | undefined; + let yMean = 0; + + if (this.fit_intercept) { + xMean = new Float64Array(nFeatures); + for (let i = 0; i < n; i++) { + const row = X[i] ?? new Float64Array(nFeatures); + for (let j = 0; j < nFeatures; j++) { + xMean[j] = (xMean[j] ?? 0) + (row[j] ?? 0); + } + } + for (let j = 0; j < nFeatures; j++) { + xMean[j] = (xMean[j] ?? 0) / n; + } + for (const v of y) yMean += v; + yMean /= n; + + XCenter = X.map((row) => { + const centered = new Float64Array(row); + for (let j = 0; j < centered.length; j++) { + centered[j] = (centered[j] ?? 0) - (xMean as Float64Array)[j]!; + } + return centered; + }); + yCenter = new Float64Array(y.length); + for (let i = 0; i < y.length; i++) { + yCenter[i] = (y[i] ?? 0) - yMean; + } + } + + // Solve (X.T @ X + alpha * I) @ β = X.T @ y + const XtX = gramMatrix(XCenter); + const Xty = xtDotY(XCenter, yCenter); + + // Add alpha * I (ridge regularization) + addDiagonal(XtX, this.alpha); + + const coef = choleskyLinsolve(XtX, Xty); + this.coef_ = coef; + this.n_iter_ = 1; + + if (this.fit_intercept && xMean !== undefined) { + let intercept = yMean; + for (let j = 0; j < nFeatures; j++) { + intercept -= (coef[j] ?? 0) * (xMean[j] ?? 0); + } + this.intercept_ = intercept; + } else { + this.intercept_ = 0; + } + + return this; + } + + predict(X: Float64Array[]): Float64Array { + this._check_is_fitted(["coef_", "intercept_"]); + const coef = this.coef_ as Float64Array; + const intercept = this.intercept_ as number; + const yPred = safeDot(X, coef); + for (let i = 0; i < yPred.length; i++) { + yPred[i] = (yPred[i] ?? 0) + intercept; + } + return yPred; + } + + /** R² score on test data. */ + score(X: Float64Array[], y: Float64Array): number { + const yPred = this.predict(X); + const yMean = Array.from(y).reduce((a, b) => a + b, 0) / y.length; + let ssTot = 0; + let ssRes = 0; + for (let i = 0; i < y.length; i++) { + const yi = y[i] ?? 0; + ssTot += (yi - yMean) ** 2; + ssRes += (yi - (yPred[i] ?? 0)) ** 2; + } + return ssTot === 0 ? 1 : 1 - ssRes / ssTot; + } +} diff --git a/src/metrics/classification.ts b/src/metrics/classification.ts new file mode 100644 index 0000000..34fbdc9 --- /dev/null +++ b/src/metrics/classification.ts @@ -0,0 +1,173 @@ +/** + * Classification metrics. + * Mirrors sklearn.metrics (classification subset). + */ + +import { ValueError } from "../exceptions.js"; + +/** Accuracy score. */ +export function accuracy_score( + yTrue: Float64Array | Int32Array, + yPred: Float64Array | Int32Array, + normalize = true, +): number { + if (yTrue.length !== yPred.length) { + throw new ValueError("yTrue and yPred must have the same length"); + } + let correct = 0; + for (let i = 0; i < yTrue.length; i++) { + if ((yTrue[i] ?? 0) === (yPred[i] ?? 0)) correct++; + } + return normalize ? (yTrue.length > 0 ? correct / yTrue.length : 0) : correct; +} + +/** Confusion matrix. Returns a 2D array [actual][predicted]. */ +export function confusion_matrix( + yTrue: Float64Array | Int32Array, + yPred: Float64Array | Int32Array, + labels?: Int32Array, +): number[][] { + const labelSet = labels ?? (() => { + const s = new Set(); + for (const v of yTrue) s.add(v); + for (const v of yPred) s.add(v); + return new Int32Array([...s].sort((a, b) => a - b)); + })(); + + const n = labelSet.length; + const labelIdx = new Map(); + for (let i = 0; i < n; i++) labelIdx.set(labelSet[i] ?? 0, i); + + const matrix: number[][] = Array.from({ length: n }, () => new Array(n).fill(0)); + for (let i = 0; i < yTrue.length; i++) { + const ti = labelIdx.get(yTrue[i] ?? 0); + const pi = labelIdx.get(yPred[i] ?? 0); + if (ti !== undefined && pi !== undefined) { + (matrix[ti] as number[])[pi] = ((matrix[ti] as number[])[pi] ?? 0) + 1; + } + } + return matrix; +} + +/** Precision score for binary or multiclass (macro average). */ +export function precision_score( + yTrue: Float64Array | Int32Array, + yPred: Float64Array | Int32Array, + options: { average?: "binary" | "macro" | "micro"; posLabel?: number } = {}, +): number { + const { average = "binary", posLabel = 1 } = options; + const classes = (() => { + const s = new Set(); + for (const v of yTrue) s.add(v); + return new Int32Array([...s].sort((a, b) => a - b)); + })(); + + if (average === "binary") { + let tp = 0; + let fp = 0; + for (let i = 0; i < yTrue.length; i++) { + if ((yPred[i] ?? 0) === posLabel) { + if ((yTrue[i] ?? 0) === posLabel) tp++; + else fp++; + } + } + return tp + fp === 0 ? 0 : tp / (tp + fp); + } + + if (average === "macro") { + let total = 0; + for (const c of classes) { + let tp = 0; + let fp = 0; + for (let i = 0; i < yTrue.length; i++) { + if ((yPred[i] ?? 0) === c) { + if ((yTrue[i] ?? 0) === c) tp++; + else fp++; + } + } + total += tp + fp === 0 ? 0 : tp / (tp + fp); + } + return classes.length > 0 ? total / classes.length : 0; + } + + // micro + let tp = 0; + let fp = 0; + for (let i = 0; i < yTrue.length; i++) { + if ((yPred[i] ?? 0) === (yTrue[i] ?? 0)) tp++; + else fp++; + } + return tp + fp === 0 ? 0 : tp / (tp + fp); +} + +/** Recall score. */ +export function recall_score( + yTrue: Float64Array | Int32Array, + yPred: Float64Array | Int32Array, + options: { average?: "binary" | "macro" | "micro"; posLabel?: number } = {}, +): number { + const { average = "binary", posLabel = 1 } = options; + const classes = (() => { + const s = new Set(); + for (const v of yTrue) s.add(v); + return new Int32Array([...s].sort((a, b) => a - b)); + })(); + + if (average === "binary") { + let tp = 0; + let fn = 0; + for (let i = 0; i < yTrue.length; i++) { + if ((yTrue[i] ?? 0) === posLabel) { + if ((yPred[i] ?? 0) === posLabel) tp++; + else fn++; + } + } + return tp + fn === 0 ? 0 : tp / (tp + fn); + } + + if (average === "macro") { + let total = 0; + for (const c of classes) { + let tp = 0; + let fn = 0; + for (let i = 0; i < yTrue.length; i++) { + if ((yTrue[i] ?? 0) === c) { + if ((yPred[i] ?? 0) === c) tp++; + else fn++; + } + } + total += tp + fn === 0 ? 0 : tp / (tp + fn); + } + return classes.length > 0 ? total / classes.length : 0; + } + + return accuracy_score(yTrue, yPred); +} + +/** F1 score. */ +export function f1_score( + yTrue: Float64Array | Int32Array, + yPred: Float64Array | Int32Array, + options: { average?: "binary" | "macro" | "micro"; posLabel?: number } = {}, +): number { + const p = precision_score(yTrue, yPred, options); + const r = recall_score(yTrue, yPred, options); + return p + r === 0 ? 0 : (2 * p * r) / (p + r); +} + +/** Log loss (cross-entropy). */ +export function log_loss( + yTrue: Float64Array | Int32Array, + yProba: Float64Array[], + eps = 1e-15, +): number { + let total = 0; + for (let i = 0; i < yTrue.length; i++) { + const row = yProba[i] ?? new Float64Array(0); + const label = yTrue[i] ?? 0; + // For binary: row[1] is P(class=1) + const p = Math.min(1 - eps, Math.max(eps, row[label] ?? eps)); + total += -Math.log(p); + } + return yTrue.length > 0 ? total / yTrue.length : 0; +} diff --git a/src/metrics/index.ts b/src/metrics/index.ts new file mode 100644 index 0000000..96b3cab --- /dev/null +++ b/src/metrics/index.ts @@ -0,0 +1,2 @@ +export * from "./regression.js"; +export * from "./classification.js"; diff --git a/src/metrics/regression.ts b/src/metrics/regression.ts new file mode 100644 index 0000000..c42b5d3 --- /dev/null +++ b/src/metrics/regression.ts @@ -0,0 +1,120 @@ +/** + * Regression metrics. + * Mirrors sklearn.metrics (regression subset). + */ + +import { ValueError } from "../exceptions.js"; + +/** Mean squared error. */ +export function mean_squared_error( + yTrue: Float64Array, + yPred: Float64Array, + options: { sampleWeight?: Float64Array; squared?: boolean } = {}, +): number { + const { sampleWeight, squared = true } = options; + if (yTrue.length !== yPred.length) { + throw new ValueError("yTrue and yPred must have the same length"); + } + let total = 0; + let wSum = 0; + for (let i = 0; i < yTrue.length; i++) { + const diff = (yTrue[i] ?? 0) - (yPred[i] ?? 0); + const w = sampleWeight ? (sampleWeight[i] ?? 1) : 1; + total += w * diff * diff; + wSum += w; + } + const mse = wSum > 0 ? total / wSum : 0; + return squared ? mse : Math.sqrt(mse); +} + +/** Mean absolute error. */ +export function mean_absolute_error( + yTrue: Float64Array, + yPred: Float64Array, + sampleWeight?: Float64Array, +): number { + if (yTrue.length !== yPred.length) { + throw new ValueError("yTrue and yPred must have the same length"); + } + let total = 0; + let wSum = 0; + for (let i = 0; i < yTrue.length; i++) { + const w = sampleWeight ? (sampleWeight[i] ?? 1) : 1; + total += w * Math.abs((yTrue[i] ?? 0) - (yPred[i] ?? 0)); + wSum += w; + } + return wSum > 0 ? total / wSum : 0; +} + +/** R² score (coefficient of determination). */ +export function r2_score( + yTrue: Float64Array, + yPred: Float64Array, + sampleWeight?: Float64Array, +): number { + if (yTrue.length !== yPred.length) { + throw new ValueError("yTrue and yPred must have the same length"); + } + let wSum = 0; + let yMeanNum = 0; + for (let i = 0; i < yTrue.length; i++) { + const w = sampleWeight ? (sampleWeight[i] ?? 1) : 1; + yMeanNum += w * (yTrue[i] ?? 0); + wSum += w; + } + const yMean = wSum > 0 ? yMeanNum / wSum : 0; + + let ssTot = 0; + let ssRes = 0; + for (let i = 0; i < yTrue.length; i++) { + const w = sampleWeight ? (sampleWeight[i] ?? 1) : 1; + const diff = (yTrue[i] ?? 0) - yMean; + ssTot += w * diff * diff; + ssRes += w * ((yTrue[i] ?? 0) - (yPred[i] ?? 0)) ** 2; + } + return ssTot === 0 ? 1 : 1 - ssRes / ssTot; +} + +/** Mean absolute percentage error. */ +export function mean_absolute_percentage_error( + yTrue: Float64Array, + yPred: Float64Array, +): number { + if (yTrue.length !== yPred.length) { + throw new ValueError("yTrue and yPred must have the same length"); + } + let total = 0; + for (let i = 0; i < yTrue.length; i++) { + const yt = yTrue[i] ?? 0; + if (yt === 0) continue; + total += Math.abs((yt - (yPred[i] ?? 0)) / yt); + } + return total / yTrue.length; +} + +/** Explained variance score. */ +export function explained_variance_score( + yTrue: Float64Array, + yPred: Float64Array, +): number { + const n = yTrue.length; + let meanTrue = 0; + let meanErr = 0; + for (let i = 0; i < n; i++) { + meanTrue += yTrue[i] ?? 0; + meanErr += (yTrue[i] ?? 0) - (yPred[i] ?? 0); + } + meanTrue /= n; + meanErr /= n; + + let varTrue = 0; + let varErr = 0; + for (let i = 0; i < n; i++) { + varTrue += ((yTrue[i] ?? 0) - meanTrue) ** 2; + varErr += ((yTrue[i] ?? 0) - (yPred[i] ?? 0) - meanErr) ** 2; + } + varTrue /= n; + varErr /= n; + + return varTrue === 0 ? 0 : 1 - varErr / varTrue; +} diff --git a/src/model_selection/index.ts b/src/model_selection/index.ts new file mode 100644 index 0000000..35a025e --- /dev/null +++ b/src/model_selection/index.ts @@ -0,0 +1 @@ +export * from "./split.js"; diff --git a/src/model_selection/split.ts b/src/model_selection/split.ts new file mode 100644 index 0000000..781928e --- /dev/null +++ b/src/model_selection/split.ts @@ -0,0 +1,204 @@ +/** + * Model selection utilities: train/test split and cross-validation. + * Mirrors sklearn.model_selection. + */ + +import { ValueError } from "../exceptions.js"; + +export interface TrainTestSplitOptions { + testSize?: number; + trainSize?: number; + randomState?: number; + shuffle?: boolean; + stratify?: Float64Array | Int32Array; +} + +export interface TrainTestSplitResult { + XTrain: Float64Array[]; + XTest: Float64Array[]; + yTrain: Float64Array | Int32Array; + yTest: Float64Array | Int32Array; +} + +/** Simple linear congruential generator for reproducible shuffles. */ +function lcg(seed: number): () => number { + let s = seed; + return () => { + s = (s * 1664525 + 1013904223) & 0xffffffff; + return (s >>> 0) / 0x100000000; + }; +} + +/** Fisher-Yates shuffle with optional seed. */ +function shuffleIndices(n: number, rng: () => number): Int32Array { + const idx = new Int32Array(n); + for (let i = 0; i < n; i++) idx[i] = i; + for (let i = n - 1; i > 0; i--) { + const j = Math.floor(rng() * (i + 1)); + const tmp = idx[i] ?? 0; + idx[i] = idx[j] ?? 0; + idx[j] = tmp; + } + return idx; +} + +/** + * Split arrays or matrices into random train and test subsets. + * Mirrors sklearn.model_selection.train_test_split. + */ +export function train_test_split( + X: Float64Array[], + y: Float64Array | Int32Array, + options: TrainTestSplitOptions = {}, +): TrainTestSplitResult { + const { testSize = 0.25, randomState = 42, shuffle = true } = options; + const n = X.length; + const nTest = Math.max(1, Math.round(n * testSize)); + const nTrain = n - nTest; + + if (nTrain <= 0) { + throw new ValueError(`With n_samples=${n} and test_size=${testSize}, the resulting train set would be empty.`); + } + + const rng = lcg(randomState); + const indices = shuffle ? shuffleIndices(n, rng) : (() => { + const idx = new Int32Array(n); + for (let i = 0; i < n; i++) idx[i] = i; + return idx; + })(); + + const trainIdx = indices.slice(0, nTrain); + const testIdx = indices.slice(nTrain); + + const XTrain = Array.from(trainIdx, (i) => X[i] ?? new Float64Array(0)); + const XTest = Array.from(testIdx, (i) => X[i] ?? new Float64Array(0)); + + const isInt = y instanceof Int32Array; + const yTrain = isInt + ? new Int32Array(Array.from(trainIdx, (i) => (y as Int32Array)[i] ?? 0)) + : new Float64Array(Array.from(trainIdx, (i) => (y as Float64Array)[i] ?? 0)); + const yTest = isInt + ? new Int32Array(Array.from(testIdx, (i) => (y as Int32Array)[i] ?? 0)) + : new Float64Array(Array.from(testIdx, (i) => (y as Float64Array)[i] ?? 0)); + + return { XTrain, XTest, yTrain, yTest }; +} + +export interface KFoldOptions { + nSplits?: number; + shuffle?: boolean; + randomState?: number; +} + +export interface Fold { + trainIndex: Int32Array; + testIndex: Int32Array; +} + +/** + * K-Folds cross-validator. + * Mirrors sklearn.model_selection.KFold. + */ +export class KFold { + nSplits: number; + shuffle: boolean; + randomState: number; + + constructor(options: KFoldOptions = {}) { + this.nSplits = options.nSplits ?? 5; + this.shuffle = options.shuffle ?? false; + this.randomState = options.randomState ?? 0; + } + + /** Generate indices to split data into training and test sets. */ + *split(X: Float64Array[]): Generator { + const n = X.length; + if (this.nSplits > n) { + throw new ValueError( + `Cannot have number of splits n_splits=${this.nSplits} greater than the number of samples=${n}`, + ); + } + + const rng = lcg(this.randomState); + const indices = this.shuffle ? shuffleIndices(n, rng) : (() => { + const idx = new Int32Array(n); + for (let i = 0; i < n; i++) idx[i] = i; + return idx; + })(); + + const foldSizes = new Int32Array(this.nSplits).fill(Math.floor(n / this.nSplits)); + for (let i = 0; i < n % this.nSplits; i++) { + foldSizes[i] = (foldSizes[i] ?? 0) + 1; + } + + let current = 0; + for (let fold = 0; fold < this.nSplits; fold++) { + const start = current; + const stop = current + (foldSizes[fold] ?? 0); + const testIndex = indices.slice(start, stop); + const trainIndex = new Int32Array([ + ...Array.from(indices.slice(0, start)), + ...Array.from(indices.slice(stop)), + ]); + yield { trainIndex, testIndex }; + current = stop; + } + } + + getNumSplits(): number { + return this.nSplits; + } +} + +export interface StratifiedKFoldOptions { + nSplits?: number; + shuffle?: boolean; + randomState?: number; +} + +/** + * Stratified K-Folds cross-validator. + * Mirrors sklearn.model_selection.StratifiedKFold. + */ +export class StratifiedKFold { + nSplits: number; + shuffle: boolean; + randomState: number; + + constructor(options: StratifiedKFoldOptions = {}) { + this.nSplits = options.nSplits ?? 5; + this.shuffle = options.shuffle ?? false; + this.randomState = options.randomState ?? 0; + } + + *split(X: Float64Array[], y: Float64Array | Int32Array): Generator { + const n = X.length; + const rng = lcg(this.randomState); + + // Group indices by class + const classIndices = new Map(); + for (let i = 0; i < n; i++) { + const c = y[i] ?? 0; + if (!classIndices.has(c)) classIndices.set(c, []); + (classIndices.get(c) as number[]).push(i); + } + + // Assign indices to folds + const foldIndices: number[][] = Array.from({ length: this.nSplits }, () => []); + for (const [, idxList] of classIndices) { + const shuffled = this.shuffle ? [...idxList].sort(() => rng() - 0.5) : idxList; + shuffled.forEach((idx, i) => { + (foldIndices[i % this.nSplits] as number[]).push(idx); + }); + } + + for (let fold = 0; fold < this.nSplits; fold++) { + const testIndex = new Int32Array(foldIndices[fold] as number[]); + const trainIndicesList: number[] = []; + for (let f = 0; f < this.nSplits; f++) { + if (f !== fold) trainIndicesList.push(...(foldIndices[f] as number[])); + } + yield { trainIndex: new Int32Array(trainIndicesList), testIndex }; + } + } +} diff --git a/src/preprocessing/index.ts b/src/preprocessing/index.ts new file mode 100644 index 0000000..7c8f35b --- /dev/null +++ b/src/preprocessing/index.ts @@ -0,0 +1,4 @@ +export * from "./standard_scaler.js"; +export * from "./minmax_scaler.js"; +export * from "./label_encoder.js"; +export * from "./normalizer.js"; diff --git a/src/preprocessing/label_encoder.ts b/src/preprocessing/label_encoder.ts new file mode 100644 index 0000000..234dfb5 --- /dev/null +++ b/src/preprocessing/label_encoder.ts @@ -0,0 +1,54 @@ +/** + * LabelEncoder — encode target labels with value between 0 and n_classes-1. + * Mirrors sklearn.preprocessing.LabelEncoder. + */ + +import { BaseEstimator } from "../base.js"; +import { ValueError } from "../exceptions.js"; + +export class LabelEncoder extends BaseEstimator { + classes_?: Int32Array; + + fit(y: Float64Array | Int32Array): this { + const unique = new Set(); + for (const v of y) unique.add(v); + this.classes_ = new Int32Array([...unique].sort((a, b) => a - b)); + return this; + } + + transform(y: Float64Array | Int32Array): Int32Array { + this._check_is_fitted(["classes_"]); + const classes = this.classes_ as Int32Array; + const classMap = new Map(); + for (let i = 0; i < classes.length; i++) { + classMap.set(classes[i] ?? 0, i); + } + const result = new Int32Array(y.length); + for (let i = 0; i < y.length; i++) { + const encoded = classMap.get(y[i] ?? 0); + if (encoded === undefined) { + throw new ValueError(`y contains previously unseen labels: ${String(y[i])}`); + } + result[i] = encoded; + } + return result; + } + + inverse_transform(y: Int32Array): Int32Array { + this._check_is_fitted(["classes_"]); + const classes = this.classes_ as Int32Array; + const result = new Int32Array(y.length); + for (let i = 0; i < y.length; i++) { + const idx = y[i] ?? 0; + if (idx < 0 || idx >= classes.length) { + throw new ValueError(`y contains values not in the fitted classes`); + } + result[i] = classes[idx] ?? 0; + } + return result; + } + + fit_transform(y: Float64Array | Int32Array): Int32Array { + return this.fit(y).transform(y); + } +} diff --git a/src/preprocessing/minmax_scaler.ts b/src/preprocessing/minmax_scaler.ts new file mode 100644 index 0000000..83276f7 --- /dev/null +++ b/src/preprocessing/minmax_scaler.ts @@ -0,0 +1,105 @@ +/** + * MinMaxScaler — scales features to a given range. + * Mirrors sklearn.preprocessing.MinMaxScaler. + */ + +import { BaseEstimator } from "../base.js"; +import { checkArray } from "../utils/validation.js"; +import { ValueError } from "../exceptions.js"; + +export interface MinMaxScalerParams { + feature_range?: [number, number]; + copy?: boolean; + clip?: boolean; +} + +export class MinMaxScaler extends BaseEstimator { + feature_range: [number, number]; + copy: boolean; + clip: boolean; + + data_min_?: Float64Array; + data_max_?: Float64Array; + data_range_?: Float64Array; + scale_?: Float64Array; + min_?: Float64Array; + n_features_in_?: number; + n_samples_seen_?: number; + + constructor(params: MinMaxScalerParams = {}) { + super(); + this.feature_range = params.feature_range ?? [0, 1]; + this.copy = params.copy ?? true; + this.clip = params.clip ?? false; + } + + fit(X: Float64Array[], _y?: Float64Array | Int32Array): this { + checkArray(X); + const [rMin, rMax] = this.feature_range; + if (rMin >= rMax) { + throw new ValueError( + `Minimum of desired feature range must be smaller than maximum. Got ${String(this.feature_range)}.`, + ); + } + const n = X.length; + const p = (X[0] ?? new Float64Array(0)).length; + this.n_samples_seen_ = n; + this.n_features_in_ = p; + + const dataMin = new Float64Array(p).fill(Infinity); + const dataMax = new Float64Array(p).fill(-Infinity); + for (const row of X) { + for (let j = 0; j < p; j++) { + const v = row[j] ?? 0; + if (v < (dataMin[j] ?? Infinity)) dataMin[j] = v; + if (v > (dataMax[j] ?? -Infinity)) dataMax[j] = v; + } + } + this.data_min_ = dataMin; + this.data_max_ = dataMax; + this.data_range_ = Float64Array.from(dataMax, (v, i) => v - (dataMin[i] ?? 0)); + const rangeScale = rMax - rMin; + this.scale_ = Float64Array.from(this.data_range_, (v) => + v === 0 ? 0 : rangeScale / v, + ); + this.min_ = Float64Array.from(this.scale_, (v, i) => + rMin - v * (dataMin[i] ?? 0), + ); + return this; + } + + transform(X: Float64Array[]): Float64Array[] { + this._check_is_fitted(["scale_", "min_"]); + const scale = this.scale_ as Float64Array; + const min = this.min_ as Float64Array; + const [rMin, rMax] = this.feature_range; + return X.map((row) => { + const out = this.copy ? new Float64Array(row) : row; + for (let j = 0; j < out.length; j++) { + out[j] = (out[j] ?? 0) * (scale[j] ?? 1) + (min[j] ?? 0); + if (this.clip) { + out[j] = Math.max(rMin, Math.min(rMax, out[j] ?? 0)); + } + } + return out; + }); + } + + inverse_transform(X: Float64Array[]): Float64Array[] { + this._check_is_fitted(["scale_", "min_"]); + const scale = this.scale_ as Float64Array; + const min = this.min_ as Float64Array; + return X.map((row) => { + const out = new Float64Array(row); + for (let j = 0; j < out.length; j++) { + const s = scale[j] ?? 0; + out[j] = s !== 0 ? ((out[j] ?? 0) - (min[j] ?? 0)) / s : 0; + } + return out; + }); + } + + fit_transform(X: Float64Array[], y?: Float64Array | Int32Array): Float64Array[] { + return this.fit(X, y).transform(X); + } +} diff --git a/src/preprocessing/normalizer.ts b/src/preprocessing/normalizer.ts new file mode 100644 index 0000000..7af81b9 --- /dev/null +++ b/src/preprocessing/normalizer.ts @@ -0,0 +1,68 @@ +/** + * Normalizer — normalize samples individually to unit norm. + * Mirrors sklearn.preprocessing.Normalizer. + */ + +import { BaseEstimator } from "../base.js"; +import { ValueError } from "../exceptions.js"; + +export type NormType = "l1" | "l2" | "max"; + +export interface NormalizerParams { + norm?: NormType; + copy?: boolean; +} + +export class Normalizer extends BaseEstimator { + norm: NormType; + copy: boolean; + + constructor(params: NormalizerParams = {}) { + super(); + this.norm = params.norm ?? "l2"; + this.copy = params.copy ?? true; + } + + fit(_X: Float64Array[], _y?: Float64Array | Int32Array): this { + // Normalizer is stateless — nothing to fit + return this; + } + + transform(X: Float64Array[]): Float64Array[] { + return X.map((row) => { + const out = this.copy ? new Float64Array(row) : row; + const norm = this._computeNorm(out); + if (norm === 0) return out; + for (let j = 0; j < out.length; j++) { + out[j] = (out[j] ?? 0) / norm; + } + return out; + }); + } + + fit_transform(X: Float64Array[], _y?: Float64Array | Int32Array): Float64Array[] { + return this.transform(X); + } + + private _computeNorm(row: Float64Array): number { + switch (this.norm) { + case "l1": { + let sum = 0; + for (const v of row) sum += Math.abs(v); + return sum; + } + case "l2": { + let sum = 0; + for (const v of row) sum += v * v; + return Math.sqrt(sum); + } + case "max": { + let max = 0; + for (const v of row) max = Math.max(max, Math.abs(v)); + return max; + } + default: + throw new ValueError(`Unknown norm: ${String(this.norm)}`); + } + } +} diff --git a/src/preprocessing/standard_scaler.ts b/src/preprocessing/standard_scaler.ts new file mode 100644 index 0000000..3154b41 --- /dev/null +++ b/src/preprocessing/standard_scaler.ts @@ -0,0 +1,96 @@ +/** + * StandardScaler — zero-mean, unit-variance normalization. + * Mirrors sklearn.preprocessing.StandardScaler. + */ + +import { BaseEstimator, TransformerMixin } from "../base.js"; +import { checkArray, checkFeaturesConsistency } from "../utils/validation.js"; +import { ValueError } from "../exceptions.js"; + +export interface StandardScalerParams { + copy?: boolean; + with_mean?: boolean; + with_std?: boolean; +} + +export class StandardScaler extends BaseEstimator { + copy: boolean; + with_mean: boolean; + with_std: boolean; + + mean_?: Float64Array; + scale_?: Float64Array; + var_?: Float64Array; + n_features_in_?: number; + n_samples_seen_?: number; + + constructor(params: StandardScalerParams = {}) { + super(); + this.copy = params.copy ?? true; + this.with_mean = params.with_mean ?? true; + this.with_std = params.with_std ?? true; + } + + fit(X: Float64Array[], _y?: Float64Array | Int32Array): this { + checkArray(X); + const n = X.length; + const p = (X[0] ?? new Float64Array(0)).length; + this.n_samples_seen_ = n; + this.n_features_in_ = p; + + const mean = new Float64Array(p); + const M2 = new Float64Array(p); + + // Welford's online algorithm for mean and variance + for (let i = 0; i < n; i++) { + const row = X[i] ?? new Float64Array(p); + for (let j = 0; j < p; j++) { + const x = row[j] ?? 0; + const delta = x - (mean[j] ?? 0); + mean[j] = (mean[j] ?? 0) + delta / (i + 1); + M2[j] = (M2[j] ?? 0) + delta * (x - (mean[j] ?? 0)); + } + } + + this.mean_ = mean; + const variance = n > 1 + ? Float64Array.from(M2, (v) => v / (n - 1)) + : new Float64Array(p); + this.var_ = variance; + this.scale_ = Float64Array.from(variance, (v) => Math.sqrt(v) || 1.0); + return this; + } + + transform(X: Float64Array[]): Float64Array[] { + this._check_is_fitted(["mean_", "scale_"]); + checkFeaturesConsistency(X, X); // just shape check + const mean = this.mean_ as Float64Array; + const scale = this.scale_ as Float64Array; + return X.map((row) => { + const out = this.copy ? new Float64Array(row) : row; + for (let j = 0; j < out.length; j++) { + if (this.with_mean) out[j] = (out[j] ?? 0) - (mean[j] ?? 0); + if (this.with_std) out[j] = (out[j] ?? 0) / (scale[j] ?? 1); + } + return out; + }); + } + + inverse_transform(X: Float64Array[]): Float64Array[] { + this._check_is_fitted(["mean_", "scale_"]); + const mean = this.mean_ as Float64Array; + const scale = this.scale_ as Float64Array; + return X.map((row) => { + const out = new Float64Array(row); + for (let j = 0; j < out.length; j++) { + if (this.with_std) out[j] = (out[j] ?? 0) * (scale[j] ?? 1); + if (this.with_mean) out[j] = (out[j] ?? 0) + (mean[j] ?? 0); + } + return out; + }); + } + + fit_transform(X: Float64Array[], y?: Float64Array | Int32Array): Float64Array[] { + return this.fit(X, y).transform(X); + } +} diff --git a/src/utils/class_weight.ts b/src/utils/class_weight.ts new file mode 100644 index 0000000..9c23c4b --- /dev/null +++ b/src/utils/class_weight.ts @@ -0,0 +1,69 @@ +/** + * Class weight utilities. + * Mirrors sklearn.utils.class_weight. + */ + +import { ValueError } from "../exceptions.js"; + +/** + * Compute class weights for imbalanced datasets. + * For 'balanced': n_samples / (n_classes * bincount(y)) + */ +export function computeClassWeight( + classWeight: "balanced" | Record, + classes: Int32Array, + y: Float64Array | Int32Array, +): Float64Array { + const weights = new Float64Array(classes.length); + + if (classWeight === "balanced") { + const nSamples = y.length; + const nClasses = classes.length; + const counts = new Map(); + for (const c of classes) counts.set(c, 0); + for (const v of y) { + const cur = counts.get(v); + if (cur !== undefined) counts.set(v, cur + 1); + } + for (let i = 0; i < classes.length; i++) { + const c = classes[i] ?? 0; + const count = counts.get(c) ?? 0; + if (count === 0) { + throw new ValueError(`Class ${c} is not present in y`); + } + weights[i] = nSamples / (nClasses * count); + } + } else { + for (let i = 0; i < classes.length; i++) { + const c = classes[i] ?? 0; + const w = classWeight[c]; + if (w === undefined) { + throw new ValueError(`Class ${c} is not in classWeight`); + } + weights[i] = w; + } + } + return weights; +} + +/** + * Compute per-sample weights from class weights. + */ +export function computeSampleWeight( + classWeight: "balanced" | Record, + y: Float64Array | Int32Array, +): Float64Array { + const uniqueClasses = new Set(); + for (const v of y) uniqueClasses.add(v); + const classes = new Int32Array([...uniqueClasses].sort((a, b) => a - b)); + const cw = computeClassWeight(classWeight, classes, y); + const classToWeight = new Map(); + for (let i = 0; i < classes.length; i++) { + classToWeight.set(classes[i] ?? 0, cw[i] ?? 1.0); + } + const sampleWeights = new Float64Array(y.length); + for (let i = 0; i < y.length; i++) { + sampleWeights[i] = classToWeight.get(y[i] ?? 0) ?? 1.0; + } + return sampleWeights; +} diff --git a/src/utils/extmath.ts b/src/utils/extmath.ts new file mode 100644 index 0000000..9f84714 --- /dev/null +++ b/src/utils/extmath.ts @@ -0,0 +1,196 @@ +/** + * Mathematical utilities for tsikit-learn. + * Mirrors sklearn.utils.extmath. + */ + +/** Compute the log of the logistic function element-wise. */ +export function logLogistic(x: Float64Array): Float64Array { + const result = new Float64Array(x.length); + for (let i = 0; i < x.length; i++) { + const xi = x[i] ?? 0; + result[i] = xi >= 0 ? -Math.log1p(Math.exp(-xi)) : xi - Math.log1p(Math.exp(xi)); + } + return result; +} + +/** Compute softmax values for each row of X. */ +export function softmax(X: Float64Array[], copy = true): Float64Array[] { + const result = copy ? X.map((row) => new Float64Array(row)) : X; + for (const row of result) { + const maxVal = Math.max(...row); + let sum = 0; + for (let j = 0; j < row.length; j++) { + row[j] = Math.exp((row[j] ?? 0) - maxVal); + sum += row[j] ?? 0; + } + for (let j = 0; j < row.length; j++) { + row[j] = (row[j] ?? 0) / sum; + } + } + return result; +} + +/** Compute row norms of a matrix. */ +export function rowNorms(X: Float64Array[], squared = false): Float64Array { + const norms = new Float64Array(X.length); + for (let i = 0; i < X.length; i++) { + const row = X[i] ?? new Float64Array(0); + let norm2 = 0; + for (const v of row) norm2 += v * v; + norms[i] = squared ? norm2 : Math.sqrt(norm2); + } + return norms; +} + +/** Safe sparse dot (dense version). Computes X @ y. */ +export function safeDot(X: Float64Array[], y: Float64Array): Float64Array { + const n = X.length; + const result = new Float64Array(n); + for (let i = 0; i < n; i++) { + const row = X[i] ?? new Float64Array(0); + let dot = 0; + for (let j = 0; j < row.length; j++) { + dot += (row[j] ?? 0) * (y[j] ?? 0); + } + result[i] = dot; + } + return result; +} + +/** Matrix transpose. */ +export function transpose(X: Float64Array[]): Float64Array[] { + if (X.length === 0) return []; + const nRows = X.length; + const nCols = (X[0] ?? new Float64Array(0)).length; + const result: Float64Array[] = Array.from({ length: nCols }, () => new Float64Array(nRows)); + for (let i = 0; i < nRows; i++) { + for (let j = 0; j < nCols; j++) { + (result[j] ?? new Float64Array(0))[i] = (X[i] ?? new Float64Array(0))[j] ?? 0; + } + } + return result; +} + +/** Matrix-matrix multiply: A @ B. */ +export function matMul(A: Float64Array[], B: Float64Array[]): Float64Array[] { + if (A.length === 0 || B.length === 0) return []; + const nRows = A.length; + const nCols = (B[0] ?? new Float64Array(0)).length; + const nInner = B.length; + const result: Float64Array[] = Array.from({ length: nRows }, () => new Float64Array(nCols)); + for (let i = 0; i < nRows; i++) { + for (let k = 0; k < nInner; k++) { + const aik = (A[i] ?? new Float64Array(0))[k] ?? 0; + if (aik === 0) continue; + for (let j = 0; j < nCols; j++) { + result[i]![j] = (result[i]![j] ?? 0) + aik * ((B[k] ?? new Float64Array(0))[j] ?? 0); + } + } + } + return result; +} + +/** + * Solve a lower triangular system Lx = b using forward substitution. + */ +export function forwardSubstitution(L: Float64Array[], b: Float64Array): Float64Array { + const n = b.length; + const x = new Float64Array(n); + for (let i = 0; i < n; i++) { + let sum = b[i] ?? 0; + for (let j = 0; j < i; j++) { + sum -= ((L[i] ?? new Float64Array(0))[j] ?? 0) * (x[j] ?? 0); + } + x[i] = sum / ((L[i] ?? new Float64Array(0))[i] ?? 1); + } + return x; +} + +/** + * Solve an upper triangular system Ux = b using back substitution. + */ +export function backSubstitution(U: Float64Array[], b: Float64Array): Float64Array { + const n = b.length; + const x = new Float64Array(n); + for (let i = n - 1; i >= 0; i--) { + let sum = b[i] ?? 0; + for (let j = i + 1; j < n; j++) { + sum -= ((U[i] ?? new Float64Array(0))[j] ?? 0) * (x[j] ?? 0); + } + x[i] = sum / ((U[i] ?? new Float64Array(0))[i] ?? 1); + } + return x; +} + +/** + * Cholesky decomposition of a symmetric positive definite matrix. + * Returns L such that A = L @ L.T + */ +export function cholesky(A: Float64Array[]): Float64Array[] { + const n = A.length; + const L: Float64Array[] = Array.from({ length: n }, () => new Float64Array(n)); + for (let i = 0; i < n; i++) { + for (let j = 0; j <= i; j++) { + let sum = (A[i] ?? new Float64Array(0))[j] ?? 0; + for (let k = 0; k < j; k++) { + sum -= ((L[i] ?? new Float64Array(0))[k] ?? 0) * ((L[j] ?? new Float64Array(0))[k] ?? 0); + } + if (i === j) { + (L[i] ?? new Float64Array(0))[j] = Math.sqrt(Math.max(sum, 0)); + } else { + const ljj = (L[j] ?? new Float64Array(0))[j] ?? 1; + (L[i] ?? new Float64Array(0))[j] = ljj !== 0 ? sum / ljj : 0; + } + } + } + return L; +} + +/** + * Solve the linear system Ax = b using Cholesky decomposition. + * A must be symmetric positive definite. + */ +export function choleskyLinsolve(A: Float64Array[], b: Float64Array): Float64Array { + const L = cholesky(A); + const y = forwardSubstitution(L, b); + const Lt = transpose(L); + return backSubstitution(Lt, y); +} + +/** Compute the Euclidean distance between two vectors. */ +export function euclideanDistance(a: Float64Array, b: Float64Array): number { + let sum = 0; + for (let i = 0; i < a.length; i++) { + const diff = (a[i] ?? 0) - (b[i] ?? 0); + sum += diff * diff; + } + return Math.sqrt(sum); +} + +/** Add identity * alpha to a matrix (in-place). */ +export function addDiagonal(A: Float64Array[], alpha: number): Float64Array[] { + for (let i = 0; i < A.length; i++) { + (A[i] ?? new Float64Array(0))[i] = ((A[i] ?? new Float64Array(0))[i] ?? 0) + alpha; + } + return A; +} + +/** Compute X.T @ X (Gram matrix). */ +export function gramMatrix(X: Float64Array[]): Float64Array[] { + const Xt = transpose(X); + return matMul(Xt, X); +} + +/** Compute X.T @ y. */ +export function xtDotY(X: Float64Array[], y: Float64Array): Float64Array { + const p = (X[0] ?? new Float64Array(0)).length; + const result = new Float64Array(p); + for (let i = 0; i < X.length; i++) { + const yi = y[i] ?? 0; + const row = X[i] ?? new Float64Array(0); + for (let j = 0; j < p; j++) { + result[j] = (result[j] ?? 0) + (row[j] ?? 0) * yi; + } + } + return result; +} diff --git a/src/utils/index.ts b/src/utils/index.ts new file mode 100644 index 0000000..2ea8323 --- /dev/null +++ b/src/utils/index.ts @@ -0,0 +1,4 @@ +export * from "./extmath.js"; +export * from "./validation.js"; +export * from "./multiclass.js"; +export * from "./class_weight.js"; diff --git a/src/utils/multiclass.ts b/src/utils/multiclass.ts new file mode 100644 index 0000000..cd461ad --- /dev/null +++ b/src/utils/multiclass.ts @@ -0,0 +1,68 @@ +/** + * Multiclass utilities. + * Mirrors sklearn.utils.multiclass. + */ + +import { ValueError } from "../exceptions.js"; + +export type MulticlassType = + | "binary" + | "multiclass" + | "multiclass-multioutput" + | "multilabel-indicator" + | "continuous" + | "continuous-multioutput" + | "unknown"; + +/** Determine the type of target variable. */ +export function typeOfTarget(y: Float64Array | Int32Array): MulticlassType { + const unique = new Set(); + for (const v of y) unique.add(v); + const nUnique = unique.size; + + // Check if all values are integers + const allInt = Array.from(unique).every((v) => Number.isInteger(v)); + if (!allInt) return "continuous"; + + if (nUnique <= 2) return "binary"; + return "multiclass"; +} + +/** Return sorted unique class labels. */ +export function uniqueLabels(...ys: (Float64Array | Int32Array)[]): Int32Array { + const all = new Set(); + for (const y of ys) { + for (const v of y) all.add(v); + } + return new Int32Array([...all].sort((a, b) => a - b)); +} + +/** Check if classification is binary. */ +export function isBinaryClassification(y: Float64Array | Int32Array): boolean { + const unique = new Set(); + for (const v of y) unique.add(v); + return unique.size === 2; +} + +/** Check if classification is multilabel. */ +export function isMultilabel(_y: Float64Array[]): boolean { + // For dense arrays this is always false in our simplified implementation + return false; +} + +/** Return the number of classes for a label array. */ +export function classCount(y: Float64Array | Int32Array): number { + const unique = new Set(); + for (const v of y) unique.add(v); + return unique.size; +} + +/** Validate that y only contains values in the expected classes. */ +export function checkClassificationTargets(y: Float64Array | Int32Array): void { + const t = typeOfTarget(y); + if (t === "continuous") { + throw new ValueError( + `Unknown label type: ${t}. Maybe you are trying to fit a classifier, which expects discrete classes.`, + ); + } +} diff --git a/src/utils/validation.ts b/src/utils/validation.ts new file mode 100644 index 0000000..2e4f2af --- /dev/null +++ b/src/utils/validation.ts @@ -0,0 +1,104 @@ +/** + * Input validation utilities. + * Mirrors sklearn.utils.validation. + */ + +import { ValueError } from "../exceptions.js"; + +/** Validate that X is a non-empty 2D array of Float64Arrays. */ +export function checkArray( + X: Float64Array[], + options: { + minSamples?: number; + minFeatures?: number; + allowNd?: boolean; + } = {}, +): Float64Array[] { + const { minSamples = 1, minFeatures = 1 } = options; + if (!Array.isArray(X)) { + throw new ValueError("X must be an array of Float64Arrays"); + } + if (X.length < minSamples) { + throw new ValueError(`X must have at least ${minSamples} samples, got ${X.length}`); + } + const nFeatures = (X[0] ?? new Float64Array(0)).length; + if (nFeatures < minFeatures) { + throw new ValueError(`X must have at least ${minFeatures} features, got ${nFeatures}`); + } + for (let i = 0; i < X.length; i++) { + const row = X[i]; + if (!(row instanceof Float64Array)) { + throw new ValueError(`X[${i}] must be a Float64Array`); + } + if (row.length !== nFeatures) { + throw new ValueError( + `X rows must all have the same length. Row 0 has ${nFeatures}, row ${i} has ${row.length}`, + ); + } + } + return X; +} + +/** Validate that X and y have compatible shapes. */ +export function checkXy( + X: Float64Array[], + y: Float64Array | Int32Array, +): [Float64Array[], Float64Array | Int32Array] { + checkArray(X); + if (X.length !== y.length) { + throw new ValueError( + `X and y have inconsistent first dimensions: X has ${X.length} samples, y has ${y.length}`, + ); + } + return [X, y]; +} + +/** Return the number of features in X. */ +export function getNumFeatures(X: Float64Array[]): number { + if (X.length === 0) return 0; + return (X[0] ?? new Float64Array(0)).length; +} + +/** Validate that test features match training features. */ +export function checkFeaturesConsistency( + XTrain: Float64Array[], + XTest: Float64Array[], +): void { + const trainFeats = getNumFeatures(XTrain); + const testFeats = getNumFeatures(XTest); + if (trainFeats !== testFeats) { + throw new ValueError( + `X has ${testFeats} features, but the estimator was trained with ${trainFeats} features`, + ); + } +} + +/** Convert a number array to Float64Array. */ +export function asFloat64Array(arr: number[] | Float64Array): Float64Array { + if (arr instanceof Float64Array) return arr; + return new Float64Array(arr); +} + +/** Convert a number array to Int32Array. */ +export function asInt32Array(arr: number[] | Int32Array): Int32Array { + if (arr instanceof Int32Array) return arr; + return new Int32Array(arr); +} + +/** Validate sample weights, returning a uniform weight array if null. */ +export function checkSampleWeight( + sampleWeight: Float64Array | null | undefined, + nSamples: number, +): Float64Array { + if (sampleWeight == null) { + const w = new Float64Array(nSamples); + w.fill(1.0); + return w; + } + if (sampleWeight.length !== nSamples) { + throw new ValueError( + `sampleWeight.length (${sampleWeight.length}) != n_samples (${nSamples})`, + ); + } + return sampleWeight; +} diff --git a/tests/base.test.ts b/tests/base.test.ts new file mode 100644 index 0000000..08f43a4 --- /dev/null +++ b/tests/base.test.ts @@ -0,0 +1,63 @@ +import { describe, it, expect } from "bun:test"; +import { BaseEstimator, ClassifierMixin, RegressorMixin, clone, check_is_fitted } from "../src/base.ts"; +import { NotFittedError } from "../src/exceptions.ts"; + +class DummyEstimator extends BaseEstimator { + alpha: number; + beta: string; + fitted_?: boolean; + + constructor(alpha = 1.0, beta = "test") { + super(); + this.alpha = alpha; + this.beta = beta; + } + + fit(): this { + this.fitted_ = true; + return this; + } +} + +describe("BaseEstimator", () => { + it("get_params returns constructor params", () => { + const est = new DummyEstimator(2.0, "hello"); + const params = est.get_params(); + expect(params["alpha"]).toBe(2.0); + expect(params["beta"]).toBe("hello"); + }); + + it("set_params updates params", () => { + const est = new DummyEstimator(); + est.set_params({ alpha: 5.0 }); + expect(est.alpha).toBe(5.0); + }); + + it("check_is_fitted throws NotFittedError when not fitted", () => { + const est = new DummyEstimator(); + expect(() => est.fit()._check_is_fitted(["fitted_"])).not.toThrow(); + const est2 = new DummyEstimator(); + expect(() => est2["_check_is_fitted"](["fitted_"])).toThrow(NotFittedError); + }); +}); + +describe("clone", () => { + it("creates a new instance with same params", () => { + const est = new DummyEstimator(3.0, "foo"); + const cloned = clone(est); + expect(cloned).not.toBe(est); + expect(cloned.alpha).toBe(3.0); + expect(cloned.beta).toBe("foo"); + }); +}); + +describe("Exceptions", () => { + it("NotFittedError has correct name", () => { + const err = new NotFittedError(); + expect(err.name).toBe("NotFittedError"); + }); + + it("NotFittedError is an Error", () => { + expect(new NotFittedError()).toBeInstanceOf(Error); + }); +}); diff --git a/tests/linear_model.test.ts b/tests/linear_model.test.ts new file mode 100644 index 0000000..0b1ad2f --- /dev/null +++ b/tests/linear_model.test.ts @@ -0,0 +1,174 @@ +import { describe, it, expect } from "bun:test"; +import { LinearRegression } from "../src/linear_model/linear_regression.ts"; +import { Ridge } from "../src/linear_model/ridge.ts"; + +describe("LinearRegression", () => { + it("fits a simple 1D linear relationship", () => { + const X = [ + new Float64Array([1]), + new Float64Array([2]), + new Float64Array([3]), + new Float64Array([4]), + new Float64Array([5]), + ]; + const y = new Float64Array([2, 4, 6, 8, 10]); + const reg = new LinearRegression(); + reg.fit(X, y); + + expect(reg.coef_).toBeDefined(); + expect(Math.abs((reg.coef_ as Float64Array)[0]! - 2)).toBeLessThan(1e-6); + expect(Math.abs((reg.intercept_ as number))).toBeLessThan(1e-6); + }); + + it("fits with intercept", () => { + const X = [ + new Float64Array([0]), + new Float64Array([1]), + new Float64Array([2]), + ]; + const y = new Float64Array([1, 3, 5]); // y = 2x + 1 + const reg = new LinearRegression(); + reg.fit(X, y); + + expect(Math.abs((reg.coef_ as Float64Array)[0]! - 2)).toBeLessThan(1e-6); + expect(Math.abs((reg.intercept_ as number) - 1)).toBeLessThan(1e-6); + }); + + it("fits without intercept", () => { + const X = [ + new Float64Array([1]), + new Float64Array([2]), + new Float64Array([3]), + ]; + const y = new Float64Array([3, 6, 9]); // y = 3x + const reg = new LinearRegression({ fit_intercept: false }); + reg.fit(X, y); + + expect(Math.abs((reg.coef_ as Float64Array)[0]! - 3)).toBeLessThan(1e-6); + expect(reg.intercept_).toBe(0); + }); + + it("predicts correctly", () => { + const X = [new Float64Array([1]), new Float64Array([2])]; + const y = new Float64Array([1, 2]); + const reg = new LinearRegression(); + reg.fit(X, y); + + const pred = reg.predict([new Float64Array([3])]); + expect(Math.abs(pred[0]! - 3)).toBeLessThan(1e-4); + }); + + it("fits multiple features", () => { + // y = 1*x1 + 2*x2 + const X = [ + new Float64Array([1, 2]), + new Float64Array([2, 1]), + new Float64Array([3, 3]), + new Float64Array([4, 2]), + ]; + const y = new Float64Array([5, 4, 9, 8]); + const reg = new LinearRegression({ fit_intercept: false }); + reg.fit(X, y); + + const pred = reg.predict([new Float64Array([1, 2])]); + expect(Math.abs(pred[0]! - 5)).toBeLessThan(0.1); + }); + + it("computes R² score", () => { + const X = [ + new Float64Array([1]), + new Float64Array([2]), + new Float64Array([3]), + new Float64Array([4]), + ]; + const y = new Float64Array([2, 4, 6, 8]); + const reg = new LinearRegression(); + reg.fit(X, y); + + const score = reg.score(X, y); + expect(score).toBeCloseTo(1.0, 5); + }); + + it("returns R² close to 1 for perfect linear data", () => { + const X = Array.from({ length: 20 }, (_, i) => + new Float64Array([i, i * 2])); + const y = new Float64Array(Array.from({ length: 20 }, (_, i) => i * 3 + 1)); + const reg = new LinearRegression(); + reg.fit(X, y); + expect(reg.score(X, y)).toBeGreaterThan(0.999); + }); + + it("throws NotFittedError when predicting before fit", () => { + const reg = new LinearRegression(); + expect(() => reg.predict([new Float64Array([1])])).toThrow(); + }); + + it("get_params returns all params", () => { + const reg = new LinearRegression({ alpha: 0 } as never); + const params = reg.get_params(); + expect("fit_intercept" in params).toBe(true); + }); +}); + +describe("Ridge", () => { + it("fits a simple linear relationship with regularization", () => { + const X = [ + new Float64Array([1]), + new Float64Array([2]), + new Float64Array([3]), + new Float64Array([4]), + new Float64Array([5]), + ]; + const y = new Float64Array([2, 4, 6, 8, 10]); + const reg = new Ridge({ alpha: 0.0001 }); + reg.fit(X, y); + + // With tiny alpha, should be close to OLS + expect(Math.abs((reg.coef_ as Float64Array)[0]! - 2)).toBeLessThan(0.01); + }); + + it("shrinks coefficients with large alpha", () => { + const X = [ + new Float64Array([1, 0]), + new Float64Array([0, 1]), + new Float64Array([1, 1]), + ]; + const y = new Float64Array([2, 3, 5]); + + const regLowAlpha = new Ridge({ alpha: 0.001 }); + const regHighAlpha = new Ridge({ alpha: 100.0 }); + regLowAlpha.fit(X, y); + regHighAlpha.fit(X, y); + + const normLow = Array.from(regLowAlpha.coef_ as Float64Array) + .reduce((a, b) => a + b * b, 0); + const normHigh = Array.from(regHighAlpha.coef_ as Float64Array) + .reduce((a, b) => a + b * b, 0); + + // Higher alpha → smaller coefficients + expect(normHigh).toBeLessThan(normLow); + }); + + it("predicts correctly", () => { + const X = [new Float64Array([1]), new Float64Array([2]), new Float64Array([3])]; + const y = new Float64Array([1, 2, 3]); + const reg = new Ridge({ alpha: 0.001 }); + reg.fit(X, y); + + const pred = reg.predict([new Float64Array([4])]); + expect(Math.abs(pred[0]! - 4)).toBeLessThan(0.1); + }); + + it("score is R²", () => { + const X = Array.from({ length: 20 }, (_, i) => new Float64Array([i])); + const y = new Float64Array(Array.from({ length: 20 }, (_, i) => i * 2 + 1)); + const reg = new Ridge({ alpha: 0.001 }); + reg.fit(X, y); + expect(reg.score(X, y)).toBeGreaterThan(0.99); + }); + + it("throws NotFittedError when predicting before fit", () => { + const reg = new Ridge(); + expect(() => reg.predict([new Float64Array([1])])).toThrow(); + }); +}); diff --git a/tests/metrics_model_selection.test.ts b/tests/metrics_model_selection.test.ts new file mode 100644 index 0000000..e46d879 --- /dev/null +++ b/tests/metrics_model_selection.test.ts @@ -0,0 +1,109 @@ +import { describe, it, expect } from "bun:test"; +import { + mean_squared_error, + mean_absolute_error, + r2_score, +} from "../src/metrics/regression.ts"; +import { + accuracy_score, + confusion_matrix, + precision_score, + recall_score, + f1_score, +} from "../src/metrics/classification.ts"; +import { train_test_split, KFold } from "../src/model_selection/split.ts"; + +describe("Regression metrics", () => { + it("MSE is 0 for perfect prediction", () => { + const y = new Float64Array([1, 2, 3]); + expect(mean_squared_error(y, y)).toBe(0); + }); + + it("MAE is 0 for perfect prediction", () => { + const y = new Float64Array([1, 2, 3]); + expect(mean_absolute_error(y, y)).toBe(0); + }); + + it("R² is 1 for perfect prediction", () => { + const y = new Float64Array([1, 2, 3]); + expect(r2_score(y, y)).toBe(1); + }); + + it("MSE is correct", () => { + const yTrue = new Float64Array([1, 2, 3]); + const yPred = new Float64Array([2, 3, 4]); // all off by 1 + expect(mean_squared_error(yTrue, yPred)).toBe(1); + }); +}); + +describe("Classification metrics", () => { + it("accuracy is 1 for perfect prediction", () => { + const y = new Int32Array([0, 1, 2]); + expect(accuracy_score(y, y)).toBe(1); + }); + + it("accuracy counts correct predictions", () => { + const yTrue = new Int32Array([0, 1, 1, 0]); + const yPred = new Int32Array([0, 1, 0, 0]); + expect(accuracy_score(yTrue, yPred)).toBe(0.75); + }); + + it("confusion matrix is correct for binary", () => { + const yTrue = new Int32Array([0, 1, 0, 1, 0]); + const yPred = new Int32Array([0, 1, 1, 1, 0]); + const cm = confusion_matrix(yTrue, yPred); + // [[TN, FP], [FN, TP]] + expect((cm[0] as number[])[0]).toBe(2); // TN + expect((cm[0] as number[])[1]).toBe(1); // FP + expect((cm[1] as number[])[0]).toBe(0); // FN + expect((cm[1] as number[])[1]).toBe(2); // TP + }); + + it("f1 is 1 for perfect predictions", () => { + const y = new Int32Array([0, 1, 0, 1]); + expect(f1_score(y, y)).toBeCloseTo(1); + }); +}); + +describe("train_test_split", () => { + it("splits data correctly", () => { + const X = Array.from({ length: 100 }, (_, i) => new Float64Array([i])); + const y = new Float64Array(Array.from({ length: 100 }, (_, i) => i)); + const { XTrain, XTest, yTrain, yTest } = train_test_split(X, y, { testSize: 0.2 }); + expect(XTrain.length).toBe(80); + expect(XTest.length).toBe(20); + expect(yTrain.length).toBe(80); + expect(yTest.length).toBe(20); + }); + + it("is reproducible with randomState", () => { + const X = Array.from({ length: 20 }, (_, i) => new Float64Array([i])); + const y = new Float64Array(Array.from({ length: 20 }, (_, i) => i)); + const r1 = train_test_split(X, y, { randomState: 42 }); + const r2 = train_test_split(X, y, { randomState: 42 }); + expect(Array.from(r1.yTest)).toEqual(Array.from(r2.yTest)); + }); +}); + +describe("KFold", () => { + it("generates k folds", () => { + const X = Array.from({ length: 10 }, (_, i) => new Float64Array([i])); + const kf = new KFold({ nSplits: 5 }); + const folds = [...kf.split(X)]; + expect(folds.length).toBe(5); + for (const fold of folds) { + expect(fold.trainIndex.length).toBe(8); + expect(fold.testIndex.length).toBe(2); + } + }); + + it("covers all samples exactly once", () => { + const X = Array.from({ length: 9 }, (_, i) => new Float64Array([i])); + const kf = new KFold({ nSplits: 3 }); + const allTest = new Set(); + for (const fold of kf.split(X)) { + for (const idx of fold.testIndex) allTest.add(idx); + } + expect(allTest.size).toBe(9); + }); +}); diff --git a/tests/preprocessing.test.ts b/tests/preprocessing.test.ts new file mode 100644 index 0000000..f6966f0 --- /dev/null +++ b/tests/preprocessing.test.ts @@ -0,0 +1,119 @@ +import { describe, it, expect } from "bun:test"; +import { StandardScaler } from "../src/preprocessing/standard_scaler.ts"; +import { MinMaxScaler } from "../src/preprocessing/minmax_scaler.ts"; +import { LabelEncoder } from "../src/preprocessing/label_encoder.ts"; +import { Normalizer } from "../src/preprocessing/normalizer.ts"; +import { NotFittedError } from "../src/exceptions.ts"; + +describe("StandardScaler", () => { + const X = [ + new Float64Array([1, 2]), + new Float64Array([3, 4]), + new Float64Array([5, 6]), + ]; + + it("computes mean and std correctly", () => { + const scaler = new StandardScaler(); + scaler.fit(X); + expect(scaler.mean_).toBeDefined(); + expect(Math.abs((scaler.mean_ as Float64Array)[0]! - 3)).toBeLessThan(1e-10); + expect(Math.abs((scaler.mean_ as Float64Array)[1]! - 4)).toBeLessThan(1e-10); + }); + + it("transforms to zero mean", () => { + const scaler = new StandardScaler(); + const Xt = scaler.fit_transform(X); + const mean0 = Xt.reduce((a, r) => a + (r[0] ?? 0), 0) / Xt.length; + expect(Math.abs(mean0)).toBeLessThan(1e-10); + }); + + it("inverse_transform recovers original", () => { + const scaler = new StandardScaler(); + const Xt = scaler.fit_transform(X); + const Xr = scaler.inverse_transform(Xt); + for (let i = 0; i < X.length; i++) { + for (let j = 0; j < (X[i] as Float64Array).length; j++) { + expect(Math.abs((Xr[i] as Float64Array)[j]! - (X[i] as Float64Array)[j]!)).toBeLessThan(1e-8); + } + } + }); + + it("throws when not fitted", () => { + const scaler = new StandardScaler(); + expect(() => scaler.transform(X)).toThrow(NotFittedError); + }); +}); + +describe("MinMaxScaler", () => { + const X = [ + new Float64Array([0, 2]), + new Float64Array([5, 4]), + new Float64Array([10, 6]), + ]; + + it("scales to [0, 1] by default", () => { + const scaler = new MinMaxScaler(); + const Xt = scaler.fit_transform(X); + expect((Xt[0] as Float64Array)[0]).toBeCloseTo(0, 8); + expect((Xt[2] as Float64Array)[0]).toBeCloseTo(1, 8); + }); + + it("scales to custom range", () => { + const scaler = new MinMaxScaler({ feature_range: [-1, 1] }); + const Xt = scaler.fit_transform(X); + expect((Xt[0] as Float64Array)[0]).toBeCloseTo(-1, 6); + expect((Xt[2] as Float64Array)[0]).toBeCloseTo(1, 6); + }); + + it("inverse_transform recovers original", () => { + const scaler = new MinMaxScaler(); + const Xt = scaler.fit_transform(X); + const Xr = scaler.inverse_transform(Xt); + for (let i = 0; i < X.length; i++) { + for (let j = 0; j < (X[i] as Float64Array).length; j++) { + expect(Math.abs((Xr[i] as Float64Array)[j]! - (X[i] as Float64Array)[j]!)).toBeLessThan(1e-8); + } + } + }); +}); + +describe("LabelEncoder", () => { + it("encodes labels", () => { + const le = new LabelEncoder(); + const y = new Int32Array([3, 1, 2, 1, 3]); + const encoded = le.fit_transform(y); + expect(Array.from(encoded)).toEqual([2, 0, 1, 0, 2]); + }); + + it("inverse_transform recovers original", () => { + const le = new LabelEncoder(); + const y = new Int32Array([10, 20, 30]); + const encoded = le.fit_transform(y); + const decoded = le.inverse_transform(encoded); + expect(Array.from(decoded)).toEqual([10, 20, 30]); + }); + + it("throws on unseen labels", () => { + const le = new LabelEncoder(); + le.fit(new Int32Array([1, 2, 3])); + expect(() => le.transform(new Int32Array([4]))).toThrow(); + }); +}); + +describe("Normalizer", () => { + it("normalizes to unit L2 norm", () => { + const norm = new Normalizer({ norm: "l2" }); + const X = [new Float64Array([3, 4])]; // 3² + 4² = 25, norm = 5 + const Xt = norm.transform(X); + expect((Xt[0] as Float64Array)[0]).toBeCloseTo(0.6, 8); + expect((Xt[0] as Float64Array)[1]).toBeCloseTo(0.8, 8); + }); + + it("normalizes to unit L1 norm", () => { + const norm = new Normalizer({ norm: "l1" }); + const X = [new Float64Array([1, 3])]; // sum = 4 + const Xt = norm.transform(X); + expect((Xt[0] as Float64Array)[0]).toBeCloseTo(0.25, 8); + expect((Xt[0] as Float64Array)[1]).toBeCloseTo(0.75, 8); + }); +}); diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..989d8ca --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,24 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "moduleResolution": "bundler", + "lib": ["ES2022", "DOM"], + "strict": true, + "noUncheckedIndexedAccess": true, + "exactOptionalPropertyTypes": true, + "noImplicitOverride": true, + "noImplicitReturns": true, + "noPropertyAccessFromIndexSignature": true, + "noFallthroughCasesInSwitch": true, + "verbatimModuleSyntax": true, + "declaration": true, + "declarationMap": true, + "sourceMap": true, + "outDir": "./dist", + "rootDir": "./src", + "skipLibCheck": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist", "tests", "playground"] +} From 7e2f7d961469012f7b661557b0b333dfce8375db Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 12 May 2026 14:47:35 +0000 Subject: [PATCH 3/4] Limit CI workflow permissions Co-authored-by: mrjf <180956+mrjf@users.noreply.github.com> --- .github/workflows/ci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2bd1ead..8a519bd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,6 +6,9 @@ on: pull_request: branches: [main] +permissions: + contents: read + jobs: test: name: Test @@ -73,6 +76,7 @@ jobs: needs: [test, playground] if: github.ref == 'refs/heads/main' permissions: + contents: read pages: write id-token: write environment: From d341da9c2787dc6f374508cbdf4e306820db2e67 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 12 May 2026 15:08:37 +0000 Subject: [PATCH 4/4] Fix CI lint failures Co-authored-by: mrjf <180956+mrjf@users.noreply.github.com> --- src/base.ts | 25 ++++++++++---- src/exceptions.ts | 4 ++- src/linear_model/linear_regression.ts | 10 +++--- src/linear_model/ridge.ts | 10 +++--- src/metrics/classification.ts | 18 ++++++---- src/model_selection/split.ts | 47 ++++++++++++++++--------- src/preprocessing/label_encoder.ts | 6 ++-- src/preprocessing/minmax_scaler.ts | 25 +++++++++----- src/preprocessing/normalizer.ts | 5 ++- src/preprocessing/standard_scaler.ts | 12 ++++--- src/utils/extmath.ts | 47 +++++++++++++++++++------ src/utils/validation.ts | 8 +++-- tests/base.test.ts | 16 ++++++--- tests/linear_model.test.ts | 50 ++++++++++++++++++--------- tests/metrics_model_selection.test.ts | 20 ++++++----- tests/preprocessing.test.ts | 30 +++++++++++----- 16 files changed, 225 insertions(+), 108 deletions(-) diff --git a/src/base.ts b/src/base.ts index d7af2e1..236df4d 100644 --- a/src/base.ts +++ b/src/base.ts @@ -21,7 +21,8 @@ export abstract class BaseEstimator { for (const key of Object.keys(this)) { const val = (this as Record)[key]; if (typeof val !== "function") { - out[key] = deep && val instanceof BaseEstimator ? val.get_params(deep) : val; + out[key] = + deep && val instanceof BaseEstimator ? val.get_params(deep) : val; } } return out; @@ -37,7 +38,9 @@ export abstract class BaseEstimator { /** Assert the estimator is fitted. */ protected _check_is_fitted(attributes: string[]): void { - const missing = attributes.filter((a) => (this as Record)[a] === undefined); + const missing = attributes.filter( + (a) => (this as Record)[a] === undefined, + ); if (missing.length > 0) { throw new NotFittedError( `This ${this.constructor.name} instance is not fitted yet. Call 'fit' first.`, @@ -90,7 +93,10 @@ export abstract class TransformerMixin { readonly _estimator_type = "transformer" as const; /** Fit and transform in one step. */ - fit_transform(X: Float64Array[], y?: Float64Array | Int32Array): Float64Array[] { + fit_transform( + X: Float64Array[], + y?: Float64Array | Int32Array, + ): Float64Array[] { return this.fit(X, y).transform(X); } @@ -120,14 +126,21 @@ export function clone(estimator: T): T { } /** Check if an estimator is fitted by looking for a trailing underscore attribute. */ -export function check_is_fitted(estimator: BaseEstimator, attributes?: string[]): void { - const attrs = attributes ?? Object.keys(estimator).filter((k) => k.endsWith("_") && !k.startsWith("_")); +export function check_is_fitted( + estimator: BaseEstimator, + attributes?: string[], +): void { + const attrs = + attributes ?? + Object.keys(estimator).filter((k) => k.endsWith("_") && !k.startsWith("_")); if (attrs.length === 0) { throw new NotFittedError( `This ${estimator.constructor.name} instance is not fitted yet.`, ); } - const missing = attrs.filter((a) => (estimator as unknown as Record)[a] === undefined); + const missing = attrs.filter( + (a) => (estimator as unknown as Record)[a] === undefined, + ); if (missing.length > 0) { throw new NotFittedError( `This ${estimator.constructor.name} instance is not fitted yet. Missing attributes: ${missing.join(", ")}.`, diff --git a/src/exceptions.ts b/src/exceptions.ts index 5314a48..f1a2bce 100644 --- a/src/exceptions.ts +++ b/src/exceptions.ts @@ -6,7 +6,9 @@ /** Raised when an estimator is used before being fitted. */ export class NotFittedError extends Error { override readonly name = "NotFittedError"; - constructor(message = "This estimator is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.") { + constructor( + message = "This estimator is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.", + ) { super(message); } } diff --git a/src/linear_model/linear_regression.ts b/src/linear_model/linear_regression.ts index ff22f57..bee73b2 100644 --- a/src/linear_model/linear_regression.ts +++ b/src/linear_model/linear_regression.ts @@ -7,14 +7,14 @@ */ import { BaseEstimator, RegressorMixin } from "../base.js"; -import { checkArray, checkXy } from "../utils/validation.js"; import { - gramMatrix, - xtDotY, + addDiagonal, choleskyLinsolve, + gramMatrix, safeDot, - addDiagonal, + xtDotY, } from "../utils/extmath.js"; +import { checkArray, checkXy } from "../utils/validation.js"; export interface LinearRegressionParams { fit_intercept?: boolean; @@ -91,7 +91,7 @@ export class LinearRegression extends BaseEstimator { XCenter = X.map((row) => { const centered = new Float64Array(row); for (let j = 0; j < centered.length; j++) { - centered[j] = (centered[j] ?? 0) - (xMean as Float64Array)[j]!; + centered[j] = (centered[j] ?? 0) - ((xMean as Float64Array)[j] ?? 0); } return centered; }); diff --git a/src/linear_model/ridge.ts b/src/linear_model/ridge.ts index f431b40..eab9a65 100644 --- a/src/linear_model/ridge.ts +++ b/src/linear_model/ridge.ts @@ -7,14 +7,14 @@ */ import { BaseEstimator } from "../base.js"; -import { checkArray, checkXy } from "../utils/validation.js"; import { - gramMatrix, - xtDotY, + addDiagonal, choleskyLinsolve, + gramMatrix, safeDot, - addDiagonal, + xtDotY, } from "../utils/extmath.js"; +import { checkArray, checkXy } from "../utils/validation.js"; export interface RidgeParams { alpha?: number; @@ -95,7 +95,7 @@ export class Ridge extends BaseEstimator { XCenter = X.map((row) => { const centered = new Float64Array(row); for (let j = 0; j < centered.length; j++) { - centered[j] = (centered[j] ?? 0) - (xMean as Float64Array)[j]!; + centered[j] = (centered[j] ?? 0) - ((xMean as Float64Array)[j] ?? 0); } return centered; }); diff --git a/src/metrics/classification.ts b/src/metrics/classification.ts index 34fbdc9..408f55a 100644 --- a/src/metrics/classification.ts +++ b/src/metrics/classification.ts @@ -27,18 +27,22 @@ export function confusion_matrix( yPred: Float64Array | Int32Array, labels?: Int32Array, ): number[][] { - const labelSet = labels ?? (() => { - const s = new Set(); - for (const v of yTrue) s.add(v); - for (const v of yPred) s.add(v); - return new Int32Array([...s].sort((a, b) => a - b)); - })(); + const labelSet = + labels ?? + (() => { + const s = new Set(); + for (const v of yTrue) s.add(v); + for (const v of yPred) s.add(v); + return new Int32Array([...s].sort((a, b) => a - b)); + })(); const n = labelSet.length; const labelIdx = new Map(); for (let i = 0; i < n; i++) labelIdx.set(labelSet[i] ?? 0, i); - const matrix: number[][] = Array.from({ length: n }, () => new Array(n).fill(0)); + const matrix: number[][] = Array.from({ length: n }, () => + new Array(n).fill(0), + ); for (let i = 0; i < yTrue.length; i++) { const ti = labelIdx.get(yTrue[i] ?? 0); const pi = labelIdx.get(yPred[i] ?? 0); diff --git a/src/model_selection/split.ts b/src/model_selection/split.ts index 781928e..c693e53 100644 --- a/src/model_selection/split.ts +++ b/src/model_selection/split.ts @@ -57,15 +57,19 @@ export function train_test_split( const nTrain = n - nTest; if (nTrain <= 0) { - throw new ValueError(`With n_samples=${n} and test_size=${testSize}, the resulting train set would be empty.`); + throw new ValueError( + `With n_samples=${n} and test_size=${testSize}, the resulting train set would be empty.`, + ); } const rng = lcg(randomState); - const indices = shuffle ? shuffleIndices(n, rng) : (() => { - const idx = new Int32Array(n); - for (let i = 0; i < n; i++) idx[i] = i; - return idx; - })(); + const indices = shuffle + ? shuffleIndices(n, rng) + : (() => { + const idx = new Int32Array(n); + for (let i = 0; i < n; i++) idx[i] = i; + return idx; + })(); const trainIdx = indices.slice(0, nTrain); const testIdx = indices.slice(nTrain); @@ -76,7 +80,9 @@ export function train_test_split( const isInt = y instanceof Int32Array; const yTrain = isInt ? new Int32Array(Array.from(trainIdx, (i) => (y as Int32Array)[i] ?? 0)) - : new Float64Array(Array.from(trainIdx, (i) => (y as Float64Array)[i] ?? 0)); + : new Float64Array( + Array.from(trainIdx, (i) => (y as Float64Array)[i] ?? 0), + ); const yTest = isInt ? new Int32Array(Array.from(testIdx, (i) => (y as Int32Array)[i] ?? 0)) : new Float64Array(Array.from(testIdx, (i) => (y as Float64Array)[i] ?? 0)); @@ -120,13 +126,17 @@ export class KFold { } const rng = lcg(this.randomState); - const indices = this.shuffle ? shuffleIndices(n, rng) : (() => { - const idx = new Int32Array(n); - for (let i = 0; i < n; i++) idx[i] = i; - return idx; - })(); - - const foldSizes = new Int32Array(this.nSplits).fill(Math.floor(n / this.nSplits)); + const indices = this.shuffle + ? shuffleIndices(n, rng) + : (() => { + const idx = new Int32Array(n); + for (let i = 0; i < n; i++) idx[i] = i; + return idx; + })(); + + const foldSizes = new Int32Array(this.nSplits).fill( + Math.floor(n / this.nSplits), + ); for (let i = 0; i < n % this.nSplits; i++) { foldSizes[i] = (foldSizes[i] ?? 0) + 1; } @@ -184,9 +194,14 @@ export class StratifiedKFold { } // Assign indices to folds - const foldIndices: number[][] = Array.from({ length: this.nSplits }, () => []); + const foldIndices: number[][] = Array.from( + { length: this.nSplits }, + () => [], + ); for (const [, idxList] of classIndices) { - const shuffled = this.shuffle ? [...idxList].sort(() => rng() - 0.5) : idxList; + const shuffled = this.shuffle + ? [...idxList].sort(() => rng() - 0.5) + : idxList; shuffled.forEach((idx, i) => { (foldIndices[i % this.nSplits] as number[]).push(idx); }); diff --git a/src/preprocessing/label_encoder.ts b/src/preprocessing/label_encoder.ts index 234dfb5..e6bdd91 100644 --- a/src/preprocessing/label_encoder.ts +++ b/src/preprocessing/label_encoder.ts @@ -27,7 +27,9 @@ export class LabelEncoder extends BaseEstimator { for (let i = 0; i < y.length; i++) { const encoded = classMap.get(y[i] ?? 0); if (encoded === undefined) { - throw new ValueError(`y contains previously unseen labels: ${String(y[i])}`); + throw new ValueError( + `y contains previously unseen labels: ${String(y[i])}`, + ); } result[i] = encoded; } @@ -41,7 +43,7 @@ export class LabelEncoder extends BaseEstimator { for (let i = 0; i < y.length; i++) { const idx = y[i] ?? 0; if (idx < 0 || idx >= classes.length) { - throw new ValueError(`y contains values not in the fitted classes`); + throw new ValueError("y contains values not in the fitted classes"); } result[i] = classes[idx] ?? 0; } diff --git a/src/preprocessing/minmax_scaler.ts b/src/preprocessing/minmax_scaler.ts index 83276f7..2773fc8 100644 --- a/src/preprocessing/minmax_scaler.ts +++ b/src/preprocessing/minmax_scaler.ts @@ -4,8 +4,8 @@ */ import { BaseEstimator } from "../base.js"; -import { checkArray } from "../utils/validation.js"; import { ValueError } from "../exceptions.js"; +import { checkArray } from "../utils/validation.js"; export interface MinMaxScalerParams { feature_range?: [number, number]; @@ -46,24 +46,28 @@ export class MinMaxScaler extends BaseEstimator { this.n_samples_seen_ = n; this.n_features_in_ = p; - const dataMin = new Float64Array(p).fill(Infinity); - const dataMax = new Float64Array(p).fill(-Infinity); + const dataMin = new Float64Array(p).fill(Number.POSITIVE_INFINITY); + const dataMax = new Float64Array(p).fill(Number.NEGATIVE_INFINITY); for (const row of X) { for (let j = 0; j < p; j++) { const v = row[j] ?? 0; - if (v < (dataMin[j] ?? Infinity)) dataMin[j] = v; - if (v > (dataMax[j] ?? -Infinity)) dataMax[j] = v; + if (v < (dataMin[j] ?? Number.POSITIVE_INFINITY)) dataMin[j] = v; + if (v > (dataMax[j] ?? Number.NEGATIVE_INFINITY)) dataMax[j] = v; } } this.data_min_ = dataMin; this.data_max_ = dataMax; - this.data_range_ = Float64Array.from(dataMax, (v, i) => v - (dataMin[i] ?? 0)); + this.data_range_ = Float64Array.from( + dataMax, + (v, i) => v - (dataMin[i] ?? 0), + ); const rangeScale = rMax - rMin; this.scale_ = Float64Array.from(this.data_range_, (v) => v === 0 ? 0 : rangeScale / v, ); - this.min_ = Float64Array.from(this.scale_, (v, i) => - rMin - v * (dataMin[i] ?? 0), + this.min_ = Float64Array.from( + this.scale_, + (v, i) => rMin - v * (dataMin[i] ?? 0), ); return this; } @@ -99,7 +103,10 @@ export class MinMaxScaler extends BaseEstimator { }); } - fit_transform(X: Float64Array[], y?: Float64Array | Int32Array): Float64Array[] { + fit_transform( + X: Float64Array[], + y?: Float64Array | Int32Array, + ): Float64Array[] { return this.fit(X, y).transform(X); } } diff --git a/src/preprocessing/normalizer.ts b/src/preprocessing/normalizer.ts index 7af81b9..ab7ef5d 100644 --- a/src/preprocessing/normalizer.ts +++ b/src/preprocessing/normalizer.ts @@ -40,7 +40,10 @@ export class Normalizer extends BaseEstimator { }); } - fit_transform(X: Float64Array[], _y?: Float64Array | Int32Array): Float64Array[] { + fit_transform( + X: Float64Array[], + _y?: Float64Array | Int32Array, + ): Float64Array[] { return this.transform(X); } diff --git a/src/preprocessing/standard_scaler.ts b/src/preprocessing/standard_scaler.ts index 3154b41..576cb12 100644 --- a/src/preprocessing/standard_scaler.ts +++ b/src/preprocessing/standard_scaler.ts @@ -4,8 +4,8 @@ */ import { BaseEstimator, TransformerMixin } from "../base.js"; -import { checkArray, checkFeaturesConsistency } from "../utils/validation.js"; import { ValueError } from "../exceptions.js"; +import { checkArray, checkFeaturesConsistency } from "../utils/validation.js"; export interface StandardScalerParams { copy?: boolean; @@ -53,9 +53,8 @@ export class StandardScaler extends BaseEstimator { } this.mean_ = mean; - const variance = n > 1 - ? Float64Array.from(M2, (v) => v / (n - 1)) - : new Float64Array(p); + const variance = + n > 1 ? Float64Array.from(M2, (v) => v / (n - 1)) : new Float64Array(p); this.var_ = variance; this.scale_ = Float64Array.from(variance, (v) => Math.sqrt(v) || 1.0); return this; @@ -90,7 +89,10 @@ export class StandardScaler extends BaseEstimator { }); } - fit_transform(X: Float64Array[], y?: Float64Array | Int32Array): Float64Array[] { + fit_transform( + X: Float64Array[], + y?: Float64Array | Int32Array, + ): Float64Array[] { return this.fit(X, y).transform(X); } } diff --git a/src/utils/extmath.ts b/src/utils/extmath.ts index 9f84714..43a42cb 100644 --- a/src/utils/extmath.ts +++ b/src/utils/extmath.ts @@ -8,7 +8,8 @@ export function logLogistic(x: Float64Array): Float64Array { const result = new Float64Array(x.length); for (let i = 0; i < x.length; i++) { const xi = x[i] ?? 0; - result[i] = xi >= 0 ? -Math.log1p(Math.exp(-xi)) : xi - Math.log1p(Math.exp(xi)); + result[i] = + xi >= 0 ? -Math.log1p(Math.exp(-xi)) : xi - Math.log1p(Math.exp(xi)); } return result; } @@ -62,10 +63,14 @@ export function transpose(X: Float64Array[]): Float64Array[] { if (X.length === 0) return []; const nRows = X.length; const nCols = (X[0] ?? new Float64Array(0)).length; - const result: Float64Array[] = Array.from({ length: nCols }, () => new Float64Array(nRows)); + const result: Float64Array[] = Array.from( + { length: nCols }, + () => new Float64Array(nRows), + ); for (let i = 0; i < nRows; i++) { for (let j = 0; j < nCols; j++) { - (result[j] ?? new Float64Array(0))[i] = (X[i] ?? new Float64Array(0))[j] ?? 0; + (result[j] ?? new Float64Array(0))[i] = + (X[i] ?? new Float64Array(0))[j] ?? 0; } } return result; @@ -77,13 +82,18 @@ export function matMul(A: Float64Array[], B: Float64Array[]): Float64Array[] { const nRows = A.length; const nCols = (B[0] ?? new Float64Array(0)).length; const nInner = B.length; - const result: Float64Array[] = Array.from({ length: nRows }, () => new Float64Array(nCols)); + const result: Float64Array[] = Array.from( + { length: nRows }, + () => new Float64Array(nCols), + ); for (let i = 0; i < nRows; i++) { for (let k = 0; k < nInner; k++) { const aik = (A[i] ?? new Float64Array(0))[k] ?? 0; if (aik === 0) continue; for (let j = 0; j < nCols; j++) { - result[i]![j] = (result[i]![j] ?? 0) + aik * ((B[k] ?? new Float64Array(0))[j] ?? 0); + const resultRow = result[i] ?? new Float64Array(0); + resultRow[j] = + (resultRow[j] ?? 0) + aik * ((B[k] ?? new Float64Array(0))[j] ?? 0); } } } @@ -93,7 +103,10 @@ export function matMul(A: Float64Array[], B: Float64Array[]): Float64Array[] { /** * Solve a lower triangular system Lx = b using forward substitution. */ -export function forwardSubstitution(L: Float64Array[], b: Float64Array): Float64Array { +export function forwardSubstitution( + L: Float64Array[], + b: Float64Array, +): Float64Array { const n = b.length; const x = new Float64Array(n); for (let i = 0; i < n; i++) { @@ -109,7 +122,10 @@ export function forwardSubstitution(L: Float64Array[], b: Float64Array): Float64 /** * Solve an upper triangular system Ux = b using back substitution. */ -export function backSubstitution(U: Float64Array[], b: Float64Array): Float64Array { +export function backSubstitution( + U: Float64Array[], + b: Float64Array, +): Float64Array { const n = b.length; const x = new Float64Array(n); for (let i = n - 1; i >= 0; i--) { @@ -128,12 +144,17 @@ export function backSubstitution(U: Float64Array[], b: Float64Array): Float64Arr */ export function cholesky(A: Float64Array[]): Float64Array[] { const n = A.length; - const L: Float64Array[] = Array.from({ length: n }, () => new Float64Array(n)); + const L: Float64Array[] = Array.from( + { length: n }, + () => new Float64Array(n), + ); for (let i = 0; i < n; i++) { for (let j = 0; j <= i; j++) { let sum = (A[i] ?? new Float64Array(0))[j] ?? 0; for (let k = 0; k < j; k++) { - sum -= ((L[i] ?? new Float64Array(0))[k] ?? 0) * ((L[j] ?? new Float64Array(0))[k] ?? 0); + sum -= + ((L[i] ?? new Float64Array(0))[k] ?? 0) * + ((L[j] ?? new Float64Array(0))[k] ?? 0); } if (i === j) { (L[i] ?? new Float64Array(0))[j] = Math.sqrt(Math.max(sum, 0)); @@ -150,7 +171,10 @@ export function cholesky(A: Float64Array[]): Float64Array[] { * Solve the linear system Ax = b using Cholesky decomposition. * A must be symmetric positive definite. */ -export function choleskyLinsolve(A: Float64Array[], b: Float64Array): Float64Array { +export function choleskyLinsolve( + A: Float64Array[], + b: Float64Array, +): Float64Array { const L = cholesky(A); const y = forwardSubstitution(L, b); const Lt = transpose(L); @@ -170,7 +194,8 @@ export function euclideanDistance(a: Float64Array, b: Float64Array): number { /** Add identity * alpha to a matrix (in-place). */ export function addDiagonal(A: Float64Array[], alpha: number): Float64Array[] { for (let i = 0; i < A.length; i++) { - (A[i] ?? new Float64Array(0))[i] = ((A[i] ?? new Float64Array(0))[i] ?? 0) + alpha; + (A[i] ?? new Float64Array(0))[i] = + ((A[i] ?? new Float64Array(0))[i] ?? 0) + alpha; } return A; } diff --git a/src/utils/validation.ts b/src/utils/validation.ts index 2e4f2af..e366ffc 100644 --- a/src/utils/validation.ts +++ b/src/utils/validation.ts @@ -19,11 +19,15 @@ export function checkArray( throw new ValueError("X must be an array of Float64Arrays"); } if (X.length < minSamples) { - throw new ValueError(`X must have at least ${minSamples} samples, got ${X.length}`); + throw new ValueError( + `X must have at least ${minSamples} samples, got ${X.length}`, + ); } const nFeatures = (X[0] ?? new Float64Array(0)).length; if (nFeatures < minFeatures) { - throw new ValueError(`X must have at least ${minFeatures} features, got ${nFeatures}`); + throw new ValueError( + `X must have at least ${minFeatures} features, got ${nFeatures}`, + ); } for (let i = 0; i < X.length; i++) { const row = X[i]; diff --git a/tests/base.test.ts b/tests/base.test.ts index 08f43a4..550000e 100644 --- a/tests/base.test.ts +++ b/tests/base.test.ts @@ -1,5 +1,11 @@ -import { describe, it, expect } from "bun:test"; -import { BaseEstimator, ClassifierMixin, RegressorMixin, clone, check_is_fitted } from "../src/base.ts"; +import { describe, expect, it } from "bun:test"; +import { + BaseEstimator, + ClassifierMixin, + RegressorMixin, + check_is_fitted, + clone, +} from "../src/base.ts"; import { NotFittedError } from "../src/exceptions.ts"; class DummyEstimator extends BaseEstimator { @@ -23,8 +29,8 @@ describe("BaseEstimator", () => { it("get_params returns constructor params", () => { const est = new DummyEstimator(2.0, "hello"); const params = est.get_params(); - expect(params["alpha"]).toBe(2.0); - expect(params["beta"]).toBe("hello"); + expect(params.alpha).toBe(2.0); + expect(params.beta).toBe("hello"); }); it("set_params updates params", () => { @@ -37,7 +43,7 @@ describe("BaseEstimator", () => { const est = new DummyEstimator(); expect(() => est.fit()._check_is_fitted(["fitted_"])).not.toThrow(); const est2 = new DummyEstimator(); - expect(() => est2["_check_is_fitted"](["fitted_"])).toThrow(NotFittedError); + expect(() => est2._check_is_fitted(["fitted_"])).toThrow(NotFittedError); }); }); diff --git a/tests/linear_model.test.ts b/tests/linear_model.test.ts index 0b1ad2f..02dd9b0 100644 --- a/tests/linear_model.test.ts +++ b/tests/linear_model.test.ts @@ -1,4 +1,4 @@ -import { describe, it, expect } from "bun:test"; +import { describe, expect, it } from "bun:test"; import { LinearRegression } from "../src/linear_model/linear_regression.ts"; import { Ridge } from "../src/linear_model/ridge.ts"; @@ -16,8 +16,10 @@ describe("LinearRegression", () => { reg.fit(X, y); expect(reg.coef_).toBeDefined(); - expect(Math.abs((reg.coef_ as Float64Array)[0]! - 2)).toBeLessThan(1e-6); - expect(Math.abs((reg.intercept_ as number))).toBeLessThan(1e-6); + expect(Math.abs(((reg.coef_ as Float64Array)[0] ?? 0) - 2)).toBeLessThan( + 1e-6, + ); + expect(Math.abs(reg.intercept_ as number)).toBeLessThan(1e-6); }); it("fits with intercept", () => { @@ -30,7 +32,9 @@ describe("LinearRegression", () => { const reg = new LinearRegression(); reg.fit(X, y); - expect(Math.abs((reg.coef_ as Float64Array)[0]! - 2)).toBeLessThan(1e-6); + expect(Math.abs(((reg.coef_ as Float64Array)[0] ?? 0) - 2)).toBeLessThan( + 1e-6, + ); expect(Math.abs((reg.intercept_ as number) - 1)).toBeLessThan(1e-6); }); @@ -44,7 +48,9 @@ describe("LinearRegression", () => { const reg = new LinearRegression({ fit_intercept: false }); reg.fit(X, y); - expect(Math.abs((reg.coef_ as Float64Array)[0]! - 3)).toBeLessThan(1e-6); + expect(Math.abs(((reg.coef_ as Float64Array)[0] ?? 0) - 3)).toBeLessThan( + 1e-6, + ); expect(reg.intercept_).toBe(0); }); @@ -55,7 +61,7 @@ describe("LinearRegression", () => { reg.fit(X, y); const pred = reg.predict([new Float64Array([3])]); - expect(Math.abs(pred[0]! - 3)).toBeLessThan(1e-4); + expect(Math.abs((pred[0] ?? 0) - 3)).toBeLessThan(1e-4); }); it("fits multiple features", () => { @@ -71,7 +77,7 @@ describe("LinearRegression", () => { reg.fit(X, y); const pred = reg.predict([new Float64Array([1, 2])]); - expect(Math.abs(pred[0]! - 5)).toBeLessThan(0.1); + expect(Math.abs((pred[0] ?? 0) - 5)).toBeLessThan(0.1); }); it("computes R² score", () => { @@ -90,8 +96,10 @@ describe("LinearRegression", () => { }); it("returns R² close to 1 for perfect linear data", () => { - const X = Array.from({ length: 20 }, (_, i) => - new Float64Array([i, i * 2])); + const X = Array.from( + { length: 20 }, + (_, i) => new Float64Array([i, i * 2]), + ); const y = new Float64Array(Array.from({ length: 20 }, (_, i) => i * 3 + 1)); const reg = new LinearRegression(); reg.fit(X, y); @@ -124,7 +132,9 @@ describe("Ridge", () => { reg.fit(X, y); // With tiny alpha, should be close to OLS - expect(Math.abs((reg.coef_ as Float64Array)[0]! - 2)).toBeLessThan(0.01); + expect(Math.abs(((reg.coef_ as Float64Array)[0] ?? 0) - 2)).toBeLessThan( + 0.01, + ); }); it("shrinks coefficients with large alpha", () => { @@ -140,23 +150,31 @@ describe("Ridge", () => { regLowAlpha.fit(X, y); regHighAlpha.fit(X, y); - const normLow = Array.from(regLowAlpha.coef_ as Float64Array) - .reduce((a, b) => a + b * b, 0); - const normHigh = Array.from(regHighAlpha.coef_ as Float64Array) - .reduce((a, b) => a + b * b, 0); + const normLow = Array.from(regLowAlpha.coef_ as Float64Array).reduce( + (a, b) => a + b * b, + 0, + ); + const normHigh = Array.from(regHighAlpha.coef_ as Float64Array).reduce( + (a, b) => a + b * b, + 0, + ); // Higher alpha → smaller coefficients expect(normHigh).toBeLessThan(normLow); }); it("predicts correctly", () => { - const X = [new Float64Array([1]), new Float64Array([2]), new Float64Array([3])]; + const X = [ + new Float64Array([1]), + new Float64Array([2]), + new Float64Array([3]), + ]; const y = new Float64Array([1, 2, 3]); const reg = new Ridge({ alpha: 0.001 }); reg.fit(X, y); const pred = reg.predict([new Float64Array([4])]); - expect(Math.abs(pred[0]! - 4)).toBeLessThan(0.1); + expect(Math.abs((pred[0] ?? 0) - 4)).toBeLessThan(0.1); }); it("score is R²", () => { diff --git a/tests/metrics_model_selection.test.ts b/tests/metrics_model_selection.test.ts index e46d879..1dfb861 100644 --- a/tests/metrics_model_selection.test.ts +++ b/tests/metrics_model_selection.test.ts @@ -1,17 +1,17 @@ -import { describe, it, expect } from "bun:test"; -import { - mean_squared_error, - mean_absolute_error, - r2_score, -} from "../src/metrics/regression.ts"; +import { describe, expect, it } from "bun:test"; import { accuracy_score, confusion_matrix, + f1_score, precision_score, recall_score, - f1_score, } from "../src/metrics/classification.ts"; -import { train_test_split, KFold } from "../src/model_selection/split.ts"; +import { + mean_absolute_error, + mean_squared_error, + r2_score, +} from "../src/metrics/regression.ts"; +import { KFold, train_test_split } from "../src/model_selection/split.ts"; describe("Regression metrics", () => { it("MSE is 0 for perfect prediction", () => { @@ -69,7 +69,9 @@ describe("train_test_split", () => { it("splits data correctly", () => { const X = Array.from({ length: 100 }, (_, i) => new Float64Array([i])); const y = new Float64Array(Array.from({ length: 100 }, (_, i) => i)); - const { XTrain, XTest, yTrain, yTest } = train_test_split(X, y, { testSize: 0.2 }); + const { XTrain, XTest, yTrain, yTest } = train_test_split(X, y, { + testSize: 0.2, + }); expect(XTrain.length).toBe(80); expect(XTest.length).toBe(20); expect(yTrain.length).toBe(80); diff --git a/tests/preprocessing.test.ts b/tests/preprocessing.test.ts index f6966f0..cc11d17 100644 --- a/tests/preprocessing.test.ts +++ b/tests/preprocessing.test.ts @@ -1,9 +1,9 @@ -import { describe, it, expect } from "bun:test"; -import { StandardScaler } from "../src/preprocessing/standard_scaler.ts"; -import { MinMaxScaler } from "../src/preprocessing/minmax_scaler.ts"; +import { describe, expect, it } from "bun:test"; +import { NotFittedError } from "../src/exceptions.ts"; import { LabelEncoder } from "../src/preprocessing/label_encoder.ts"; +import { MinMaxScaler } from "../src/preprocessing/minmax_scaler.ts"; import { Normalizer } from "../src/preprocessing/normalizer.ts"; -import { NotFittedError } from "../src/exceptions.ts"; +import { StandardScaler } from "../src/preprocessing/standard_scaler.ts"; describe("StandardScaler", () => { const X = [ @@ -16,8 +16,12 @@ describe("StandardScaler", () => { const scaler = new StandardScaler(); scaler.fit(X); expect(scaler.mean_).toBeDefined(); - expect(Math.abs((scaler.mean_ as Float64Array)[0]! - 3)).toBeLessThan(1e-10); - expect(Math.abs((scaler.mean_ as Float64Array)[1]! - 4)).toBeLessThan(1e-10); + expect(Math.abs(((scaler.mean_ as Float64Array)[0] ?? 0) - 3)).toBeLessThan( + 1e-10, + ); + expect(Math.abs(((scaler.mean_ as Float64Array)[1] ?? 0) - 4)).toBeLessThan( + 1e-10, + ); }); it("transforms to zero mean", () => { @@ -33,7 +37,12 @@ describe("StandardScaler", () => { const Xr = scaler.inverse_transform(Xt); for (let i = 0; i < X.length; i++) { for (let j = 0; j < (X[i] as Float64Array).length; j++) { - expect(Math.abs((Xr[i] as Float64Array)[j]! - (X[i] as Float64Array)[j]!)).toBeLessThan(1e-8); + expect( + Math.abs( + ((Xr[i] as Float64Array)[j] ?? 0) - + ((X[i] as Float64Array)[j] ?? 0), + ), + ).toBeLessThan(1e-8); } } }); @@ -71,7 +80,12 @@ describe("MinMaxScaler", () => { const Xr = scaler.inverse_transform(Xt); for (let i = 0; i < X.length; i++) { for (let j = 0; j < (X[i] as Float64Array).length; j++) { - expect(Math.abs((Xr[i] as Float64Array)[j]! - (X[i] as Float64Array)[j]!)).toBeLessThan(1e-8); + expect( + Math.abs( + ((Xr[i] as Float64Array)[j] ?? 0) - + ((X[i] as Float64Array)[j] ?? 0), + ), + ).toBeLessThan(1e-8); } } });