diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..8a519bd
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,109 @@
+name: CI
+
+on:
+  push:
+    branches: [main, "autoloop/**"]
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+jobs:
+  test:
+    name: Test
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bun
+        uses: oven-sh/setup-bun@v2
+        with:
+          bun-version: latest
+
+      - name: Install dependencies
+        run: bun install
+
+      - name: Type check
+        run: bunx tsc --noEmit
+
+      - name: Run tests
+        run: bun test
+
+  lint:
+    name: Lint
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bun
+        uses: oven-sh/setup-bun@v2
+        with:
+          bun-version: latest
+
+      - name: Install dependencies
+        run: bun install
+
+      - name: Lint
+        run: bunx biome check src tests
+
+  playground:
+    name: Build Playground
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bun
+        uses: oven-sh/setup-bun@v2
+        with:
+          bun-version: latest
+
+      - name: Install dependencies
+        run: bun install
+
+      - name: Build library bundle
+        run: bun build src/index.ts --outfile playground/tsikit-learn.js --target browser --minify
+
+      - name: Upload playground artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: playground
+          path: playground/
+
+  pages:
+    name: Deploy to GitHub Pages
+    runs-on: ubuntu-latest
+    needs: [test, playground]
+    if: github.ref == 'refs/heads/main'
+    permissions:
+      contents: read
+      pages: write
+      id-token: write
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bun
+        uses: oven-sh/setup-bun@v2
+        with:
+          bun-version: latest
+
+      - name: Install dependencies
+        run: bun install
+
+      - name: Build library bundle
+        run: bun build src/index.ts --outfile playground/tsikit-learn.js --target browser --minify
+
+      - name: Setup Pages
+        uses: actions/configure-pages@v5
+
+      - name: Upload artifact
+        uses: actions/upload-pages-artifact@v3
+        with:
+          path: playground/
+
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..9ebfc2d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+node_modules/
+dist/
+coverage/
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..adbb06d
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,79 @@
+# Agent Instructions for tsikit-learn
+
+## Overview
+
+`tsikit-learn` is a TypeScript port of [scikit-learn](https://scikit-learn.org/). The project is being built one feature at a time by the Autoloop agent.
+
+## Stack
+
+- **Runtime & bundler**: Bun
+- **Language**: TypeScript (strictest settings — `strict: true`, `noUncheckedIndexedAccess: true`, `exactOptionalPropertyTypes: true`)
+- **Linting**: Biome
+- **Testing**: Bun test runner with fast-check for property-based tests
+- **Data layer**: `tsb` (TypeScript pandas port) as a peer dependency; typed arrays for numeric computation
+
+## Directory Structure
+
+```
+src/
+  index.ts                — public entry point, re-exports everything
+  exceptions.ts           — NotFittedError, ConvergenceWarning, etc.
+  base.ts                 — BaseEstimator, mixins, clone, check_is_fitted
+  utils/
+    extmath.ts            — math utilities (safeDot, gramMatrix, cholesky, etc.)
+    validation.ts         — input validation
+    multiclass.ts         — multiclass helpers
+    class_weight.ts       — class weight utilities
+    index.ts              — re-exports all utils
+  preprocessing/
+    standard_scaler.ts    — StandardScaler
+    minmax_scaler.ts      — MinMaxScaler
+    label_encoder.ts      — LabelEncoder
+    normalizer.ts         — Normalizer
+    index.ts
+  metrics/
+    regression.ts         — MSE, MAE, R², MAPE, explained_variance
+    classification.ts     — accuracy, confusion_matrix, precision, recall, F1, log_loss
+    index.ts
+  model_selection/
+    split.ts              — train_test_split, KFold, StratifiedKFold
+    index.ts
+  linear_model/
+    linear_regression.ts  — LinearRegression (OLS via Cholesky)
+    ridge.ts              — Ridge (L2 regularization)
+    index.ts
+tests/
+  base.test.ts
+  preprocessing.test.ts
+  metrics_model_selection.test.ts
+  linear_model.test.ts
+playground/
+  index.html              — interactive demos, deployed to GitHub Pages
+```
+
+## TypeScript Conventions
+
+- No `any`, no `@ts-ignore`, no `as` casts (unless provably safe)
+- Use `Float64Array` for continuous numeric data, `Int32Array` for integer labels
+- Use `?? 0` or null checks for `noUncheckedIndexedAccess` compliance
+- Export everything from module `index.ts` files
+
+## Evaluation Metric
+
+The CI evaluation script counts TypeScript source files in `src/` (excluding `index.ts`) that contain `export`. Currently: **15 files**.
+
+## Adding a New Module
+
+1. Create `src/{module}/{feature}.ts` — implement the class with `fit`, `predict`/`transform`, `score`
+2. Create or update `src/{module}/index.ts` — re-export from the new file
+3. Update `src/index.ts` — add `export * from "./{module}/index.js"`
+4. Add tests in `tests/{module}.test.ts`
+5. Add a card to `playground/index.html`
+
+## Running Locally
+
+```bash
+bun install
+bun test
+bunx tsc --noEmit
+```
diff --git a/biome.json b/biome.json
new file mode 100644
index 0000000..600b130
--- /dev/null
+++ b/biome.json
@@ -0,0 +1,15 @@
+{
+  "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json",
+  "organizeImports": { "enabled": true },
+  "linter": {
+    "enabled": true,
+    "rules": {
+      "recommended": true
+    }
+  },
+  "formatter": {
+    "enabled": true,
+    "indentStyle": "space",
+    "indentWidth": 2
+  }
+}
diff --git a/bunfig.toml b/bunfig.toml
new file mode 100644
index 0000000..0c9079a
--- /dev/null
+++ b/bunfig.toml
@@ -0,0 +1,2 @@
+[test]
+coverage = true
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..e6bd00a
--- /dev/null
+++ b/package.json
@@ -0,0 +1,34 @@
+{
+  "name": "tsikit-learn",
+  "version": "0.1.0",
+  "description": "A complete TypeScript port of scikit-learn",
+  "type": "module",
+  "main": "./dist/index.js",
+  "module": "./dist/index.js",
+  "types": "./dist/index.d.ts",
+  "exports": {
+    ".": {
+      "import": "./dist/index.js",
+      "types": "./dist/index.d.ts"
+    }
+  },
+  "scripts": {
+    "build": "bun build src/index.ts --outdir dist --target browser",
+    "test": "bun test",
+    "typecheck": "bunx tsc --noEmit",
+    "lint": "bunx biome check src tests"
+  },
+  "devDependencies": {
+    "@biomejs/biome": "^1.9.4",
+    "fast-check": "^3.22.0",
+    "typescript": "^5.7.2"
+  },
+  "peerDependencies": {
+    "tsb": "^0.1.0"
+  },
+  "peerDependenciesMeta": {
+    "tsb": {
+      "optional": true
+    }
+  }
+}
diff --git a/playground/index.html b/playground/index.html
new file mode 100644
index 0000000..2004305
--- /dev/null
+++ b/playground/index.html
@@ -0,0 +1,225 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>tsikit-learn — TypeScript scikit-learn</title>
+  <style>
+    * { box-sizing: border-box; margin: 0; padding: 0; }
+    body { font-family: system-ui, sans-serif; background: #0f0f0f; color: #e8e8e8; }
+    header { padding: 2rem; border-bottom: 1px solid #2a2a2a; }
+    header h1 { font-size: 2rem; color: #4ade80; }
+    header p { color: #999; margin-top: 0.5rem; }
+    .grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); gap: 1rem; padding: 2rem; }
+    .card { background: #1a1a1a; border: 1px solid #2a2a2a; border-radius: 8px; padding: 1.5rem; }
+    .card h3 { font-size: 1rem; color: #4ade80; margin-bottom: 0.5rem; }
+    .card p { font-size: 0.85rem; color: #888; margin-bottom: 1rem; }
+    .status { display: inline-block; padding: 0.2rem 0.5rem; border-radius: 4px; font-size: 0.75rem; font-weight: bold; }
+    .status.done { background: #14532d; color: #4ade80; }
+    .status.pending { background: #1a1a2e; color: #6b7280; }
+    .card a { color: #4ade80; text-decoration: none; font-size: 0.85rem; }
+    .card a:hover { text-decoration: underline; }
+    .demo-container { padding: 2rem; }
+    #demo { background: #1a1a1a; border: 1px solid #2a2a2a; border-radius: 8px; padding: 1.5rem; max-width: 900px; }
+    #demo h2 { color: #4ade80; margin-bottom: 1rem; }
+    canvas { background: #0f0f0f; border: 1px solid #2a2a2a; border-radius: 4px; }
+    button { background: #4ade80; color: #0f0f0f; border: none; padding: 0.5rem 1rem; border-radius: 4px; cursor: pointer; font-weight: bold; margin-top: 1rem; }
+    button:hover { background: #22c55e; }
+    pre { background: #0f0f0f; border: 1px solid #2a2a2a; padding: 1rem; border-radius: 4px; overflow-x: auto; font-size: 0.8rem; margin-top: 1rem; }
+  </style>
+</head>
+<body>
+  <header>
+    <h1>tsikit-learn 🤖</h1>
+    <p>A complete TypeScript port of scikit-learn — one feature at a time.</p>
+  </header>
+
+  <div class="grid">
+    <div class="card">
+      <h3>exceptions</h3>
+      <p>NotFittedError, ConvergenceWarning, ValueError</p>
+      <span class="status done">✅ Ported</span>
+    </div>
+    <div class="card">
+      <h3>base</h3>
+      <p>BaseEstimator, ClassifierMixin, RegressorMixin, TransformerMixin</p>
+      <span class="status done">✅ Ported</span>
+    </div>
+    <div class="card">
+      <h3>utils</h3>
+      <p>extmath, validation, multiclass, class_weight</p>
+      <span class="status done">✅ Ported</span>
+    </div>
+    <div class="card">
+      <h3>preprocessing</h3>
+      <p>StandardScaler, MinMaxScaler, LabelEncoder, Normalizer</p>
+      <span class="status done">✅ Ported</span>
+    </div>
+    <div class="card">
+      <h3>metrics</h3>
+      <p>MSE, MAE, R², accuracy, precision, recall, F1, log_loss</p>
+      <span class="status done">✅ Ported</span>
+    </div>
+    <div class="card">
+      <h3>model_selection</h3>
+      <p>train_test_split, KFold, StratifiedKFold</p>
+      <span class="status done">✅ Ported</span>
+    </div>
+    <div class="card">
+      <h3>linear_model.LinearRegression</h3>
+      <p>OLS via Cholesky decomposition (normal equations)</p>
+      <span class="status done">✅ Ported</span> &nbsp;
+      <a href="#demo" onclick="showLinearRegressionDemo()">▶ Demo</a>
+    </div>
+    <div class="card">
+      <h3>linear_model.Ridge</h3>
+      <p>L2-regularized least squares</p>
+      <span class="status done">✅ Ported</span>
+    </div>
+    <div class="card">
+      <h3>linear_model.Lasso</h3>
+      <p>L1-regularized least squares</p>
+      <span class="status pending">🕐 Pending</span>
+    </div>
+    <div class="card">
+      <h3>linear_model.LogisticRegression</h3>
+      <p>Logistic regression with SGD/L-BFGS solver</p>
+      <span class="status pending">🕐 Pending</span>
+    </div>
+    <div class="card">
+      <h3>tree</h3>
+      <p>DecisionTreeClassifier, DecisionTreeRegressor</p>
+      <span class="status pending">🕐 Pending</span>
+    </div>
+    <div class="card">
+      <h3>neighbors</h3>
+      <p>KNeighborsClassifier, KNeighborsRegressor, NearestNeighbors</p>
+      <span class="status pending">🕐 Pending</span>
+    </div>
+    <div class="card">
+      <h3>naive_bayes</h3>
+      <p>GaussianNB, MultinomialNB, BernoulliNB</p>
+      <span class="status pending">🕐 Pending</span>
+    </div>
+    <div class="card">
+      <h3>svm</h3>
+      <p>SVC, SVR, LinearSVC, LinearSVR</p>
+      <span class="status pending">🕐 Pending</span>
+    </div>
+    <div class="card">
+      <h3>cluster</h3>
+      <p>KMeans, DBSCAN, AgglomerativeClustering</p>
+      <span class="status pending">🕐 Pending</span>
+    </div>
+    <div class="card">
+      <h3>ensemble</h3>
+      <p>RandomForest, GradientBoosting, AdaBoost</p>
+      <span class="status pending">🕐 Pending</span>
+    </div>
+  </div>
+
+  <div class="demo-container">
+    <div id="demo">
+      <h2>LinearRegression Demo</h2>
+      <p>Click "Generate" to create a noisy linear dataset, then "Fit" to train a LinearRegression model.</p>
+      <button onclick="generateData()">Generate Data</button>
+      <button onclick="fitModel()">Fit LinearRegression</button>
+      <canvas id="canvas" width="800" height="400"></canvas>
+      <pre id="output">// Click "Generate Data" to start</pre>
+    </div>
+  </div>
+
+  <script type="module">
+    import { LinearRegression, Ridge, StandardScaler, train_test_split, r2_score } from './tsikit-learn.js';
+
+    window.tsl = { LinearRegression, Ridge, StandardScaler, train_test_split, r2_score };
+
+    let data = { X: [], y: [], model: null };
+
+    window.generateData = function() {
+      const n = 80;
+      const X = [];
+      const y = new Float64Array(n);
+      for (let i = 0; i < n; i++) {
+        const x = (Math.random() - 0.5) * 10;
+        X.push(new Float64Array([x]));
+        y[i] = 2.5 * x + 1.0 + (Math.random() - 0.5) * 4;
+      }
+      data = { X, y, model: null };
+      draw(data.X, data.y, null);
+      document.getElementById('output').textContent = `Generated ${n} samples.\ntrue: y = 2.5x + 1.0 + noise\n\nClick "Fit LinearRegression" to train.`;
+    };
+
+    window.fitModel = function() {
+      if (data.X.length === 0) { generateData(); }
+
+      const { X, y } = data;
+      const { XTrain, XTest, yTrain, yTest } = window.tsl.train_test_split(X, y, { testSize: 0.2, randomState: 42 });
+
+      const reg = new window.tsl.LinearRegression();
+      reg.fit(XTrain, yTrain);
+      data.model = reg;
+
+      const trainScore = reg.score(XTrain, yTrain);
+      const testScore = reg.score(XTest, yTest);
+
+      draw(data.X, data.y, reg);
+      document.getElementById('output').textContent =
+        `LinearRegression fitted!\n` +
+        `  coef_ = [${reg.coef_[0].toFixed(4)}]\n` +
+        `  intercept_ = ${reg.intercept_.toFixed(4)}\n\n` +
+        `  Train R² = ${trainScore.toFixed(4)}\n` +
+        `  Test  R² = ${testScore.toFixed(4)}`;
+    };
+
+    function draw(X, y, model) {
+      const canvas = document.getElementById('canvas');
+      const ctx = canvas.getContext('2d');
+      const W = canvas.width, H = canvas.height;
+      ctx.clearRect(0, 0, W, H);
+      ctx.fillStyle = '#0f0f0f';
+      ctx.fillRect(0, 0, W, H);
+
+      if (X.length === 0) return;
+
+      const xs = X.map(r => r[0]);
+      const minX = Math.min(...xs), maxX = Math.max(...xs);
+      const minY = Math.min(...y), maxY = Math.max(...y);
+      const padX = (maxX - minX) * 0.05, padY = (maxY - minY) * 0.1;
+
+      const toCanvas = (x, yi) => ({
+        cx: ((x - minX + padX) / (maxX - minX + 2 * padX)) * (W - 60) + 30,
+        cy: H - 30 - ((yi - minY + padY) / (maxY - minY + 2 * padY)) * (H - 60),
+      });
+
+      // Draw axes
+      ctx.strokeStyle = '#333';
+      ctx.lineWidth = 1;
+      ctx.beginPath();
+      ctx.moveTo(30, 30); ctx.lineTo(30, H - 30); ctx.lineTo(W - 30, H - 30);
+      ctx.stroke();
+
+      // Draw points
+      ctx.fillStyle = '#60a5fa';
+      for (let i = 0; i < X.length; i++) {
+        const { cx, cy } = toCanvas(X[i][0], y[i]);
+        ctx.beginPath(); ctx.arc(cx, cy, 3, 0, Math.PI * 2); ctx.fill();
+      }
+
+      // Draw regression line
+      if (model) {
+        const x0 = minX - padX, x1 = maxX + padX;
+        const y0 = model.predict([new Float64Array([x0])])[0];
+        const y1 = model.predict([new Float64Array([x1])])[0];
+        const p0 = toCanvas(x0, y0), p1 = toCanvas(x1, y1);
+        ctx.strokeStyle = '#4ade80';
+        ctx.lineWidth = 2;
+        ctx.beginPath(); ctx.moveTo(p0.cx, p0.cy); ctx.lineTo(p1.cx, p1.cy); ctx.stroke();
+      }
+    }
+
+    // Auto-generate on load
+    generateData();
+  </script>
+</body>
+</html>
diff --git a/src/base.ts b/src/base.ts
new file mode 100644
index 0000000..236df4d
--- /dev/null
+++ b/src/base.ts
@@ -0,0 +1,149 @@
+/**
+ * Base classes for all estimators.
+ * Mirrors sklearn.base.
+ */
+
+import { NotFittedError } from "./exceptions.js";
+
+export type Params = Record<string, unknown>;
+
+/**
+ * Base class for all scikit-learn estimators.
+ * Provides get_params / set_params following sklearn conventions.
+ */
+export abstract class BaseEstimator {
+  /**
+   * Get parameters for this estimator.
+   * Returns own enumerable string-keyed properties that are not functions.
+   */
+  get_params(deep = true): Params {
+    const out: Params = {};
+    for (const key of Object.keys(this)) {
+      const val = (this as Record<string, unknown>)[key];
+      if (typeof val !== "function") {
+        out[key] =
+          deep && val instanceof BaseEstimator ? val.get_params(deep) : val;
+      }
+    }
+    return out;
+  }
+
+  /** Set the parameters of this estimator. */
+  set_params(params: Params): this {
+    for (const [key, val] of Object.entries(params)) {
+      (this as Record<string, unknown>)[key] = val;
+    }
+    return this;
+  }
+
+  /** Assert the estimator is fitted. */
+  protected _check_is_fitted(attributes: string[]): void {
+    const missing = attributes.filter(
+      (a) => (this as Record<string, unknown>)[a] === undefined,
+    );
+    if (missing.length > 0) {
+      throw new NotFittedError(
+        `This ${this.constructor.name} instance is not fitted yet. Call 'fit' first.`,
+      );
+    }
+  }
+}
+
+/** Mixin class for all classifiers. */
+export abstract class ClassifierMixin {
+  readonly _estimator_type = "classifier" as const;
+
+  /** Return the mean accuracy on the given test data and labels. */
+  score(X: Float64Array[], y: Float64Array | Int32Array): number {
+    const yPred = this.predict(X);
+    let correct = 0;
+    for (let i = 0; i < y.length; i++) {
+      if ((yPred[i] ?? 0) === (y[i] ?? 0)) correct++;
+    }
+    return y.length > 0 ? correct / y.length : 0;
+  }
+
+  abstract predict(X: Float64Array[]): Int32Array | Float64Array;
+}
+
+/** Mixin class for all regressors. */
+export abstract class RegressorMixin {
+  readonly _estimator_type = "regressor" as const;
+
+  /** Return the coefficient of determination R² of the prediction. */
+  score(X: Float64Array[], y: Float64Array): number {
+    const yPred = this.predict(X);
+    const yMean = Array.from(y).reduce((a, b) => a + b, 0) / y.length;
+    let ssTot = 0;
+    let ssRes = 0;
+    for (let i = 0; i < y.length; i++) {
+      const yi = y[i] ?? 0;
+      const pi = yPred[i] ?? 0;
+      ssTot += (yi - yMean) ** 2;
+      ssRes += (yi - pi) ** 2;
+    }
+    return ssTot === 0 ? 1 : 1 - ssRes / ssTot;
+  }
+
+  abstract predict(X: Float64Array[]): Float64Array;
+}
+
+/** Mixin class for all transformers. */
+export abstract class TransformerMixin {
+  readonly _estimator_type = "transformer" as const;
+
+  /** Fit and transform in one step. */
+  fit_transform(
+    X: Float64Array[],
+    y?: Float64Array | Int32Array,
+  ): Float64Array[] {
+    return this.fit(X, y).transform(X);
+  }
+
+  abstract fit(X: Float64Array[], y?: Float64Array | Int32Array): this;
+  abstract transform(X: Float64Array[]): Float64Array[];
+}
+
+/** Mixin class for all clusterers. */
+export abstract class ClusterMixin {
+  readonly _estimator_type = "clusterer" as const;
+
+  /** Perform clustering on X and return cluster labels. */
+  fit_predict(X: Float64Array[], y?: Float64Array | Int32Array): Int32Array {
+    return this.fit(X, y).labels_ ?? new Int32Array(X.length);
+  }
+
+  abstract fit(X: Float64Array[], y?: Float64Array | Int32Array): this;
+  labels_?: Int32Array;
+}
+
+/** Clone an estimator with the same parameters. */
+export function clone<T extends BaseEstimator>(estimator: T): T {
+  const Cls = estimator.constructor as new () => T;
+  const newEst = new Cls();
+  newEst.set_params(estimator.get_params(false));
+  return newEst;
+}
+
+/** Check if an estimator is fitted by looking for a trailing underscore attribute. */
+export function check_is_fitted(
+  estimator: BaseEstimator,
+  attributes?: string[],
+): void {
+  const attrs =
+    attributes ??
+    Object.keys(estimator).filter((k) => k.endsWith("_") && !k.startsWith("_"));
+  if (attrs.length === 0) {
+    throw new NotFittedError(
+      `This ${estimator.constructor.name} instance is not fitted yet.`,
+    );
+  }
+  const missing = attrs.filter(
+    (a) => (estimator as unknown as Record<string, unknown>)[a] === undefined,
+  );
+  if (missing.length > 0) {
+    throw new NotFittedError(
+      `This ${estimator.constructor.name} instance is not fitted yet. Missing attributes: ${missing.join(", ")}.`,
+    );
+  }
+}
diff --git a/src/exceptions.ts b/src/exceptions.ts
new file mode 100644
index 0000000..f1a2bce
--- /dev/null
+++ b/src/exceptions.ts
@@ -0,0 +1,39 @@
+/**
+ * Exceptions used throughout tsikit-learn.
+ * Mirrors sklearn.exceptions.
+ */
+
+/** Raised when an estimator is used before being fitted. */
+export class NotFittedError extends Error {
+  override readonly name = "NotFittedError";
+  constructor(
+    message = "This estimator is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.",
+  ) {
+    super(message);
+  }
+}
+
+/** Warning raised when convergence is not reached. */
+export class ConvergenceWarning extends Error {
+  override readonly name = "ConvergenceWarning";
+}
+
+/** Raised when an invalid value is encountered. */
+export class ValueError extends Error {
+  override readonly name = "ValueError";
+}
+
+/** Raised when feature dimensions don't match. */
+export class DataDimensionalityWarning extends Error {
+  override readonly name = "DataDimensionalityWarning";
+}
+
+/** Raised when an undefined parameter is encountered. */
+export class UndefinedMetricWarning extends Error {
+  override readonly name = "UndefinedMetricWarning";
+}
+
+/** Raised when a change or metric is not positive. */
+export class EfficiencyWarning extends Error {
+  override readonly name = "EfficiencyWarning";
+}
diff --git a/src/index.ts b/src/index.ts
new file mode 100644
index 0000000..0d022c2
--- /dev/null
+++ b/src/index.ts
@@ -0,0 +1,31 @@
+/**
+ * tsikit-learn — A complete TypeScript port of scikit-learn.
+ *
+ * Ported modules (Phase 1 + Phase 2 + linear_model):
+ * - exceptions: NotFittedError, ConvergenceWarning, ValueError
+ * - base: BaseEstimator, ClassifierMixin, RegressorMixin, TransformerMixin, ClusterMixin
+ * - utils: extmath, validation, multiclass, class_weight
+ * - preprocessing: StandardScaler, MinMaxScaler, LabelEncoder, Normalizer
+ * - metrics: regression (mse, mae, r2), classification (accuracy, precision, recall, f1)
+ * - model_selection: train_test_split, KFold, StratifiedKFold
+ * - linear_model: LinearRegression, Ridge
+ */
+
+// Core
+export * from "./exceptions.js";
+export * from "./base.js";
+
+// Utils
+export * from "./utils/index.js";
+
+// Preprocessing
+export * from "./preprocessing/index.js";
+
+// Metrics
+export * from "./metrics/index.js";
+
+// Model selection
+export * from "./model_selection/index.js";
+
+// Linear models
+export * from "./linear_model/index.js";
diff --git a/src/linear_model/index.ts b/src/linear_model/index.ts
new file mode 100644
index 0000000..1875ef5
--- /dev/null
+++ b/src/linear_model/index.ts
@@ -0,0 +1,2 @@
+export * from "./linear_regression.js";
+export * from "./ridge.js";
diff --git a/src/linear_model/linear_regression.ts b/src/linear_model/linear_regression.ts
new file mode 100644
index 0000000..bee73b2
--- /dev/null
+++ b/src/linear_model/linear_regression.ts
@@ -0,0 +1,152 @@
+/**
+ * Linear Regression — Ordinary Least Squares.
+ * Mirrors sklearn.linear_model.LinearRegression.
+ *
+ * Uses the normal equations: β = (X.T X)⁻¹ X.T y
+ * Solved via Cholesky decomposition for numerical stability.
+ */
+
+import { BaseEstimator, RegressorMixin } from "../base.js";
+import {
+  addDiagonal,
+  choleskyLinsolve,
+  gramMatrix,
+  safeDot,
+  xtDotY,
+} from "../utils/extmath.js";
+import { checkArray, checkXy } from "../utils/validation.js";
+
+export interface LinearRegressionParams {
+  fit_intercept?: boolean;
+  copy_X?: boolean;
+  positive?: boolean;
+}
+
+/**
+ * Ordinary least squares Linear Regression.
+ *
+ * Minimizes the residual sum of squares between observed and predicted values.
+ * Equivalent to sklearn.linear_model.LinearRegression.
+ *
+ * @example
+ * ```ts
+ * import { LinearRegression } from 'tsikit-learn';
+ *
+ * const X = [new Float64Array([1]), new Float64Array([2]), new Float64Array([3])];
+ * const y = new Float64Array([2, 4, 6]);
+ *
+ * const reg = new LinearRegression();
+ * reg.fit(X, y);
+ * console.log(reg.coef_);    // Float64Array [2]
+ * console.log(reg.intercept_); // ~0
+ * console.log(reg.predict([new Float64Array([4])])); // Float64Array [8]
+ * ```
+ */
+export class LinearRegression extends BaseEstimator {
+  fit_intercept: boolean;
+  copy_X: boolean;
+  positive: boolean;
+
+  coef_?: Float64Array;
+  intercept_?: number;
+  n_features_in_?: number;
+  rank_?: number;
+
+  constructor(params: LinearRegressionParams = {}) {
+    super();
+    this.fit_intercept = params.fit_intercept ?? true;
+    this.copy_X = params.copy_X ?? true;
+    this.positive = params.positive ?? false;
+  }
+
+  fit(X: Float64Array[], y: Float64Array): this {
+    checkXy(X, y);
+    checkArray(X);
+
+    const n = X.length;
+    const nFeatures = (X[0] ?? new Float64Array(0)).length;
+    this.n_features_in_ = nFeatures;
+
+    let XCenter = X;
+    let yCenter = y;
+    let xMean: Float64Array | undefined;
+    let yMean = 0;
+
+    if (this.fit_intercept) {
+      // Center X and y
+      xMean = new Float64Array(nFeatures);
+      for (let i = 0; i < n; i++) {
+        const row = X[i] ?? new Float64Array(nFeatures);
+        for (let j = 0; j < nFeatures; j++) {
+          xMean[j] = (xMean[j] ?? 0) + (row[j] ?? 0);
+        }
+      }
+      for (let j = 0; j < nFeatures; j++) {
+        xMean[j] = (xMean[j] ?? 0) / n;
+      }
+      yMean = 0;
+      for (const v of y) yMean += v;
+      yMean /= n;
+
+      XCenter = X.map((row) => {
+        const centered = new Float64Array(row);
+        for (let j = 0; j < centered.length; j++) {
+          centered[j] = (centered[j] ?? 0) - ((xMean as Float64Array)[j] ?? 0);
+        }
+        return centered;
+      });
+      yCenter = new Float64Array(y.length);
+      for (let i = 0; i < y.length; i++) {
+        yCenter[i] = (y[i] ?? 0) - yMean;
+      }
+    }
+
+    // Solve normal equations: (X.T @ X) @ β = X.T @ y
+    const XtX = gramMatrix(XCenter);
+    const Xty = xtDotY(XCenter, yCenter);
+
+    // Add tiny ridge to handle near-singular matrices
+    addDiagonal(XtX, 1e-12);
+
+    const coef = choleskyLinsolve(XtX, Xty);
+    this.coef_ = coef;
+    this.rank_ = nFeatures;
+
+    if (this.fit_intercept && xMean !== undefined) {
+      let intercept = yMean;
+      for (let j = 0; j < nFeatures; j++) {
+        intercept -= (coef[j] ?? 0) * (xMean[j] ?? 0);
+      }
+      this.intercept_ = intercept;
+    } else {
+      this.intercept_ = 0;
+    }
+
+    return this;
+  }
+
+  predict(X: Float64Array[]): Float64Array {
+    this._check_is_fitted(["coef_", "intercept_"]);
+    const coef = this.coef_ as Float64Array;
+    const intercept = this.intercept_ as number;
+    const yPred = safeDot(X, coef);
+    for (let i = 0; i < yPred.length; i++) {
+      yPred[i] = (yPred[i] ?? 0) + intercept;
+    }
+    return yPred;
+  }
+
+  /** R² score on test data. */
+  score(X: Float64Array[], y: Float64Array): number {
+    const yPred = this.predict(X);
+    const yMean = Array.from(y).reduce((a, b) => a + b, 0) / y.length;
+    let ssTot = 0;
+    let ssRes = 0;
+    for (let i = 0; i < y.length; i++) {
+      const yi = y[i] ?? 0;
+      ssTot += (yi - yMean) ** 2;
+      ssRes += (yi - (yPred[i] ?? 0)) ** 2;
+    }
+    return ssTot === 0 ? 1 : 1 - ssRes / ssTot;
+  }
+}
diff --git a/src/linear_model/ridge.ts b/src/linear_model/ridge.ts
new file mode 100644
index 0000000..eab9a65
--- /dev/null
+++ b/src/linear_model/ridge.ts
@@ -0,0 +1,156 @@
+/**
+ * Ridge Regression — L2-regularized Linear Regression.
+ * Mirrors sklearn.linear_model.Ridge.
+ *
+ * Minimizes: ||y - Xw||² + alpha * ||w||²
+ * Solved as: β = (X.T X + alpha * I)⁻¹ X.T y
+ */
+
+import { BaseEstimator } from "../base.js";
+import {
+  addDiagonal,
+  choleskyLinsolve,
+  gramMatrix,
+  safeDot,
+  xtDotY,
+} from "../utils/extmath.js";
+import { checkArray, checkXy } from "../utils/validation.js";
+
+export interface RidgeParams {
+  alpha?: number;
+  fit_intercept?: boolean;
+  copy_X?: boolean;
+  max_iter?: number;
+  tol?: number;
+  solver?: "auto" | "cholesky";
+}
+
+/**
+ * Linear least squares with L2 regularization.
+ *
+ * Equivalent to sklearn.linear_model.Ridge.
+ *
+ * @example
+ * ```ts
+ * import { Ridge } from 'tsikit-learn';
+ *
+ * const X = [new Float64Array([1, 0]), new Float64Array([0, 1]), new Float64Array([1, 1])];
+ * const y = new Float64Array([1, 2, 3]);
+ *
+ * const reg = new Ridge({ alpha: 1.0 });
+ * reg.fit(X, y);
+ * console.log(reg.coef_);
+ * ```
+ */
+export class Ridge extends BaseEstimator {
+  alpha: number;
+  fit_intercept: boolean;
+  copy_X: boolean;
+  max_iter: number;
+  tol: number;
+  solver: "auto" | "cholesky";
+
+  coef_?: Float64Array;
+  intercept_?: number;
+  n_features_in_?: number;
+  n_iter_?: number;
+
+  constructor(params: RidgeParams = {}) {
+    super();
+    this.alpha = params.alpha ?? 1.0;
+    this.fit_intercept = params.fit_intercept ?? true;
+    this.copy_X = params.copy_X ?? true;
+    this.max_iter = params.max_iter ?? 1000;
+    this.tol = params.tol ?? 1e-4;
+    this.solver = params.solver ?? "auto";
+  }
+
+  fit(X: Float64Array[], y: Float64Array): this {
+    checkXy(X, y);
+    checkArray(X);
+
+    const n = X.length;
+    const nFeatures = (X[0] ?? new Float64Array(0)).length;
+    this.n_features_in_ = nFeatures;
+
+    let XCenter = X;
+    let yCenter = y;
+    let xMean: Float64Array | undefined;
+    let yMean = 0;
+
+    if (this.fit_intercept) {
+      xMean = new Float64Array(nFeatures);
+      for (let i = 0; i < n; i++) {
+        const row = X[i] ?? new Float64Array(nFeatures);
+        for (let j = 0; j < nFeatures; j++) {
+          xMean[j] = (xMean[j] ?? 0) + (row[j] ?? 0);
+        }
+      }
+      for (let j = 0; j < nFeatures; j++) {
+        xMean[j] = (xMean[j] ?? 0) / n;
+      }
+      for (const v of y) yMean += v;
+      yMean /= n;
+
+      XCenter = X.map((row) => {
+        const centered = new Float64Array(row);
+        for (let j = 0; j < centered.length; j++) {
+          centered[j] = (centered[j] ?? 0) - ((xMean as Float64Array)[j] ?? 0);
+        }
+        return centered;
+      });
+      yCenter = new Float64Array(y.length);
+      for (let i = 0; i < y.length; i++) {
+        yCenter[i] = (y[i] ?? 0) - yMean;
+      }
+    }
+
+    // Solve (X.T @ X + alpha * I) @ β = X.T @ y
+    const XtX = gramMatrix(XCenter);
+    const Xty = xtDotY(XCenter, yCenter);
+
+    // Add alpha * I (ridge regularization)
+    addDiagonal(XtX, this.alpha);
+
+    const coef = choleskyLinsolve(XtX, Xty);
+    this.coef_ = coef;
+    this.n_iter_ = 1;
+
+    if (this.fit_intercept && xMean !== undefined) {
+      let intercept = yMean;
+      for (let j = 0; j < nFeatures; j++) {
+        intercept -= (coef[j] ?? 0) * (xMean[j] ?? 0);
+      }
+      this.intercept_ = intercept;
+    } else {
+      this.intercept_ = 0;
+    }
+
+    return this;
+  }
+
+  predict(X: Float64Array[]): Float64Array {
+    this._check_is_fitted(["coef_", "intercept_"]);
+    const coef = this.coef_ as Float64Array;
+    const intercept = this.intercept_ as number;
+    const yPred = safeDot(X, coef);
+    for (let i = 0; i < yPred.length; i++) {
+      yPred[i] = (yPred[i] ?? 0) + intercept;
+    }
+    return yPred;
+  }
+
+  /** R² score on test data. */
+  score(X: Float64Array[], y: Float64Array): number {
+    const yPred = this.predict(X);
+    const yMean = Array.from(y).reduce((a, b) => a + b, 0) / y.length;
+    let ssTot = 0;
+    let ssRes = 0;
+    for (let i = 0; i < y.length; i++) {
+      const yi = y[i] ?? 0;
+      ssTot += (yi - yMean) ** 2;
+      ssRes += (yi - (yPred[i] ?? 0)) ** 2;
+    }
+    return ssTot === 0 ? 1 : 1 - ssRes / ssTot;
+  }
+}
diff --git a/src/metrics/classification.ts b/src/metrics/classification.ts
new file mode 100644
index 0000000..408f55a
--- /dev/null
+++ b/src/metrics/classification.ts
@@ -0,0 +1,177 @@
+/**
+ * Classification metrics.
+ * Mirrors sklearn.metrics (classification subset).
+ */
+
+import { ValueError } from "../exceptions.js";
+
+/** Accuracy score. */
+export function accuracy_score(
+  yTrue: Float64Array | Int32Array,
+  yPred: Float64Array | Int32Array,
+  normalize = true,
+): number {
+  if (yTrue.length !== yPred.length) {
+    throw new ValueError("yTrue and yPred must have the same length");
+  }
+  let correct = 0;
+  for (let i = 0; i < yTrue.length; i++) {
+    if ((yTrue[i] ?? 0) === (yPred[i] ?? 0)) correct++;
+  }
+  return normalize ? (yTrue.length > 0 ? correct / yTrue.length : 0) : correct;
+}
+
+/** Confusion matrix. Returns a 2D array [actual][predicted]. */
+export function confusion_matrix(
+  yTrue: Float64Array | Int32Array,
+  yPred: Float64Array | Int32Array,
+  labels?: Int32Array,
+): number[][] {
+  const labelSet =
+    labels ??
+    (() => {
+      const s = new Set<number>();
+      for (const v of yTrue) s.add(v);
+      for (const v of yPred) s.add(v);
+      return new Int32Array([...s].sort((a, b) => a - b));
+    })();
+
+  const n = labelSet.length;
+  const labelIdx = new Map<number, number>();
+  for (let i = 0; i < n; i++) labelIdx.set(labelSet[i] ?? 0, i);
+
+  const matrix: number[][] = Array.from({ length: n }, () =>
+    new Array<number>(n).fill(0),
+  );
+  for (let i = 0; i < yTrue.length; i++) {
+    const ti = labelIdx.get(yTrue[i] ?? 0);
+    const pi = labelIdx.get(yPred[i] ?? 0);
+    if (ti !== undefined && pi !== undefined) {
+      (matrix[ti] as number[])[pi] = ((matrix[ti] as number[])[pi] ?? 0) + 1;
+    }
+  }
+  return matrix;
+}
+
+/** Precision score for binary or multiclass (macro average). */
+export function precision_score(
+  yTrue: Float64Array | Int32Array,
+  yPred: Float64Array | Int32Array,
+  options: { average?: "binary" | "macro" | "micro"; posLabel?: number } = {},
+): number {
+  const { average = "binary", posLabel = 1 } = options;
+  const classes = (() => {
+    const s = new Set<number>();
+    for (const v of yTrue) s.add(v);
+    return new Int32Array([...s].sort((a, b) => a - b));
+  })();
+
+  if (average === "binary") {
+    let tp = 0;
+    let fp = 0;
+    for (let i = 0; i < yTrue.length; i++) {
+      if ((yPred[i] ?? 0) === posLabel) {
+        if ((yTrue[i] ?? 0) === posLabel) tp++;
+        else fp++;
+      }
+    }
+    return tp + fp === 0 ? 0 : tp / (tp + fp);
+  }
+
+  if (average === "macro") {
+    let total = 0;
+    for (const c of classes) {
+      let tp = 0;
+      let fp = 0;
+      for (let i = 0; i < yTrue.length; i++) {
+        if ((yPred[i] ?? 0) === c) {
+          if ((yTrue[i] ?? 0) === c) tp++;
+          else fp++;
+        }
+      }
+      total += tp + fp === 0 ? 0 : tp / (tp + fp);
+    }
+    return classes.length > 0 ? total / classes.length : 0;
+  }
+
+  // micro
+  let tp = 0;
+  let fp = 0;
+  for (let i = 0; i < yTrue.length; i++) {
+    if ((yPred[i] ?? 0) === (yTrue[i] ?? 0)) tp++;
+    else fp++;
+  }
+  return tp + fp === 0 ? 0 : tp / (tp + fp);
+}
+
+/** Recall score. */
+export function recall_score(
+  yTrue: Float64Array | Int32Array,
+  yPred: Float64Array | Int32Array,
+  options: { average?: "binary" | "macro" | "micro"; posLabel?: number } = {},
+): number {
+  const { average = "binary", posLabel = 1 } = options;
+  const classes = (() => {
+    const s = new Set<number>();
+    for (const v of yTrue) s.add(v);
+    return new Int32Array([...s].sort((a, b) => a - b));
+  })();
+
+  if (average === "binary") {
+    let tp = 0;
+    let fn = 0;
+    for (let i = 0; i < yTrue.length; i++) {
+      if ((yTrue[i] ?? 0) === posLabel) {
+        if ((yPred[i] ?? 0) === posLabel) tp++;
+        else fn++;
+      }
+    }
+    return tp + fn === 0 ? 0 : tp / (tp + fn);
+  }
+
+  if (average === "macro") {
+    let total = 0;
+    for (const c of classes) {
+      let tp = 0;
+      let fn = 0;
+      for (let i = 0; i < yTrue.length; i++) {
+        if ((yTrue[i] ?? 0) === c) {
+          if ((yPred[i] ?? 0) === c) tp++;
+          else fn++;
+        }
+      }
+      total += tp + fn === 0 ? 0 : tp / (tp + fn);
+    }
+    return classes.length > 0 ? total / classes.length : 0;
+  }
+
+  return accuracy_score(yTrue, yPred);
+}
+
+/** F1 score. */
+export function f1_score(
+  yTrue: Float64Array | Int32Array,
+  yPred: Float64Array | Int32Array,
+  options: { average?: "binary" | "macro" | "micro"; posLabel?: number } = {},
+): number {
+  const p = precision_score(yTrue, yPred, options);
+  const r = recall_score(yTrue, yPred, options);
+  return p + r === 0 ? 0 : (2 * p * r) / (p + r);
+}
+
+/** Log loss (cross-entropy). */
+export function log_loss(
+  yTrue: Float64Array | Int32Array,
+  yProba: Float64Array[],
+  eps = 1e-15,
+): number {
+  let total = 0;
+  for (let i = 0; i < yTrue.length; i++) {
+    const row = yProba[i] ?? new Float64Array(0);
+    const label = yTrue[i] ?? 0;
+    // For binary: row[1] is P(class=1)
+    const p = Math.min(1 - eps, Math.max(eps, row[label] ?? eps));
+    total += -Math.log(p);
+  }
+  return yTrue.length > 0 ? total / yTrue.length : 0;
+}
diff --git a/src/metrics/index.ts b/src/metrics/index.ts
new file mode 100644
index 0000000..96b3cab
--- /dev/null
+++ b/src/metrics/index.ts
@@ -0,0 +1,2 @@
+export * from "./regression.js";
+export * from "./classification.js";
diff --git a/src/metrics/regression.ts b/src/metrics/regression.ts
new file mode 100644
index 0000000..c42b5d3
--- /dev/null
+++ b/src/metrics/regression.ts
@@ -0,0 +1,120 @@
+/**
+ * Regression metrics.
+ * Mirrors sklearn.metrics (regression subset).
+ */
+
+import { ValueError } from "../exceptions.js";
+
+/** Mean squared error. */
+export function mean_squared_error(
+  yTrue: Float64Array,
+  yPred: Float64Array,
+  options: { sampleWeight?: Float64Array; squared?: boolean } = {},
+): number {
+  const { sampleWeight, squared = true } = options;
+  if (yTrue.length !== yPred.length) {
+    throw new ValueError("yTrue and yPred must have the same length");
+  }
+  let total = 0;
+  let wSum = 0;
+  for (let i = 0; i < yTrue.length; i++) {
+    const diff = (yTrue[i] ?? 0) - (yPred[i] ?? 0);
+    const w = sampleWeight ? (sampleWeight[i] ?? 1) : 1;
+    total += w * diff * diff;
+    wSum += w;
+  }
+  const mse = wSum > 0 ? total / wSum : 0;
+  return squared ? mse : Math.sqrt(mse);
+}
+
+/** Mean absolute error. */
+export function mean_absolute_error(
+  yTrue: Float64Array,
+  yPred: Float64Array,
+  sampleWeight?: Float64Array,
+): number {
+  if (yTrue.length !== yPred.length) {
+    throw new ValueError("yTrue and yPred must have the same length");
+  }
+  let total = 0;
+  let wSum = 0;
+  for (let i = 0; i < yTrue.length; i++) {
+    const w = sampleWeight ? (sampleWeight[i] ?? 1) : 1;
+    total += w * Math.abs((yTrue[i] ?? 0) - (yPred[i] ?? 0));
+    wSum += w;
+  }
+  return wSum > 0 ? total / wSum : 0;
+}
+
+/** R² score (coefficient of determination). */
+export function r2_score(
+  yTrue: Float64Array,
+  yPred: Float64Array,
+  sampleWeight?: Float64Array,
+): number {
+  if (yTrue.length !== yPred.length) {
+    throw new ValueError("yTrue and yPred must have the same length");
+  }
+  let wSum = 0;
+  let yMeanNum = 0;
+  for (let i = 0; i < yTrue.length; i++) {
+    const w = sampleWeight ? (sampleWeight[i] ?? 1) : 1;
+    yMeanNum += w * (yTrue[i] ?? 0);
+    wSum += w;
+  }
+  const yMean = wSum > 0 ? yMeanNum / wSum : 0;
+
+  let ssTot = 0;
+  let ssRes = 0;
+  for (let i = 0; i < yTrue.length; i++) {
+    const w = sampleWeight ? (sampleWeight[i] ?? 1) : 1;
+    const diff = (yTrue[i] ?? 0) - yMean;
+    ssTot += w * diff * diff;
+    ssRes += w * ((yTrue[i] ?? 0) - (yPred[i] ?? 0)) ** 2;
+  }
+  return ssTot === 0 ? 1 : 1 - ssRes / ssTot;
+}
+
+/** Mean absolute percentage error. */
+export function mean_absolute_percentage_error(
+  yTrue: Float64Array,
+  yPred: Float64Array,
+): number {
+  if (yTrue.length !== yPred.length) {
+    throw new ValueError("yTrue and yPred must have the same length");
+  }
+  let total = 0;
+  for (let i = 0; i < yTrue.length; i++) {
+    const yt = yTrue[i] ?? 0;
+    if (yt === 0) continue;
+    total += Math.abs((yt - (yPred[i] ?? 0)) / yt);
+  }
+  return total / yTrue.length;
+}
+
+/** Explained variance score. */
+export function explained_variance_score(
+  yTrue: Float64Array,
+  yPred: Float64Array,
+): number {
+  const n = yTrue.length;
+  let meanTrue = 0;
+  let meanErr = 0;
+  for (let i = 0; i < n; i++) {
+    meanTrue += yTrue[i] ?? 0;
+    meanErr += (yTrue[i] ?? 0) - (yPred[i] ?? 0);
+  }
+  meanTrue /= n;
+  meanErr /= n;
+
+  let varTrue = 0;
+  let varErr = 0;
+  for (let i = 0; i < n; i++) {
+    varTrue += ((yTrue[i] ?? 0) - meanTrue) ** 2;
+    varErr += ((yTrue[i] ?? 0) - (yPred[i] ?? 0) - meanErr) ** 2;
+  }
+  varTrue /= n;
+  varErr /= n;
+
+  return varTrue === 0 ? 0 : 1 - varErr / varTrue;
+}
diff --git a/src/model_selection/index.ts b/src/model_selection/index.ts
new file mode 100644
index 0000000..35a025e
--- /dev/null
+++ b/src/model_selection/index.ts
@@ -0,0 +1 @@
+export * from "./split.js";
diff --git a/src/model_selection/split.ts b/src/model_selection/split.ts
new file mode 100644
index 0000000..c693e53
--- /dev/null
+++ b/src/model_selection/split.ts
@@ -0,0 +1,219 @@
+/**
+ * Model selection utilities: train/test split and cross-validation.
+ * Mirrors sklearn.model_selection.
+ */
+
+import { ValueError } from "../exceptions.js";
+
+export interface TrainTestSplitOptions {
+  testSize?: number;
+  trainSize?: number;
+  randomState?: number;
+  shuffle?: boolean;
+  stratify?: Float64Array | Int32Array;
+}
+
+export interface TrainTestSplitResult {
+  XTrain: Float64Array[];
+  XTest: Float64Array[];
+  yTrain: Float64Array | Int32Array;
+  yTest: Float64Array | Int32Array;
+}
+
+/** Simple linear congruential generator for reproducible shuffles. */
+function lcg(seed: number): () => number {
+  let s = seed;
+  return () => {
+    s = (s * 1664525 + 1013904223) & 0xffffffff;
+    return (s >>> 0) / 0x100000000;
+  };
+}
+
+/** Fisher-Yates shuffle with optional seed. */
+function shuffleIndices(n: number, rng: () => number): Int32Array {
+  const idx = new Int32Array(n);
+  for (let i = 0; i < n; i++) idx[i] = i;
+  for (let i = n - 1; i > 0; i--) {
+    const j = Math.floor(rng() * (i + 1));
+    const tmp = idx[i] ?? 0;
+    idx[i] = idx[j] ?? 0;
+    idx[j] = tmp;
+  }
+  return idx;
+}
+
+/**
+ * Split arrays or matrices into random train and test subsets.
+ * Mirrors sklearn.model_selection.train_test_split.
+ */
+export function train_test_split(
+  X: Float64Array[],
+  y: Float64Array | Int32Array,
+  options: TrainTestSplitOptions = {},
+): TrainTestSplitResult {
+  const { testSize = 0.25, randomState = 42, shuffle = true } = options;
+  const n = X.length;
+  const nTest = Math.max(1, Math.round(n * testSize));
+  const nTrain = n - nTest;
+
+  if (nTrain <= 0) {
+    throw new ValueError(
+      `With n_samples=${n} and test_size=${testSize}, the resulting train set would be empty.`,
+    );
+  }
+
+  const rng = lcg(randomState);
+  const indices = shuffle
+    ? shuffleIndices(n, rng)
+    : (() => {
+        const idx = new Int32Array(n);
+        for (let i = 0; i < n; i++) idx[i] = i;
+        return idx;
+      })();
+
+  const trainIdx = indices.slice(0, nTrain);
+  const testIdx = indices.slice(nTrain);
+
+  const XTrain = Array.from(trainIdx, (i) => X[i] ?? new Float64Array(0));
+  const XTest = Array.from(testIdx, (i) => X[i] ?? new Float64Array(0));
+
+  const isInt = y instanceof Int32Array;
+  const yTrain = isInt
+    ? new Int32Array(Array.from(trainIdx, (i) => (y as Int32Array)[i] ?? 0))
+    : new Float64Array(
+        Array.from(trainIdx, (i) => (y as Float64Array)[i] ?? 0),
+      );
+  const yTest = isInt
+    ? new Int32Array(Array.from(testIdx, (i) => (y as Int32Array)[i] ?? 0))
+    : new Float64Array(Array.from(testIdx, (i) => (y as Float64Array)[i] ?? 0));
+
+  return { XTrain, XTest, yTrain, yTest };
+}
+
+export interface KFoldOptions {
+  nSplits?: number;
+  shuffle?: boolean;
+  randomState?: number;
+}
+
+export interface Fold {
+  trainIndex: Int32Array;
+  testIndex: Int32Array;
+}
+
+/**
+ * K-Folds cross-validator.
+ * Mirrors sklearn.model_selection.KFold.
+ */
+export class KFold {
+  nSplits: number;
+  shuffle: boolean;
+  randomState: number;
+
+  constructor(options: KFoldOptions = {}) {
+    this.nSplits = options.nSplits ?? 5;
+    this.shuffle = options.shuffle ?? false;
+    this.randomState = options.randomState ?? 0;
+  }
+
+  /** Generate indices to split data into training and test sets. */
+  *split(X: Float64Array[]): Generator<Fold> {
+    const n = X.length;
+    if (this.nSplits > n) {
+      throw new ValueError(
+        `Cannot have number of splits n_splits=${this.nSplits} greater than the number of samples=${n}`,
+      );
+    }
+
+    const rng = lcg(this.randomState);
+    const indices = this.shuffle
+      ? shuffleIndices(n, rng)
+      : (() => {
+          const idx = new Int32Array(n);
+          for (let i = 0; i < n; i++) idx[i] = i;
+          return idx;
+        })();
+
+    const foldSizes = new Int32Array(this.nSplits).fill(
+      Math.floor(n / this.nSplits),
+    );
+    for (let i = 0; i < n % this.nSplits; i++) {
+      foldSizes[i] = (foldSizes[i] ?? 0) + 1;
+    }
+
+    let current = 0;
+    for (let fold = 0; fold < this.nSplits; fold++) {
+      const start = current;
+      const stop = current + (foldSizes[fold] ?? 0);
+      const testIndex = indices.slice(start, stop);
+      const trainIndex = new Int32Array([
+        ...Array.from(indices.slice(0, start)),
+        ...Array.from(indices.slice(stop)),
+      ]);
+      yield { trainIndex, testIndex };
+      current = stop;
+    }
+  }
+
+  getNumSplits(): number {
+    return this.nSplits;
+  }
+}
+
+export interface StratifiedKFoldOptions {
+  nSplits?: number;
+  shuffle?: boolean;
+  randomState?: number;
+}
+
+/**
+ * Stratified K-Folds cross-validator.
+ * Mirrors sklearn.model_selection.StratifiedKFold.
+ */
+export class StratifiedKFold {
+  nSplits: number;
+  shuffle: boolean;
+  randomState: number;
+
+  constructor(options: StratifiedKFoldOptions = {}) {
+    this.nSplits = options.nSplits ?? 5;
+    this.shuffle = options.shuffle ?? false;
+    this.randomState = options.randomState ?? 0;
+  }
+
+  *split(X: Float64Array[], y: Float64Array | Int32Array): Generator<Fold> {
+    const n = X.length;
+    const rng = lcg(this.randomState);
+
+    // Group indices by class
+    const classIndices = new Map<number, number[]>();
+    for (let i = 0; i < n; i++) {
+      const c = y[i] ?? 0;
+      if (!classIndices.has(c)) classIndices.set(c, []);
+      (classIndices.get(c) as number[]).push(i);
+    }
+
+    // Assign indices to folds
+    const foldIndices: number[][] = Array.from(
+      { length: this.nSplits },
+      () => [],
+    );
+    for (const [, idxList] of classIndices) {
+      const shuffled = this.shuffle
+        ? [...idxList].sort(() => rng() - 0.5)
+        : idxList;
+      shuffled.forEach((idx, i) => {
+        (foldIndices[i % this.nSplits] as number[]).push(idx);
+      });
+    }
+
+    for (let fold = 0; fold < this.nSplits; fold++) {
+      const testIndex = new Int32Array(foldIndices[fold] as number[]);
+      const trainIndicesList: number[] = [];
+      for (let f = 0; f < this.nSplits; f++) {
+        if (f !== fold) trainIndicesList.push(...(foldIndices[f] as number[]));
+      }
+      yield { trainIndex: new Int32Array(trainIndicesList), testIndex };
+    }
+  }
+}
diff --git a/src/preprocessing/index.ts b/src/preprocessing/index.ts
new file mode 100644
index 0000000..7c8f35b
--- /dev/null
+++ b/src/preprocessing/index.ts
@@ -0,0 +1,4 @@
+export * from "./standard_scaler.js";
+export * from "./minmax_scaler.js";
+export * from "./label_encoder.js";
+export * from "./normalizer.js";
diff --git a/src/preprocessing/label_encoder.ts b/src/preprocessing/label_encoder.ts
new file mode 100644
index 0000000..e6bdd91
--- /dev/null
+++ b/src/preprocessing/label_encoder.ts
@@ -0,0 +1,56 @@
+/**
+ * LabelEncoder — encode target labels with value between 0 and n_classes-1.
+ * Mirrors sklearn.preprocessing.LabelEncoder.
+ */
+
+import { BaseEstimator } from "../base.js";
+import { ValueError } from "../exceptions.js";
+
+export class LabelEncoder extends BaseEstimator {
+  classes_?: Int32Array;
+
+  fit(y: Float64Array | Int32Array): this {
+    const unique = new Set<number>();
+    for (const v of y) unique.add(v);
+    this.classes_ = new Int32Array([...unique].sort((a, b) => a - b));
+    return this;
+  }
+
+  transform(y: Float64Array | Int32Array): Int32Array {
+    this._check_is_fitted(["classes_"]);
+    const classes = this.classes_ as Int32Array;
+    const classMap = new Map<number, number>();
+    for (let i = 0; i < classes.length; i++) {
+      classMap.set(classes[i] ?? 0, i);
+    }
+    const result = new Int32Array(y.length);
+    for (let i = 0; i < y.length; i++) {
+      const encoded = classMap.get(y[i] ?? 0);
+      if (encoded === undefined) {
+        throw new ValueError(
+          `y contains previously unseen labels: ${String(y[i])}`,
+        );
+      }
+      result[i] = encoded;
+    }
+    return result;
+  }
+
+  inverse_transform(y: Int32Array): Int32Array {
+    this._check_is_fitted(["classes_"]);
+    const classes = this.classes_ as Int32Array;
+    const result = new Int32Array(y.length);
+    for (let i = 0; i < y.length; i++) {
+      const idx = y[i] ?? 0;
+      if (idx < 0 || idx >= classes.length) {
+        throw new ValueError("y contains values not in the fitted classes");
+      }
+      result[i] = classes[idx] ?? 0;
+    }
+    return result;
+  }
+
+  fit_transform(y: Float64Array | Int32Array): Int32Array {
+    return this.fit(y).transform(y);
+  }
+}
diff --git a/src/preprocessing/minmax_scaler.ts b/src/preprocessing/minmax_scaler.ts
new file mode 100644
index 0000000..2773fc8
--- /dev/null
+++ b/src/preprocessing/minmax_scaler.ts
@@ -0,0 +1,112 @@
+/**
+ * MinMaxScaler — scales features to a given range.
+ * Mirrors sklearn.preprocessing.MinMaxScaler.
+ */
+
+import { BaseEstimator } from "../base.js";
+import { ValueError } from "../exceptions.js";
+import { checkArray } from "../utils/validation.js";
+
+export interface MinMaxScalerParams {
+  feature_range?: [number, number];
+  copy?: boolean;
+  clip?: boolean;
+}
+
+export class MinMaxScaler extends BaseEstimator {
+  feature_range: [number, number];
+  copy: boolean;
+  clip: boolean;
+
+  data_min_?: Float64Array;
+  data_max_?: Float64Array;
+  data_range_?: Float64Array;
+  scale_?: Float64Array;
+  min_?: Float64Array;
+  n_features_in_?: number;
+  n_samples_seen_?: number;
+
+  constructor(params: MinMaxScalerParams = {}) {
+    super();
+    this.feature_range = params.feature_range ?? [0, 1];
+    this.copy = params.copy ?? true;
+    this.clip = params.clip ?? false;
+  }
+
+  fit(X: Float64Array[], _y?: Float64Array | Int32Array): this {
+    checkArray(X);
+    const [rMin, rMax] = this.feature_range;
+    if (rMin >= rMax) {
+      throw new ValueError(
+        `Minimum of desired feature range must be smaller than maximum. Got ${String(this.feature_range)}.`,
+      );
+    }
+    const n = X.length;
+    const p = (X[0] ?? new Float64Array(0)).length;
+    this.n_samples_seen_ = n;
+    this.n_features_in_ = p;
+
+    const dataMin = new Float64Array(p).fill(Number.POSITIVE_INFINITY);
+    const dataMax = new Float64Array(p).fill(Number.NEGATIVE_INFINITY);
+    for (const row of X) {
+      for (let j = 0; j < p; j++) {
+        const v = row[j] ?? 0;
+        if (v < (dataMin[j] ?? Number.POSITIVE_INFINITY)) dataMin[j] = v;
+        if (v > (dataMax[j] ?? Number.NEGATIVE_INFINITY)) dataMax[j] = v;
+      }
+    }
+    this.data_min_ = dataMin;
+    this.data_max_ = dataMax;
+    this.data_range_ = Float64Array.from(
+      dataMax,
+      (v, i) => v - (dataMin[i] ?? 0),
+    );
+    const rangeScale = rMax - rMin;
+    this.scale_ = Float64Array.from(this.data_range_, (v) =>
+      v === 0 ? 0 : rangeScale / v,
+    );
+    this.min_ = Float64Array.from(
+      this.scale_,
+      (v, i) => rMin - v * (dataMin[i] ?? 0),
+    );
+    return this;
+  }
+
+  transform(X: Float64Array[]): Float64Array[] {
+    this._check_is_fitted(["scale_", "min_"]);
+    const scale = this.scale_ as Float64Array;
+    const min = this.min_ as Float64Array;
+    const [rMin, rMax] = this.feature_range;
+    return X.map((row) => {
+      const out = this.copy ? new Float64Array(row) : row;
+      for (let j = 0; j < out.length; j++) {
+        out[j] = (out[j] ?? 0) * (scale[j] ?? 1) + (min[j] ?? 0);
+        if (this.clip) {
+          out[j] = Math.max(rMin, Math.min(rMax, out[j] ?? 0));
+        }
+      }
+      return out;
+    });
+  }
+
+  inverse_transform(X: Float64Array[]): Float64Array[] {
+    this._check_is_fitted(["scale_", "min_"]);
+    const scale = this.scale_ as Float64Array;
+    const min = this.min_ as Float64Array;
+    return X.map((row) => {
+      const out = new Float64Array(row);
+      for (let j = 0; j < out.length; j++) {
+        const s = scale[j] ?? 0;
+        out[j] = s !== 0 ? ((out[j] ?? 0) - (min[j] ?? 0)) / s : 0;
+      }
+      return out;
+    });
+  }
+
+  fit_transform(
+    X: Float64Array[],
+    y?: Float64Array | Int32Array,
+  ): Float64Array[] {
+    return this.fit(X, y).transform(X);
+  }
+}
diff --git a/src/preprocessing/normalizer.ts b/src/preprocessing/normalizer.ts
new file mode 100644
index 0000000..ab7ef5d
--- /dev/null
+++ b/src/preprocessing/normalizer.ts
@@ -0,0 +1,71 @@
+/**
+ * Normalizer — normalize samples individually to unit norm.
+ * Mirrors sklearn.preprocessing.Normalizer.
+ */
+
+import { BaseEstimator } from "../base.js";
+import { ValueError } from "../exceptions.js";
+
+export type NormType = "l1" | "l2" | "max";
+
+export interface NormalizerParams {
+  norm?: NormType;
+  copy?: boolean;
+}
+
+export class Normalizer extends BaseEstimator {
+  norm: NormType;
+  copy: boolean;
+
+  constructor(params: NormalizerParams = {}) {
+    super();
+    this.norm = params.norm ?? "l2";
+    this.copy = params.copy ?? true;
+  }
+
+  fit(_X: Float64Array[], _y?: Float64Array | Int32Array): this {
+    // Normalizer is stateless — nothing to fit
+    return this;
+  }
+
+  transform(X: Float64Array[]): Float64Array[] {
+    return X.map((row) => {
+      const out = this.copy ? new Float64Array(row) : row;
+      const norm = this._computeNorm(out);
+      if (norm === 0) return out;
+      for (let j = 0; j < out.length; j++) {
+        out[j] = (out[j] ?? 0) / norm;
+      }
+      return out;
+    });
+  }
+
+  fit_transform(
+    X: Float64Array[],
+    _y?: Float64Array | Int32Array,
+  ): Float64Array[] {
+    return this.transform(X);
+  }
+
+  private _computeNorm(row: Float64Array): number {
+    switch (this.norm) {
+      case "l1": {
+        let sum = 0;
+        for (const v of row) sum += Math.abs(v);
+        return sum;
+      }
+      case "l2": {
+        let sum = 0;
+        for (const v of row) sum += v * v;
+        return Math.sqrt(sum);
+      }
+      case "max": {
+        let max = 0;
+        for (const v of row) max = Math.max(max, Math.abs(v));
+        return max;
+      }
+      default:
+        throw new ValueError(`Unknown norm: ${String(this.norm)}`);
+    }
+  }
+}
diff --git a/src/preprocessing/standard_scaler.ts b/src/preprocessing/standard_scaler.ts
new file mode 100644
index 0000000..576cb12
--- /dev/null
+++ b/src/preprocessing/standard_scaler.ts
@@ -0,0 +1,98 @@
+/**
+ * StandardScaler — zero-mean, unit-variance normalization.
+ * Mirrors sklearn.preprocessing.StandardScaler.
+ */
+
+import { BaseEstimator, TransformerMixin } from "../base.js";
+import { ValueError } from "../exceptions.js";
+import { checkArray, checkFeaturesConsistency } from "../utils/validation.js";
+
+export interface StandardScalerParams {
+  copy?: boolean;
+  with_mean?: boolean;
+  with_std?: boolean;
+}
+
+export class StandardScaler extends BaseEstimator {
+  copy: boolean;
+  with_mean: boolean;
+  with_std: boolean;
+
+  mean_?: Float64Array;
+  scale_?: Float64Array;
+  var_?: Float64Array;
+  n_features_in_?: number;
+  n_samples_seen_?: number;
+
+  constructor(params: StandardScalerParams = {}) {
+    super();
+    this.copy = params.copy ?? true;
+    this.with_mean = params.with_mean ?? true;
+    this.with_std = params.with_std ?? true;
+  }
+
+  fit(X: Float64Array[], _y?: Float64Array | Int32Array): this {
+    checkArray(X);
+    const n = X.length;
+    const p = (X[0] ?? new Float64Array(0)).length;
+    this.n_samples_seen_ = n;
+    this.n_features_in_ = p;
+
+    const mean = new Float64Array(p);
+    const M2 = new Float64Array(p);
+
+    // Welford's online algorithm for mean and variance
+    for (let i = 0; i < n; i++) {
+      const row = X[i] ?? new Float64Array(p);
+      for (let j = 0; j < p; j++) {
+        const x = row[j] ?? 0;
+        const delta = x - (mean[j] ?? 0);
+        mean[j] = (mean[j] ?? 0) + delta / (i + 1);
+        M2[j] = (M2[j] ?? 0) + delta * (x - (mean[j] ?? 0));
+      }
+    }
+
+    this.mean_ = mean;
+    const variance =
+      n > 1 ? Float64Array.from(M2, (v) => v / (n - 1)) : new Float64Array(p);
+    this.var_ = variance;
+    this.scale_ = Float64Array.from(variance, (v) => Math.sqrt(v) || 1.0);
+    return this;
+  }
+
+  transform(X: Float64Array[]): Float64Array[] {
+    this._check_is_fitted(["mean_", "scale_"]);
+    checkFeaturesConsistency(X, X); // just shape check
+    const mean = this.mean_ as Float64Array;
+    const scale = this.scale_ as Float64Array;
+    return X.map((row) => {
+      const out = this.copy ? new Float64Array(row) : row;
+      for (let j = 0; j < out.length; j++) {
+        if (this.with_mean) out[j] = (out[j] ?? 0) - (mean[j] ?? 0);
+        if (this.with_std) out[j] = (out[j] ?? 0) / (scale[j] ?? 1);
+      }
+      return out;
+    });
+  }
+
+  inverse_transform(X: Float64Array[]): Float64Array[] {
+    this._check_is_fitted(["mean_", "scale_"]);
+    const mean = this.mean_ as Float64Array;
+    const scale = this.scale_ as Float64Array;
+    return X.map((row) => {
+      const out = new Float64Array(row);
+      for (let j = 0; j < out.length; j++) {
+        if (this.with_std) out[j] = (out[j] ?? 0) * (scale[j] ?? 1);
+        if (this.with_mean) out[j] = (out[j] ?? 0) + (mean[j] ?? 0);
+      }
+      return out;
+    });
+  }
+
+  fit_transform(
+    X: Float64Array[],
+    y?: Float64Array | Int32Array,
+  ): Float64Array[] {
+    return this.fit(X, y).transform(X);
+  }
+}
diff --git a/src/utils/class_weight.ts b/src/utils/class_weight.ts
new file mode 100644
index 0000000..9c23c4b
--- /dev/null
+++ b/src/utils/class_weight.ts
@@ -0,0 +1,69 @@
+/**
+ * Class weight utilities.
+ * Mirrors sklearn.utils.class_weight.
+ */
+
+import { ValueError } from "../exceptions.js";
+
+/**
+ * Compute class weights for imbalanced datasets.
+ * For 'balanced': n_samples / (n_classes * bincount(y))
+ */
+export function computeClassWeight(
+  classWeight: "balanced" | Record<number, number>,
+  classes: Int32Array,
+  y: Float64Array | Int32Array,
+): Float64Array {
+  const weights = new Float64Array(classes.length);
+
+  if (classWeight === "balanced") {
+    const nSamples = y.length;
+    const nClasses = classes.length;
+    const counts = new Map<number, number>();
+    for (const c of classes) counts.set(c, 0);
+    for (const v of y) {
+      const cur = counts.get(v);
+      if (cur !== undefined) counts.set(v, cur + 1);
+    }
+    for (let i = 0; i < classes.length; i++) {
+      const c = classes[i] ?? 0;
+      const count = counts.get(c) ?? 0;
+      if (count === 0) {
+        throw new ValueError(`Class ${c} is not present in y`);
+      }
+      weights[i] = nSamples / (nClasses * count);
+    }
+  } else {
+    for (let i = 0; i < classes.length; i++) {
+      const c = classes[i] ?? 0;
+      const w = classWeight[c];
+      if (w === undefined) {
+        throw new ValueError(`Class ${c} is not in classWeight`);
+      }
+      weights[i] = w;
+    }
+  }
+  return weights;
+}
+
+/**
+ * Compute per-sample weights from class weights.
+ */
+export function computeSampleWeight(
+  classWeight: "balanced" | Record<number, number>,
+  y: Float64Array | Int32Array,
+): Float64Array {
+  const uniqueClasses = new Set<number>();
+  for (const v of y) uniqueClasses.add(v);
+  const classes = new Int32Array([...uniqueClasses].sort((a, b) => a - b));
+  const cw = computeClassWeight(classWeight, classes, y);
+  const classToWeight = new Map<number, number>();
+  for (let i = 0; i < classes.length; i++) {
+    classToWeight.set(classes[i] ?? 0, cw[i] ?? 1.0);
+  }
+  const sampleWeights = new Float64Array(y.length);
+  for (let i = 0; i < y.length; i++) {
+    sampleWeights[i] = classToWeight.get(y[i] ?? 0) ?? 1.0;
+  }
+  return sampleWeights;
+}
diff --git a/src/utils/extmath.ts b/src/utils/extmath.ts
new file mode 100644
index 0000000..43a42cb
--- /dev/null
+++ b/src/utils/extmath.ts
@@ -0,0 +1,221 @@
+/**
+ * Mathematical utilities for tsikit-learn.
+ * Mirrors sklearn.utils.extmath.
+ */
+
+/** Compute the log of the logistic function element-wise. */
+export function logLogistic(x: Float64Array): Float64Array {
+  const result = new Float64Array(x.length);
+  for (let i = 0; i < x.length; i++) {
+    const xi = x[i] ?? 0;
+    result[i] =
+      xi >= 0 ? -Math.log1p(Math.exp(-xi)) : xi - Math.log1p(Math.exp(xi));
+  }
+  return result;
+}
+
+/** Compute softmax values for each row of X. */
+export function softmax(X: Float64Array[], copy = true): Float64Array[] {
+  const result = copy ? X.map((row) => new Float64Array(row)) : X;
+  for (const row of result) {
+    const maxVal = Math.max(...row);
+    let sum = 0;
+    for (let j = 0; j < row.length; j++) {
+      row[j] = Math.exp((row[j] ?? 0) - maxVal);
+      sum += row[j] ?? 0;
+    }
+    for (let j = 0; j < row.length; j++) {
+      row[j] = (row[j] ?? 0) / sum;
+    }
+  }
+  return result;
+}
+
+/** Compute row norms of a matrix. */
+export function rowNorms(X: Float64Array[], squared = false): Float64Array {
+  const norms = new Float64Array(X.length);
+  for (let i = 0; i < X.length; i++) {
+    const row = X[i] ?? new Float64Array(0);
+    let norm2 = 0;
+    for (const v of row) norm2 += v * v;
+    norms[i] = squared ? norm2 : Math.sqrt(norm2);
+  }
+  return norms;
+}
+
+/** Safe sparse dot (dense version). Computes X @ y. */
+export function safeDot(X: Float64Array[], y: Float64Array): Float64Array {
+  const n = X.length;
+  const result = new Float64Array(n);
+  for (let i = 0; i < n; i++) {
+    const row = X[i] ?? new Float64Array(0);
+    let dot = 0;
+    for (let j = 0; j < row.length; j++) {
+      dot += (row[j] ?? 0) * (y[j] ?? 0);
+    }
+    result[i] = dot;
+  }
+  return result;
+}
+
+/** Matrix transpose. */
+export function transpose(X: Float64Array[]): Float64Array[] {
+  if (X.length === 0) return [];
+  const nRows = X.length;
+  const nCols = (X[0] ?? new Float64Array(0)).length;
+  const result: Float64Array[] = Array.from(
+    { length: nCols },
+    () => new Float64Array(nRows),
+  );
+  for (let i = 0; i < nRows; i++) {
+    for (let j = 0; j < nCols; j++) {
+      (result[j] ?? new Float64Array(0))[i] =
+        (X[i] ?? new Float64Array(0))[j] ?? 0;
+    }
+  }
+  return result;
+}
+
+/** Matrix-matrix multiply: A @ B. */
+export function matMul(A: Float64Array[], B: Float64Array[]): Float64Array[] {
+  if (A.length === 0 || B.length === 0) return [];
+  const nRows = A.length;
+  const nCols = (B[0] ?? new Float64Array(0)).length;
+  const nInner = B.length;
+  const result: Float64Array[] = Array.from(
+    { length: nRows },
+    () => new Float64Array(nCols),
+  );
+  for (let i = 0; i < nRows; i++) {
+    for (let k = 0; k < nInner; k++) {
+      const aik = (A[i] ?? new Float64Array(0))[k] ?? 0;
+      if (aik === 0) continue;
+      for (let j = 0; j < nCols; j++) {
+        const resultRow = result[i] ?? new Float64Array(0);
+        resultRow[j] =
+          (resultRow[j] ?? 0) + aik * ((B[k] ?? new Float64Array(0))[j] ?? 0);
+      }
+    }
+  }
+  return result;
+}
+
+/**
+ * Solve a lower triangular system Lx = b using forward substitution.
+ */
+export function forwardSubstitution(
+  L: Float64Array[],
+  b: Float64Array,
+): Float64Array {
+  const n = b.length;
+  const x = new Float64Array(n);
+  for (let i = 0; i < n; i++) {
+    let sum = b[i] ?? 0;
+    for (let j = 0; j < i; j++) {
+      sum -= ((L[i] ?? new Float64Array(0))[j] ?? 0) * (x[j] ?? 0);
+    }
+    x[i] = sum / ((L[i] ?? new Float64Array(0))[i] ?? 1);
+  }
+  return x;
+}
+
+/**
+ * Solve an upper triangular system Ux = b using back substitution.
+ */
+export function backSubstitution(
+  U: Float64Array[],
+  b: Float64Array,
+): Float64Array {
+  const n = b.length;
+  const x = new Float64Array(n);
+  for (let i = n - 1; i >= 0; i--) {
+    let sum = b[i] ?? 0;
+    for (let j = i + 1; j < n; j++) {
+      sum -= ((U[i] ?? new Float64Array(0))[j] ?? 0) * (x[j] ?? 0);
+    }
+    x[i] = sum / ((U[i] ?? new Float64Array(0))[i] ?? 1);
+  }
+  return x;
+}
+
+/**
+ * Cholesky decomposition of a symmetric positive definite matrix.
+ * Returns L such that A = L @ L.T
+ */
+export function cholesky(A: Float64Array[]): Float64Array[] {
+  const n = A.length;
+  const L: Float64Array[] = Array.from(
+    { length: n },
+    () => new Float64Array(n),
+  );
+  for (let i = 0; i < n; i++) {
+    for (let j = 0; j <= i; j++) {
+      let sum = (A[i] ?? new Float64Array(0))[j] ?? 0;
+      for (let k = 0; k < j; k++) {
+        sum -=
+          ((L[i] ?? new Float64Array(0))[k] ?? 0) *
+          ((L[j] ?? new Float64Array(0))[k] ?? 0);
+      }
+      if (i === j) {
+        (L[i] ?? new Float64Array(0))[j] = Math.sqrt(Math.max(sum, 0));
+      } else {
+        const ljj = (L[j] ?? new Float64Array(0))[j] ?? 1;
+        (L[i] ?? new Float64Array(0))[j] = ljj !== 0 ? sum / ljj : 0;
+      }
+    }
+  }
+  return L;
+}
+
+/**
+ * Solve the linear system Ax = b using Cholesky decomposition.
+ * A must be symmetric positive definite.
+ */
+export function choleskyLinsolve(
+  A: Float64Array[],
+  b: Float64Array,
+): Float64Array {
+  const L = cholesky(A);
+  const y = forwardSubstitution(L, b);
+  const Lt = transpose(L);
+  return backSubstitution(Lt, y);
+}
+
+/** Compute the Euclidean distance between two vectors. */
+export function euclideanDistance(a: Float64Array, b: Float64Array): number {
+  let sum = 0;
+  for (let i = 0; i < a.length; i++) {
+    const diff = (a[i] ?? 0) - (b[i] ?? 0);
+    sum += diff * diff;
+  }
+  return Math.sqrt(sum);
+}
+
+/** Add identity * alpha to a matrix (in-place). */
+export function addDiagonal(A: Float64Array[], alpha: number): Float64Array[] {
+  for (let i = 0; i < A.length; i++) {
+    (A[i] ?? new Float64Array(0))[i] =
+      ((A[i] ?? new Float64Array(0))[i] ?? 0) + alpha;
+  }
+  return A;
+}
+
+/** Compute X.T @ X (Gram matrix). */
+export function gramMatrix(X: Float64Array[]): Float64Array[] {
+  const Xt = transpose(X);
+  return matMul(Xt, X);
+}
+
+/** Compute X.T @ y. */
+export function xtDotY(X: Float64Array[], y: Float64Array): Float64Array {
+  const p = (X[0] ?? new Float64Array(0)).length;
+  const result = new Float64Array(p);
+  for (let i = 0; i < X.length; i++) {
+    const yi = y[i] ?? 0;
+    const row = X[i] ?? new Float64Array(0);
+    for (let j = 0; j < p; j++) {
+      result[j] = (result[j] ?? 0) + (row[j] ?? 0) * yi;
+    }
+  }
+  return result;
+}
diff --git a/src/utils/index.ts b/src/utils/index.ts
new file mode 100644
index 0000000..2ea8323
--- /dev/null
+++ b/src/utils/index.ts
@@ -0,0 +1,4 @@
+export * from "./extmath.js";
+export * from "./validation.js";
+export * from "./multiclass.js";
+export * from "./class_weight.js";
diff --git a/src/utils/multiclass.ts b/src/utils/multiclass.ts
new file mode 100644
index 0000000..cd461ad
--- /dev/null
+++ b/src/utils/multiclass.ts
@@ -0,0 +1,68 @@
+/**
+ * Multiclass utilities.
+ * Mirrors sklearn.utils.multiclass.
+ */
+
+import { ValueError } from "../exceptions.js";
+
+export type MulticlassType =
+  | "binary"
+  | "multiclass"
+  | "multiclass-multioutput"
+  | "multilabel-indicator"
+  | "continuous"
+  | "continuous-multioutput"
+  | "unknown";
+
+/** Determine the type of target variable. */
+export function typeOfTarget(y: Float64Array | Int32Array): MulticlassType {
+  const unique = new Set<number>();
+  for (const v of y) unique.add(v);
+  const nUnique = unique.size;
+
+  // Check if all values are integers
+  const allInt = Array.from(unique).every((v) => Number.isInteger(v));
+  if (!allInt) return "continuous";
+
+  if (nUnique <= 2) return "binary";
+  return "multiclass";
+}
+
+/** Return sorted unique class labels. */
+export function uniqueLabels(...ys: (Float64Array | Int32Array)[]): Int32Array {
+  const all = new Set<number>();
+  for (const y of ys) {
+    for (const v of y) all.add(v);
+  }
+  return new Int32Array([...all].sort((a, b) => a - b));
+}
+
+/** Check if classification is binary. */
+export function isBinaryClassification(y: Float64Array | Int32Array): boolean {
+  const unique = new Set<number>();
+  for (const v of y) unique.add(v);
+  return unique.size === 2;
+}
+
+/** Check if classification is multilabel. */
+export function isMultilabel(_y: Float64Array[]): boolean {
+  // For dense arrays this is always false in our simplified implementation
+  return false;
+}
+
+/** Return the number of classes for a label array. */
+export function classCount(y: Float64Array | Int32Array): number {
+  const unique = new Set<number>();
+  for (const v of y) unique.add(v);
+  return unique.size;
+}
+
+/** Validate that y only contains values in the expected classes. */
+export function checkClassificationTargets(y: Float64Array | Int32Array): void {
+  const t = typeOfTarget(y);
+  if (t === "continuous") {
+    throw new ValueError(
+      `Unknown label type: ${t}. Maybe you are trying to fit a classifier, which expects discrete classes.`,
+    );
+  }
+}
diff --git a/src/utils/validation.ts b/src/utils/validation.ts
new file mode 100644
index 0000000..e366ffc
--- /dev/null
+++ b/src/utils/validation.ts
@@ -0,0 +1,108 @@
+/**
+ * Input validation utilities.
+ * Mirrors sklearn.utils.validation.
+ */
+
+import { ValueError } from "../exceptions.js";
+
+/** Validate that X is a non-empty 2D array of Float64Arrays. */
+export function checkArray(
+  X: Float64Array[],
+  options: {
+    minSamples?: number;
+    minFeatures?: number;
+    allowNd?: boolean;
+  } = {},
+): Float64Array[] {
+  const { minSamples = 1, minFeatures = 1 } = options;
+  if (!Array.isArray(X)) {
+    throw new ValueError("X must be an array of Float64Arrays");
+  }
+  if (X.length < minSamples) {
+    throw new ValueError(
+      `X must have at least ${minSamples} samples, got ${X.length}`,
+    );
+  }
+  const nFeatures = (X[0] ?? new Float64Array(0)).length;
+  if (nFeatures < minFeatures) {
+    throw new ValueError(
+      `X must have at least ${minFeatures} features, got ${nFeatures}`,
+    );
+  }
+  for (let i = 0; i < X.length; i++) {
+    const row = X[i];
+    if (!(row instanceof Float64Array)) {
+      throw new ValueError(`X[${i}] must be a Float64Array`);
+    }
+    if (row.length !== nFeatures) {
+      throw new ValueError(
+        `X rows must all have the same length. Row 0 has ${nFeatures}, row ${i} has ${row.length}`,
+      );
+    }
+  }
+  return X;
+}
+
+/** Validate that X and y have compatible shapes. */
+export function checkXy(
+  X: Float64Array[],
+  y: Float64Array | Int32Array,
+): [Float64Array[], Float64Array | Int32Array] {
+  checkArray(X);
+  if (X.length !== y.length) {
+    throw new ValueError(
+      `X and y have inconsistent first dimensions: X has ${X.length} samples, y has ${y.length}`,
+    );
+  }
+  return [X, y];
+}
+
+/** Return the number of features in X. */
+export function getNumFeatures(X: Float64Array[]): number {
+  if (X.length === 0) return 0;
+  return (X[0] ?? new Float64Array(0)).length;
+}
+
+/** Validate that test features match training features. */
+export function checkFeaturesConsistency(
+  XTrain: Float64Array[],
+  XTest: Float64Array[],
+): void {
+  const trainFeats = getNumFeatures(XTrain);
+  const testFeats = getNumFeatures(XTest);
+  if (trainFeats !== testFeats) {
+    throw new ValueError(
+      `X has ${testFeats} features, but the estimator was trained with ${trainFeats} features`,
+    );
+  }
+}
+
+/** Convert a number array to Float64Array. */
+export function asFloat64Array(arr: number[] | Float64Array): Float64Array {
+  if (arr instanceof Float64Array) return arr;
+  return new Float64Array(arr);
+}
+
+/** Convert a number array to Int32Array. */
+export function asInt32Array(arr: number[] | Int32Array): Int32Array {
+  if (arr instanceof Int32Array) return arr;
+  return new Int32Array(arr);
+}
+
+/** Validate sample weights, returning a uniform weight array if null. */
+export function checkSampleWeight(
+  sampleWeight: Float64Array | null | undefined,
+  nSamples: number,
+): Float64Array {
+  if (sampleWeight == null) {
+    const w = new Float64Array(nSamples);
+    w.fill(1.0);
+    return w;
+  }
+  if (sampleWeight.length !== nSamples) {
+    throw new ValueError(
+      `sampleWeight.length (${sampleWeight.length}) != n_samples (${nSamples})`,
+    );
+  }
+  return sampleWeight;
+}
diff --git a/tests/base.test.ts b/tests/base.test.ts
new file mode 100644
index 0000000..550000e
--- /dev/null
+++ b/tests/base.test.ts
@@ -0,0 +1,69 @@
+import { describe, expect, it } from "bun:test";
+import {
+  BaseEstimator,
+  ClassifierMixin,
+  RegressorMixin,
+  check_is_fitted,
+  clone,
+} from "../src/base.ts";
+import { NotFittedError } from "../src/exceptions.ts";
+
+class DummyEstimator extends BaseEstimator {
+  alpha: number;
+  beta: string;
+  fitted_?: boolean;
+
+  constructor(alpha = 1.0, beta = "test") {
+    super();
+    this.alpha = alpha;
+    this.beta = beta;
+  }
+
+  fit(): this {
+    this.fitted_ = true;
+    return this;
+  }
+}
+
+describe("BaseEstimator", () => {
+  it("get_params returns constructor params", () => {
+    const est = new DummyEstimator(2.0, "hello");
+    const params = est.get_params();
+    expect(params.alpha).toBe(2.0);
+    expect(params.beta).toBe("hello");
+  });
+
+  it("set_params updates params", () => {
+    const est = new DummyEstimator();
+    est.set_params({ alpha: 5.0 });
+    expect(est.alpha).toBe(5.0);
+  });
+
+  it("check_is_fitted throws NotFittedError when not fitted", () => {
+    const est = new DummyEstimator();
+    expect(() => est.fit()._check_is_fitted(["fitted_"])).not.toThrow();
+    const est2 = new DummyEstimator();
+    expect(() => est2._check_is_fitted(["fitted_"])).toThrow(NotFittedError);
+  });
+});
+
+describe("clone", () => {
+  it("creates a new instance with same params", () => {
+    const est = new DummyEstimator(3.0, "foo");
+    const cloned = clone(est);
+    expect(cloned).not.toBe(est);
+    expect(cloned.alpha).toBe(3.0);
+    expect(cloned.beta).toBe("foo");
+  });
+});
+
+describe("Exceptions", () => {
+  it("NotFittedError has correct name", () => {
+    const err = new NotFittedError();
+    expect(err.name).toBe("NotFittedError");
+  });
+
+  it("NotFittedError is an Error", () => {
+    expect(new NotFittedError()).toBeInstanceOf(Error);
+  });
+});
diff --git a/tests/linear_model.test.ts b/tests/linear_model.test.ts
new file mode 100644
index 0000000..02dd9b0
--- /dev/null
+++ b/tests/linear_model.test.ts
@@ -0,0 +1,192 @@
+import { describe, expect, it } from "bun:test";
+import { LinearRegression } from "../src/linear_model/linear_regression.ts";
+import { Ridge } from "../src/linear_model/ridge.ts";
+
+describe("LinearRegression", () => {
+  it("fits a simple 1D linear relationship", () => {
+    const X = [
+      new Float64Array([1]),
+      new Float64Array([2]),
+      new Float64Array([3]),
+      new Float64Array([4]),
+      new Float64Array([5]),
+    ];
+    const y = new Float64Array([2, 4, 6, 8, 10]);
+    const reg = new LinearRegression();
+    reg.fit(X, y);
+
+    expect(reg.coef_).toBeDefined();
+    expect(Math.abs(((reg.coef_ as Float64Array)[0] ?? 0) - 2)).toBeLessThan(
+      1e-6,
+    );
+    expect(Math.abs(reg.intercept_ as number)).toBeLessThan(1e-6);
+  });
+
+  it("fits with intercept", () => {
+    const X = [
+      new Float64Array([0]),
+      new Float64Array([1]),
+      new Float64Array([2]),
+    ];
+    const y = new Float64Array([1, 3, 5]); // y = 2x + 1
+    const reg = new LinearRegression();
+    reg.fit(X, y);
+
+    expect(Math.abs(((reg.coef_ as Float64Array)[0] ?? 0) - 2)).toBeLessThan(
+      1e-6,
+    );
+    expect(Math.abs((reg.intercept_ as number) - 1)).toBeLessThan(1e-6);
+  });
+
+  it("fits without intercept", () => {
+    const X = [
+      new Float64Array([1]),
+      new Float64Array([2]),
+      new Float64Array([3]),
+    ];
+    const y = new Float64Array([3, 6, 9]); // y = 3x
+    const reg = new LinearRegression({ fit_intercept: false });
+    reg.fit(X, y);
+
+    expect(Math.abs(((reg.coef_ as Float64Array)[0] ?? 0) - 3)).toBeLessThan(
+      1e-6,
+    );
+    expect(reg.intercept_).toBe(0);
+  });
+
+  it("predicts correctly", () => {
+    const X = [new Float64Array([1]), new Float64Array([2])];
+    const y = new Float64Array([1, 2]);
+    const reg = new LinearRegression();
+    reg.fit(X, y);
+
+    const pred = reg.predict([new Float64Array([3])]);
+    expect(Math.abs((pred[0] ?? 0) - 3)).toBeLessThan(1e-4);
+  });
+
+  it("fits multiple features", () => {
+    // y = 1*x1 + 2*x2
+    const X = [
+      new Float64Array([1, 2]),
+      new Float64Array([2, 1]),
+      new Float64Array([3, 3]),
+      new Float64Array([4, 2]),
+    ];
+    const y = new Float64Array([5, 4, 9, 8]);
+    const reg = new LinearRegression({ fit_intercept: false });
+    reg.fit(X, y);
+
+    const pred = reg.predict([new Float64Array([1, 2])]);
+    expect(Math.abs((pred[0] ?? 0) - 5)).toBeLessThan(0.1);
+  });
+
+  it("computes R² score", () => {
+    const X = [
+      new Float64Array([1]),
+      new Float64Array([2]),
+      new Float64Array([3]),
+      new Float64Array([4]),
+    ];
+    const y = new Float64Array([2, 4, 6, 8]);
+    const reg = new LinearRegression();
+    reg.fit(X, y);
+
+    const score = reg.score(X, y);
+    expect(score).toBeCloseTo(1.0, 5);
+  });
+
+  it("returns R² close to 1 for perfect linear data", () => {
+    const X = Array.from(
+      { length: 20 },
+      (_, i) => new Float64Array([i, i * 2]),
+    );
+    const y = new Float64Array(Array.from({ length: 20 }, (_, i) => i * 3 + 1));
+    const reg = new LinearRegression();
+    reg.fit(X, y);
+    expect(reg.score(X, y)).toBeGreaterThan(0.999);
+  });
+
+  it("throws NotFittedError when predicting before fit", () => {
+    const reg = new LinearRegression();
+    expect(() => reg.predict([new Float64Array([1])])).toThrow();
+  });
+
+  it("get_params returns all params", () => {
+    const reg = new LinearRegression({ alpha: 0 } as never);
+    const params = reg.get_params();
+    expect("fit_intercept" in params).toBe(true);
+  });
+});
+
+describe("Ridge", () => {
+  it("fits a simple linear relationship with regularization", () => {
+    const X = [
+      new Float64Array([1]),
+      new Float64Array([2]),
+      new Float64Array([3]),
+      new Float64Array([4]),
+      new Float64Array([5]),
+    ];
+    const y = new Float64Array([2, 4, 6, 8, 10]);
+    const reg = new Ridge({ alpha: 0.0001 });
+    reg.fit(X, y);
+
+    // With tiny alpha, should be close to OLS
+    expect(Math.abs(((reg.coef_ as Float64Array)[0] ?? 0) - 2)).toBeLessThan(
+      0.01,
+    );
+  });
+
+  it("shrinks coefficients with large alpha", () => {
+    const X = [
+      new Float64Array([1, 0]),
+      new Float64Array([0, 1]),
+      new Float64Array([1, 1]),
+    ];
+    const y = new Float64Array([2, 3, 5]);
+
+    const regLowAlpha = new Ridge({ alpha: 0.001 });
+    const regHighAlpha = new Ridge({ alpha: 100.0 });
+    regLowAlpha.fit(X, y);
+    regHighAlpha.fit(X, y);
+
+    const normLow = Array.from(regLowAlpha.coef_ as Float64Array).reduce(
+      (a, b) => a + b * b,
+      0,
+    );
+    const normHigh = Array.from(regHighAlpha.coef_ as Float64Array).reduce(
+      (a, b) => a + b * b,
+      0,
+    );
+
+    // Higher alpha → smaller coefficients
+    expect(normHigh).toBeLessThan(normLow);
+  });
+
+  it("predicts correctly", () => {
+    const X = [
+      new Float64Array([1]),
+      new Float64Array([2]),
+      new Float64Array([3]),
+    ];
+    const y = new Float64Array([1, 2, 3]);
+    const reg = new Ridge({ alpha: 0.001 });
+    reg.fit(X, y);
+
+    const pred = reg.predict([new Float64Array([4])]);
+    expect(Math.abs((pred[0] ?? 0) - 4)).toBeLessThan(0.1);
+  });
+
+  it("score is R²", () => {
+    const X = Array.from({ length: 20 }, (_, i) => new Float64Array([i]));
+    const y = new Float64Array(Array.from({ length: 20 }, (_, i) => i * 2 + 1));
+    const reg = new Ridge({ alpha: 0.001 });
+    reg.fit(X, y);
+    expect(reg.score(X, y)).toBeGreaterThan(0.99);
+  });
+
+  it("throws NotFittedError when predicting before fit", () => {
+    const reg = new Ridge();
+    expect(() => reg.predict([new Float64Array([1])])).toThrow();
+  });
+});
diff --git a/tests/metrics_model_selection.test.ts b/tests/metrics_model_selection.test.ts
new file mode 100644
index 0000000..1dfb861
--- /dev/null
+++ b/tests/metrics_model_selection.test.ts
@@ -0,0 +1,111 @@
+import { describe, expect, it } from "bun:test";
+import {
+  accuracy_score,
+  confusion_matrix,
+  f1_score,
+  precision_score,
+  recall_score,
+} from "../src/metrics/classification.ts";
+import {
+  mean_absolute_error,
+  mean_squared_error,
+  r2_score,
+} from "../src/metrics/regression.ts";
+import { KFold, train_test_split } from "../src/model_selection/split.ts";
+
+describe("Regression metrics", () => {
+  it("MSE is 0 for perfect prediction", () => {
+    const y = new Float64Array([1, 2, 3]);
+    expect(mean_squared_error(y, y)).toBe(0);
+  });
+
+  it("MAE is 0 for perfect prediction", () => {
+    const y = new Float64Array([1, 2, 3]);
+    expect(mean_absolute_error(y, y)).toBe(0);
+  });
+
+  it("R² is 1 for perfect prediction", () => {
+    const y = new Float64Array([1, 2, 3]);
+    expect(r2_score(y, y)).toBe(1);
+  });
+
+  it("MSE is correct", () => {
+    const yTrue = new Float64Array([1, 2, 3]);
+    const yPred = new Float64Array([2, 3, 4]); // all off by 1
+    expect(mean_squared_error(yTrue, yPred)).toBe(1);
+  });
+});
+
+describe("Classification metrics", () => {
+  it("accuracy is 1 for perfect prediction", () => {
+    const y = new Int32Array([0, 1, 2]);
+    expect(accuracy_score(y, y)).toBe(1);
+  });
+
+  it("accuracy counts correct predictions", () => {
+    const yTrue = new Int32Array([0, 1, 1, 0]);
+    const yPred = new Int32Array([0, 1, 0, 0]);
+    expect(accuracy_score(yTrue, yPred)).toBe(0.75);
+  });
+
+  it("confusion matrix is correct for binary", () => {
+    const yTrue = new Int32Array([0, 1, 0, 1, 0]);
+    const yPred = new Int32Array([0, 1, 1, 1, 0]);
+    const cm = confusion_matrix(yTrue, yPred);
+    // [[TN, FP], [FN, TP]]
+    expect((cm[0] as number[])[0]).toBe(2); // TN
+    expect((cm[0] as number[])[1]).toBe(1); // FP
+    expect((cm[1] as number[])[0]).toBe(0); // FN
+    expect((cm[1] as number[])[1]).toBe(2); // TP
+  });
+
+  it("f1 is 1 for perfect predictions", () => {
+    const y = new Int32Array([0, 1, 0, 1]);
+    expect(f1_score(y, y)).toBeCloseTo(1);
+  });
+});
+
+describe("train_test_split", () => {
+  it("splits data correctly", () => {
+    const X = Array.from({ length: 100 }, (_, i) => new Float64Array([i]));
+    const y = new Float64Array(Array.from({ length: 100 }, (_, i) => i));
+    const { XTrain, XTest, yTrain, yTest } = train_test_split(X, y, {
+      testSize: 0.2,
+    });
+    expect(XTrain.length).toBe(80);
+    expect(XTest.length).toBe(20);
+    expect(yTrain.length).toBe(80);
+    expect(yTest.length).toBe(20);
+  });
+
+  it("is reproducible with randomState", () => {
+    const X = Array.from({ length: 20 }, (_, i) => new Float64Array([i]));
+    const y = new Float64Array(Array.from({ length: 20 }, (_, i) => i));
+    const r1 = train_test_split(X, y, { randomState: 42 });
+    const r2 = train_test_split(X, y, { randomState: 42 });
+    expect(Array.from(r1.yTest)).toEqual(Array.from(r2.yTest));
+  });
+});
+
+describe("KFold", () => {
+  it("generates k folds", () => {
+    const X = Array.from({ length: 10 }, (_, i) => new Float64Array([i]));
+    const kf = new KFold({ nSplits: 5 });
+    const folds = [...kf.split(X)];
+    expect(folds.length).toBe(5);
+    for (const fold of folds) {
+      expect(fold.trainIndex.length).toBe(8);
+      expect(fold.testIndex.length).toBe(2);
+    }
+  });
+
+  it("covers all samples exactly once", () => {
+    const X = Array.from({ length: 9 }, (_, i) => new Float64Array([i]));
+    const kf = new KFold({ nSplits: 3 });
+    const allTest = new Set<number>();
+    for (const fold of kf.split(X)) {
+      for (const idx of fold.testIndex) allTest.add(idx);
+    }
+    expect(allTest.size).toBe(9);
+  });
+});
diff --git a/tests/preprocessing.test.ts b/tests/preprocessing.test.ts
new file mode 100644
index 0000000..cc11d17
--- /dev/null
+++ b/tests/preprocessing.test.ts
@@ -0,0 +1,133 @@
+import { describe, expect, it } from "bun:test";
+import { NotFittedError } from "../src/exceptions.ts";
+import { LabelEncoder } from "../src/preprocessing/label_encoder.ts";
+import { MinMaxScaler } from "../src/preprocessing/minmax_scaler.ts";
+import { Normalizer } from "../src/preprocessing/normalizer.ts";
+import { StandardScaler } from "../src/preprocessing/standard_scaler.ts";
+
+describe("StandardScaler", () => {
+  const X = [
+    new Float64Array([1, 2]),
+    new Float64Array([3, 4]),
+    new Float64Array([5, 6]),
+  ];
+
+  it("computes mean and std correctly", () => {
+    const scaler = new StandardScaler();
+    scaler.fit(X);
+    expect(scaler.mean_).toBeDefined();
+    expect(Math.abs(((scaler.mean_ as Float64Array)[0] ?? 0) - 3)).toBeLessThan(
+      1e-10,
+    );
+    expect(Math.abs(((scaler.mean_ as Float64Array)[1] ?? 0) - 4)).toBeLessThan(
+      1e-10,
+    );
+  });
+
+  it("transforms to zero mean", () => {
+    const scaler = new StandardScaler();
+    const Xt = scaler.fit_transform(X);
+    const mean0 = Xt.reduce((a, r) => a + (r[0] ?? 0), 0) / Xt.length;
+    expect(Math.abs(mean0)).toBeLessThan(1e-10);
+  });
+
+  it("inverse_transform recovers original", () => {
+    const scaler = new StandardScaler();
+    const Xt = scaler.fit_transform(X);
+    const Xr = scaler.inverse_transform(Xt);
+    for (let i = 0; i < X.length; i++) {
+      for (let j = 0; j < (X[i] as Float64Array).length; j++) {
+        expect(
+          Math.abs(
+            ((Xr[i] as Float64Array)[j] ?? 0) -
+              ((X[i] as Float64Array)[j] ?? 0),
+          ),
+        ).toBeLessThan(1e-8);
+      }
+    }
+  });
+
+  it("throws when not fitted", () => {
+    const scaler = new StandardScaler();
+    expect(() => scaler.transform(X)).toThrow(NotFittedError);
+  });
+});
+
+describe("MinMaxScaler", () => {
+  const X = [
+    new Float64Array([0, 2]),
+    new Float64Array([5, 4]),
+    new Float64Array([10, 6]),
+  ];
+
+  it("scales to [0, 1] by default", () => {
+    const scaler = new MinMaxScaler();
+    const Xt = scaler.fit_transform(X);
+    expect((Xt[0] as Float64Array)[0]).toBeCloseTo(0, 8);
+    expect((Xt[2] as Float64Array)[0]).toBeCloseTo(1, 8);
+  });
+
+  it("scales to custom range", () => {
+    const scaler = new MinMaxScaler({ feature_range: [-1, 1] });
+    const Xt = scaler.fit_transform(X);
+    expect((Xt[0] as Float64Array)[0]).toBeCloseTo(-1, 6);
+    expect((Xt[2] as Float64Array)[0]).toBeCloseTo(1, 6);
+  });
+
+  it("inverse_transform recovers original", () => {
+    const scaler = new MinMaxScaler();
+    const Xt = scaler.fit_transform(X);
+    const Xr = scaler.inverse_transform(Xt);
+    for (let i = 0; i < X.length; i++) {
+      for (let j = 0; j < (X[i] as Float64Array).length; j++) {
+        expect(
+          Math.abs(
+            ((Xr[i] as Float64Array)[j] ?? 0) -
+              ((X[i] as Float64Array)[j] ?? 0),
+          ),
+        ).toBeLessThan(1e-8);
+      }
+    }
+  });
+});
+
+describe("LabelEncoder", () => {
+  it("encodes labels", () => {
+    const le = new LabelEncoder();
+    const y = new Int32Array([3, 1, 2, 1, 3]);
+    const encoded = le.fit_transform(y);
+    expect(Array.from(encoded)).toEqual([2, 0, 1, 0, 2]);
+  });
+
+  it("inverse_transform recovers original", () => {
+    const le = new LabelEncoder();
+    const y = new Int32Array([10, 20, 30]);
+    const encoded = le.fit_transform(y);
+    const decoded = le.inverse_transform(encoded);
+    expect(Array.from(decoded)).toEqual([10, 20, 30]);
+  });
+
+  it("throws on unseen labels", () => {
+    const le = new LabelEncoder();
+    le.fit(new Int32Array([1, 2, 3]));
+    expect(() => le.transform(new Int32Array([4]))).toThrow();
+  });
+});
+
+describe("Normalizer", () => {
+  it("normalizes to unit L2 norm", () => {
+    const norm = new Normalizer({ norm: "l2" });
+    const X = [new Float64Array([3, 4])]; // 3² + 4² = 25, norm = 5
+    const Xt = norm.transform(X);
+    expect((Xt[0] as Float64Array)[0]).toBeCloseTo(0.6, 8);
+    expect((Xt[0] as Float64Array)[1]).toBeCloseTo(0.8, 8);
+  });
+
+  it("normalizes to unit L1 norm", () => {
+    const norm = new Normalizer({ norm: "l1" });
+    const X = [new Float64Array([1, 3])]; // sum = 4
+    const Xt = norm.transform(X);
+    expect((Xt[0] as Float64Array)[0]).toBeCloseTo(0.25, 8);
+    expect((Xt[0] as Float64Array)[1]).toBeCloseTo(0.75, 8);
+  });
+});
diff --git a/tsconfig.json b/tsconfig.json
new file mode 100644
index 0000000..989d8ca
--- /dev/null
+++ b/tsconfig.json
@@ -0,0 +1,24 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "ESNext",
+    "moduleResolution": "bundler",
+    "lib": ["ES2022", "DOM"],
+    "strict": true,
+    "noUncheckedIndexedAccess": true,
+    "exactOptionalPropertyTypes": true,
+    "noImplicitOverride": true,
+    "noImplicitReturns": true,
+    "noPropertyAccessFromIndexSignature": true,
+    "noFallthroughCasesInSwitch": true,
+    "verbatimModuleSyntax": true,
+    "declaration": true,
+    "declarationMap": true,
+    "sourceMap": true,
+    "outDir": "./dist",
+    "rootDir": "./src",
+    "skipLibCheck": true
+  },
+  "include": ["src/**/*"],
+  "exclude": ["node_modules", "dist", "tests", "playground"]
+}