diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 114a7a4..58d023d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -67,3 +67,5 @@ jobs: lua examples/poisson_arrivals.lua lua examples/binomial_coin_flips.lua lua examples/bootstrap_mean.lua + lua spec/test_bivariate.lua + lua examples/covariance_correlation.lua diff --git a/CHANGELOG.md b/CHANGELOG.md index 36b57ea..5d6b8c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,44 @@ This project follows a lightweight changelog format inspired by [Keep a Changelo ### Added +* Added modular internal source layout under `src/luasf/`. +* Added `src/luasf/core.lua`. +* Added `src/luasf/validation.lua`. +* Added `src/luasf/rng.lua`. +* Added `src/luasf/descriptive.lua`. +* Added `src/luasf/sampling.lua`. +* Added `src/luasf/distributions.lua`. +* Added `src/luasf/bivariate.lua`. +* Added `src/luasf/probability.lua` as a placeholder for future probability helpers. +* Added `covariance(x, y)`. +* Added `correlation(x, y)`. +* Added `pearson(x, y)` as an alias for `correlation(x, y)`. +* Added `spec/test_bivariate.lua`. +* Added `examples/covariance_correlation.lua`. +* Added `rockspec/luasf-0.5.0-1.rockspec` as the next LuaRocks release draft. + +### Changed + +* Kept `src/luasf.lua` as the public facade module. +* Preserved the existing public API while moving implementation details into smaller internal modules. +* Moved LuaRocks specification files into the `rockspec/` directory. +* Updated documentation to describe the modular layout and bivariate statistics helpers. +* Updated CI expectations to include bivariate tests and the covariance/correlation example. + +### Planned + +* Add shape statistics helpers such as `skewness(array)` and `kurtosis(array)`. +* Explore future probability helpers such as `factorial(n)`, `combinations(n, r)`, and `permutations(n, r)`. +* Explore a lightweight cross-reference with LuaHMF as a related pure-Lua math helper project. +* Consider simple formula-based regression summaries later, without turning LuaSF into a machine learning framework. +* Add more distribution examples and simulation-oriented examples. + +--- + +## [0.4.0] - 2026-06-04 + +### Added + * Added `mode(array)`. * Added `range(array)`. * Added `iqr(array)`. @@ -20,14 +58,13 @@ This project follows a lightweight changelog format inspired by [Keep a Changelo * Added `examples/poisson_arrivals.lua`. * Added `examples/binomial_coin_flips.lua`. * Added `examples/bootstrap_mean.lua`. +* Added `luasf-0.4.0-1.rockspec`. -### Planned +### Documentation -* Improve GitHub Actions CI with optional automatic checks for pull requests. -* Improve LuaRocks validation and publishing workflows. -* Add more distribution examples and simulation-oriented examples. -* Explore a lightweight cross-reference with LuaHMF as a related pure-Lua math helper project. -* Evaluate future combinatorics helpers such as `factorial`, `combinations`, and `permutations`. +* Updated `README.md`. +* Updated `docs/api.md`. +* Updated `CHANGELOG.md`. --- diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ef0a78a..d9d166d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -51,6 +51,45 @@ Modern aliases can be added, but legacy names should not be removed. --- +## Source layout + +LuaSF exposes a stable public facade: + +```lua +local stats = require("luasf") +``` + +The implementation is modularized under `src/luasf/`: + +```text +src/ + luasf.lua + luasf/ + core.lua + descriptive.lua + sampling.lua + distributions.lua + bivariate.lua + probability.lua + validation.lua + rng.lua +``` + +When adding new functionality, prefer placing it in the most relevant internal module instead of growing `src/luasf.lua`. + +Recommended module ownership: + +* `descriptive.lua`: univariate descriptive statistics +* `bivariate.lua`: two-variable statistics such as covariance and correlation +* `sampling.lua`: sampling helpers +* `distributions.lua`: random variable generators +* `probability.lua`: future probability/combinatorics helpers +* `validation.lua`: reusable input validation helpers +* `rng.lua`: random generator and seed helpers +* `core.lua`: small reusable internal utilities + +--- + ## Development setup Clone the repository: @@ -72,6 +111,8 @@ Run tests: ```bash lua spec/test_stats.lua lua spec/test_distributions.lua +lua spec/test_sampling.lua +lua spec/test_bivariate.lua ``` Run examples: @@ -80,10 +121,37 @@ Run examples: lua examples/dice_simulation.lua lua examples/normal_quality_control.lua lua examples/gamma_distribution.lua +lua examples/weighted_loot_drop.lua +lua examples/monte_carlo_pi.lua +lua examples/poisson_arrivals.lua +lua examples/binomial_coin_flips.lua +lua examples/bootstrap_mean.lua +lua examples/covariance_correlation.lua ``` --- +## LuaRocks packaging + +Rockspec files are kept under: + +```text +rockspec/ +``` + +When adding new internal modules, update the next rockspec draft so LuaRocks knows how to package them. + +Before publishing, validate locally or through GitHub Actions: + +```bash +luarocks lint rockspec/luasf-0.5.0-1.rockspec +luarocks make rockspec/luasf-0.5.0-1.rockspec +``` + +Publishing should remain manual and intentional. + +--- + ## Branch naming Recommended branch names: @@ -98,7 +166,8 @@ test/short-description Examples: ```text -feature/add-median +feature/modular-bivariate-stats +feature/add-skewness-kurtosis fix/triangular-random-variable docs/improve-api test/add-distribution-ranges @@ -113,6 +182,10 @@ Use clear and direct commit messages. Examples: ```text +Modularize LuaSF source layout +Add bivariate statistics helpers +Add bivariate statistics tests +Add covariance and correlation example Fix triangular random variable implementation Add frequency table tests Improve README examples @@ -130,6 +203,7 @@ Before opening a pull request, please check: * Examples still run. * New functions include simple documentation. * New behavior includes at least one test. +* New modules are included in the rockspec draft when needed. * Code remains readable and dependency-light. --- @@ -143,6 +217,7 @@ Prefer: * Small functions * Minimal dependencies * Compatibility with Lua 5.1+ +* Formula-based helpers when appropriate Avoid: @@ -150,6 +225,21 @@ Avoid: * Breaking legacy names * Adding native dependencies * Overcomplicating the API +* Turning LuaSF into a machine learning framework + +--- + +## Future scope + +Potential future additions include: + +* `skewness(array)` +* `kurtosis(array)` +* `factorial(n)` +* `combinations(n, r)` +* `permutations(n, r)` + +Simple formula-based regression summaries may be considered later, but optimization-based models and ML workflows are outside the current scope. --- diff --git a/README.md b/README.md index 93ae53b..122c23e 100644 --- a/README.md +++ b/README.md @@ -42,9 +42,11 @@ The project started around 2014 and was later published under the MIT License. I * Pure Lua implementation * No native dependencies * Lua 5.1+ friendly -* Single-file friendly +* Single-file friendly public API +* Modular internal source layout * Basic descriptive statistics * Summary statistics helpers +* Bivariate statistics helpers * Sampling utilities * Discrete and continuous pseudo-random variables * Compatible with the existing public LuaSF API @@ -153,6 +155,14 @@ print(stats.stvF(values)) -- sample standard deviation | `percentile(array, p)` | Percentile where `p` is between `0` and `100` | | `summary(array)` | Summary table with count, min, max, mean, median, variance, and stddev | +### Bivariate statistics + +| Function | Description | +|---|---| +| `covariance(x, y)` | Sample covariance using `n - 1` | +| `correlation(x, y)` | Pearson correlation coefficient | +| `pearson(x, y)` | Alias for `correlation(x, y)` | + ### Sampling utilities | Function | Description | @@ -191,6 +201,36 @@ print(stats.stvF(values)) -- sample standard deviation ## Examples +### Summary statistics + +```lua +local stats = require("luasf") + +local values = {10, 12, 14, 15, 18, 20} +local result = stats.summary(values) + +print("Count:", result.count) +print("Min:", result.min) +print("Max:", result.max) +print("Mean:", result.mean) +print("Median:", result.median) +print("Variance:", result.variance) +print("Stddev:", result.stddev) +``` + +### Covariance and correlation + +```lua +local stats = require("luasf") + +local study_hours = {1, 2, 3, 4, 5} +local exam_scores = {50, 55, 65, 70, 80} + +print(stats.covariance(study_hours, exam_scores)) +print(stats.correlation(study_hours, exam_scores)) +``` + + ### Twice two dice simulation ```lua @@ -209,23 +249,6 @@ for i = 1, #frequencies.counts do end ``` -### Summary statistics - -```lua -local stats = require("luasf") - -local values = {10, 12, 14, 15, 18, 20} -local result = stats.summary(values) - -print("Count:", result.count) -print("Min:", result.min) -print("Max:", result.max) -print("Mean:", result.mean) -print("Median:", result.median) -print("Variance:", result.variance) -print("Stddev:", result.stddev) -``` - ### Normal distribution quality control sample ```lua @@ -293,10 +316,20 @@ stats.reset_rng() LuaSF/ src/ luasf.lua + luasf/ + core.lua + descriptive.lua + sampling.lua + distributions.lua + bivariate.lua + probability.lua + validation.lua + rng.lua spec/ test_stats.lua test_distributions.lua test_sampling.lua + test_bivariate.lua examples/ dice_simulation.lua normal_quality_control.lua @@ -306,20 +339,24 @@ LuaSF/ poisson_arrivals.lua binomial_coin_flips.lua bootstrap_mean.lua + covariance_correlation.lua docs/ api.md .github/ workflows/ ci.yml publish-luarocks.yml + rockspec/ + luasf-0.2.0-1.rockspec + luasf-0.3.0-1.rockspec + luasf-0.4.0-1.rockspec + luasf-0.5.0-1.rockspec LuaSF.lua LuaStat.lua README.md CHANGELOG.md CONTRIBUTING.md LICENSE - luasf-0.2.0-1.rockspec - luasf-0.3.0-1.rockspec ``` --- @@ -339,6 +376,7 @@ Run tests: lua spec/test_stats.lua lua spec/test_distributions.lua lua spec/test_sampling.lua +lua spec/test_bivariate.lua ``` --- @@ -354,6 +392,7 @@ lua examples/monte_carlo_pi.lua lua examples/poisson_arrivals.lua lua examples/binomial_coin_flips.lua lua examples/bootstrap_mean.lua +lua examples/covariance_correlation.lua ``` --- @@ -363,25 +402,34 @@ lua examples/bootstrap_mean.lua ### Completed * Compatibility-safe project revival -* Cleaner module structure +* Cleaner modular source structure * Legacy API preservation * Modern aliases * Basic tests * Examples * API documentation * Additional statistics helpers +* Summary statistics helpers +* Bivariate statistics helpers * Sampling utilities * Deterministic simulation support * LuaRocks publishing ### Planned -* Improve GitHub Actions CI with optional automatic checks for pull requests -* Improve LuaRocks validation and publishing workflows -* More examples -* More statistical helpers +* Shape statistics helpers such as `skewness(array)` and `kurtosis(array)` +* Future probability helpers such as `factorial`, `combinations`, and `permutations` * Lightweight cross-reference with LuaHMF -* Future combinatorics helpers such as `factorial`, `combinations`, and `permutations` +* More distribution and simulation examples +* Optional simple formula-based regression summaries, without turning LuaSF into a machine learning framework + +--- + +## Scope + +LuaSF is focused on lightweight statistics, probability, random variables, and simulation helpers. + +Optimization-based modeling, machine learning workflows, model training pipelines, and non-linear regression are intentionally outside the current scope of LuaSF. --- diff --git a/docs/api.md b/docs/api.md index 1452fe8..77cb917 100644 --- a/docs/api.md +++ b/docs/api.md @@ -36,6 +36,34 @@ local stats = require("src.luasf") --- +## Internal module layout + +LuaSF exposes a single public facade through: + +```lua +local stats = require("luasf") +``` + +Internally, the implementation is split into smaller modules: + +```text +src/ + luasf.lua + luasf/ + core.lua + descriptive.lua + sampling.lua + distributions.lua + bivariate.lua + probability.lua + validation.lua + rng.lua +``` + +This keeps the public API stable while making the source code easier to maintain. + +--- + ## Descriptive statistics ### `sumF(array)` @@ -288,6 +316,60 @@ print(result.median) -- 3 print(result.variance) -- 2.5 ``` + +--- + +## Bivariate statistics + +### `covariance(x, y)` + +Returns the sample covariance between two numeric arrays. + +LuaSF uses the sample covariance formula with `n - 1`. + +Both arrays must: + +* be tables +* contain numeric values +* have the same length +* contain at least two values + +Example: + +```lua +local stats = require("luasf") + +local x = {1, 2, 3, 4, 5} +local y = {2, 4, 6, 8, 10} + +print(stats.covariance(x, y)) -- 5 +``` + +### `correlation(x, y)` + +Returns the Pearson correlation coefficient between two numeric arrays. + +Both arrays must have the same length and non-zero sample standard deviation. + +Example: + +```lua +local stats = require("luasf") + +local x = {1, 2, 3, 4, 5} +local y = {2, 4, 6, 8, 10} + +print(stats.correlation(x, y)) -- 1 +``` + +### `pearson(x, y)` + +Alias for: + +```lua +stats.correlation(x, y) +``` + --- ## Sampling utilities @@ -395,6 +477,26 @@ stats.reset_rng() LuaSF provides functions for discrete and continuous pseudo-random variables. +| Legacy name | Modern alias | Description | +|---|---|---| +| `nomalVA(mu, sig)` | `normal(mu, sig)` | Normal random variable | +| `normalVA(mu, sig)` | `normal(mu, sig)` | Normal random variable | +| `normal_inv_D(p, mu, sig)` | `inverse_normal(p, mu, sig)` | Approximate inverse normal value | +| `bernoulliVA(p)` | `bernoulli(p)` | Bernoulli random variable | +| `unifVA(min, max)` | `uniform(min, max)` | Uniform random variable | +| `expoVA(beta)` | `exponential(beta)` | Exponential random variable | +| `weibullVA(alpha, beta)` | `weibull(alpha, beta)` | Weibull random variable | +| `erlangVA(n, lambda)` | `erlang(n, lambda)` | Erlang random variable | +| `trianVA(a, b, c)` | `triangular(a, b, c)` | Triangular random variable | +| `binomialVA(n, p)` | `binomial(n, p)` | Binomial random variable | +| `geometricVA(p)` | `geometric(p)` | Geometric random variable | +| `poissonVA(lambda)` | `poisson(lambda)` | Poisson random variable | +| `chiSquareVA(n)` | `chi_square(n)` | Chi-square random variable | +| `gamVA(alpha, lambda)` | `gamma(alpha, lambda)` | Gamma random variable | +| `lognoVA(m, s)` | `lognormal(m, s)` | Log-normal random variable | +| `lognoRandVA(m, s)` | `lognormal(m, s)` | Log-normal random variable | + + ### `normalVA(mu, sig)` Returns a normally distributed random value. diff --git a/examples/covariance_correlation.lua b/examples/covariance_correlation.lua new file mode 100644 index 0000000..8949d7f --- /dev/null +++ b/examples/covariance_correlation.lua @@ -0,0 +1,19 @@ +local stats = require("luasf") + +local study_hours = {1, 2, 3, 4, 5} +local exam_scores = {50, 55, 65, 70, 80} + +local cov = stats.covariance(study_hours, exam_scores) +local corr = stats.correlation(study_hours, exam_scores) + +print("Study hours and exam scores") +print("Covariance:", cov) +print("Correlation:", corr) + +if corr > 0 then + print("Interpretation: positive relationship") +elseif corr < 0 then + print("Interpretation: negative relationship") +else + print("Interpretation: no linear relationship") +end \ No newline at end of file diff --git a/luasf-0.2.0-1.rockspec b/rockspec/luasf-0.2.0-1.rockspec similarity index 100% rename from luasf-0.2.0-1.rockspec rename to rockspec/luasf-0.2.0-1.rockspec diff --git a/luasf-0.3.0-1.rockspec b/rockspec/luasf-0.3.0-1.rockspec similarity index 100% rename from luasf-0.3.0-1.rockspec rename to rockspec/luasf-0.3.0-1.rockspec diff --git a/luasf-0.4.0-1.rockspec b/rockspec/luasf-0.4.0-1.rockspec similarity index 100% rename from luasf-0.4.0-1.rockspec rename to rockspec/luasf-0.4.0-1.rockspec diff --git a/rockspec/luasf-0.5.0-1.rockspec b/rockspec/luasf-0.5.0-1.rockspec new file mode 100644 index 0000000..1a6bae8 --- /dev/null +++ b/rockspec/luasf-0.5.0-1.rockspec @@ -0,0 +1,40 @@ +package = "luasf" +version = "0.4.0-1" + +source = { + url = "git://github.com/HubertRonald/LuaSF.git", + tag = "v0.4.0" +} + +description = { + summary = "Lua Statistics Functions", + detailed = [[ +LuaSF is a lightweight, pure-Lua library for descriptive statistics, +summary statistics, sampling utilities, simulation examples, and random +variable generation. + ]], + homepage = "https://github.com/HubertRonald/LuaSF", + license = "MIT", + maintainer = "Hubert Ronald" +} + +dependencies = { + "lua >= 5.1" +} + +build = { + type = "builtin", + modules = { + luasf = "src/luasf.lua", + ["luasf.core"] = "src/luasf/core.lua", + ["luasf.validation"] = "src/luasf/validation.lua", + ["luasf.rng"] = "src/luasf/rng.lua", + ["luasf.descriptive"] = "src/luasf/descriptive.lua", + ["luasf.sampling"] = "src/luasf/sampling.lua", + ["luasf.distributions"] = "src/luasf/distributions.lua", + ["luasf.bivariate"] = "src/luasf/bivariate.lua", + ["luasf.probability"] = "src/luasf/probability.lua", + LuaSF = "LuaSF.lua", + LuaStat = "LuaStat.lua" + } +} \ No newline at end of file diff --git a/spec/test_bivariate.lua b/spec/test_bivariate.lua new file mode 100644 index 0000000..07aa3d4 --- /dev/null +++ b/spec/test_bivariate.lua @@ -0,0 +1,52 @@ +local luaunit = require("luaunit") +local stats = require("luasf") + +TestBivariate = {} + +function TestBivariate:test_covariance_positive_relationship() + local x = {1, 2, 3, 4, 5} + local y = {2, 4, 6, 8, 10} + + luaunit.assertAlmostEquals(stats.covariance(x, y), 5, 0.000001) +end + +function TestBivariate:test_correlation_positive_relationship() + local x = {1, 2, 3, 4, 5} + local y = {2, 4, 6, 8, 10} + + luaunit.assertAlmostEquals(stats.correlation(x, y), 1, 0.000001) +end + +function TestBivariate:test_correlation_negative_relationship() + local x = {1, 2, 3, 4, 5} + local y = {10, 8, 6, 4, 2} + + luaunit.assertAlmostEquals(stats.correlation(x, y), -1, 0.000001) +end + +function TestBivariate:test_pearson_alias() + local x = {1, 2, 3, 4, 5} + local y = {2, 4, 6, 8, 10} + + luaunit.assertAlmostEquals(stats.pearson(x, y), 1, 0.000001) +end + +function TestBivariate:test_covariance_requires_same_length() + luaunit.assertError(function() + stats.covariance({1, 2, 3}, {1, 2}) + end) +end + +function TestBivariate:test_correlation_requires_non_constant_x() + luaunit.assertError(function() + stats.correlation({1, 1, 1}, {1, 2, 3}) + end) +end + +function TestBivariate:test_correlation_requires_non_constant_y() + luaunit.assertError(function() + stats.correlation({1, 2, 3}, {1, 1, 1}) + end) +end + +os.exit(luaunit.LuaUnit.run()) \ No newline at end of file diff --git a/spec/test_distributions.lua b/spec/test_distributions.lua index e67ba7a..27668d5 100644 --- a/spec/test_distributions.lua +++ b/spec/test_distributions.lua @@ -1,5 +1,5 @@ local luaunit = require("luaunit") -local stats = require("src.luasf") +local stats = require("luasf") TestDistributions = {} diff --git a/spec/test_sampling.lua b/spec/test_sampling.lua index 82fa08e..3bfaf03 100644 --- a/spec/test_sampling.lua +++ b/spec/test_sampling.lua @@ -1,5 +1,5 @@ local luaunit = require("luaunit") -local stats = require("src.luasf") +local stats = require("luasf") TestSampling = {} diff --git a/spec/test_stats.lua b/spec/test_stats.lua index 6bcade2..d8c016f 100644 --- a/spec/test_stats.lua +++ b/spec/test_stats.lua @@ -1,5 +1,5 @@ local luaunit = require("luaunit") -local stats = require("src.luasf") +local stats = require("luasf") TestStats = {} diff --git a/src/luasf.lua b/src/luasf.lua index 2bc3d55..7be91bf 100644 --- a/src/luasf.lua +++ b/src/luasf.lua @@ -13,667 +13,30 @@ without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, subject to the conditions in the LICENSE file. -Phase 1 compatibility notes: -- Keeps legacy function names from the original LuaSF API. -- Adds modern aliases without breaking existing references. -- Keeps require("LuaSF") as the main compatibility entry point. -]] - -local M = {} - -local default_rand = math.random -local rand = default_rand -local randomseed = math.randomseed -local ln = math.log -local sqrt = math.sqrt -local exp = math.exp -local floor = math.floor -local ceil = math.ceil -local ipairs = ipairs -local table_sort = table.sort -local os_time = os.time - -local function assert_number(value, name) - assert(type(value) == "number", name .. " must be a number") -end - -local function assert_probability(p, name) - name = name or "p" - assert_number(p, name) - assert(p >= 0 and p <= 1, name .. " must be between 0 and 1") -end - -local function assert_non_empty_array(array, name) - name = name or "array" - assert(type(array) == "table", name .. " must be a table") - assert(#array > 0, name .. " must not be empty") -end - -local function safe_random_open() - -- Avoid exact 0 or 1 because logarithms and inverse transforms can fail there. - local r = rand() - while r <= 0 or r >= 1 do - r = rand() - end - return r -end - --- Seed helper -local function seed(value) - randomseed(value or os_time()) -end - -local function set_rng(rng_function) - assert(type(rng_function) == "function", "rng_function must be a function") - rand = rng_function -end - -local function reset_rng() - rand = default_rand -end - --- Legacy-compatible integer/random helper. --- Supports: --- rand() --- rand(max) --- rand(min, max) -local function randF(a, b) - local r = rand() - - if a == nil and b == nil then - return r - elseif b == nil then - assert_number(a, "max") - assert(a >= 1, "max must be greater than or equal to 1") - return floor(r * a) + 1 - else - assert_number(a, "min") - assert_number(b, "max") - assert(b >= a, "max must be greater than or equal to min") - return floor(r * (b - a + 1)) + a - end -end - --- Sum array -local function sumF(array) - assert(type(array) == "table", "array must be a table") - - local s = 0 - for _, value in ipairs(array) do - assert_number(value, "array value") - s = s + value - end - - return s -end - --- Average array -local function avF(array) - assert_non_empty_array(array) - return sumF(array) / #array -end - --- Sample standard deviation, using n - 1 -local function stvF(array) - assert_non_empty_array(array) - assert(#array >= 2, "array must contain at least two values") - - local average = avF(array) - local squared_sum = 0 - - for _, value in ipairs(array) do - squared_sum = squared_sum + (value - average) ^ 2 - end - - return sqrt(squared_sum / (#array - 1)) -end - -local function variance(array) - assert_non_empty_array(array) - assert(#array >= 2, "array must contain at least two values") - - local average = avF(array) - local squared_sum = 0 - - for _, value in ipairs(array) do - assert_number(value, "array value") - squared_sum = squared_sum + (value - average) ^ 2 - end - - return squared_sum / (#array - 1) -end - -local function min_value(array) - assert_non_empty_array(array) - - local current_min = array[1] - assert_number(current_min, "array value") - - for i = 2, #array do - assert_number(array[i], "array value") - if array[i] < current_min then - current_min = array[i] - end - end - - return current_min -end - -local function max_value(array) - assert_non_empty_array(array) - - local current_max = array[1] - assert_number(current_max, "array value") - - for i = 2, #array do - assert_number(array[i], "array value") - if array[i] > current_max then - current_max = array[i] - end - end - - return current_max -end - -local function copy_array(array) - assert_non_empty_array(array) - - local copied = {} - - for i = 1, #array do - copied[i] = array[i] - end - - return copied -end - -local function quantile(array, q) - assert_non_empty_array(array) - assert_number(q, "q") - assert(q >= 0 and q <= 1, "q must be between 0 and 1") - - local list = copy_array(array) - table_sort(list) - - if #list == 1 then - return list[1] - end - - local position = 1 + (#list - 1) * q - local lower_index = floor(position) - local upper_index = ceil(position) - - if lower_index == upper_index then - return list[lower_index] - end - - local weight = position - lower_index - return list[lower_index] * (1 - weight) + list[upper_index] * weight -end - -local function median(array) - return quantile(array, 0.5) -end - --- Frequency distribution. --- Keeps legacy fields: --- g = groups --- c = counts --- Adds readable aliases: --- values = groups --- counts = counts -local function frecuencyF(array) - assert_non_empty_array(array) - - local list = {} - local groups = {} - local counts = {} - - for index, value in ipairs(array) do - list[index] = value - end - - table_sort(list) - - groups[1] = list[1] - counts[1] = 1 - - for i = 2, #list do - if groups[#groups] == list[i] then - counts[#counts] = counts[#counts] + 1 - else - groups[#groups + 1] = list[i] - counts[#counts + 1] = 1 - end - end - - return { - g = groups, - c = counts, - values = groups, - counts = counts - } -end - -local function mode(array) - assert_non_empty_array(array) - - local freq = frecuencyF(array) - local best_value = freq.g[1] - local best_count = freq.c[1] - - for i = 2, #freq.g do - if freq.c[i] > best_count then - best_value = freq.g[i] - best_count = freq.c[i] - end - end - - return best_value -end - -local function range_value(array) - return max_value(array) - min_value(array) -end - -local function iqr(array) - return quantile(array, 0.75) - quantile(array, 0.25) -end - -local function percentile(array, p) - assert_number(p, "p") - assert(p >= 0 and p <= 100, "p must be between 0 and 100") - - return quantile(array, p / 100) -end - -local function summary(array) - assert_non_empty_array(array) - - local result = { - count = #array, - min = min_value(array), - max = max_value(array), - mean = avF(array), - median = median(array) - } - - if #array >= 2 then - result.variance = variance(array) - result.stddev = stvF(array) - else - result.variance = nil - result.stddev = nil - end - - return result -end - --- Normal random variable. --- Original approximation method kept for compatibility. -local function normalVA(mu, sig) - mu = mu or 0 - sig = sig or 1 - - assert_number(mu, "mu") - assert_number(sig, "sig") - assert(sig > 0, "sig must be greater than 0") - - local r = safe_random_open() - local z = (r ^ 0.135 - (1 - r) ^ 0.135) / 0.1975 - - return z * sig + mu -end --- Approximate inverse normal distribution. -local function normal_inv_D(p, mu, sig) - p = p or 0.5 - mu = mu or 0 - sig = sig or 1 +Public facade module. - assert_number(p, "p") - assert_number(mu, "mu") - assert_number(sig, "sig") - assert(p > 0 and p < 1, "p must be greater than 0 and less than 1") - assert(sig > 0, "sig must be greater than 0") - - local z = (p ^ 0.135 - (1 - p) ^ 0.135) / 0.1975 - - return z * sig + mu -end - -local function bernoulliVA(p) - p = p or 0.5 - assert_probability(p, "p") - - if rand() <= p then - return 1 - end - - return 0 -end - -local function unifVA(min, max) - min = min or 0 - max = max or 1 - - assert_number(min, "min") - assert_number(max, "max") - assert(max >= min, "max must be greater than or equal to min") - - return (max - min) * rand() + min -end - --- Exponential random variable. --- beta is treated as the rate parameter. -local function expoVA(beta) - beta = beta or 1 - - assert_number(beta, "beta") - assert(beta > 0, "beta must be greater than 0") - - return (-1 / beta) * ln(1 - safe_random_open()) -end - -local function weibullVA(alpha, beta) - alpha = alpha or 1 - beta = beta or 1 - - assert_number(alpha, "alpha") - assert_number(beta, "beta") - assert(alpha > 0, "alpha must be greater than 0") - assert(beta > 0, "beta must be greater than 0") - - return alpha * (-ln(1 - safe_random_open())) ^ (1 / beta) -end - -local function erlangVA(n, lambda) - n = n or 1 - lambda = lambda or 1 - - assert_number(n, "n") - assert_number(lambda, "lambda") - assert(n >= 1, "n must be greater than or equal to 1") - assert(lambda > 0, "lambda must be greater than 0") - - local value = 0 - - for _ = 1, floor(n) do - value = value + expoVA(lambda) - end - - return value -end - -local function trianVA(a, b, c) - a = a or 0 - b = b or 0.5 - c = c or 1 - - assert_number(a, "a") - assert_number(b, "b") - assert_number(c, "c") - assert(a <= b and b <= c, "parameters must satisfy a <= b <= c") - assert(c > a, "c must be greater than a") - - local r = safe_random_open() - local threshold = (b - a) / (c - a) - - if r <= threshold then - return a + sqrt(r * (b - a) * (c - a)) - end - - return c - sqrt((1 - r) * (c - b) * (c - a)) -end - -local function binomialVA(n, p) - n = n or 1 - p = p or 0.5 - - assert_number(n, "n") - assert_probability(p, "p") - assert(n >= 0, "n must be greater than or equal to 0") - - local value = 0 - - for _ = 1, floor(n) do - value = value + bernoulliVA(p) - end - - return value -end - -local function geometricVA(p) - p = p or 0.5 - - assert_probability(p, "p") - assert(p > 0 and p < 1, "p must be greater than 0 and less than 1") - - local u = safe_random_open() - - -- Number of failures before first success. - return floor(ln(u) / ln(1 - p)) -end - -local function poissonVA(lambda) - lambda = lambda or 1 - - assert_number(lambda, "lambda") - assert(lambda > 0, "lambda must be greater than 0") - - local elapsed = 0 - local value = 0 - - while true do - elapsed = elapsed + expoVA(lambda) - - if elapsed <= 1 then - value = value + 1 - else - break - end - end - - return value -end - -local function chiSquareVA(n) - n = n or 1 - - assert_number(n, "n") - assert(n >= 1, "n must be greater than or equal to 1") - - local value = 0 - - for _ = 1, floor(n) do - local z = normalVA() - value = value + z * z - end - - return value -end - --- Gamma random variable. --- alpha = shape --- lambda = rate -local function gamVA(alpha, lambda) - alpha = alpha or 1 - lambda = lambda or 1 - - assert_number(alpha, "alpha") - assert_number(lambda, "lambda") - assert(alpha > 0, "alpha must be greater than 0") - assert(lambda > 0, "lambda must be greater than 0") - - local value - - if alpha >= 1 then - local d = alpha - 1 / 3 - local c = 1 / sqrt(9 * d) - - while true do - local z = normalVA() - local v = 1 + c * z - - if v > 0 then - v = v ^ 3 - - local u = safe_random_open() - - if ln(u) < 0.5 * z * z + d - d * v + d * ln(v) then - value = d * v / lambda - break - end - end - end - else - value = gamVA(alpha + 1, lambda) * safe_random_open() ^ (1 / alpha) - end - - return value -end - --- Log-normal random variable from arithmetic mean and standard deviation. -local function lognoVA(m, s) - m = m or 1 - s = s or 1 - - assert_number(m, "m") - assert_number(s, "s") - assert(m > 0, "m must be greater than 0") - assert(s > 0, "s must be greater than 0") - - local mean = ln((m * m) / sqrt((m * m) + (s * s))) - local sd = sqrt(ln(1 + ((s * s) / (m * m)))) - - return exp(normalVA(mean, sd)) -end - -local function choice(array) - assert_non_empty_array(array) - return array[randF(1, #array)] -end - -local function shuffle(array) - local shuffled = copy_array(array) - - for i = #shuffled, 2, -1 do - local j = randF(1, i) - shuffled[i], shuffled[j] = shuffled[j], shuffled[i] - end - - return shuffled -end - -local function sample(array, n) - assert_non_empty_array(array) - assert_number(n, "n") - assert(n >= 0, "n must be greater than or equal to 0") - assert(n <= #array, "n must be less than or equal to array length") - - local shuffled = shuffle(array) - local result = {} - - for i = 1, floor(n) do - result[i] = shuffled[i] - end - - return result -end - -local function weighted_choice(items, weights) - assert_non_empty_array(items, "items") - assert_non_empty_array(weights, "weights") - assert(#items == #weights, "items and weights must have the same length") - - local total_weight = 0 - - for _, weight in ipairs(weights) do - assert_number(weight, "weight") - assert(weight >= 0, "weights must be greater than or equal to 0") - total_weight = total_weight + weight - end - - assert(total_weight > 0, "total weight must be greater than 0") - - local threshold = rand() * total_weight - local cumulative = 0 +This file keeps the public LuaSF API stable while delegating implementation +to smaller internal modules under src/luasf/. +]] - for i = 1, #items do - cumulative = cumulative + weights[i] +local module_name = ... or "luasf" +local prefix = module_name:match("^src%.") and "src.luasf." or "luasf." - if threshold <= cumulative then - return items[i] - end +local function merge(target, source) + for key, value in pairs(source) do + target[key] = value end - - return items[#items] end --- Legacy/public API -M.rand = randF -M.sumF = sumF -M.avF = avF -M.stvF = stvF -M.frecuencyF = frecuencyF -M.nomalVA = normalVA -M.normalVA = normalVA -M.normal_inv_D = normal_inv_D -M.bernoulliVA = bernoulliVA -M.unifVA = unifVA -M.expoVA = expoVA -M.weibullVA = weibullVA -M.erlangVA = erlangVA -M.trianVA = trianVA -M.binomialVA = binomialVA -M.geometricVA = geometricVA -M.poissonVA = poissonVA -M.chiSquareVA = chiSquareVA -M.gamVA = gamVA -M.lognoVA = lognoVA -M.lognoRandVA = lognoVA - --- Modern aliases -M.seed = seed -M.random_integer = randF -M.sum = sumF -M.mean = avF -M.stddev = stvF -M.frequency = frecuencyF - -M.mode = mode -M.range = range_value -M.iqr = iqr -M.percentile = percentile -M.summary = summary - -M.normal = normalVA -M.inverse_normal = normal_inv_D -M.bernoulli = bernoulliVA -M.uniform = unifVA -M.exponential = expoVA -M.weibull = weibullVA -M.erlang = erlangVA -M.triangular = trianVA -M.binomial = binomialVA -M.geometric = geometricVA -M.poisson = poissonVA -M.chi_square = chiSquareVA -M.gamma = gamVA -M.lognormal = lognoVA - -M.set_rng = set_rng -M.reset_rng = reset_rng - -M.variance = variance -M.median = median -M.min = min_value -M.max = max_value -M.quantile = quantile +local M = {} -M.choice = choice -M.shuffle = shuffle -M.sample = sample -M.weighted_choice = weighted_choice +merge(M, require(prefix .. "rng")) +merge(M, require(prefix .. "core")) +merge(M, require(prefix .. "descriptive")) +merge(M, require(prefix .. "sampling")) +merge(M, require(prefix .. "distributions")) +merge(M, require(prefix .. "bivariate")) +merge(M, require(prefix .. "probability")) return M diff --git a/src/luasf/bivariate.lua b/src/luasf/bivariate.lua new file mode 100644 index 0000000..b2c9bf0 --- /dev/null +++ b/src/luasf/bivariate.lua @@ -0,0 +1,42 @@ +local module_name = ... or "luasf.bivariate" +local prefix = module_name:match("^src%.") and "src.luasf." or "luasf." + +local validation = require(prefix .. "validation") +local descriptive = require(prefix .. "descriptive") + +local M = {} + +local function covariance(x, y) + validation.assert_same_length_numeric_arrays(x, y, "x", "y") + assert(#x >= 2, "x and y must contain at least two values") + + local mean_x = descriptive.mean(x) + local mean_y = descriptive.mean(y) + local total = 0 + + for i = 1, #x do + total = total + (x[i] - mean_x) * (y[i] - mean_y) + end + + return total / (#x - 1) +end + +local function pearson_correlation(x, y) + validation.assert_same_length_numeric_arrays(x, y, "x", "y") + assert(#x >= 2, "x and y must contain at least two values") + + local std_x = descriptive.stddev(x) + local std_y = descriptive.stddev(y) + + assert(std_x > 0, "x standard deviation must be greater than 0") + assert(std_y > 0, "y standard deviation must be greater than 0") + + return covariance(x, y) / (std_x * std_y) +end + +M.covariance = covariance +M.correlation = pearson_correlation +M.pearson = pearson_correlation +M.pearson_correlation = pearson_correlation + +return M \ No newline at end of file diff --git a/src/luasf/core.lua b/src/luasf/core.lua new file mode 100644 index 0000000..3cdad03 --- /dev/null +++ b/src/luasf/core.lua @@ -0,0 +1,32 @@ +local module_name = ... or "luasf.core" +local prefix = module_name:match("^src%.") and "src.luasf." or "luasf." + +local validation = require(prefix .. "validation") + +local M = {} + +function M.copy_array(array) + validation.assert_non_empty_array(array) + + local copied = {} + + for i = 1, #array do + copied[i] = array[i] + end + + return copied +end + +function M.copy_table(source) + assert(type(source) == "table", "source must be a table") + + local copied = {} + + for key, value in pairs(source) do + copied[key] = value + end + + return copied +end + +return M \ No newline at end of file diff --git a/src/luasf/descriptive.lua b/src/luasf/descriptive.lua new file mode 100644 index 0000000..49c7007 --- /dev/null +++ b/src/luasf/descriptive.lua @@ -0,0 +1,232 @@ +local module_name = ... or "luasf.descriptive" +local prefix = module_name:match("^src%.") and "src.luasf." or "luasf." + +local validation = require(prefix .. "validation") +local core = require(prefix .. "core") + +local sqrt = math.sqrt +local floor = math.floor +local ceil = math.ceil +local table_sort = table.sort + +local M = {} + +local function sumF(array) + assert(type(array) == "table", "array must be a table") + + local s = 0 + + for _, value in ipairs(array) do + validation.assert_number(value, "array value") + s = s + value + end + + return s +end + +local function avF(array) + validation.assert_non_empty_array(array) + return sumF(array) / #array +end + +local function stvF(array) + validation.assert_min_length(array, 2) + + local average = avF(array) + local squared_sum = 0 + + for _, value in ipairs(array) do + validation.assert_number(value, "array value") + squared_sum = squared_sum + (value - average) ^ 2 + end + + return sqrt(squared_sum / (#array - 1)) +end + +local function variance(array) + validation.assert_min_length(array, 2) + + local average = avF(array) + local squared_sum = 0 + + for _, value in ipairs(array) do + validation.assert_number(value, "array value") + squared_sum = squared_sum + (value - average) ^ 2 + end + + return squared_sum / (#array - 1) +end + +local function min_value(array) + validation.assert_non_empty_array(array) + + local current_min = array[1] + validation.assert_number(current_min, "array value") + + for i = 2, #array do + validation.assert_number(array[i], "array value") + + if array[i] < current_min then + current_min = array[i] + end + end + + return current_min +end + +local function max_value(array) + validation.assert_non_empty_array(array) + + local current_max = array[1] + validation.assert_number(current_max, "array value") + + for i = 2, #array do + validation.assert_number(array[i], "array value") + + if array[i] > current_max then + current_max = array[i] + end + end + + return current_max +end + +local function quantile(array, q) + validation.assert_non_empty_array(array) + validation.assert_number(q, "q") + assert(q >= 0 and q <= 1, "q must be between 0 and 1") + + local list = core.copy_array(array) + table_sort(list) + + if #list == 1 then + return list[1] + end + + local position = 1 + (#list - 1) * q + local lower_index = floor(position) + local upper_index = ceil(position) + + if lower_index == upper_index then + return list[lower_index] + end + + local weight = position - lower_index + + return list[lower_index] * (1 - weight) + list[upper_index] * weight +end + +local function median(array) + return quantile(array, 0.5) +end + +local function frecuencyF(array) + validation.assert_non_empty_array(array) + + local list = {} + local groups = {} + local counts = {} + + for index, value in ipairs(array) do + list[index] = value + end + + table_sort(list) + + groups[1] = list[1] + counts[1] = 1 + + for i = 2, #list do + if groups[#groups] == list[i] then + counts[#counts] = counts[#counts] + 1 + else + groups[#groups + 1] = list[i] + counts[#counts + 1] = 1 + end + end + + return { + g = groups, + c = counts, + values = groups, + counts = counts + } +end + +local function mode(array) + validation.assert_non_empty_array(array) + + local freq = frecuencyF(array) + local best_value = freq.g[1] + local best_count = freq.c[1] + + for i = 2, #freq.g do + if freq.c[i] > best_count then + best_value = freq.g[i] + best_count = freq.c[i] + end + end + + return best_value +end + +local function range_value(array) + return max_value(array) - min_value(array) +end + +local function iqr(array) + return quantile(array, 0.75) - quantile(array, 0.25) +end + +local function percentile(array, p) + validation.assert_number(p, "p") + assert(p >= 0 and p <= 100, "p must be between 0 and 100") + + return quantile(array, p / 100) +end + +local function summary(array) + validation.assert_non_empty_array(array) + + local result = { + count = #array, + min = min_value(array), + max = max_value(array), + mean = avF(array), + median = median(array) + } + + if #array >= 2 then + result.variance = variance(array) + result.stddev = stvF(array) + else + result.variance = nil + result.stddev = nil + end + + return result +end + +M.sumF = sumF +M.avF = avF +M.stvF = stvF +M.frecuencyF = frecuencyF + +M.sum = sumF +M.mean = avF +M.stddev = stvF +M.frequency = frecuencyF + +M.variance = variance +M.median = median +M.min = min_value +M.max = max_value +M.quantile = quantile + +M.mode = mode +M.range = range_value +M.iqr = iqr +M.percentile = percentile +M.summary = summary + +return M \ No newline at end of file diff --git a/src/luasf/distributions.lua b/src/luasf/distributions.lua new file mode 100644 index 0000000..0282d81 --- /dev/null +++ b/src/luasf/distributions.lua @@ -0,0 +1,276 @@ +local module_name = ... or "luasf.distributions" +local prefix = module_name:match("^src%.") and "src.luasf." or "luasf." + +local validation = require(prefix .. "validation") +local rng = require(prefix .. "rng") + +local ln = math.log +local sqrt = math.sqrt +local exp = math.exp +local floor = math.floor + +local M = {} + +local function normalVA(mu, sig) + mu = mu or 0 + sig = sig or 1 + + validation.assert_number(mu, "mu") + validation.assert_number(sig, "sig") + assert(sig > 0, "sig must be greater than 0") + + local r = rng.safe_random_open() + local z = (r ^ 0.135 - (1 - r) ^ 0.135) / 0.1975 + + return z * sig + mu +end + +local function normal_inv_D(p, mu, sig) + p = p or 0.5 + mu = mu or 0 + sig = sig or 1 + + validation.assert_number(p, "p") + validation.assert_number(mu, "mu") + validation.assert_number(sig, "sig") + assert(p > 0 and p < 1, "p must be greater than 0 and less than 1") + assert(sig > 0, "sig must be greater than 0") + + local z = (p ^ 0.135 - (1 - p) ^ 0.135) / 0.1975 + + return z * sig + mu +end + +local function bernoulliVA(p) + p = p or 0.5 + validation.assert_probability(p, "p") + + if rng.random() <= p then + return 1 + end + + return 0 +end + +local function unifVA(min, max) + min = min or 0 + max = max or 1 + + validation.assert_number(min, "min") + validation.assert_number(max, "max") + assert(max >= min, "max must be greater than or equal to min") + + return (max - min) * rng.random() + min +end + +local function expoVA(beta) + beta = beta or 1 + + validation.assert_number(beta, "beta") + assert(beta > 0, "beta must be greater than 0") + + return (-1 / beta) * ln(1 - rng.safe_random_open()) +end + +local function weibullVA(alpha, beta) + alpha = alpha or 1 + beta = beta or 1 + + validation.assert_number(alpha, "alpha") + validation.assert_number(beta, "beta") + assert(alpha > 0, "alpha must be greater than 0") + assert(beta > 0, "beta must be greater than 0") + + return alpha * (-ln(1 - rng.safe_random_open())) ^ (1 / beta) +end + +local function erlangVA(n, lambda) + n = n or 1 + lambda = lambda or 1 + + validation.assert_number(n, "n") + validation.assert_number(lambda, "lambda") + assert(n >= 1, "n must be greater than or equal to 1") + assert(lambda > 0, "lambda must be greater than 0") + + local value = 0 + + for _ = 1, floor(n) do + value = value + expoVA(lambda) + end + + return value +end + +local function trianVA(a, b, c) + a = a or 0 + b = b or 0.5 + c = c or 1 + + validation.assert_number(a, "a") + validation.assert_number(b, "b") + validation.assert_number(c, "c") + assert(a <= b and b <= c, "parameters must satisfy a <= b <= c") + assert(c > a, "c must be greater than a") + + local r = rng.safe_random_open() + local threshold = (b - a) / (c - a) + + if r <= threshold then + return a + sqrt(r * (b - a) * (c - a)) + end + + return c - sqrt((1 - r) * (c - b) * (c - a)) +end + +local function binomialVA(n, p) + n = n or 1 + p = p or 0.5 + + validation.assert_number(n, "n") + validation.assert_probability(p, "p") + assert(n >= 0, "n must be greater than or equal to 0") + + local value = 0 + + for _ = 1, floor(n) do + value = value + bernoulliVA(p) + end + + return value +end + +local function geometricVA(p) + p = p or 0.5 + + validation.assert_probability(p, "p") + assert(p > 0 and p < 1, "p must be greater than 0 and less than 1") + + local u = rng.safe_random_open() + + return floor(ln(u) / ln(1 - p)) +end + +local function poissonVA(lambda) + lambda = lambda or 1 + + validation.assert_number(lambda, "lambda") + assert(lambda > 0, "lambda must be greater than 0") + + local elapsed = 0 + local value = 0 + + while true do + elapsed = elapsed + expoVA(lambda) + + if elapsed <= 1 then + value = value + 1 + else + break + end + end + + return value +end + +local function chiSquareVA(n) + n = n or 1 + + validation.assert_number(n, "n") + assert(n >= 1, "n must be greater than or equal to 1") + + local value = 0 + + for _ = 1, floor(n) do + local z = normalVA() + value = value + z * z + end + + return value +end + +local function gamVA(alpha, lambda) + alpha = alpha or 1 + lambda = lambda or 1 + + validation.assert_number(alpha, "alpha") + validation.assert_number(lambda, "lambda") + assert(alpha > 0, "alpha must be greater than 0") + assert(lambda > 0, "lambda must be greater than 0") + + local value + + if alpha >= 1 then + local d = alpha - 1 / 3 + local c = 1 / sqrt(9 * d) + + while true do + local z = normalVA() + local v = 1 + c * z + + if v > 0 then + v = v ^ 3 + + local u = rng.safe_random_open() + + if ln(u) < 0.5 * z * z + d - d * v + d * ln(v) then + value = d * v / lambda + break + end + end + end + else + value = gamVA(alpha + 1, lambda) * rng.safe_random_open() ^ (1 / alpha) + end + + return value +end + +local function lognoVA(m, s) + m = m or 1 + s = s or 1 + + validation.assert_number(m, "m") + validation.assert_number(s, "s") + assert(m > 0, "m must be greater than 0") + assert(s > 0, "s must be greater than 0") + + local mean = ln((m * m) / sqrt((m * m) + (s * s))) + local sd = sqrt(ln(1 + ((s * s) / (m * m)))) + + return exp(normalVA(mean, sd)) +end + +M.nomalVA = normalVA +M.normalVA = normalVA +M.normal_inv_D = normal_inv_D +M.bernoulliVA = bernoulliVA +M.unifVA = unifVA +M.expoVA = expoVA +M.weibullVA = weibullVA +M.erlangVA = erlangVA +M.trianVA = trianVA +M.binomialVA = binomialVA +M.geometricVA = geometricVA +M.poissonVA = poissonVA +M.chiSquareVA = chiSquareVA +M.gamVA = gamVA +M.lognoVA = lognoVA +M.lognoRandVA = lognoVA + +M.normal = normalVA +M.inverse_normal = normal_inv_D +M.bernoulli = bernoulliVA +M.uniform = unifVA +M.exponential = expoVA +M.weibull = weibullVA +M.erlang = erlangVA +M.triangular = trianVA +M.binomial = binomialVA +M.geometric = geometricVA +M.poisson = poissonVA +M.chi_square = chiSquareVA +M.gamma = gamVA +M.lognormal = lognoVA + +return M \ No newline at end of file diff --git a/src/luasf/probability.lua b/src/luasf/probability.lua new file mode 100644 index 0000000..790f982 --- /dev/null +++ b/src/luasf/probability.lua @@ -0,0 +1,13 @@ +--[[ +Probability helpers placeholder. + +This module is intentionally small for now. Future versions may include +factorial, combinations, permutations, and related probability helpers. + +Those helpers should be added carefully to avoid overflow and to keep LuaSF +focused on lightweight statistics, probability, and simulation utilities. +]] + +local M = {} + +return M \ No newline at end of file diff --git a/src/luasf/rng.lua b/src/luasf/rng.lua new file mode 100644 index 0000000..0c0743e --- /dev/null +++ b/src/luasf/rng.lua @@ -0,0 +1,69 @@ +local module_name = ... or "luasf.rng" +local prefix = module_name:match("^src%.") and "src.luasf." or "luasf." + +local validation = require(prefix .. "validation") + +local floor = math.floor +local os_time = os.time + +local default_rand = math.random +local rand = default_rand +local randomseed = math.randomseed + +local M = {} + +local function seed(value) + randomseed(value or os_time()) +end + +local function set_rng(rng_function) + assert(type(rng_function) == "function", "rng_function must be a function") + rand = rng_function +end + +local function reset_rng() + rand = default_rand +end + +local function random() + return rand() +end + +local function safe_random_open() + local r = rand() + + while r <= 0 or r >= 1 do + r = rand() + end + + return r +end + +local function randF(a, b) + local r = rand() + + if a == nil and b == nil then + return r + elseif b == nil then + validation.assert_number(a, "max") + assert(a >= 1, "max must be greater than or equal to 1") + return floor(r * a) + 1 + else + validation.assert_number(a, "min") + validation.assert_number(b, "max") + assert(b >= a, "max must be greater than or equal to min") + return floor(r * (b - a + 1)) + a + end +end + +M.seed = seed +M.set_rng = set_rng +M.reset_rng = reset_rng + +M.random = random +M.safe_random_open = safe_random_open + +M.rand = randF +M.random_integer = randF + +return M \ No newline at end of file diff --git a/src/luasf/sampling.lua b/src/luasf/sampling.lua new file mode 100644 index 0000000..8ee20ea --- /dev/null +++ b/src/luasf/sampling.lua @@ -0,0 +1,78 @@ +local module_name = ... or "luasf.sampling" +local prefix = module_name:match("^src%.") and "src.luasf." or "luasf." + +local validation = require(prefix .. "validation") +local core = require(prefix .. "core") +local rng = require(prefix .. "rng") + +local floor = math.floor + +local M = {} + +local function choice(array) + validation.assert_non_empty_array(array) + return array[rng.rand(1, #array)] +end + +local function shuffle(array) + local shuffled = core.copy_array(array) + + for i = #shuffled, 2, -1 do + local j = rng.rand(1, i) + shuffled[i], shuffled[j] = shuffled[j], shuffled[i] + end + + return shuffled +end + +local function sample(array, n) + validation.assert_non_empty_array(array) + validation.assert_number(n, "n") + assert(n >= 0, "n must be greater than or equal to 0") + assert(n <= #array, "n must be less than or equal to array length") + + local shuffled = shuffle(array) + local result = {} + + for i = 1, floor(n) do + result[i] = shuffled[i] + end + + return result +end + +local function weighted_choice(items, weights) + validation.assert_non_empty_array(items, "items") + validation.assert_non_empty_array(weights, "weights") + assert(#items == #weights, "items and weights must have the same length") + + local total_weight = 0 + + for _, weight in ipairs(weights) do + validation.assert_number(weight, "weight") + assert(weight >= 0, "weights must be greater than or equal to 0") + total_weight = total_weight + weight + end + + assert(total_weight > 0, "total weight must be greater than 0") + + local threshold = rng.random() * total_weight + local cumulative = 0 + + for i = 1, #items do + cumulative = cumulative + weights[i] + + if threshold <= cumulative then + return items[i] + end + end + + return items[#items] +end + +M.choice = choice +M.shuffle = shuffle +M.sample = sample +M.weighted_choice = weighted_choice + +return M \ No newline at end of file diff --git a/src/luasf/validation.lua b/src/luasf/validation.lua new file mode 100644 index 0000000..dcdbfa3 --- /dev/null +++ b/src/luasf/validation.lua @@ -0,0 +1,54 @@ +local M = {} + +function M.assert_number(value, name) + name = name or "value" + assert(type(value) == "number", name .. " must be a number") +end + +function M.assert_probability(p, name) + name = name or "p" + M.assert_number(p, name) + assert(p >= 0 and p <= 1, name .. " must be between 0 and 1") +end + +function M.assert_non_empty_array(array, name) + name = name or "array" + assert(type(array) == "table", name .. " must be a table") + assert(#array > 0, name .. " must not be empty") +end + +function M.assert_min_length(array, minimum, name) + name = name or "array" + M.assert_non_empty_array(array, name) + assert(#array >= minimum, name .. " must contain at least " .. minimum .. " values") +end + +function M.assert_numeric_array(array, name) + name = name or "array" + M.assert_non_empty_array(array, name) + + for _, value in ipairs(array) do + M.assert_number(value, name .. " value") + end +end + +function M.assert_same_length_arrays(x, y, x_name, y_name) + x_name = x_name or "x" + y_name = y_name or "y" + + M.assert_non_empty_array(x, x_name) + M.assert_non_empty_array(y, y_name) + + assert(#x == #y, x_name .. " and " .. y_name .. " must have the same length") +end + +function M.assert_same_length_numeric_arrays(x, y, x_name, y_name) + x_name = x_name or "x" + y_name = y_name or "y" + + M.assert_same_length_arrays(x, y, x_name, y_name) + M.assert_numeric_array(x, x_name) + M.assert_numeric_array(y, y_name) +end + +return M \ No newline at end of file