diff --git a/doc/benchmarks.rst b/doc/benchmarks.rst index 54c5e69c..c43977c5 100644 --- a/doc/benchmarks.rst +++ b/doc/benchmarks.rst @@ -76,6 +76,32 @@ These benchmarks also have an "eager" flavor that uses asyncio eager task factor if available. +base64 +------ + +Benchmark the ``base64`` module's encoding and decoding functions. Each +algorithm has ``_small`` and ``_large`` variants that test both encode and +decode in a single benchmark: + +* ``_small``: Balanced iterations across tiny (20B), small (127B), medium (3KB), + and 9KB data sizes +* ``_large``: Large data focus with 100KB x 10 iterations plus 1MB x 1 iteration + +Available benchmarks: + +* ``base64_small``, ``base64_large``: Standard Base64 encoding and decoding + (includes ``validate=True`` code path) +* ``urlsafe_base64_small``: URL-safe Base64 (small only, as URLs shouldn't + contain huge data) +* ``base32_small``, ``base32_large``: Base32 encoding and decoding +* ``base16_small``, ``base16_large``: Base16/hex encoding and decoding +* ``ascii85_small``, ``ascii85_large``: Ascii85 encoding and decoding + (includes ``wrapcol=76`` code path) +* ``base85_small``, ``base85_large``: Base85 encoding and decoding + +See the `base64 module `_. + + chameleon --------- diff --git a/doc/changelog.rst b/doc/changelog.rst index 767927a9..f15bbdc9 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -1,6 +1,10 @@ Changelog ========= +Version 1.14.0 +-------------- +* Add base64 module benchmark (b64, b32, b16, a85, b85) + Version 1.13.0 (2025-10-27) -------------- * Re-enable xdsl benchmark diff --git a/doc/conf.py b/doc/conf.py index fabfb643..202335f6 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -55,7 +55,7 @@ # built documents. # # The short X.Y version. -version = release = "1.0.6" +version = release = "1.14.0" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/pyperformance/__init__.py b/pyperformance/__init__.py index 32d7dea9..44cefed2 100644 --- a/pyperformance/__init__.py +++ b/pyperformance/__init__.py @@ -3,7 +3,7 @@ import sys from importlib.metadata import distribution -VERSION = (1, 13, 0) +VERSION = (1, 14, 0) __version__ = ".".join(map(str, VERSION)) diff --git a/pyperformance/data-files/benchmarks/MANIFEST b/pyperformance/data-files/benchmarks/MANIFEST index dd22b77a..8b28b9db 100644 --- a/pyperformance/data-files/benchmarks/MANIFEST +++ b/pyperformance/data-files/benchmarks/MANIFEST @@ -24,6 +24,7 @@ async_tree_eager_memoization_tg asyncio_tcp asyncio_tcp_ssl asyncio_websockets +base64 bpe_tokeniser concurrent_imap coroutines diff --git a/pyperformance/data-files/benchmarks/bm_base64/pyproject.toml b/pyperformance/data-files/benchmarks/bm_base64/pyproject.toml new file mode 100644 index 00000000..982d8328 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_base64/pyproject.toml @@ -0,0 +1,10 @@ +[project] +name = "pyperformance_bm_base64" +requires-python = ">=3.8" +dependencies = ["pyperf"] +urls = {repository = "https://github.com/python/pyperformance"} +dynamic = ["version"] + +[tool.pyperformance] +name = "base64" +tags = "serialize" diff --git a/pyperformance/data-files/benchmarks/bm_base64/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_base64/run_benchmark.py new file mode 100644 index 00000000..d82d4c49 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_base64/run_benchmark.py @@ -0,0 +1,274 @@ +"""Benchmark for the base64 module's primary public APIs. + +Tests encoding and decoding performance across various variants +and data sizes, split into _small (balanced small data) and _large variants. +""" + +import base64 +import random +import pyperf + + +# Generate test data with fixed seed for reproducibility +random.seed(12345) +DATA_TINY = bytes(random.randrange(256) for _ in range(20)) +DATA_SMALL = bytes(random.randrange(256) for _ in range(127)) # odd on purpose +DATA_MEDIUM = bytes(random.randrange(256) for _ in range(3072)) +DATA_9K = bytes(random.randrange(256) for _ in range(9000)) +DATA_LARGE = bytes(random.randrange(256) for _ in range(102400)) +DATA_HUGE = bytes(random.randrange(256) for _ in range(1048576)) + +# Pre-encoded data for decode benchmarks +B64_TINY = base64.b64encode(DATA_TINY) +B64_SMALL = base64.b64encode(DATA_SMALL) +B64_MEDIUM = base64.b64encode(DATA_MEDIUM) +B64_9K = base64.b64encode(DATA_9K) +B64_LARGE = base64.b64encode(DATA_LARGE) +B64_HUGE = base64.b64encode(DATA_HUGE) + +B64_URLSAFE_TINY = base64.urlsafe_b64encode(DATA_TINY) +B64_URLSAFE_SMALL = base64.urlsafe_b64encode(DATA_SMALL) +B64_URLSAFE_MEDIUM = base64.urlsafe_b64encode(DATA_MEDIUM) +B64_URLSAFE_9K = base64.urlsafe_b64encode(DATA_9K) + +B32_TINY = base64.b32encode(DATA_TINY) +B32_SMALL = base64.b32encode(DATA_SMALL) +B32_MEDIUM = base64.b32encode(DATA_MEDIUM) +B32_9K = base64.b32encode(DATA_9K) +B32_LARGE = base64.b32encode(DATA_LARGE) +B32_HUGE = base64.b32encode(DATA_HUGE) + +B16_TINY = base64.b16encode(DATA_TINY) +B16_SMALL = base64.b16encode(DATA_SMALL) +B16_MEDIUM = base64.b16encode(DATA_MEDIUM) +B16_9K = base64.b16encode(DATA_9K) +B16_LARGE = base64.b16encode(DATA_LARGE) +B16_HUGE = base64.b16encode(DATA_HUGE) + +A85_TINY = base64.a85encode(DATA_TINY) +A85_SMALL = base64.a85encode(DATA_SMALL) +A85_MEDIUM = base64.a85encode(DATA_MEDIUM) +A85_9K = base64.a85encode(DATA_9K) +A85_LARGE = base64.a85encode(DATA_LARGE) +A85_HUGE = base64.a85encode(DATA_HUGE) + +B85_TINY = base64.b85encode(DATA_TINY) +B85_SMALL = base64.b85encode(DATA_SMALL) +B85_MEDIUM = base64.b85encode(DATA_MEDIUM) +B85_9K = base64.b85encode(DATA_9K) +B85_LARGE = base64.b85encode(DATA_LARGE) +B85_HUGE = base64.b85encode(DATA_HUGE) + + +# --- Base64 (includes validate=True) --- + +def bench_b64_small(loops): + range_it = range(loops) + t0 = pyperf.perf_counter() + for _ in range_it: + for _ in range(450): + base64.b64encode(DATA_TINY) + base64.b64decode(B64_TINY) + base64.b64decode(B64_TINY, validate=True) + for _ in range(71): + base64.b64encode(DATA_SMALL) + base64.b64decode(B64_SMALL) + base64.b64decode(B64_SMALL, validate=True) + for _ in range(3): + base64.b64encode(DATA_MEDIUM) + base64.b64decode(B64_MEDIUM) + base64.b64decode(B64_MEDIUM, validate=True) + base64.b64encode(DATA_9K) + base64.b64decode(B64_9K) + base64.b64decode(B64_9K, validate=True) + return pyperf.perf_counter() - t0 + + +def bench_b64_large(loops): + range_it = range(loops) + t0 = pyperf.perf_counter() + for _ in range_it: + for _ in range(10): + base64.b64encode(DATA_LARGE) + base64.b64decode(B64_LARGE) + base64.b64decode(B64_LARGE, validate=True) + base64.b64encode(DATA_HUGE) + base64.b64decode(B64_HUGE) + base64.b64decode(B64_HUGE, validate=True) + return pyperf.perf_counter() - t0 + + +# --- URL-safe Base64 (small only) --- + +def bench_urlsafe_b64_small(loops): + range_it = range(loops) + t0 = pyperf.perf_counter() + for _ in range_it: + for _ in range(450): + base64.urlsafe_b64encode(DATA_TINY) + base64.urlsafe_b64decode(B64_URLSAFE_TINY) + for _ in range(71): + base64.urlsafe_b64encode(DATA_SMALL) + base64.urlsafe_b64decode(B64_URLSAFE_SMALL) + for _ in range(3): + base64.urlsafe_b64encode(DATA_MEDIUM) + base64.urlsafe_b64decode(B64_URLSAFE_MEDIUM) + base64.urlsafe_b64encode(DATA_9K) + base64.urlsafe_b64decode(B64_URLSAFE_9K) + return pyperf.perf_counter() - t0 + + +# --- Base32 --- + +def bench_b32_small(loops): + range_it = range(loops) + t0 = pyperf.perf_counter() + for _ in range_it: + for _ in range(450): + base64.b32encode(DATA_TINY) + base64.b32decode(B32_TINY) + for _ in range(71): + base64.b32encode(DATA_SMALL) + base64.b32decode(B32_SMALL) + for _ in range(3): + base64.b32encode(DATA_MEDIUM) + base64.b32decode(B32_MEDIUM) + base64.b32encode(DATA_9K) + base64.b32decode(B32_9K) + return pyperf.perf_counter() - t0 + + +def bench_b32_large(loops): + range_it = range(loops) + t0 = pyperf.perf_counter() + for _ in range_it: + for _ in range(10): + base64.b32encode(DATA_LARGE) + base64.b32decode(B32_LARGE) + base64.b32encode(DATA_HUGE) + base64.b32decode(B32_HUGE) + return pyperf.perf_counter() - t0 + + +# --- Base16 --- + +def bench_b16_small(loops): + range_it = range(loops) + t0 = pyperf.perf_counter() + for _ in range_it: + for _ in range(450): + base64.b16encode(DATA_TINY) + base64.b16decode(B16_TINY) + for _ in range(71): + base64.b16encode(DATA_SMALL) + base64.b16decode(B16_SMALL) + for _ in range(3): + base64.b16encode(DATA_MEDIUM) + base64.b16decode(B16_MEDIUM) + base64.b16encode(DATA_9K) + base64.b16decode(B16_9K) + return pyperf.perf_counter() - t0 + + +def bench_b16_large(loops): + range_it = range(loops) + t0 = pyperf.perf_counter() + for _ in range_it: + for _ in range(10): + base64.b16encode(DATA_LARGE) + base64.b16decode(B16_LARGE) + base64.b16encode(DATA_HUGE) + base64.b16decode(B16_HUGE) + return pyperf.perf_counter() - t0 + + +# --- Ascii85 (includes wrapcol=76) --- + +def bench_a85_small(loops): + range_it = range(loops) + t0 = pyperf.perf_counter() + for _ in range_it: + for _ in range(450): + base64.a85encode(DATA_TINY) + base64.a85encode(DATA_TINY, wrapcol=76) + base64.a85decode(A85_TINY) + for _ in range(71): + base64.a85encode(DATA_SMALL) + base64.a85encode(DATA_SMALL, wrapcol=76) + base64.a85decode(A85_SMALL) + for _ in range(3): + base64.a85encode(DATA_MEDIUM) + base64.a85encode(DATA_MEDIUM, wrapcol=76) + base64.a85decode(A85_MEDIUM) + base64.a85encode(DATA_9K) + base64.a85encode(DATA_9K, wrapcol=76) + base64.a85decode(A85_9K) + return pyperf.perf_counter() - t0 + + +def bench_a85_large(loops): + range_it = range(loops) + t0 = pyperf.perf_counter() + for _ in range_it: + for _ in range(10): + base64.a85encode(DATA_LARGE) + base64.a85encode(DATA_LARGE, wrapcol=76) + base64.a85decode(A85_LARGE) + base64.a85encode(DATA_HUGE) + base64.a85encode(DATA_HUGE, wrapcol=76) + base64.a85decode(A85_HUGE) + return pyperf.perf_counter() - t0 + + +# --- Base85 --- + +def bench_b85_small(loops): + range_it = range(loops) + t0 = pyperf.perf_counter() + for _ in range_it: + for _ in range(450): + base64.b85encode(DATA_TINY) + base64.b85decode(B85_TINY) + for _ in range(71): + base64.b85encode(DATA_SMALL) + base64.b85decode(B85_SMALL) + for _ in range(3): + base64.b85encode(DATA_MEDIUM) + base64.b85decode(B85_MEDIUM) + base64.b85encode(DATA_9K) + base64.b85decode(B85_9K) + return pyperf.perf_counter() - t0 + + +def bench_b85_large(loops): + range_it = range(loops) + t0 = pyperf.perf_counter() + for _ in range_it: + for _ in range(10): + base64.b85encode(DATA_LARGE) + base64.b85decode(B85_LARGE) + base64.b85encode(DATA_HUGE) + base64.b85decode(B85_HUGE) + return pyperf.perf_counter() - t0 + + +if __name__ == "__main__": + runner = pyperf.Runner() + runner.metadata['description'] = "Benchmark base64 module encoding/decoding" + + runner.bench_time_func('base64_small', bench_b64_small) + runner.bench_time_func('base64_large', bench_b64_large) + + runner.bench_time_func('urlsafe_base64_small', bench_urlsafe_b64_small) + + runner.bench_time_func('base32_small', bench_b32_small) + runner.bench_time_func('base32_large', bench_b32_large) + + runner.bench_time_func('base16_small', bench_b16_small) + runner.bench_time_func('base16_large', bench_b16_large) + + runner.bench_time_func('ascii85_small', bench_a85_small) + runner.bench_time_func('ascii85_large', bench_a85_large) + + runner.bench_time_func('base85_small', bench_b85_small) + runner.bench_time_func('base85_large', bench_b85_large)