diff --git a/testsuite/perftest.py b/testsuite/perftest.py new file mode 100755 index 000000000..d5ac6a2b5 --- /dev/null +++ b/testsuite/perftest.py @@ -0,0 +1,506 @@ +#!/usr/bin/env python3 +"""Compare the transfer performance of two rsync binaries (local <-> local). + +This is a standalone dev tool (run it directly, not via runtests.py) for +spotting performance regressions between rsync releases. Given two rsync +binaries it builds one test tree, then runs the two binaries ALTERNATELY for a +number of loops, timing each transfer, and reports the mean and standard +deviation of the transfer time for each binary. + +Two transfers are timed each loop (see --mode): + * full -- a fresh copy into an emptied destination (end-to-end read+write). + * noop -- a re-run against an already-synced destination (rsync's own + scan / file-list / stat overhead, where many regressions hide). + +The first measured run of each binary is dropped (see --warmup) because it +cold-loads the source into the page cache and is an outlier. + +The test tree's shape (heavy-tailed file sizes, a directory spine, symlinks, +hard links and a spread of permission modes) follows the gentestdata.py +generator; it is deterministic for a given --seed. + +Examples: + # Quick smoke run, same binary twice (means should match, no regression). + ./perftest.py --files 200 --total-size 5M -n 3 ./rsync ./rsync + + # Compare a released binary against a fresh build over 8 loops. + ./perftest.py -n 8 ../old_versions/rsync_3.4.0 ./rsync + + # Heavier tree, no-op (scan-overhead) timing only. + ./perftest.py --files 50000 --total-size 2G --mode noop OLD/rsync NEW/rsync +""" + +import argparse +import dataclasses +import math +import os +import random +import shlex +import shutil +import statistics +import struct +import subprocess +import sys +import tempfile +import time + +# --------------------------------------------------------------------------- +# Test-tree generation (ported from gentestdata.py, kept self-contained). +# --------------------------------------------------------------------------- + +# Marker file at the tree root; safe_rmtree only deletes a tree carrying it. +MARKER = ".perftest" + +# Permission modes drawn at random for regular files (execs + read-only). +FILE_MODES = [0o644, 0o644, 0o600, 0o640, 0o664, 0o444, 0o755, 0o750, 0o700] +# Directory modes; owner always keeps r-x so the tree stays traversable. +DIR_MODES = [0o755, 0o755, 0o775, 0o750, 0o700, 0o555] + +SIZE_SIGMA = 1.8 # sigma of the underlying lognormal size distribution +BASE_BUF_SIZE = 1 << 20 # 1 MiB shared random buffer for file content + + +def parse_size(s): + """Parse a human size like 500M, 1.5GiB, 200KB, or a bare byte count.""" + s = s.strip() + units = { + "": 1, "B": 1, + "K": 1024, "KIB": 1024, "KB": 1000, + "M": 1024**2, "MIB": 1024**2, "MB": 1000**2, + "G": 1024**3, "GIB": 1024**3, "GB": 1000**3, + "T": 1024**4, "TIB": 1024**4, "TB": 1000**4, + } + num, suffix = s, "" + while num and not (num[-1].isdigit() or num[-1] == "."): + suffix = num[-1] + suffix + num = num[:-1] + suffix = suffix.upper() + if suffix not in units: + raise argparse.ArgumentTypeError(f"unknown size suffix in {s!r}") + try: + value = float(num) + except ValueError: + raise argparse.ArgumentTypeError(f"invalid size {s!r}") + return int(value * units[suffix]) + + +def human(n): + """Format a byte count for the summary output.""" + for unit in ("B", "KiB", "MiB", "GiB", "TiB"): + if abs(n) < 1024 or unit == "TiB": + return f"{n:.1f}{unit}" if unit != "B" else f"{n}B" + n /= 1024 + + +def gen_sizes(n, total, rng): + """Return n heavy-tailed file sizes (bytes) summing to exactly `total`.""" + if n == 0: + return [] + weights = [math.exp(rng.gauss(0.0, SIZE_SIGMA)) for _ in range(n)] + wsum = sum(weights) + sizes = [int(w / wsum * total) for w in weights] + drift = total - sum(sizes) + if drift and sizes: + i = max(range(n), key=lambda k: sizes[k]) + sizes[i] += drift + return sizes + + +def build_dirs(root, num_dirs, max_depth, rng): + """Create `num_dirs` dirs under root, up to `max_depth` deep; return them.""" + os.makedirs(root) + dirs = [root] + depth_of = {root: 0} + candidates = [root] if max_depth > 0 else [] + counter = 0 + + cur = root + for d in range(1, max_depth + 1): + cur = os.path.join(cur, f"d{d}") + os.mkdir(cur) + dirs.append(cur) + depth_of[cur] = d + if d < max_depth: + candidates.append(cur) + + while len(dirs) < num_dirs and candidates: + parent = rng.choice(candidates) + counter += 1 + child = os.path.join(parent, f"dir{counter}") + os.mkdir(child) + d = depth_of[parent] + 1 + dirs.append(child) + depth_of[child] = d + if d < max_depth: + candidates.append(child) + + return dirs + + +def write_file(path, size, index, base): + """Write a regular file of exactly `size` bytes (index/size in first 16).""" + with open(path, "wb") as f: + remaining = size + if remaining >= 16: + f.write(struct.pack(" 0: + chunk = base if remaining >= blen else base[:remaining] + f.write(chunk) + remaining -= len(chunk) + + +def rel_symlink(target, link_path): + """Create a relative symlink at link_path pointing at target.""" + rel = os.path.relpath(target, os.path.dirname(link_path)) + os.symlink(rel, link_path) + + +def safe_rmtree(path): + """Remove a tree, even one containing read-only directories.""" + for dirpath, _dirnames, _filenames in os.walk(path): + try: + os.chmod(dirpath, 0o700) + except OSError: + pass + shutil.rmtree(path) + + +def generate_tree(root, args): + """Build the deterministic source tree at `root`; return a summary string.""" + n = args.files + num_dirs = args.dirs if args.dirs is not None else max(args.depth, n // 20, 1) + n_sym = args.symlinks if args.symlinks is not None else (max(1, n // 20) if n else 0) + n_hard = args.hardlinks if args.hardlinks is not None else (max(1, n // 20) if n else 0) + + rng = random.Random(args.seed) + base = rng.randbytes(BASE_BUF_SIZE) + + dirs = build_dirs(root, num_dirs, args.depth, rng) + with open(os.path.join(root, MARKER), "w") as f: + f.write(f"generated by perftest.py seed={args.seed} files={n} " + f"total={args.total_size}\n") + + sizes = gen_sizes(n, args.total_size, rng) + files = [] + for i in range(n): + path = os.path.join(rng.choice(dirs), f"file{i}.dat") + write_file(path, sizes[i], i, base) + files.append(path) + + hard_made = 0 + if files: + for i in range(n_hard): + tgt = rng.choice(files) + link = os.path.join(rng.choice(dirs), f"hlink{i}_{os.path.basename(tgt)}") + try: + os.link(tgt, link) + hard_made += 1 + except OSError: + pass + + sym_made = 0 + for i in range(n_sym): + link = os.path.join(rng.choice(dirs), f"sym{i}") + roll = rng.random() + try: + if roll < 0.15 or not files: + os.symlink(f"../broken-target-{i}", link) + elif roll < 0.30: + rel_symlink(rng.choice(dirs), link) + else: + rel_symlink(rng.choice(files), link) + sym_made += 1 + except OSError: + pass + + for path in files: + os.chmod(path, rng.choice(FILE_MODES)) + for path in sorted((d for d in dirs if d != root), + key=lambda p: p.count(os.sep), reverse=True): + os.chmod(path, rng.choice(DIR_MODES)) + + return (f"files={n} dirs={len(dirs)} symlinks={sym_made} hardlinks={hard_made} " + f"total={human(sum(sizes))} biggest={human(max(sizes) if sizes else 0)} " + f"seed={args.seed}") + + +# --------------------------------------------------------------------------- +# Benchmark. +# --------------------------------------------------------------------------- + +@dataclasses.dataclass +class Binary: + label: str # "A" / "B" + path: str # absolute path to the rsync binary + version: str # first line of `rsync --version` + + +def rsync_version(path): + """Return the first line of ` --version`, or a placeholder.""" + try: + r = subprocess.run([path, "--version"], capture_output=True, text=True, timeout=15) + line = (r.stdout or r.stderr or "").splitlines() + return line[0].strip() if line else "(no --version output)" + except (OSError, subprocess.TimeoutExpired) as e: + return f"(version unavailable: {e})" + + +def drop_caches(): + """Best-effort: flush dirty pages and drop the page/dentry/inode caches. + + Needs root to write /proc/sys/vm/drop_caches; returns True on success. + """ + subprocess.run(["sync"], check=False) + try: + with open("/proc/sys/vm/drop_caches", "w") as f: + f.write("3\n") + return True + except OSError: + return False + + +def time_transfer(binary, rsync_args, src, dest, timeout): + """Run one `rsync src/ dest/` and return its wall-clock seconds. + + Raises RuntimeError if rsync exits non-zero (a failed transfer can't be + timed meaningfully). + """ + argv = [binary.path, *rsync_args, src + "/", dest + "/"] + t0 = time.monotonic() + r = subprocess.run(argv, capture_output=True, text=True, timeout=timeout) + elapsed = time.monotonic() - t0 + if r.returncode != 0: + raise RuntimeError( + f"{binary.label} ({binary.path}) rsync exited {r.returncode}:\n" + f" cmd: {shlex.join(argv)}\n" + f" {(r.stderr or r.stdout).strip()}") + return elapsed + + +def run_benchmark(binaries, args, src, dest_full, dest_noop): + """Run the alternating loops; return {label: {mode: [all samples]}}.""" + do_full = args.mode in ("both", "full") + do_noop = args.mode in ("both", "noop") + + # Pre-populate the shared no-op destination so every timed no-op run finds + # nothing to do. Use binary A; its content is identical for B. + if do_noop: + time_transfer(binaries[0], args.rsync_args, src, dest_noop, args.timeout) + + samples = {b.label: {m: [] for m in ("full", "noop")} for b in binaries} + total_loops = args.warmup + args.runs + + for loop in range(total_loops): + tag = "warmup" if loop < args.warmup else f"run {loop - args.warmup + 1}/{args.runs}" + # Alternate which binary goes first to cancel first-mover/thermal drift. + order = binaries if loop % 2 == 0 else list(reversed(binaries)) + for b in order: + if do_full: + safe_rmtree(dest_full) if os.path.exists(dest_full) else None + os.mkdir(dest_full) + if args.drop_caches: + drop_caches() + t = time_transfer(b, args.rsync_args, src, dest_full, args.timeout) + samples[b.label]["full"].append(t) + _progress(b, "full", tag, t) + if do_noop: + if args.drop_caches: + drop_caches() + t = time_transfer(b, args.rsync_args, src, dest_noop, args.timeout) + samples[b.label]["noop"].append(t) + _progress(b, "noop", tag, t) + return samples + + +def _progress(binary, mode, tag, t): + excl = " (warmup, excluded)" if tag == "warmup" else "" + print(f" [{tag:>10}] {binary.label} {mode:<4} {t:8.3f}s{excl}") + + +# --------------------------------------------------------------------------- +# Reporting. +# --------------------------------------------------------------------------- + +def _stats(times): + """(n, mean, stddev, min, median) over the timing samples.""" + n = len(times) + if n == 0: + return (0, 0.0, 0.0, 0.0, 0.0) + return (n, statistics.mean(times), + statistics.stdev(times) if n > 1 else 0.0, + min(times), statistics.median(times)) + + +def report(binaries, samples, args): + """Print the per-binary tables and the A-vs-B comparison; return exit code.""" + print("\n" + "=" * 72) + for b in binaries: + print(f"{b.label}: {b.path}\n {b.version}") + print(f"rsync args: {' '.join(args.rsync_args)} " + f"(note: a full copy is not fsync'd unless you add --fsync)") + print("=" * 72) + + modes = [m for m in ("full", "noop") if any(samples[b.label][m] for b in binaries)] + hdr = f"{'binary':<7}{'mode':<6}{'runs':>5}{'mean':>11}{'stddev':>11}{'min':>11}{'median':>11}" + + for mode in modes: + print(f"\n{hdr}\n{'-' * len(hdr)}") + st = {} + for b in binaries: + # Drop the leading warm-up samples before computing statistics. + kept = samples[b.label][mode][args.warmup:] + st[b.label] = _stats(kept) + n, mean, sd, mn, md = st[b.label] + print(f"{b.label:<7}{mode:<6}{n:>5}{mean:>10.3f}s{sd:>10.3f}s" + f"{mn:>10.3f}s{md:>10.3f}s") + + a, c = binaries[0].label, binaries[1].label + (na, ma, sda, *_), (nc, mc, sdc, *_) = st[a], st[c] + if na and nc and ma > 0: + delta = mc - ma + pct = delta / ma * 100.0 + noise = max(sda, sdc) + # Flag only when B is slower beyond the run-to-run noise and a small + # relative threshold, so jitter doesn't cry "regression". + if delta > noise and pct > args.threshold: + verdict = f"REGRESSION (slower): {c} is {pct:+.1f}% vs {a}" + elif delta < -noise and -pct > args.threshold: + verdict = f"faster: {c} is {pct:+.1f}% vs {a}" + else: + verdict = f"no significant change: {pct:+.1f}% (within noise)" + print(f" {mode}: {a} {ma:.3f}s vs {c} {mc:.3f}s -> {verdict}") + + if args.csv: + _write_csv(args.csv, binaries, samples) + print(f"\nraw per-run timings written to {args.csv}") + return 0 + + +def _write_csv(path, binaries, samples): + with open(path, "w") as f: + f.write("binary,path,mode,run,warmup,seconds\n") + for b in binaries: + for mode in ("full", "noop"): + for i, t in enumerate(samples[b.label][mode]): + f.write(f"{b.label},{b.path},{mode},{i},{int(i == 0)},{t:.6f}\n") + + +# --------------------------------------------------------------------------- +# Main. +# --------------------------------------------------------------------------- + +def main(): + ap = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + ap.add_argument("rsync_a", help="path to the first rsync binary (labelled A)") + ap.add_argument("rsync_b", help="path to the second rsync binary (labelled B)") + ap.add_argument("-n", "--runs", type=int, default=10, + help="measured loops per binary (default: 10)") + ap.add_argument("--warmup", type=int, default=1, + help="leading runs per binary dropped from the stats to " + "reduce cache impact (default: 1)") + ap.add_argument("--mode", choices=("both", "full", "noop"), default="both", + help="full=clean-dest copy, noop=re-sync scan overhead, " + "both (default)") + ap.add_argument("--rsync-args", default="-aH", + help="rsync flags for the timed transfer (default: -aH)") + ap.add_argument("--threshold", type=float, default=2.0, + help="percent slowdown above run-to-run noise before a " + "regression is flagged (default: 2.0)") + # Tree-generation knobs (mirror gentestdata.py). + ap.add_argument("--src", default=None, + help="benchmark this existing tree instead of generating one") + ap.add_argument("-f", "--files", type=int, default=10000, + help="number of regular files to generate (default: 10000)") + ap.add_argument("-s", "--total-size", type=parse_size, default="500M", + help="total size of all regular files (default: 500M)") + ap.add_argument("-d", "--depth", type=int, default=10, + help="maximum directory tree depth (default: 10)") + ap.add_argument("--dirs", type=int, default=None, + help="number of directories (default: max(depth, files/20))") + ap.add_argument("--symlinks", type=int, default=None, + help="number of symlinks (default: files/20)") + ap.add_argument("--hardlinks", type=int, default=None, + help="number of hard links (default: files/20)") + ap.add_argument("--seed", type=int, default=1, + help="PRNG seed for a reproducible tree (default: 1)") + ap.add_argument("--workdir", default=None, + help="scratch root for src/dest dirs (default: a tempdir)") + ap.add_argument("--drop-caches", action="store_true", + help="sync + drop page/dentry/inode caches before each timed " + "run (needs root; cold-cache measurement)") + ap.add_argument("--timeout", type=float, default=3600.0, + help="seconds before a single rsync run is abandoned " + "(default: 3600)") + ap.add_argument("--keep", action="store_true", + help="keep the scratch tree on exit (default: remove it)") + ap.add_argument("--csv", default=None, + help="write raw per-run timings to this CSV file") + args = ap.parse_args() + + if args.runs < 2: + ap.error("--runs must be >= 2 (need >=2 samples for a stddev)") + args.rsync_args = shlex.split(args.rsync_args) + + binaries = [] + for label, p in (("A", args.rsync_a), ("B", args.rsync_b)): + path = os.path.abspath(p) + if not (os.path.isfile(path) and os.access(path, os.X_OK)): + ap.error(f"rsync {label} is not an executable file: {p}") + binaries.append(Binary(label, path, rsync_version(path))) + + workdir = tempfile.mkdtemp(prefix="rsync-perftest-", + dir=args.workdir) if not args.keep or not args.workdir \ + else os.path.join(args.workdir, "rsync-perftest") + os.makedirs(workdir, exist_ok=True) + dest_full = os.path.join(workdir, "dest_full") + dest_noop = os.path.join(workdir, "dest_noop") + os.makedirs(dest_noop, exist_ok=True) + + generated = None + if args.src: + src = os.path.abspath(args.src) + if not os.path.isdir(src): + ap.error(f"--src is not a directory: {args.src}") + print(f"using existing source tree {src}") + else: + src = os.path.join(workdir, "src") + print(f"generating source tree in {src} ...") + t0 = time.monotonic() + summary = generate_tree(src, args) + generated = src + print(f" {summary} ({time.monotonic() - t0:.1f}s)") + + print(f"\nbenchmarking: warmup={args.warmup} runs={args.runs} mode={args.mode} " + f"drop_caches={args.drop_caches}\n") + rc = 1 + try: + samples = run_benchmark(binaries, args, src, dest_full, dest_noop) + rc = report(binaries, samples, args) + except RuntimeError as e: + print(f"\nbenchmark aborted: {e}", file=sys.stderr) + rc = 2 + except KeyboardInterrupt: + print("\ninterrupted", file=sys.stderr) + rc = 130 + finally: + if args.keep: + print(f"\nkept scratch tree: {workdir}") + else: + for d in (dest_full, dest_noop, generated): + if d and os.path.exists(d): + safe_rmtree(d) + # Remove the workdir itself if it is now empty (i.e. we made it). + try: + os.rmdir(workdir) + except OSError: + pass + sys.exit(rc) + + +if __name__ == "__main__": + main() + +# vim: sw=4 et ft=python