From 7b6e1140b8354dae78eca0c4e6544813e5de3c21 Mon Sep 17 00:00:00 2001 From: Tom Herbert <18316812+taherbert@users.noreply.github.com> Date: Mon, 23 Feb 2026 11:23:41 -0800 Subject: [PATCH] [Build] Add LTO, PGO, and -march=native CMake options The legacy engine/Makefile supports LTO, PGO, and -march=native but the CMake build has no equivalent. This adds opt-in CMake options that bring those capabilities to cmake users. New options (all OFF by default, existing builds unaffected): - SC_LTO: thin LTO on Clang, full LTO on GCC - SC_MARCH_NATIVE: tune for the host CPU - SC_PGO_GENERATE / SC_PGO_USE: LLVM/GCC profile-guided optimization Bumps cmake_minimum_required to 3.13 for target_link_options(). Also adds scripts/benchmark.sh which automates a three-way comparison (baseline vs LTO+march vs PGO+LTO+march) and prints a results table. Tested on Apple Silicon (M4 Max, AppleClang 17): Variant Time(s) Speedup Release (baseline) 0.64 1.00x LTO + march=native 0.57 1.12x PGO + LTO + march=native 0.55 1.16x (best of 3 runs, 1000 iterations, 4 threads, MID1_Priest_Shadow) --- CMakeLists.txt | 39 +++++++++++++- scripts/benchmark.sh | 121 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+), 1 deletion(-) create mode 100755 scripts/benchmark.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index 8662ba4b15e..11a5900191d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ # Projects Settings -cmake_minimum_required (VERSION 3.10...3.22) +cmake_minimum_required (VERSION 3.13...3.22) # Set OSX minimum version parity with Qt project set(CMAKE_OSX_DEPLOYMENT_TARGET 10.15) @@ -18,6 +18,15 @@ option(SC_USE_FLAT_INSTALL "Install files into a flat folder structure" ${WIN32} option(SC_EVENT_QUEUE_DEBUG "Enable Event Queue Debug Info" OFF) +option(SC_LTO "Enable Link-Time Optimization" OFF) +option(SC_MARCH_NATIVE "Tune for the host CPU (-march=native)" OFF) +option(SC_PGO_GENERATE "Instrument for PGO profile collection" OFF) +set(SC_PGO_USE "" CACHE STRING "Absolute path to .profdata for PGO-optimized build") + +if(SC_PGO_GENERATE AND SC_PGO_USE) + message(FATAL_ERROR "SC_PGO_GENERATE and SC_PGO_USE are mutually exclusive.") +endif() + set(CMAKE_CXX_STANDARD 17 CACHE STRING "C++ standard to conform to") set(CMAKE_CXX_STANDARD_REQUIRED YES) set(CMAKE_CXX_EXTENSIONS OFF) @@ -51,6 +60,34 @@ function(sc_common_compiler_options target) $<$:/utf-8> # required for fmt 11.2 $<$>:-Wall -Wextra -Wpedantic> ) + if(NOT MSVC) + set(_sc_extra_flags "") + if(SC_LTO) + list(APPEND _sc_extra_flags + $<$:-flto=thin> + $<$:-flto> + ) + endif() + if(SC_MARCH_NATIVE) + list(APPEND _sc_extra_flags -march=native) + endif() + if(SC_PGO_GENERATE) + list(APPEND _sc_extra_flags + $<$:-fprofile-instr-generate> + $<$:-fprofile-generate> + ) + endif() + if(SC_PGO_USE) + list(APPEND _sc_extra_flags + $<$:-fprofile-instr-use=${SC_PGO_USE}> + $<$:-fprofile-use=${SC_PGO_USE}> + ) + endif() + if(_sc_extra_flags) + target_compile_options(${target} PRIVATE ${_sc_extra_flags}) + target_link_options(${target} PRIVATE ${_sc_extra_flags}) + endif() + endif() endfunction() diff --git a/scripts/benchmark.sh b/scripts/benchmark.sh new file mode 100755 index 00000000000..a9e46ffaae8 --- /dev/null +++ b/scripts/benchmark.sh @@ -0,0 +1,121 @@ +#!/usr/bin/env bash +set -euo pipefail + +# ---------- Config ---------- +ITERATIONS=1000 +THREADS=4 +RUNS=3 +PROFILE="profiles/MID1/MID1_Priest_Shadow.simc" +WORKDIR="${TMPDIR:-/tmp}/simc-bench" +SRCDIR="$(cd "$(dirname "$0")/.." && pwd)" + +# ---------- Helpers ---------- +die() { echo "FATAL: $*" >&2; exit 1; } +info() { echo "==> $*" >&2; } + +best_of() { + printf '%s\n' "$@" | sort -g | head -1 +} + +bench_variant() { + local binary="$1" label="$2" + local best=999999 t + for i in $(seq 1 "$RUNS"); do + info " run $i/$RUNS" + t=$( { /usr/bin/time -p "$binary" \ + "$SRCDIR/$PROFILE" \ + iterations="$ITERATIONS" \ + threads="$THREADS" \ + output=/dev/null \ + html=/dev/null ; } 2>&1 | awk '/^real/ {print $2}' ) + [[ -n "$t" ]] || die "failed to capture time for $label run $i" + best=$(best_of "$best" "$t") + done + echo "$best" +} + +cmake_build() { + local builddir="$1"; shift + cmake -S "$SRCDIR" -B "$builddir" \ + -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_GUI=OFF \ + -DBUILD_TESTING=OFF \ + "$@" + cmake --build "$builddir" --target simc -j "$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4)" +} + +# ---------- Pre-checks ---------- +command -v cmake >/dev/null || die "cmake not found" + +# llvm-profdata: try PATH first, then xcrun (macOS Xcode toolchain) +if command -v llvm-profdata >/dev/null 2>&1; then + LLVM_PROFDATA="llvm-profdata" +elif xcrun -f llvm-profdata >/dev/null 2>&1; then + LLVM_PROFDATA="$(xcrun -f llvm-profdata)" +else + die "llvm-profdata not found (install llvm toolchain)" +fi +[[ -f "$SRCDIR/CMakeLists.txt" ]] || die "cannot find $SRCDIR/CMakeLists.txt" +[[ -f "$SRCDIR/$PROFILE" ]] || die "cannot find $SRCDIR/$PROFILE" + +rm -rf "$WORKDIR" +mkdir -p "$WORKDIR" + +# ---------- 1. Baseline ---------- +info "Building baseline (Release, no extras)" +cmake_build "$WORKDIR/build-baseline" +info "Benchmarking baseline" +TIME_BASELINE=$(bench_variant "$WORKDIR/build-baseline/simc" "baseline") +info " best: ${TIME_BASELINE}s" + +# ---------- 2. LTO + march=native ---------- +info "Building LTO + march=native" +cmake_build "$WORKDIR/build-lto" -DSC_LTO=ON -DSC_MARCH_NATIVE=ON +info "Benchmarking LTO + march=native" +TIME_LTO=$(bench_variant "$WORKDIR/build-lto/simc" "lto+march") +info " best: ${TIME_LTO}s" + +# ---------- 3. PGO ---------- +info "Building PGO instrumented" +cmake_build "$WORKDIR/build-pgo-gen" -DSC_PGO_GENERATE=ON + +info "Collecting PGO profile data" +LLVM_PROFILE_FILE="$WORKDIR/pgo-%p.profraw" \ + "$WORKDIR/build-pgo-gen/simc" \ + "$SRCDIR/$PROFILE" \ + iterations="$ITERATIONS" \ + threads="$THREADS" \ + output=/dev/null \ + html=/dev/null + +info "Merging profile data" +PROFDATA="$WORKDIR/merged.profdata" +"$LLVM_PROFDATA" merge -output="$PROFDATA" "$WORKDIR"/pgo-*.profraw + +info "Building PGO optimized (+ LTO + march=native)" +cmake_build "$WORKDIR/build-pgo-use" \ + -DSC_LTO=ON \ + -DSC_MARCH_NATIVE=ON \ + -DSC_PGO_USE="$PROFDATA" + +info "Benchmarking PGO optimized" +TIME_PGO=$(bench_variant "$WORKDIR/build-pgo-use/simc" "pgo") +info " best: ${TIME_PGO}s" + +# ---------- Results ---------- +echo "" +echo "==============================================" +echo " SimC build benchmark (best of $RUNS runs)" +echo " iterations=$ITERATIONS threads=$THREADS" +echo "==============================================" +printf "%-28s %8s %8s\n" "Variant" "Time(s)" "Speedup" +echo "----------------------------------------------" + +for label_key in "Release (baseline):$TIME_BASELINE" "LTO + march=native:$TIME_LTO" "PGO + LTO + march=native:$TIME_PGO"; do + label="${label_key%%:*}" + t="${label_key#*:}" + speedup=$(awk "BEGIN {printf \"%.2fx\", $TIME_BASELINE / $t}") + printf "%-28s %8s %8s\n" "$label" "$t" "$speedup" +done +echo "==============================================" +echo "Build artifacts in: $WORKDIR"