diff --git a/.github/scripts/prebuild-case-optimization.sh b/.github/scripts/prebuild-case-optimization.sh
index 130f523c07..581630f742 100755
--- a/.github/scripts/prebuild-case-optimization.sh
+++ b/.github/scripts/prebuild-case-optimization.sh
@@ -1,14 +1,15 @@
 #!/bin/bash
 
 # Pre-builds all benchmark cases with --case-optimization.
+# No GPU hardware needed — compilation only.
 # Can run in two modes:
 #   1. Direct (Frontier login nodes): pass cluster/device/interface as args
-#   2. Inside SLURM (Phoenix): uses $job_device/$job_interface from submit.sh
+#   2. Inside SLURM (Phoenix): uses $job_device/$job_interface from submit-slurm-job.sh
 # Usage: bash prebuild-case-optimization.sh [<cluster> <device> <interface>]
 
 set -e
 
-# Support both positional args (direct invocation) and env vars (SLURM via submit.sh)
+# Support both positional args (direct invocation) and env vars (SLURM)
 cluster="${1:-${job_cluster:-phoenix}}"
 job_device="${2:-$job_device}"
 job_interface="${3:-$job_interface}"
@@ -24,7 +25,15 @@ esac
 rm -rf build
 
 . ./mfc.sh load -c "$flag" -m g
-source .github/scripts/gpu-opts.sh
+
+# Set GPU build flags from interface — this is always a GPU build.
+# Don't use gpu-opts.sh since $job_device may be "cpu" when submitted
+# to a CPU SLURM partition (no GPU hardware needed for compilation).
+case "$job_interface" in
+    acc) gpu_opts="--gpu acc" ;;
+    omp) gpu_opts="--gpu mp" ;;
+    *)   echo "ERROR: prebuild requires gpu interface (acc or omp)"; exit 1 ;;
+esac
 
 for case in benchmarks/*/case.py; do
     echo "=== Pre-building: $case ==="
diff --git a/.github/scripts/retry-build.sh b/.github/scripts/retry-build.sh
index 38ac08b217..a0b6ce8cfe 100755
--- a/.github/scripts/retry-build.sh
+++ b/.github/scripts/retry-build.sh
@@ -1,13 +1,16 @@
 #!/bin/bash
 # Provides retry_build(): 2-attempt loop.
 # On failure of attempt 1, nukes the entire build directory before attempt 2.
-# Set RETRY_VALIDATE_CMD to run a post-build validation; failure triggers a retry.
+# If RETRY_VALIDATE_CMD is set, runs it after a successful build; a non-zero
+# exit triggers the same nuke-and-retry, catching e.g. SIGILL from binaries
+# compiled on a different CPU architecture.
 # Usage: source .github/scripts/retry-build.sh
 #        retry_build ./mfc.sh build -j 8 --gpu acc
+#        RETRY_VALIDATE_CMD='./syscheck' retry_build ./mfc.sh build -j 8
 
 retry_build() {
-    local validate_cmd="${RETRY_VALIDATE_CMD:-}"
     local max_attempts=2
+    local validate_cmd="${RETRY_VALIDATE_CMD:-}"
     local attempt=1
     while [ $attempt -le $max_attempts ]; do
         echo "Build attempt $attempt of $max_attempts..."
diff --git a/.github/scripts/run-tests-with-retry.sh b/.github/scripts/run-tests-with-retry.sh
deleted file mode 100755
index 18f1d05d0b..0000000000
--- a/.github/scripts/run-tests-with-retry.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/bash
-# Runs ./mfc.sh test with all provided arguments, then retries a small number
-# of sporadic failures (up to 5). Exits non-zero on real failures.
-# Usage: bash .github/scripts/run-tests-with-retry.sh [mfc test args...]
-
-# Extract flags that should carry over to retries (retries build their own
-# argument list with --only, so we capture passthrough flags here).
-PASSTHROUGH=""
-for arg in "$@"; do
-    case "$arg" in
-        --test-all) PASSTHROUGH="$PASSTHROUGH --test-all" ;;
-    esac
-done
-
-rm -f tests/failed_uuids.txt
-TEST_EXIT=0
-/bin/bash mfc.sh test "$@" || TEST_EXIT=$?
-
-# Retry only if a small number of tests failed (sporadic failures)
-if [ -s tests/failed_uuids.txt ]; then
-    NUM_FAILED=$(wc -l < tests/failed_uuids.txt)
-    if [ "$NUM_FAILED" -le 5 ]; then
-        FAILED=$(tr '\n' ' ' < tests/failed_uuids.txt)
-        echo ""
-        echo "=== Retrying $NUM_FAILED failed test(s): $FAILED ==="
-        echo ""
-        /bin/bash mfc.sh test -v --max-attempts 3 -j "$(nproc)" --only $FAILED $PASSTHROUGH || exit $?
-    else
-        echo "Too many failures ($NUM_FAILED) to retry — likely a real issue."
-        exit 1
-    fi
-elif [ "$TEST_EXIT" -ne 0 ]; then
-    exit $TEST_EXIT
-fi
diff --git a/.github/scripts/run_case_optimization.sh b/.github/scripts/run_case_optimization.sh
index 167505ece3..922d0a9012 100755
--- a/.github/scripts/run_case_optimization.sh
+++ b/.github/scripts/run_case_optimization.sh
@@ -44,7 +44,7 @@ for case in "${benchmarks[@]}"; do
     rm -rf "$case_dir/D" "$case_dir/p_all" "$case_dir/restart_data"
 
     # Build + run with --case-optimization, small grid, 10 timesteps
-    if ./mfc.sh run "$case" --case-optimization $gpu_opts -n "$ngpus" -j "$(nproc)" -- --gbpp 1 --steps 10; then
+    if ./mfc.sh run "$case" --case-optimization $gpu_opts -n "$ngpus" -j 8 -- --gbpp 1 --steps 10; then
         # Validate output
         if build/venv/bin/python3 .github/scripts/check_case_optimization_output.py "$case_dir"; then
             echo "PASS: $case_name"
diff --git a/.github/scripts/run_parallel_benchmarks.sh b/.github/scripts/run_parallel_benchmarks.sh
index 8c562b911e..b6a6034c3c 100755
--- a/.github/scripts/run_parallel_benchmarks.sh
+++ b/.github/scripts/run_parallel_benchmarks.sh
@@ -24,24 +24,9 @@ echo "=========================================="
 # both parallel jobs so PR and master always land on the same GPU type.
 if [ "$device" = "gpu" ] && [ "$cluster" = "phoenix" ]; then
     echo "Selecting Phoenix GPU partition for benchmark consistency..."
-    # Prefer older/smaller partitions first (rtx6000, l40s, v100) to leave
-    # large modern nodes (h200, h100, a100) free for production workloads.
-    # rtx6000 has the most nodes and gives the most consistent baselines.
-    BENCH_GPU_PARTITION=""
-    for part in gpu-rtx6000 gpu-l40s gpu-v100 gpu-h200 gpu-h100 gpu-a100; do
-        # || true: grep -c exits 1 on zero matches (or when sinfo returns no output
-        # for an unknown partition); suppress so set -euo pipefail doesn't abort.
-        idle=$(sinfo -p "$part" --noheader -o "%t" 2>/dev/null | grep -cE "^(idle|mix)" || true)
-        if [ "${idle:-0}" -gt 0 ]; then
-            BENCH_GPU_PARTITION="$part"
-            echo "Selected GPU partition: $BENCH_GPU_PARTITION ($idle idle/mix nodes)"
-            break
-        fi
-    done
-    if [ -z "$BENCH_GPU_PARTITION" ]; then
-        echo "WARNING: No idle GPU partition found; falling back to gpu-rtx6000 (may queue)"
-        BENCH_GPU_PARTITION="gpu-rtx6000"
-    fi
+    # Require 2 nodes so both PR and master jobs can run concurrently.
+    GPU_PARTITION_MIN_NODES=2 source "${SCRIPT_DIR}/select-gpu-partition.sh"
+    BENCH_GPU_PARTITION="$SELECTED_GPU_PARTITION"
     export BENCH_GPU_PARTITION
 fi
 
@@ -57,12 +42,13 @@ echo "Master job started in background (PID: $master_pid)"
 
 echo "Waiting for both jobs to complete..."
 
-# Wait and capture exit codes reliably
+# Wait and capture exit codes reliably.
+# Use `wait ... || exit=$?` to avoid set -e aborting on the first failure
+# (which would orphan the second job).
 pr_exit=0
 master_exit=0
 
-wait "$pr_pid"
-pr_exit=$?
+wait "$pr_pid" || pr_exit=$?
 if [ "$pr_exit" -ne 0 ]; then
   echo "PR job exited with code: $pr_exit"
   echo "Last 50 lines of PR job log:"
@@ -71,8 +57,7 @@ else
   echo "PR job completed successfully"
 fi
 
-wait "$master_pid"
-master_exit=$?
+wait "$master_pid" || master_exit=$?
 if [ "$master_exit" -ne 0 ]; then
   echo "Master job exited with code: $master_exit"
   echo "Last 50 lines of master job log:"
diff --git a/.github/scripts/select-gpu-partition.sh b/.github/scripts/select-gpu-partition.sh
new file mode 100644
index 0000000000..c812c000a9
--- /dev/null
+++ b/.github/scripts/select-gpu-partition.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# Select the best available Phoenix GPU partition using sinfo.
+# Sources into caller: exports SELECTED_GPU_PARTITION.
+#
+# Priority order prefers partitions most likely to have availability.
+# V100 is last due to slower performance near the test time limit.
+# Falls back to gpu-l40s if no partition meets the idle node threshold.
+# RTX 6000 nodes are excluded (too slow for the test suite time limit).
+#
+# Optional: set GPU_PARTITION_MIN_NODES before sourcing to require a minimum
+# number of idle/mix nodes (e.g. GPU_PARTITION_MIN_NODES=2 for parallel bench jobs).
+#
+# Usage: source .github/scripts/select-gpu-partition.sh
+
+_GPU_PARTITION_PRIORITY="gpu-l40s gpu-h200 gpu-h100 gpu-a100 gpu-v100"
+_GPU_PARTITION_FALLBACK="gpu-l40s"
+_GPU_PARTITION_MIN_NODES="${GPU_PARTITION_MIN_NODES:-1}"
+
+SELECTED_GPU_PARTITION=""
+for _part in $_GPU_PARTITION_PRIORITY; do
+    _idle=$(sinfo -p "$_part" --noheader -o "%t" 2>/dev/null | grep -cE "^(idle|mix)" || true)
+    if [ "${_idle:-0}" -ge "$_GPU_PARTITION_MIN_NODES" ]; then
+        SELECTED_GPU_PARTITION="$_part"
+        echo "Selected GPU partition: $SELECTED_GPU_PARTITION ($_idle idle/mix nodes)"
+        break
+    fi
+done
+
+if [ -z "$SELECTED_GPU_PARTITION" ]; then
+    echo "WARNING: No idle GPU partition found; falling back to $_GPU_PARTITION_FALLBACK (may queue)"
+    SELECTED_GPU_PARTITION="$_GPU_PARTITION_FALLBACK"
+fi
+
+export SELECTED_GPU_PARTITION
+unset _GPU_PARTITION_PRIORITY _GPU_PARTITION_FALLBACK _GPU_PARTITION_MIN_NODES _part _idle
diff --git a/.github/scripts/submit-slurm-job.sh b/.github/scripts/submit-slurm-job.sh
new file mode 100755
index 0000000000..eb6702cfbe
--- /dev/null
+++ b/.github/scripts/submit-slurm-job.sh
@@ -0,0 +1,207 @@
+#!/bin/bash
+# Unified SLURM job submission and monitoring for all clusters.
+# Submits a script as a SLURM batch job, then monitors it until completion.
+# Rerun-safe: cancels stale jobs from previous runs before resubmission.
+#
+# Usage: submit-slurm-job.sh <script.sh> <cpu|gpu> <none|acc|omp> <cluster> [shard]
+
+set -euo pipefail
+
+# Ignore SIGHUP to survive login node session drops
+trap '' HUP
+
+usage() {
+    echo "Usage: $0 <script.sh> <cpu|gpu> <none|acc|omp> <cluster> [shard]"
+}
+
+script_path="${1:-}"
+device="${2:-}"
+interface="${3:-}"
+cluster="${4:-}"
+shard="${5:-}"
+
+if [ -z "$script_path" ] || [ -z "$device" ] || [ -z "$interface" ] || [ -z "$cluster" ]; then
+    usage
+    exit 1
+fi
+
+sbatch_script_contents=$(cat "$script_path")
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# Detect job type from submitted script basename
+script_basename="$(basename "$script_path" .sh)"
+case "$script_basename" in
+    bench*) job_type="bench" ;;
+    *)      job_type="test"  ;;
+esac
+
+# --- Cluster configuration ---
+case "$cluster" in
+    phoenix)
+        compiler_flag="p"
+        account="gts-sbryngelson3"
+        job_prefix="shb"
+        qos="embers"
+        extra_sbatch="#SBATCH --requeue"
+        test_time="03:00:00"
+        bench_time="04:00:00"
+        gpu_partition_dynamic=true
+        ;;
+    frontier)
+        compiler_flag="f"
+        account="CFD154"
+        job_prefix="MFC"
+        qos="develop"
+        extra_sbatch=""
+        test_time="01:59:00"
+        bench_time="01:59:00"
+        gpu_partition_dynamic=false
+        ;;
+    frontier_amd)
+        compiler_flag="famd"
+        account="CFD154"
+        job_prefix="MFC"
+        qos="develop"
+        extra_sbatch=""
+        test_time="01:59:00"
+        bench_time="01:59:00"
+        gpu_partition_dynamic=false
+        ;;
+    *)
+        echo "ERROR: Unknown cluster '$cluster'"
+        exit 1
+        ;;
+esac
+
+# --- Time limit ---
+if [ "$job_type" = "bench" ]; then
+    sbatch_time="#SBATCH -t $bench_time"
+else
+    sbatch_time="#SBATCH -t $test_time"
+fi
+
+# --- Device-specific SBATCH options ---
+if [ "$device" = "cpu" ]; then
+    case "$cluster" in
+        phoenix)
+            sbatch_device_opts="\
+#SBATCH -p cpu-small
+#SBATCH --ntasks-per-node=24
+#SBATCH --mem-per-cpu=2G"
+            ;;
+        frontier|frontier_amd)
+            sbatch_device_opts="\
+#SBATCH -n 32
+#SBATCH -p service"
+            ;;
+    esac
+elif [ "$device" = "gpu" ]; then
+    # Determine GPU partition
+    gpu_partition="batch"
+    if [ "$gpu_partition_dynamic" = "true" ]; then
+        # Use pre-selected bench partition if available, otherwise query sinfo
+        if [ -n "${BENCH_GPU_PARTITION:-}" ]; then
+            gpu_partition="$BENCH_GPU_PARTITION"
+            echo "Using pre-selected bench partition: $gpu_partition (PR/master consistency)"
+        else
+            source "${SCRIPT_DIR}/select-gpu-partition.sh"
+            gpu_partition="$SELECTED_GPU_PARTITION"
+        fi
+    fi
+
+    case "$cluster" in
+        phoenix)
+            sbatch_device_opts="\
+#SBATCH -p $gpu_partition
+#SBATCH --ntasks-per-node=4
+#SBATCH -G2
+#SBATCH --exclude=atl1-1-03-002-29-0"
+            ;;
+        frontier|frontier_amd)
+            sbatch_device_opts="\
+#SBATCH -n 8
+#SBATCH -p service"
+            ;;
+    esac
+else
+    usage
+    exit 1
+fi
+
+# --- Job slug ---
+shard_suffix=""
+if [ -n "$shard" ]; then
+    shard_suffix="-$(echo "$shard" | sed 's|/|-of-|')"
+fi
+job_slug="$(basename "$script_path" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g')-${device}-${interface}${shard_suffix}"
+output_file="$job_slug.out"
+id_file="${job_slug}.slurm_job_id"
+
+# --- Idempotency: cancel stale jobs from previous runs ---
+if [ -f "$id_file" ]; then
+    existing_id=$(cat "$id_file")
+    state=$(sacct -j "$existing_id" -n -X -P -o State 2>/dev/null | head -n1 | cut -d'|' -f1 | tr -d ' ' || true)
+    case "${state:-UNKNOWN}" in
+        RUNNING|PENDING|REQUEUED|COMPLETING)
+            echo "Cancelling stale SLURM job $existing_id (state=$state) before resubmission"
+            scancel "$existing_id" 2>/dev/null || true
+            ;;
+        *)
+            echo "Stale job $existing_id (state=${state:-UNKNOWN}) — submitting fresh"
+            ;;
+    esac
+    rm -f "$id_file"
+fi
+
+# Remove stale output file so the monitor doesn't pick up old content
+# (a previous SLURM job's epilog can write to the .out file after our
+# stale-job check, polluting the new job's output stream).
+rm -f "$output_file"
+
+# --- Module load mode (short form) ---
+module_mode=$([ "$device" = "gpu" ] && echo "g" || echo "c")
+
+# --- Submit ---
+submit_output=$(sbatch <<EOT
+#!/bin/bash
+#SBATCH -J ${job_prefix}-${job_slug}
+#SBATCH --account=${account}
+#SBATCH -N 1
+${sbatch_device_opts}
+${sbatch_time}
+#SBATCH --qos=${qos}
+${extra_sbatch}
+#SBATCH -o ${output_file}
+
+set -e
+set -x
+
+cd "\$SLURM_SUBMIT_DIR"
+echo "Running in \$(pwd):"
+
+job_slug="$job_slug"
+job_device="$device"
+job_interface="$interface"
+job_shard="$shard"
+job_cluster="$cluster"
+
+. ./mfc.sh load -c $compiler_flag -m $module_mode
+
+$sbatch_script_contents
+
+EOT
+)
+
+job_id=$(echo "$submit_output" | grep -oE '[0-9]+')
+if [ -z "$job_id" ]; then
+    echo "ERROR: Failed to submit job. sbatch output:"
+    echo "$submit_output"
+    exit 1
+fi
+
+echo "Submitted batch job $job_id"
+echo "$job_id" > "$id_file"
+echo "Job ID written to $id_file"
+
+# --- Monitor ---
+bash "$SCRIPT_DIR/run_monitored_slurm_job.sh" "$job_id" "$output_file"
diff --git a/.github/scripts/submit_and_monitor_bench.sh b/.github/scripts/submit_and_monitor_bench.sh
index e0a6eb7384..62a377bb26 100755
--- a/.github/scripts/submit_and_monitor_bench.sh
+++ b/.github/scripts/submit_and_monitor_bench.sh
@@ -19,13 +19,12 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 echo "[$dir] Submitting benchmark for $device-$interface on $cluster..."
 cd "$dir"
 
-# Always use the PR's submit.sh so both master and PR builds benefit from the
-# run_monitored_slurm_job.sh SIGKILL recovery wrapper.  The bench script is
-# still resolved relative to the current directory (master/ or pr/) so the
-# correct branch code is benchmarked.  SLURM_SUBMIT_DIR ensures the job runs
-# in the right directory regardless of which submit.sh is invoked.
-PR_SUBMIT="${SCRIPT_DIR}/../workflows/${cluster}/submit.sh"
-bash "$PR_SUBMIT" .github/workflows/$cluster/bench.sh "$device" "$interface"
+# Use the PR's submit-slurm-job.sh and bench script for both master and PR jobs.
+# The bench script must come from the PR tree (master may not have common/bench.sh
+# yet), and the script only orchestrates build+bench — the actual MFC code under
+# test is the cwd's checkout (master/ or pr/).
+PR_BENCH_SCRIPT="$(cd "${SCRIPT_DIR}/../workflows/common" && pwd)/bench.sh"
+bash "${SCRIPT_DIR}/submit-slurm-job.sh" "$PR_BENCH_SCRIPT" "$device" "$interface" "$cluster"
 
 # Verify the YAML output file was created
 job_slug="bench-$device-$interface"
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
index 8a1c848493..7ce02c1e3f 100644
--- a/.github/workflows/bench.yml
+++ b/.github/workflows/bench.yml
@@ -105,21 +105,19 @@ jobs:
 
       - name: Setup & Build
         if: matrix.build_script != ''
-        uses: nick-fields/retry@v3
-        with:
-          max_attempts: 2
-          retry_wait_seconds: 60
-          timeout_minutes: 150
-          command: |
-            (cd pr     && ${{ matrix.build_script }}) &
-            pid1=$!
-            (cd master && ${{ matrix.build_script }}) &
-            pid2=$!
-            wait $pid1; e1=$?
-            wait $pid2; e2=$?
-            [ $e1 -eq 0 ] && [ $e2 -eq 0 ]
-          on_retry_command: |
-            rm -rf pr/build master/build
+        timeout-minutes: 150
+        run: |
+          (cd pr     && ${{ matrix.build_script }}) &
+          pid1=$!
+          (cd master && ${{ matrix.build_script }}) &
+          pid2=$!
+          e1=0; e2=0
+          wait $pid1 || e1=$?
+          wait $pid2 || e2=$?
+          if [ $e1 -ne 0 ] || [ $e2 -ne 0 ]; then
+            echo "Build failures: pr=$e1 master=$e2"
+            exit 1
+          fi
 
       - name: Bench (Master v. PR)
         run: bash pr/.github/scripts/run_parallel_benchmarks.sh ${{ matrix.device }} ${{ matrix.interface }} ${{ matrix.cluster }}
diff --git a/.github/workflows/common/bench.sh b/.github/workflows/common/bench.sh
new file mode 100644
index 0000000000..3251f7baca
--- /dev/null
+++ b/.github/workflows/common/bench.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# Unified benchmark script for all clusters.
+# Runs inside a SLURM job via submit-slurm-job.sh.
+# Expects env vars: $job_device, $job_interface, $job_slug, $job_cluster
+
+set -euo pipefail
+
+source .github/scripts/bench-preamble.sh
+
+# Cap parallel jobs at 64 to avoid overwhelming MPI daemons on large nodes
+# (GNR nodes have 192 cores but nproc is too aggressive for build).
+n_jobs=$(( $(nproc) > 64 ? 64 : $(nproc) ))
+
+# --- Phoenix TMPDIR setup ---
+if [ "$job_cluster" = "phoenix" ]; then
+    tmpbuild=/storage/project/r-sbryngelson3-0/sbryngelson3/mytmp_build
+    currentdir=$tmpbuild/run-$(( RANDOM % 9000 ))
+    mkdir -p $tmpbuild
+    mkdir -p $currentdir
+    export TMPDIR=$currentdir
+    trap 'rm -rf "$currentdir" || true' EXIT
+fi
+
+# --- Build (if not pre-built on login node) ---
+# Phoenix builds inside SLURM; Frontier pre-builds via build.sh on the login node.
+# Phoenix: always nuke stale builds (heterogeneous compute nodes → ISA mismatch risk).
+if [ "$job_cluster" = "phoenix" ]; then
+    rm -rf build
+fi
+
+if [ ! -d "build" ]; then
+    source .github/scripts/retry-build.sh
+    retry_build ./mfc.sh build -j $n_jobs $build_opts || exit 1
+fi
+
+# --- Bench cluster flag ---
+if [ "$job_cluster" = "phoenix" ]; then
+    bench_cluster="phoenix-bench"
+else
+    bench_cluster="$job_cluster"
+fi
+
+# --- Run benchmark ---
+if [ "$job_device" = "gpu" ]; then
+    ./mfc.sh bench --mem 4 -o "$job_slug.yaml" -- -c $bench_cluster $device_opts -n $n_ranks
+else
+    ./mfc.sh bench --mem 1 -o "$job_slug.yaml" -- -c $bench_cluster $device_opts -n $n_ranks
+fi
+
+# --- Phoenix cleanup (trap EXIT handles rm -rf "$currentdir") ---
+if [ "$job_cluster" = "phoenix" ]; then
+    sleep 10
+    unset TMPDIR
+fi
diff --git a/.github/workflows/common/test.sh b/.github/workflows/common/test.sh
new file mode 100644
index 0000000000..746c54f5d1
--- /dev/null
+++ b/.github/workflows/common/test.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+# Unified test script for all clusters.
+# Runs inside a SLURM job via submit-slurm-job.sh.
+# Expects env vars: $job_device, $job_interface, $job_shard, $job_cluster
+
+set -euo pipefail
+
+source .github/scripts/gpu-opts.sh
+build_opts="$gpu_opts"
+
+# --- Build (if not pre-built on login node) ---
+# Phoenix builds inside SLURM; Frontier pre-builds via build.sh on the login node.
+# Phoenix builds inside SLURM on heterogeneous compute nodes — always start fresh
+# to avoid SIGILL from stale binaries compiled on a different microarchitecture.
+if [ "$job_cluster" = "phoenix" ]; then
+    rm -rf build
+fi
+
+if [ ! -d "build" ]; then
+    source .github/scripts/retry-build.sh
+
+    # Phoenix: smoke-test the syscheck binary to catch architecture mismatches
+    # (SIGILL from binaries compiled on a different compute node).
+    validate_cmd=""
+    if [ "$job_cluster" = "phoenix" ]; then
+        validate_cmd='syscheck_bin=$(find build/install -name syscheck -type f 2>/dev/null | head -1); [ -z "$syscheck_bin" ] || "$syscheck_bin" > /dev/null 2>&1'
+    fi
+
+    RETRY_VALIDATE_CMD="$validate_cmd" \
+        retry_build ./mfc.sh test -v --dry-run -j 8 $build_opts || exit 1
+fi
+
+# --- GPU detection and thread count ---
+device_opts=""
+rdma_opts=""
+shard_opts=""
+
+case "$job_cluster" in
+    phoenix)      n_test_threads=8 ;;
+    *)            n_test_threads=32 ;;
+esac
+
+if [ "$job_device" = "gpu" ]; then
+    source .github/scripts/detect-gpus.sh
+
+    case "$job_cluster" in
+        phoenix)
+            device_opts="-g $gpu_ids"
+            n_test_threads=$((ngpus * 2))
+            ;;
+        *)
+            # Frontier: --gpu flag is already in $build_opts; no extra device opts needed
+            device_opts=""
+            n_test_threads=$ngpus
+            ;;
+    esac
+
+    # RDMA for Frontier CCE (not frontier_amd)
+    if [ "$job_cluster" = "frontier" ]; then
+        rdma_opts="--rdma-mpi"
+    fi
+else
+    device_opts="--no-gpu"
+fi
+
+# --- Sharding (Frontier only) ---
+if [ -n "${job_shard:-}" ]; then
+    shard_opts="--shard $job_shard"
+fi
+
+./mfc.sh test -v --max-attempts 3 -a -j $n_test_threads $rdma_opts $device_opts $build_opts $shard_opts -- -c $job_cluster
diff --git a/.github/workflows/frontier/bench.sh b/.github/workflows/frontier/bench.sh
deleted file mode 100644
index b896feb17c..0000000000
--- a/.github/workflows/frontier/bench.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/bash
-
-source .github/scripts/bench-preamble.sh
-
-# Cap parallel jobs at 64 to avoid overwhelming MPI daemons on large nodes.
-n_jobs=$(( $(nproc) > 64 ? 64 : $(nproc) ))
-
-if [ "$job_device" = "gpu" ]; then
-    ./mfc.sh bench --mem 4 -j $n_ranks -o "$job_slug.yaml" -- -c $job_cluster $device_opts -n $n_ranks
-else
-    ./mfc.sh bench --mem 1 -j $n_jobs -o "$job_slug.yaml" -- -c $job_cluster $device_opts -n $n_ranks
-fi
diff --git a/.github/workflows/frontier/submit.sh b/.github/workflows/frontier/submit.sh
deleted file mode 100644
index 4b472cd433..0000000000
--- a/.github/workflows/frontier/submit.sh
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/bin/bash
-
-set -e
-
-# Ignore SIGHUP to survive login node session drops
-trap '' HUP
-
-# Determine compiler flag from directory name
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-cluster_name="$(basename "$SCRIPT_DIR")"
-case "$cluster_name" in
-    frontier)     compiler_flag="f" ;;
-    frontier_amd) compiler_flag="famd" ;;
-    *) echo "ERROR: Unknown cluster '$cluster_name'"; exit 1 ;;
-esac
-
-usage() {
-    echo "Usage: $0 [script.sh] [cpu|gpu] [none|acc|omp] [shard]"
-}
-
-if [ ! -z "$1" ]; then
-    sbatch_script_contents=`cat $1`
-else
-    usage
-    exit 1
-fi
-
-# Detect job type from submitted script basename
-script_basename="$(basename "$1" .sh)"
-case "$script_basename" in
-    bench*) job_type="bench" ;;
-    *)      job_type="test"  ;;
-esac
-
-if [ "$2" = "cpu" ]; then
-    sbatch_device_opts="\
-#SBATCH -n 32                       # Number of cores required"
-elif [ "$2" = "gpu" ]; then
-    sbatch_device_opts="\
-#SBATCH -n 8                       # Number of cores required"
-else
-    usage
-    exit 1
-fi
-
-# Select SBATCH params based on job type
-if [ "$job_type" = "bench" ]; then
-    sbatch_account="#SBATCH -A CFD154"
-    sbatch_time="#SBATCH -t 01:59:00"
-    sbatch_partition="#SBATCH -p batch"
-    sbatch_extra="#SBATCH --qos=normal"
-else
-    sbatch_account="#SBATCH -A CFD154"
-    sbatch_time="#SBATCH -t 01:59:00"
-    sbatch_partition="#SBATCH -p batch"
-    sbatch_extra="#SBATCH --qos=normal"
-fi
-
-shard_suffix=""
-if [ -n "$4" ]; then
-    shard_suffix="-$(echo "$4" | sed 's|/|-of-|')"
-fi
-job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2-$3${shard_suffix}"
-output_file="$job_slug.out"
-
-submit_output=$(sbatch <<EOT
-#!/bin/bash
-#SBATCH -J MFC-$job_slug            # Job name
-$sbatch_account
-#SBATCH -N 1                       # Number of nodes required
-$sbatch_device_opts
-$sbatch_time
-#SBATCH -o$output_file             # Combined output and error messages file
-$sbatch_partition
-$sbatch_extra
-
-set -e
-set -x
-
-cd "\$SLURM_SUBMIT_DIR"
-echo "Running in $(pwd):"
-
-job_slug="$job_slug"
-job_device="$2"
-job_interface="$3"
-job_shard="$4"
-job_cluster="$cluster_name"
-
-. ./mfc.sh load -c $compiler_flag -m $([ "$2" = "gpu" ] && echo "g" || echo "c")
-
-$sbatch_script_contents
-
-EOT
-)
-
-job_id=$(echo "$submit_output" | grep -oE '[0-9]+')
-if [ -z "$job_id" ]; then
-    echo "ERROR: Failed to submit job. sbatch output:"
-    echo "$submit_output"
-    exit 1
-fi
-
-echo "Submitted batch job $job_id"
-
-bash "$SCRIPT_DIR/../../scripts/run_monitored_slurm_job.sh" "$job_id" "$output_file"
diff --git a/.github/workflows/frontier/test.sh b/.github/workflows/frontier/test.sh
deleted file mode 100644
index 78797ab8ec..0000000000
--- a/.github/workflows/frontier/test.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/bash
-
-source .github/scripts/detect-gpus.sh
-source .github/scripts/gpu-opts.sh
-device_opts="$gpu_opts"
-
-shard_opts=""
-if [ -n "$job_shard" ]; then
-    shard_opts="--shard $job_shard"
-fi
-
-if [ "$job_device" = "gpu" ]; then
-    rdma_opts=""
-    if [ "$job_cluster" = "frontier" ]; then
-        rdma_opts="--rdma-mpi"
-    fi
-    ./mfc.sh test -v -a $rdma_opts --max-attempts 3 -j $ngpus $device_opts $shard_opts -- -c $job_cluster
-else
-    ./mfc.sh test -v -a --max-attempts 3 -j 32 --no-gpu $shard_opts -- -c $job_cluster
-fi
diff --git a/.github/workflows/frontier_amd/bench.sh b/.github/workflows/frontier_amd/bench.sh
deleted file mode 120000
index 2ac24c7604..0000000000
--- a/.github/workflows/frontier_amd/bench.sh
+++ /dev/null
@@ -1 +0,0 @@
-../frontier/bench.sh
\ No newline at end of file
diff --git a/.github/workflows/frontier_amd/submit.sh b/.github/workflows/frontier_amd/submit.sh
deleted file mode 120000
index 11890c4fcd..0000000000
--- a/.github/workflows/frontier_amd/submit.sh
+++ /dev/null
@@ -1 +0,0 @@
-../frontier/submit.sh
\ No newline at end of file
diff --git a/.github/workflows/frontier_amd/test.sh b/.github/workflows/frontier_amd/test.sh
deleted file mode 120000
index 8878e823b2..0000000000
--- a/.github/workflows/frontier_amd/test.sh
+++ /dev/null
@@ -1 +0,0 @@
-../frontier/test.sh
\ No newline at end of file
diff --git a/.github/workflows/phoenix/bench.sh b/.github/workflows/phoenix/bench.sh
deleted file mode 100644
index abaf76f33d..0000000000
--- a/.github/workflows/phoenix/bench.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-
-source .github/scripts/bench-preamble.sh
-
-# Cap parallel jobs at 64 to avoid overwhelming MPI daemons on large nodes
-# (GNR nodes have 192 cores but nproc is too aggressive for build/bench).
-n_jobs=$(( $(nproc) > 64 ? 64 : $(nproc) ))
-
-tmpbuild=/storage/project/r-sbryngelson3-0/sbryngelson3/mytmp_build
-currentdir=$tmpbuild/run-$(( RANDOM % 900 ))
-mkdir -p $tmpbuild
-mkdir -p $currentdir
-
-export TMPDIR=$currentdir
-
-if [ "$job_device" = "gpu" ]; then
-    bench_opts="--mem 4"
-else
-    bench_opts="--mem 1"
-fi
-
-rm -rf build
-
-source .github/scripts/retry-build.sh
-retry_build ./mfc.sh build -j $n_jobs $build_opts || exit 1
-
-./mfc.sh bench $bench_opts -j $n_jobs -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks
-
-sleep 10
-rm -rf "$currentdir" || true
-
-unset TMPDIR
diff --git a/.github/workflows/phoenix/submit-job.sh b/.github/workflows/phoenix/submit-job.sh
deleted file mode 100755
index caa6bd2175..0000000000
--- a/.github/workflows/phoenix/submit-job.sh
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/bin/bash
-# Submit a SLURM job without waiting for it to complete.
-# Writes the job ID to <job_slug>.slurm_job_id so a separate monitor step can wait.
-# Idempotent: if a job for this slug is still RUNNING or PENDING, skip resubmission.
-#
-# Usage: submit-job.sh [script.sh] [cpu|gpu] [none|acc|omp]
-
-set -euo pipefail
-
-# Ignore SIGHUP to survive login node session drops
-trap '' HUP
-
-usage() {
-    echo "Usage: $0 [script.sh] [cpu|gpu] [none|acc|omp]"
-}
-
-if [ -z "${1:-}" ]; then
-    usage
-    exit 1
-fi
-
-sbatch_script_contents=$(cat "$1")
-
-# Detect job type from submitted script basename
-script_basename="$(basename "$1" .sh)"
-case "$script_basename" in
-    bench*) job_type="bench" ;;
-    *)      job_type="test"  ;;
-esac
-
-sbatch_cpu_opts="\
-#SBATCH -p cpu-small               # partition
-#SBATCH --ntasks-per-node=24       # Number of cores per node required
-#SBATCH --mem-per-cpu=2G           # Memory per core\
-"
-
-if [ "$job_type" = "bench" ]; then
-    bench_partition="${BENCH_GPU_PARTITION:-gpu-rtx6000}"
-    echo "Submitting bench GPU job to partition: $bench_partition (BENCH_GPU_PARTITION=${BENCH_GPU_PARTITION:-<unset, using default>})"
-    sbatch_gpu_opts="\
-#SBATCH -p $bench_partition
-#SBATCH --ntasks-per-node=4       # Number of cores per node required
-#SBATCH -G2\
-"
-    sbatch_time="#SBATCH -t 04:00:00"
-else
-    sbatch_gpu_opts="\
-#SBATCH -p gpu-v100,gpu-a100,gpu-h100,gpu-l40s,gpu-h200
-#SBATCH --ntasks-per-node=4       # Number of cores per node required
-#SBATCH -G2\
-"
-    sbatch_time="#SBATCH -t 03:00:00"
-fi
-
-if [ "$2" = "cpu" ]; then
-    sbatch_device_opts="$sbatch_cpu_opts"
-elif [ "$2" = "gpu" ]; then
-    sbatch_device_opts="$sbatch_gpu_opts"
-else
-    usage
-    exit 1
-fi
-
-job_slug="$(basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g')-$2-$3"
-output_file="$job_slug.out"
-id_file="${job_slug}.slurm_job_id"
-
-# On rerun, cancel any existing job for this slug and submit a fresh one.
-# If the job is still live (RUNNING/PENDING), scancel it first as a safety net
-# in case the "Cancel SLURM Jobs" step did not fire (e.g. runner was SIGKILL'd).
-if [ -f "$id_file" ]; then
-    existing_id=$(cat "$id_file")
-    state=$(sacct -j "$existing_id" -n -X -P -o State 2>/dev/null | head -n1 | cut -d'|' -f1 | tr -d ' ' || true)
-    case "${state:-UNKNOWN}" in
-        RUNNING|PENDING|REQUEUED|COMPLETING)
-            echo "Cancelling stale SLURM job $existing_id (state=$state) before resubmission"
-            scancel "$existing_id" 2>/dev/null || true
-            ;;
-        *)
-            echo "Stale job $existing_id (state=${state:-UNKNOWN}) — submitting fresh"
-            ;;
-    esac
-    rm -f "$id_file"
-fi
-
-submit_output=$(sbatch <<EOT
-#!/bin/bash
-#SBATCH -Jshb-$job_slug            # Job name
-#SBATCH --account=gts-sbryngelson3 # charge account
-#SBATCH -N1                        # Number of nodes required
-$sbatch_device_opts
-$sbatch_time
-#SBATCH -q embers                  # QOS Name
-#SBATCH --requeue                  # Auto-requeue on preemption
-#SBATCH -o$output_file             # Combined output and error messages file
-
-set -e
-set -x
-
-cd "\$SLURM_SUBMIT_DIR"
-echo "Running in \$(pwd):"
-
-job_slug="$job_slug"
-job_device="$2"
-job_interface="$3"
-
-. ./mfc.sh load -c p -m $2
-
-$sbatch_script_contents
-
-EOT
-)
-
-job_id=$(echo "$submit_output" | grep -oE '[0-9]+')
-if [ -z "$job_id" ]; then
-    echo "ERROR: Failed to submit job. sbatch output:"
-    echo "$submit_output"
-    exit 1
-fi
-
-echo "Submitted batch job $job_id"
-echo "$job_id" > "$id_file"
-echo "Job ID written to $id_file"
diff --git a/.github/workflows/phoenix/submit.sh b/.github/workflows/phoenix/submit.sh
deleted file mode 100755
index 0c009bd001..0000000000
--- a/.github/workflows/phoenix/submit.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/bash
-# Submit a SLURM job and wait for it to complete.
-# Delegates submission (with idempotency) to submit-job.sh, then monitors.
-#
-# Usage: submit.sh [script.sh] [cpu|gpu] [none|acc|omp]
-
-set -euo pipefail
-
-# Ignore SIGHUP to survive login node session drops
-trap '' HUP
-
-usage() {
-    echo "Usage: $0 [script.sh] [cpu|gpu] [none|acc|omp]"
-}
-
-if [ -z "${1:-}" ]; then
-    usage
-    exit 1
-fi
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
-# Submit (idempotent — skips resubmission if a live job already exists)
-bash "$SCRIPT_DIR/submit-job.sh" "$@"
-
-# Derive the same job slug and file paths as submit-job.sh.
-# NOTE: this sed pipeline must stay identical to the one in submit-job.sh —
-# if they diverge the id-file will not be found and the monitor will fail.
-job_slug="$(basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g')-$2-$3"
-output_file="$job_slug.out"
-id_file="${job_slug}.slurm_job_id"
-
-job_id=$(cat "$id_file")
-bash "$SCRIPT_DIR/../../scripts/run_monitored_slurm_job.sh" "$job_id" "$output_file"
diff --git a/.github/workflows/phoenix/test.sh b/.github/workflows/phoenix/test.sh
deleted file mode 100644
index d073c54bde..0000000000
--- a/.github/workflows/phoenix/test.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-
-source .github/scripts/gpu-opts.sh
-build_opts="$gpu_opts"
-
-rm -rf build
-
-# Build with retry; smoke-test the freshly built syscheck binary to catch
-# architecture mismatches (SIGILL from binaries compiled on a different compute node).
-source .github/scripts/retry-build.sh
-RETRY_VALIDATE_CMD='syscheck_bin=$(find build/install -name syscheck -type f 2>/dev/null | head -1); [ -z "$syscheck_bin" ] || "$syscheck_bin" > /dev/null 2>&1' \
-    retry_build ./mfc.sh test -v --dry-run -j 8 $build_opts || exit 1
-
-n_test_threads=8
-
-if [ "$job_device" = "gpu" ]; then
-    source .github/scripts/detect-gpus.sh
-    device_opts="-g $gpu_ids"
-    n_test_threads=$((ngpus * 2))
-fi
-
-./mfc.sh test -v --max-attempts 3 -a -j $n_test_threads $device_opts ${build_opts:---no-gpu} -- -c phoenix
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 9ce6dda24c..a52a5967d1 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -98,12 +98,6 @@ jobs:
       - name: Clone
         uses: actions/checkout@v4
 
-      - name: Restore Build Cache
-        uses: actions/cache@v4
-        with:
-          path: build
-          key: mfc-build-${{ matrix.os }}-${{ matrix.mpi }}-${{ matrix.debug }}-${{ matrix.precision }}-${{ matrix.intel }}-${{ hashFiles('CMakeLists.txt', 'toolchain/dependencies/**', 'toolchain/cmake/**', 'src/**/*.fpp', 'src/**/*.f90') }}
-
       - name: Setup MacOS
         if:   matrix.os == 'macos'
         run:  |
@@ -237,32 +231,16 @@ jobs:
         uses: actions/checkout@v4
         with:
           # clean: false preserves .slurm_job_id files across reruns so
-          # submit-job.sh can detect and cancel stale SLURM jobs on retry.
+          # submit-slurm-job.sh can detect and cancel stale SLURM jobs on retry.
           clean: false
 
-      - name: Build
+      - name: Build (login node)
         if:   matrix.cluster != 'phoenix'
-        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # v3
-        with:
-          max_attempts: 2
-          retry_wait_seconds: 60
-          timeout_minutes: 60
-          command: bash .github/workflows/${{ matrix.cluster }}/build.sh ${{ matrix.device }} ${{ matrix.interface }}
-          on_retry_command: rm -rf build
-
-      - name: Submit SLURM Test Job
-        if:   matrix.cluster == 'phoenix'
-        run:  bash .github/workflows/phoenix/submit-job.sh .github/workflows/phoenix/test.sh ${{ matrix.device }} ${{ matrix.interface }}
-
-      - name: Monitor SLURM Test Job
-        if:   matrix.cluster == 'phoenix'
-        run: |
-          slug="test-${{ matrix.device }}-${{ matrix.interface }}"
-          bash .github/scripts/run_monitored_slurm_job.sh "$(cat ${slug}.slurm_job_id)" "${slug}.out"
+        timeout-minutes: 60
+        run:  bash .github/workflows/${{ matrix.cluster }}/build.sh ${{ matrix.device }} ${{ matrix.interface }}
 
       - name: Test
-        if:   matrix.cluster != 'phoenix'
-        run:  bash .github/workflows/${{ matrix.cluster }}/submit.sh .github/workflows/${{ matrix.cluster }}/test.sh ${{ matrix.device }} ${{ matrix.interface }} ${{ matrix.shard }}
+        run:  bash .github/scripts/submit-slurm-job.sh .github/workflows/common/test.sh ${{ matrix.device }} ${{ matrix.interface }} ${{ matrix.cluster }} ${{ matrix.shard }}
 
       - name: Cancel SLURM Jobs
         if: cancelled()
@@ -341,25 +319,14 @@ jobs:
 
       - name: Pre-Build (SLURM)
         if:   matrix.cluster == 'phoenix'
-        run:  bash .github/workflows/phoenix/submit.sh .github/scripts/prebuild-case-optimization.sh ${{ matrix.device }} ${{ matrix.interface }}
+        run:  bash .github/scripts/submit-slurm-job.sh .github/scripts/prebuild-case-optimization.sh cpu ${{ matrix.interface }} ${{ matrix.cluster }}
 
       - name: Pre-Build (login node)
         if:   matrix.cluster != 'phoenix'
         run:  bash .github/scripts/prebuild-case-optimization.sh ${{ matrix.cluster }} ${{ matrix.device }} ${{ matrix.interface }}
 
-      - name: Submit Case-Optimization Tests
-        if:   matrix.cluster == 'phoenix'
-        run:  bash .github/workflows/phoenix/submit-job.sh .github/scripts/run_case_optimization.sh ${{ matrix.device }} ${{ matrix.interface }}
-
-      - name: Monitor Case-Optimization Tests
-        if:   matrix.cluster == 'phoenix'
-        run: |
-          slug="run-case-optimization-${{ matrix.device }}-${{ matrix.interface }}"
-          bash .github/scripts/run_monitored_slurm_job.sh "$(cat ${slug}.slurm_job_id)" "${slug}.out"
-
       - name: Run Case-Optimization Tests
-        if:   matrix.cluster != 'phoenix'
-        run:  bash .github/workflows/${{ matrix.cluster }}/submit.sh .github/scripts/run_case_optimization.sh ${{ matrix.device }} ${{ matrix.interface }}
+        run:  bash .github/scripts/submit-slurm-job.sh .github/scripts/run_case_optimization.sh ${{ matrix.device }} ${{ matrix.interface }} ${{ matrix.cluster }}
 
       - name: Cancel SLURM Jobs
         if: cancelled()