From e70b58b22becf499b3155ca02325f958b4b1aa01 Mon Sep 17 00:00:00 2001 From: bolunz Date: Wed, 11 Mar 2026 05:57:54 +0000 Subject: [PATCH 1/5] feat: organize test cases by group --- scripts/compare_loss.py | 34 +- scripts/compare_tps.py | 34 +- scripts/run_models_and_profile.bash | 141 +++++-- scripts/test_config.json | 549 ++++++++++++++-------------- scripts/write_to_feishu_sheet.py | 149 +++++--- 5 files changed, 560 insertions(+), 347 deletions(-) diff --git a/scripts/compare_loss.py b/scripts/compare_loss.py index 8b581266..255104cb 100755 --- a/scripts/compare_loss.py +++ b/scripts/compare_loss.py @@ -10,6 +10,24 @@ from pathlib import Path from argparse import ArgumentParser + +def collect_log_files(base_dir): + """Collect comparable training logs keyed by basename.""" + files = {} + duplicates = {} + + for path in base_dir.rglob('*.log'): + if path.name.startswith('build') or path.name.endswith('_profile.log'): + continue + + key = path.name + if key in files: + duplicates.setdefault(key, [files[key]]).append(path) + continue + files[key] = path + + return files, duplicates + def get_dtype_from_filename(filename): """Determine dtype from filename. Returns 'bfloat16' or 'fp32'.""" return 'bfloat16' if '_bfloat16' in filename else 'fp32' @@ -62,8 +80,20 @@ def main(): args.threshold_fp32 = args.threshold args.threshold_bf16 = args.threshold - files1 = {f.name: f for f in args.dir1.glob('*.log') if not f.name.startswith('build')} - files2 = {f.name: f for f in args.dir2.glob('*.log') if not f.name.startswith('build')} + files1, duplicates1 = collect_log_files(args.dir1) + files2, duplicates2 = collect_log_files(args.dir2) + + if duplicates1: + print(f"Found duplicate log basenames in {args.dir1.resolve()}, cannot compare safely:") + for name, paths in sorted(duplicates1.items()): + print(f" {name}: {', '.join(str(p.relative_to(args.dir1)) for p in paths)}") + sys.exit(1) + + if duplicates2: + print(f"Found duplicate log basenames in {args.dir2.resolve()}, cannot compare safely:") + for name, paths in sorted(duplicates2.items()): + print(f" {name}: {', '.join(str(p.relative_to(args.dir2)) for p in paths)}") + sys.exit(1) only_in_1 = set(files1.keys()) - set(files2.keys()) only_in_2 = set(files2.keys()) - set(files1.keys()) diff --git a/scripts/compare_tps.py b/scripts/compare_tps.py index 270b1ddd..ac2189c6 100755 --- a/scripts/compare_tps.py +++ b/scripts/compare_tps.py @@ -10,6 +10,24 @@ from pathlib import Path from argparse import ArgumentParser + +def collect_log_files(base_dir): + """Collect comparable training logs keyed by basename.""" + files = {} + duplicates = {} + + for path in base_dir.rglob('*.log'): + if path.name.startswith('build') or path.name.endswith('_profile.log'): + continue + + key = path.name + if key in files: + duplicates.setdefault(key, [files[key]]).append(path) + continue + files[key] = path + + return files, duplicates + def parse_log(file_path): """Extract step -> tok/s mapping from log file.""" pattern = re.compile(r'step\s+(\d+)/\d+.*?\|\s+(\d+)\s+tok/s') @@ -55,8 +73,20 @@ def main(): parser.add_argument('--verbose', action='store_true', help='Print detailed output for all files, including passed ones') args = parser.parse_args() - files1 = {f.name: f for f in args.dir1.glob('*.log') if not f.name.startswith('build')} - files2 = {f.name: f for f in args.dir2.glob('*.log') if not f.name.startswith('build')} + files1, duplicates1 = collect_log_files(args.dir1) + files2, duplicates2 = collect_log_files(args.dir2) + + if duplicates1: + print(f"Found duplicate log basenames in {args.dir1.resolve()}, cannot compare safely:") + for name, paths in sorted(duplicates1.items()): + print(f" {name}: {', '.join(str(p.relative_to(args.dir1)) for p in paths)}") + sys.exit(1) + + if duplicates2: + print(f"Found duplicate log basenames in {args.dir2.resolve()}, cannot compare safely:") + for name, paths in sorted(duplicates2.items()): + print(f" {name}: {', '.join(str(p.relative_to(args.dir2)) for p in paths)}") + sys.exit(1) only_in_1 = set(files1.keys()) - set(files2.keys()) only_in_2 = set(files2.keys()) - set(files1.keys()) diff --git a/scripts/run_models_and_profile.bash b/scripts/run_models_and_profile.bash index 1cf27935..93146232 100755 --- a/scripts/run_models_and_profile.bash +++ b/scripts/run_models_and_profile.bash @@ -3,7 +3,52 @@ set -e set -o pipefail -CONFIG_FILE="${1:-test_config.json}" +usage() { + cat <<'EOF' +Usage: run_models_and_profile.bash [config_file] [--only-run tag1,tag2] + +Options: + --only-run TAGS Only run the specified tag groups, separated by commas. + -h, --help Show this help message. +EOF +} + +CONFIG_FILE="test_config.json" +ONLY_RUN_TAGS="" +CONFIG_FILE_SET="no" + +while [[ $# -gt 0 ]]; do + case "$1" in + --only-run) + [[ $# -lt 2 ]] && { echo "Error: --only-run requires a comma-separated tag list."; exit 1; } + ONLY_RUN_TAGS="$2" + shift 2 + ;; + --only-run=*) + ONLY_RUN_TAGS="${1#*=}" + shift + ;; + -h|--help) + usage + exit 0 + ;; + -*) + echo "Error: Unknown option: $1" + usage + exit 1 + ;; + *) + if [[ "$CONFIG_FILE_SET" == "yes" ]]; then + echo "Error: Multiple config files provided." + usage + exit 1 + fi + CONFIG_FILE="$1" + CONFIG_FILE_SET="yes" + shift + ;; + esac +done # Dependencies check if ! command -v jq >/dev/null 2>&1; then @@ -33,6 +78,28 @@ done < <(jq -r '.variables | to_entries[] | "\(.key)=\(.value)"' "$CONFIG_FILE") # Global variable to save the last cmake command LAST_CMAKE_CMD="" +declare -A SELECTED_TAGS=() + +normalize_tag() { + local raw="$1" + raw="${raw#"${raw%%[![:space:]]*}"}" + raw="${raw%"${raw##*[![:space:]]}"}" + printf '%s' "$raw" +} + +if [[ -n "$ONLY_RUN_TAGS" ]]; then + IFS=',' read -r -a requested_tags <<< "$ONLY_RUN_TAGS" + for raw_tag in "${requested_tags[@]}"; do + tag="$(normalize_tag "$raw_tag")" + [[ -z "$tag" ]] && continue + SELECTED_TAGS["$tag"]=1 + done + + if [[ ${#SELECTED_TAGS[@]} -eq 0 ]]; then + echo "Error: --only-run did not contain any valid tags." + exit 1 + fi +fi # Clean the build directory clean_build_dir() { @@ -46,9 +113,12 @@ run_and_log() { local cmd="$1" local log_name="$2" local is_profile="$3" + local tag="${4:-basic}" local timestamp timestamp=$(date '+%Y-%m-%d %H:%M:%S') - local log_path="$(realpath "${LOG_DIR}/${log_name}.log")" + local tag_log_dir="${LOG_DIR}/${tag}" + mkdir -p "$tag_log_dir" + local log_path="$(realpath "${tag_log_dir}/${log_name}.log")" echo -e "\033[1;32m============================================================\033[0m" echo -e "\033[1;36m[$timestamp] [Running] ${log_name}\033[0m" @@ -99,7 +169,7 @@ run_and_log() { # If profiling is enabled, move profiling files to the target directory if [[ "$is_profile" == "yes" ]]; then - move_profile_logs "$log_name" + move_profile_logs "$log_name" "$tag" fi } @@ -107,14 +177,17 @@ run_and_log() { # Move profiling output logs move_profile_logs() { local prefix="$1" + local tag="${2:-basic}" + local tag_profile_dir="${PROFILE_LOG_DIR}/${tag}" + mkdir -p "$tag_profile_dir" # Move *.report.rankN files for report_file in "${BUILD_DIR}"/*.report.rank*; do if [[ -f "$report_file" ]]; then local base_name base_name=$(basename "$report_file") - mv "$report_file" "${PROFILE_LOG_DIR}/${prefix}_${base_name}" - echo "Moved $base_name to ${PROFILE_LOG_DIR}/${prefix}_${base_name}" + mv "$report_file" "${tag_profile_dir}/${prefix}_${base_name}" + echo "Moved $base_name to ${tag_profile_dir}/${prefix}_${base_name}" fi done @@ -123,17 +196,18 @@ move_profile_logs() { if [[ -f "$record_file" ]]; then local base_name base_name=$(basename "$record_file") - mv "$record_file" "${PROFILE_LOG_DIR}/${prefix}_${base_name}" - echo "Moved $base_name to ${PROFILE_LOG_DIR}/${prefix}_${base_name}" + mv "$record_file" "${tag_profile_dir}/${prefix}_${base_name}" + echo "Moved $base_name to ${tag_profile_dir}/${prefix}_${base_name}" fi done } -# Build "--key value" arg string from tests[i].args (shell-escaped) +# Build "--key value" arg string from test_groups[gi].tests[ti].args (shell-escaped) args_string_for_test() { - local idx="$1" - jq -r --argjson i "$idx" ' - .tests[$i].args + local group_idx="$1" + local test_idx="$2" + jq -r --argjson g "$group_idx" --argjson t "$test_idx" ' + .test_groups[$g].tests[$t].args | to_entries[] | "--\(.key) \(.value|tostring)" ' "$CONFIG_FILE" | paste -sd' ' - @@ -141,7 +215,20 @@ args_string_for_test() { # Run tests num_builds=$(jq '.builds | length' "$CONFIG_FILE") -num_tests=$(jq '.tests | length' "$CONFIG_FILE") +num_groups=$(jq '.test_groups | length' "$CONFIG_FILE") + +selected_group_count=0 +for ((gi=0; gi sheet_id={sheet_id}") + + cmd_args, sheet_data = get_model_data(model_name=model_name, sheet_title=testcase, tag=tag) + + if not sheet_data: + print("No valid data generated, skipping") continue - remote_by_title[testcase] = sheet_id - sort_sheets = True - write_cmd = True - print(f"Created sheet '{testcase}' with id={sheet_id}") - - print(f"Processing testcase '{testcase}' -> sheet_id={sheet_id}") - - cmd_args, sheet_data = get_model_data(model_name=model_name, sheet_title=testcase) - - if not sheet_data: - print("No valid data generated, skipping") - continue - if write_cmd and cmd_args: - handler.write_cmd_args_to_header(spreadsheet_token, cmd_args, sheet_id) + if write_cmd and cmd_args: + handler.write_cmd_args_to_header(spreadsheet_token, cmd_args, sheet_id) - if handler.prepend_data(spreadsheet_token, sheet_id, sheet_data): - handler.post_process(spreadsheet_token, sheet_id) + if handler.prepend_data(spreadsheet_token, sheet_id, sheet_data): + handler.post_process(spreadsheet_token, sheet_id) - if sort_sheets: - handler.sort_sheets_by_title(spreadsheet_token, "模板") + if sort_sheets: + handler.sort_sheets_by_title(spreadsheet_token, "模板") print("\n=== All models and sheets processed ===") From 5fede3dc72669364292706595b31181f478b9810 Mon Sep 17 00:00:00 2001 From: bolunz Date: Wed, 11 Mar 2026 06:25:33 +0000 Subject: [PATCH 2/5] fix: add retry logic in feishu writer --- scripts/write_to_feishu_sheet.py | 37 ++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/scripts/write_to_feishu_sheet.py b/scripts/write_to_feishu_sheet.py index 9fca18d2..670e9012 100644 --- a/scripts/write_to_feishu_sheet.py +++ b/scripts/write_to_feishu_sheet.py @@ -2,6 +2,7 @@ import json import time import os +import sys import argparse import glob import re @@ -14,6 +15,10 @@ HEADER_ROWS=5 HEADER_COLS="W" +# Retry settings +REQUEST_RETRY_TIMES=3 +REQUEST_RETRY_DELAY=10 + class FeishuSheetHandler: """Feishu Sheet Handler for retrieving and writing sheet data""" @@ -25,13 +30,36 @@ def __init__(self, app_id, app_secret): self.token_expire_time = 0 self.get_access_token() + def _request_with_timeout_retry(self, request_func, request_name): + """Retry request when ReadTimeout happens.""" + for attempt in range(REQUEST_RETRY_TIMES): + try: + return request_func() + except requests.exceptions.ReadTimeout: + if attempt == REQUEST_RETRY_TIMES - 1: + print( + f"FATAL: HTTP timeout after {REQUEST_RETRY_TIMES} attempts while handling " + f"{request_name}. Please manually revert the Feishu sheet to a previous version." + ) + sys.exit(1) + print( + f"{request_name} timed out on attempt " + f"{attempt + 1}/{REQUEST_RETRY_TIMES}, retry after {REQUEST_RETRY_DELAY}s" + ) + time.sleep(REQUEST_RETRY_DELAY) + def get_access_token(self): """Get and cache tenant_access_token""" if self.access_token and time.time() < self.token_expire_time: return self.access_token url = f"{self.base_url}/auth/v3/tenant_access_token/internal" - resp = requests.post(url, json={"app_id": self.app_id, "app_secret": self.app_secret}, timeout=10) + resp = self._request_with_timeout_retry( + lambda: requests.post(url, json={"app_id": self.app_id, "app_secret": self.app_secret}, timeout=10), + "Get access token" + ) + if resp is None: + return None if resp.status_code != 200: print("Failed to get token: HTTP error", resp.status_code) return None @@ -57,7 +85,12 @@ def _feishu_request(self, method, endpoint, **kwargs): } url = f"{self.base_url}{endpoint}" - resp = requests.request(method, url, headers=headers, timeout=15, **kwargs) + resp = self._request_with_timeout_retry( + lambda: requests.request(method, url, headers=headers, timeout=15, **kwargs), + f"{method} {endpoint}" + ) + if resp is None: + return None if resp.status_code != 200: print(f"Request failed: HTTP {resp.status_code}") From 712a908eed709e26b94637961ff642bd98c52966 Mon Sep 17 00:00:00 2001 From: bolunz Date: Wed, 11 Mar 2026 07:07:05 +0000 Subject: [PATCH 3/5] fix: remove redundant and duplicate codes --- scripts/compare_loss.py | 33 +++-------------------------- scripts/compare_tps.py | 33 +++-------------------------- scripts/run_models_and_profile.bash | 24 ++++++++++++--------- 3 files changed, 20 insertions(+), 70 deletions(-) diff --git a/scripts/compare_loss.py b/scripts/compare_loss.py index 255104cb..31b2a009 100755 --- a/scripts/compare_loss.py +++ b/scripts/compare_loss.py @@ -9,24 +9,7 @@ import sys from pathlib import Path from argparse import ArgumentParser - - -def collect_log_files(base_dir): - """Collect comparable training logs keyed by basename.""" - files = {} - duplicates = {} - - for path in base_dir.rglob('*.log'): - if path.name.startswith('build') or path.name.endswith('_profile.log'): - continue - - key = path.name - if key in files: - duplicates.setdefault(key, [files[key]]).append(path) - continue - files[key] = path - - return files, duplicates +from compare_utils import collect_log_files, exit_if_duplicate_logs def get_dtype_from_filename(filename): """Determine dtype from filename. Returns 'bfloat16' or 'fp32'.""" @@ -82,18 +65,8 @@ def main(): files1, duplicates1 = collect_log_files(args.dir1) files2, duplicates2 = collect_log_files(args.dir2) - - if duplicates1: - print(f"Found duplicate log basenames in {args.dir1.resolve()}, cannot compare safely:") - for name, paths in sorted(duplicates1.items()): - print(f" {name}: {', '.join(str(p.relative_to(args.dir1)) for p in paths)}") - sys.exit(1) - - if duplicates2: - print(f"Found duplicate log basenames in {args.dir2.resolve()}, cannot compare safely:") - for name, paths in sorted(duplicates2.items()): - print(f" {name}: {', '.join(str(p.relative_to(args.dir2)) for p in paths)}") - sys.exit(1) + exit_if_duplicate_logs(args.dir1, duplicates1) + exit_if_duplicate_logs(args.dir2, duplicates2) only_in_1 = set(files1.keys()) - set(files2.keys()) only_in_2 = set(files2.keys()) - set(files1.keys()) diff --git a/scripts/compare_tps.py b/scripts/compare_tps.py index ac2189c6..de6327de 100755 --- a/scripts/compare_tps.py +++ b/scripts/compare_tps.py @@ -9,24 +9,7 @@ import sys from pathlib import Path from argparse import ArgumentParser - - -def collect_log_files(base_dir): - """Collect comparable training logs keyed by basename.""" - files = {} - duplicates = {} - - for path in base_dir.rglob('*.log'): - if path.name.startswith('build') or path.name.endswith('_profile.log'): - continue - - key = path.name - if key in files: - duplicates.setdefault(key, [files[key]]).append(path) - continue - files[key] = path - - return files, duplicates +from compare_utils import collect_log_files, exit_if_duplicate_logs def parse_log(file_path): """Extract step -> tok/s mapping from log file.""" @@ -75,18 +58,8 @@ def main(): files1, duplicates1 = collect_log_files(args.dir1) files2, duplicates2 = collect_log_files(args.dir2) - - if duplicates1: - print(f"Found duplicate log basenames in {args.dir1.resolve()}, cannot compare safely:") - for name, paths in sorted(duplicates1.items()): - print(f" {name}: {', '.join(str(p.relative_to(args.dir1)) for p in paths)}") - sys.exit(1) - - if duplicates2: - print(f"Found duplicate log basenames in {args.dir2.resolve()}, cannot compare safely:") - for name, paths in sorted(duplicates2.items()): - print(f" {name}: {', '.join(str(p.relative_to(args.dir2)) for p in paths)}") - sys.exit(1) + exit_if_duplicate_logs(args.dir1, duplicates1) + exit_if_duplicate_logs(args.dir2, duplicates2) only_in_1 = set(files1.keys()) - set(files2.keys()) only_in_2 = set(files2.keys()) - set(files1.keys()) diff --git a/scripts/run_models_and_profile.bash b/scripts/run_models_and_profile.bash index 93146232..1b356534 100755 --- a/scripts/run_models_and_profile.bash +++ b/scripts/run_models_and_profile.bash @@ -5,9 +5,10 @@ set -o pipefail usage() { cat <<'EOF' -Usage: run_models_and_profile.bash [config_file] [--only-run tag1,tag2] +Usage: run_models_and_profile.bash [--test-config path] [--only-run tag1,tag2] Options: + --test-config PATH Path to test config JSON. Default: test_config.json. --only-run TAGS Only run the specified tag groups, separated by commas. -h, --help Show this help message. EOF @@ -15,10 +16,18 @@ EOF CONFIG_FILE="test_config.json" ONLY_RUN_TAGS="" -CONFIG_FILE_SET="no" while [[ $# -gt 0 ]]; do case "$1" in + --test-config) + [[ $# -lt 2 ]] && { echo "Error: --test-config requires a file path."; exit 1; } + CONFIG_FILE="$2" + shift 2 + ;; + --test-config=*) + CONFIG_FILE="${1#*=}" + shift + ;; --only-run) [[ $# -lt 2 ]] && { echo "Error: --only-run requires a comma-separated tag list."; exit 1; } ONLY_RUN_TAGS="$2" @@ -38,14 +47,9 @@ while [[ $# -gt 0 ]]; do exit 1 ;; *) - if [[ "$CONFIG_FILE_SET" == "yes" ]]; then - echo "Error: Multiple config files provided." - usage - exit 1 - fi - CONFIG_FILE="$1" - CONFIG_FILE_SET="yes" - shift + echo "Error: Unknown positional argument: $1" + usage + exit 1 ;; esac done From 3f653378466652cb300dab2632592fac27e5bba0 Mon Sep 17 00:00:00 2001 From: bolunz Date: Wed, 11 Mar 2026 07:08:52 +0000 Subject: [PATCH 4/5] fix: add compare utils --- scripts/compare_utils.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 scripts/compare_utils.py diff --git a/scripts/compare_utils.py b/scripts/compare_utils.py new file mode 100644 index 00000000..0831f7be --- /dev/null +++ b/scripts/compare_utils.py @@ -0,0 +1,31 @@ +from pathlib import Path +import sys + + +def collect_log_files(base_dir: Path): + """Collect comparable training logs keyed by basename.""" + files = {} + duplicates = {} + + for path in base_dir.rglob("*.log"): + if path.name.startswith("build") or path.name.endswith("_profile.log"): + continue + + key = path.name + if key in files: + duplicates.setdefault(key, [files[key]]).append(path) + continue + files[key] = path + + return files, duplicates + + +def exit_if_duplicate_logs(base_dir: Path, duplicates): + """Abort when duplicate basenames make comparison ambiguous.""" + if not duplicates: + return + + print(f"Found duplicate log basenames in {base_dir.resolve()}, cannot compare safely:") + for name, paths in sorted(duplicates.items()): + print(f" {name}: {', '.join(str(p.relative_to(base_dir)) for p in paths)}") + sys.exit(1) From 9dfc2e598a891f6d9a9051fdaaf2f9b3ba27024d Mon Sep 17 00:00:00 2001 From: bolunz Date: Wed, 11 Mar 2026 15:58:12 +0800 Subject: [PATCH 5/5] fix: add end of test cleanup --- scripts/run_models_and_profile.bash | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/run_models_and_profile.bash b/scripts/run_models_and_profile.bash index 1b356534..b183a936 100755 --- a/scripts/run_models_and_profile.bash +++ b/scripts/run_models_and_profile.bash @@ -303,3 +303,6 @@ else echo -e "\033[1;33m or export COMPARE_LOG_DIR=/path/to/baseline_logs before running.\033[0m" echo -e "\033[1;33m============================================================\033[0m" fi + +echo -e "\n\033[1;36m[END OF TEST] Cleaning build directory after all tests\033[0m" +clean_build_dir