Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 36 additions & 10 deletions scripts/compare-benchmark-jsons.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,27 @@ def extract_dataset_key(df):
improvement_threshold = 1.0 - (threshold_pct / 100.0) # e.g., 0.7 for 30%, 0.9 for 10%
regression_threshold = 1.0 + (threshold_pct / 100.0) # e.g., 1.3 for 30%, 1.1 for 10%


def compute_cv_pct(runtimes):
"""Compute coefficient of variation (std_dev / mean * 100) as a percentage."""
if not isinstance(runtimes, list) or len(runtimes) < 2:
return float("nan")
n = len(runtimes)
mean = sum(runtimes) / n
if mean == 0:
return float("nan")
variance = sum((x - mean) ** 2 for x in runtimes) / (n - 1)
return (variance**0.5 / mean) * 100


# Compute CV% from all_runtimes when available
has_z_pr = "all_runtimes_pr" in df3.columns
has_z_base = "all_runtimes_base" in df3.columns
if has_z_pr:
df3["cv_pct_pr"] = df3["all_runtimes_pr"].apply(compute_cv_pct)
if has_z_base:
df3["cv_pct_base"] = df3["all_runtimes_base"].apply(compute_cv_pct)

# Generate summary statistics
df3["ratio"] = df3["value_pr"] / df3["value_base"]
df3["remark"] = pd.Series([""] * len(df3))
Expand Down Expand Up @@ -183,16 +204,21 @@ def format_performance(ratio, target_name):
)

# Build table
table_df = pd.DataFrame(
{
"name": df3["name"],
f"PR {pr_commit_id[:8]}": df3["value_pr"],
f"base {base_commit_id[:8]}": df3["value_base"],
"ratio (PR/base)": df3["ratio"],
"unit": df3["unit_base"],
"remark": df3["remark"],
}
)
table_dict = {
"name": df3["name"],
f"PR {pr_commit_id[:8]}": df3["value_pr"],
f"base {base_commit_id[:8]}": df3["value_base"],
"ratio (PR/base)": df3["ratio"],
"unit": df3["unit_base"],
}

if has_z_pr:
table_dict["CV% PR"] = df3["cv_pct_pr"]
if has_z_base:
table_dict["CV% base"] = df3["cv_pct_base"]

table_dict["remark"] = df3["remark"]
table_df = pd.DataFrame(table_dict)

# Output complete formatted markdown
print("\n".join(summary_lines))
Expand Down
Loading