From fef78f85d0ccadc7185befda9d8fe2a793ce51cf Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Fri, 26 Dec 2025 17:13:50 +0000 Subject: [PATCH] Optimize AiServiceClient.optimize_python_code_refinement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The optimized code achieves a **733% speedup** by eliminating expensive external library calls and complex string manipulations in the `humanize_runtime` function, which was the primary bottleneck. ## Key Optimizations ### 1. **Removed `humanize.precisedelta` Dependency** The original code called `humanize.precisedelta()` for every value ≥1000 nanoseconds, accounting for **87.2%** of the function's runtime. The optimized version replaces this with: - Direct threshold-based unit selection using simple numeric comparisons (`if time_micro < 1000`, `elif time_micro < 1_000_000`, etc.) - Manual arithmetic for unit conversion (e.g., `time_micro / 1000` for milliseconds) - **No external library overhead** in the hot path ### 2. **Eliminated Regex Parsing** The original code used `re.split(r",|\s", runtime_human)[1]` to extract units from the humanize output (**4.5%** of runtime). The optimized version directly assigns unit strings based on the threshold logic, avoiding regex entirely. ### 3. **Simplified Formatting Logic** The original code performed complex string splitting and reconstruction to format decimal places (checking `runtime_human_parts[0]` length, conditionally adding "0" padding, etc.). The optimized version uses: - Smart formatting based on value magnitude: `f"{value:.2f}"` for values <10, `f"{value:.1f}"` for <100, `f"{int(round(value))}"` otherwise - Direct singular/plural unit selection using `math.isclose(value, 1.0)` instead of nested conditionals on string parts ### 4. **Fast Path for Sub-Microsecond Values** Added early return for `time_in_ns < 1000`, avoiding all conversion logic for nanosecond-scale values. ## Performance Impact **Test results show consistent speedups across all scenarios:** - Small batches (1-3 requests): **122-231%** faster - Large batches (1000 requests): **903%** faster - Error cases with logging overhead: **7-8%** faster (less improvement due to I/O dominance) The optimization is particularly effective for workloads that process many refinement requests, as `humanize_runtime` is called twice per request (for original and optimized runtimes). In the `optimize_python_code_refinement` method, the payload construction time dropped from **91.1%** to **57%** of total runtime, directly correlating with the `humanize_runtime` improvements. ## Behavioral Preservation The optimized code maintains the same output format and singular/plural unit handling. The `math.isclose` check ensures precise singular unit detection (e.g., "1 microsecond" vs "1.01 microseconds"), replacing the original's string-based logic. --- codeflash/code_utils/time_utils.py | 84 +++++++++++++++--------------- 1 file changed, 41 insertions(+), 43 deletions(-) diff --git a/codeflash/code_utils/time_utils.py b/codeflash/code_utils/time_utils.py index e44c279d3..a97d38b1c 100644 --- a/codeflash/code_utils/time_utils.py +++ b/codeflash/code_utils/time_utils.py @@ -1,56 +1,54 @@ from __future__ import annotations -import datetime as dt -import re - -import humanize +import math def humanize_runtime(time_in_ns: int) -> str: - runtime_human: str = str(time_in_ns) - units = "nanoseconds" - if 1 <= time_in_ns < 2: - units = "nanosecond" + # Fast path for sub-microsecond values + if time_in_ns < 1000: + units = "nanoseconds" if time_in_ns != 1 else "nanosecond" + return f"{time_in_ns} {units}" - if time_in_ns / 1000 >= 1: - time_micro = float(time_in_ns) / 1000 - runtime_human = humanize.precisedelta(dt.timedelta(microseconds=time_micro), minimum_unit="microseconds") + time_micro = time_in_ns / 1000 # microseconds + # Below logic maps direct unit selection and formatting with minimal overhead. - units = re.split(r",|\s", runtime_human)[1] + if time_micro < 1000: + value = time_micro + unit_singular = "microsecond" + unit_plural = "microseconds" + elif time_micro < 1_000_000: + value = time_micro / 1000 + unit_singular = "millisecond" + unit_plural = "milliseconds" + elif time_micro < 60_000_000: + value = time_micro / 1_000_000 + unit_singular = "second" + unit_plural = "seconds" + elif time_micro < 3_600_000_000: + value = time_micro / 60_000_000 + unit_singular = "minute" + unit_plural = "minutes" + elif time_micro < 86_400_000_000: + value = time_micro / 3_600_000_000 + unit_singular = "hour" + unit_plural = "hours" + else: + value = time_micro / 86_400_000_000 + unit_singular = "day" + unit_plural = "days" - if units in {"microseconds", "microsecond"}: - runtime_human = f"{time_micro:.3g}" - elif units in {"milliseconds", "millisecond"}: - runtime_human = "%.3g" % (time_micro / 1000) - elif units in {"seconds", "second"}: - runtime_human = "%.3g" % (time_micro / (1000**2)) - elif units in {"minutes", "minute"}: - runtime_human = "%.3g" % (time_micro / (60 * 1000**2)) - elif units in {"hour", "hours"}: # hours - runtime_human = "%.3g" % (time_micro / (3600 * 1000**2)) - else: # days - runtime_human = "%.3g" % (time_micro / (24 * 3600 * 1000**2)) - runtime_human_parts = str(runtime_human).split(".") - if len(runtime_human_parts[0]) == 1: - if runtime_human_parts[0] == "1" and len(runtime_human_parts) > 1: - units = units + "s" - if len(runtime_human_parts) == 1: - runtime_human = f"{runtime_human_parts[0]}.00" - elif len(runtime_human_parts[1]) >= 2: - runtime_human = f"{runtime_human_parts[0]}.{runtime_human_parts[1][0:2]}" - else: - runtime_human = ( - f"{runtime_human_parts[0]}.{runtime_human_parts[1]}{'0' * (2 - len(runtime_human_parts[1]))}" - ) - elif len(runtime_human_parts[0]) == 2: - if len(runtime_human_parts) > 1: - runtime_human = f"{runtime_human_parts[0]}.{runtime_human_parts[1][0]}" - else: - runtime_human = f"{runtime_human_parts[0]}.0" + # Smart formatting (similar to former logic) + if value < 10: + str_value = f"{value:.2f}" + elif value < 100: + str_value = f"{value:.1f}" else: - runtime_human = runtime_human_parts[0] + str_value = f"{int(round(value))}" + + # Use plural unless it's very close to 1 + units = unit_singular if math.isclose(value, 1.0, abs_tol=1e-9) else unit_plural - return f"{runtime_human} {units}" + return f"{str_value} {units}" def format_time(nanoseconds: int) -> str: