diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index d482debc6..94fbb44ad 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -16,7 +16,6 @@ from codeflash.code_utils.env_utils import get_codeflash_api_key from codeflash.code_utils.git_utils import get_last_commit_author_if_pr_exists, get_repo_owner_and_name from codeflash.code_utils.time_utils import humanize_runtime -from codeflash.lsp.helpers import is_LSP_enabled from codeflash.models.ExperimentMetadata import ExperimentMetadata from codeflash.models.models import ( AIServiceRefinerRequest, @@ -131,6 +130,7 @@ def optimize_python_code( # noqa: D417 experiment_metadata: ExperimentMetadata | None = None, *, is_async: bool = False, + n_candidates: int = 5, ) -> list[OptimizedCandidate]: """Optimize the given python code for performance by making a request to the Django endpoint. @@ -141,6 +141,7 @@ def optimize_python_code( # noqa: D417 - trace_id (str): Trace id of optimization run - experiment_metadata (Optional[ExperimentalMetadata, None]): Any available experiment metadata for this optimization - is_async (bool): Whether the function being optimized is async + - n_candidates (int): Number of candidates to generate Returns ------- @@ -163,10 +164,10 @@ def optimize_python_code( # noqa: D417 "repo_owner": git_repo_owner, "repo_name": git_repo_name, "is_async": is_async, - "lsp_mode": is_LSP_enabled(), "call_sequence": self.get_next_sequence(), + "n_candidates": n_candidates, } - logger.debug(f"Sending optimize request: trace_id={trace_id}, lsp_mode={payload['lsp_mode']}") + logger.debug(f"Sending optimize request: trace_id={trace_id}, n_candidates={payload['n_candidates']}") try: response = self.make_ai_service_request("/optimize", payload=payload, timeout=self.timeout) @@ -198,6 +199,7 @@ def optimize_python_code_line_profiler( # noqa: D417 dependency_code: str, trace_id: str, line_profiler_results: str, + n_candidates: int, experiment_metadata: ExperimentMetadata | None = None, ) -> list[OptimizedCandidate]: """Optimize the given python code for performance using line profiler results. @@ -209,6 +211,7 @@ def optimize_python_code_line_profiler( # noqa: D417 - trace_id (str): Trace id of optimization run - line_profiler_results (str): Line profiler output to guide optimization - experiment_metadata (Optional[ExperimentalMetadata, None]): Any available experiment metadata for this optimization + - n_candidates (int): Number of candidates to generate Returns ------- @@ -225,12 +228,12 @@ def optimize_python_code_line_profiler( # noqa: D417 payload = { "source_code": source_code, "dependency_code": dependency_code, + "n_candidates": n_candidates, "line_profiler_results": line_profiler_results, "trace_id": trace_id, "python_version": platform.python_version(), "experiment_metadata": experiment_metadata, "codeflash_version": codeflash_version, - "lsp_mode": is_LSP_enabled(), "call_sequence": self.get_next_sequence(), } diff --git a/codeflash/cli_cmds/cli.py b/codeflash/cli_cmds/cli.py index 271eb06bc..a3687fcc4 100644 --- a/codeflash/cli_cmds/cli.py +++ b/codeflash/cli_cmds/cli.py @@ -107,6 +107,9 @@ def parse_args() -> Namespace: action="store_true", help="(Deprecated) Async function optimization is now enabled by default. This flag is ignored.", ) + parser.add_argument( + "--effort", type=str, help="Effort level for optimization", choices=["low", "medium", "high"], default="medium" + ) args, unknown_args = parser.parse_known_args() sys.argv[:] = [sys.argv[0], *unknown_args] diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py index 73d09a127..96d6b8e14 100644 --- a/codeflash/code_utils/config_consts.py +++ b/codeflash/code_utils/config_consts.py @@ -1,18 +1,20 @@ +from __future__ import annotations + +from enum import Enum +from typing import Any, Union + MAX_TEST_RUN_ITERATIONS = 5 INDIVIDUAL_TESTCASE_TIMEOUT = 15 MAX_FUNCTION_TEST_SECONDS = 60 -N_CANDIDATES = 5 MIN_IMPROVEMENT_THRESHOLD = 0.05 MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD = 0.10 # 10% minimum improvement for async throughput MAX_TEST_FUNCTION_RUNS = 50 MAX_CUMULATIVE_TEST_RUNTIME_NANOSECONDS = 100e6 # 100ms -N_TESTS_TO_GENERATE = 2 TOTAL_LOOPING_TIME = 10.0 # 10 second candidate benchmarking budget COVERAGE_THRESHOLD = 60.0 MIN_TESTCASE_PASSED_THRESHOLD = 6 REPEAT_OPTIMIZATION_PROBABILITY = 0.1 DEFAULT_IMPORTANCE_THRESHOLD = 0.001 -N_CANDIDATES_LP = 6 # pytest loop stability # For now, we use strict thresholds (large windows and low tolerances), since this is still experimental. @@ -21,34 +23,14 @@ STABILITY_SPREAD_TOLERANCE = 0.0025 # 0.25% window spread # Refinement -REFINE_ALL_THRESHOLD = 2 # when valid optimizations count is 2 or less, refine all optimizations REFINED_CANDIDATE_RANKING_WEIGHTS = (2, 1) # (runtime, diff), runtime is more important than diff by a factor of 2 -TOP_N_REFINEMENTS = 0.45 # top 45% of valid optimizations (based on the weighted score) are refined # LSP-specific -N_CANDIDATES_LSP = 3 -N_TESTS_TO_GENERATE_LSP = 2 TOTAL_LOOPING_TIME_LSP = 10.0 # Kept same timing for LSP mode to avoid in increase in performance reporting -N_CANDIDATES_LP_LSP = 3 # setting this value to 1 will disable repair if there is at least one correct candidate MIN_CORRECT_CANDIDATES = 2 -# Code repair -REPAIR_UNMATCHED_PERCENTAGE_LIMIT = 0.4 # if the percentage of unmatched tests is greater than this, we won't fix it (lowering this value makes the repair more stricted) -MAX_REPAIRS_PER_TRACE = 4 # maximum number of repairs we will do for each function - -# Adaptive optimization -# TODO (ali): make this configurable with effort arg once the PR is merged -ADAPTIVE_OPTIMIZATION_THRESHOLD = 2 # Max adaptive optimizations per single candidate tree (for example : optimize -> refine -> adaptive -> another adaptive). -# MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE = 4 # maximum number of adaptive optimizations we will do for each function (this can be 2 adaptive optimizations for 2 candidates for example) -MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE = ( - 0 # disable adaptive optimizations until we have this value controlled by the effort arg -) - -MAX_N_CANDIDATES = 5 -MAX_N_CANDIDATES_LP = 6 - try: from codeflash.lsp.helpers import is_LSP_enabled @@ -56,9 +38,67 @@ except ImportError: _IS_LSP_ENABLED = False -N_CANDIDATES_EFFECTIVE = min(N_CANDIDATES_LSP if _IS_LSP_ENABLED else N_CANDIDATES, MAX_N_CANDIDATES) -N_CANDIDATES_LP_EFFECTIVE = min(N_CANDIDATES_LP_LSP if _IS_LSP_ENABLED else N_CANDIDATES_LP, MAX_N_CANDIDATES_LP) -N_TESTS_TO_GENERATE_EFFECTIVE = N_TESTS_TO_GENERATE_LSP if _IS_LSP_ENABLED else N_TESTS_TO_GENERATE TOTAL_LOOPING_TIME_EFFECTIVE = TOTAL_LOOPING_TIME_LSP if _IS_LSP_ENABLED else TOTAL_LOOPING_TIME MAX_CONTEXT_LEN_REVIEW = 1000 + + +class EffortLevel(str, Enum): + LOW = "low" + MEDIUM = "medium" + HIGH = "high" + + +class EffortKeys(str, Enum): + N_OPTIMIZER_CANDIDATES = "N_OPTIMIZER_CANDIDATES" + N_OPTIMIZER_LP_CANDIDATES = "N_OPTIMIZER_LP_CANDIDATES" + N_GENERATED_TESTS = "N_GENERATED_TESTS" + MAX_CODE_REPAIRS_PER_TRACE = "MAX_CODE_REPAIRS_PER_TRACE" + REPAIR_UNMATCHED_PERCENTAGE_LIMIT = "REPAIR_UNMATCHED_PERCENTAGE_LIMIT" + TOP_VALID_CANDIDATES_FOR_REFINEMENT = "TOP_VALID_CANDIDATES_FOR_REFINEMENT" + ADAPTIVE_OPTIMIZATION_THRESHOLD = "ADAPTIVE_OPTIMIZATION_THRESHOLD" + MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE = "MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE" + + +EFFORT_VALUES: dict[str, dict[EffortLevel, Any]] = { + EffortKeys.N_OPTIMIZER_CANDIDATES.value: {EffortLevel.LOW: 3, EffortLevel.MEDIUM: 5, EffortLevel.HIGH: 6}, + EffortKeys.N_OPTIMIZER_LP_CANDIDATES.value: {EffortLevel.LOW: 4, EffortLevel.MEDIUM: 6, EffortLevel.HIGH: 7}, + # we don't use effort with generated tests for now + EffortKeys.N_GENERATED_TESTS.value: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 2, EffortLevel.HIGH: 2}, + # maximum number of repairs we will do for each function (in case the valid candidates is less than MIN_CORRECT_CANDIDATES) + EffortKeys.MAX_CODE_REPAIRS_PER_TRACE.value: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 3, EffortLevel.HIGH: 5}, + # if the percentage of unmatched tests is greater than this, we won't fix it (lowering this value makes the repair more stricted) + # on the low effort we lower the limit to 20% to be more strict (less repairs, less time) + EffortKeys.REPAIR_UNMATCHED_PERCENTAGE_LIMIT.value: { + EffortLevel.LOW: 0.2, + EffortLevel.MEDIUM: 0.3, + EffortLevel.HIGH: 0.4, + }, + # Top valid candidates for refinements + EffortKeys.TOP_VALID_CANDIDATES_FOR_REFINEMENT: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 3, EffortLevel.HIGH: 4}, + # max number of adaptive optimization calls to make per a single candidates tree + EffortKeys.ADAPTIVE_OPTIMIZATION_THRESHOLD.value: {EffortLevel.LOW: 0, EffortLevel.MEDIUM: 0, EffortLevel.HIGH: 2}, + # max number of adaptive optimization calls to make per a single trace + EffortKeys.MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE.value: { + EffortLevel.LOW: 0, + EffortLevel.MEDIUM: 0, + EffortLevel.HIGH: 4, + }, +} + + +def get_effort_value(key: EffortKeys, effort: Union[EffortLevel, str]) -> Any: # noqa: ANN401 + key_str = key.value + + if isinstance(effort, str): + try: + effort = EffortLevel(effort) + except ValueError: + msg = f"Invalid effort level: {effort}" + raise ValueError(msg) from None + + if key_str not in EFFORT_VALUES: + msg = f"Invalid key: {key_str}" + raise ValueError(msg) + + return EFFORT_VALUES[key_str][effort] diff --git a/codeflash/code_utils/git_utils.py b/codeflash/code_utils/git_utils.py index 6d08c7dcc..ee8b7dbc3 100644 --- a/codeflash/code_utils/git_utils.py +++ b/codeflash/code_utils/git_utils.py @@ -1,10 +1,7 @@ from __future__ import annotations import os -import shutil -import subprocess import sys -import tempfile import time from functools import cache from io import StringIO @@ -16,7 +13,6 @@ from unidiff import PatchSet from codeflash.cli_cmds.console import logger -from codeflash.code_utils.config_consts import N_CANDIDATES_EFFECTIVE if TYPE_CHECKING: from git import Repo @@ -195,36 +191,6 @@ def check_and_push_branch(repo: git.Repo, git_remote: str | None = "origin", *, return True -def create_worktree_root_dir(module_root: Path) -> tuple[Path | None, Path | None]: - git_root = git_root_dir() if check_running_in_git_repo(module_root) else None - worktree_root_dir = Path(tempfile.mkdtemp()) if git_root else None - return git_root, worktree_root_dir - - -def create_git_worktrees( - git_root: Path | None, worktree_root_dir: Path | None, module_root: Path -) -> tuple[Path | None, list[Path]]: - if git_root and worktree_root_dir: - worktree_root = Path(tempfile.mkdtemp(dir=worktree_root_dir)) - worktrees = [Path(tempfile.mkdtemp(dir=worktree_root)) for _ in range(N_CANDIDATES_EFFECTIVE + 1)] - for worktree in worktrees: - subprocess.run(["git", "worktree", "add", "-d", worktree], cwd=module_root, check=True) - else: - worktree_root = None - worktrees = [] - return worktree_root, worktrees - - -def remove_git_worktrees(worktree_root: Path | None, worktrees: list[Path]) -> None: - try: - for worktree in worktrees: - subprocess.run(["git", "worktree", "remove", "-f", worktree], check=True) - except subprocess.CalledProcessError as e: - logger.warning(f"Error removing worktrees: {e}") - if worktree_root: - shutil.rmtree(worktree_root) - - def get_last_commit_author_if_pr_exists(repo: Repo | None = None) -> str | None: """Return the author's name of the last commit in the current branch if PR_NUMBER is set. diff --git a/codeflash/lsp/server.py b/codeflash/lsp/server.py index 582e5033c..6e97a9e15 100644 --- a/codeflash/lsp/server.py +++ b/codeflash/lsp/server.py @@ -7,6 +7,7 @@ from pygls.lsp.server import LanguageServer from pygls.protocol import LanguageServerProtocol +from codeflash.code_utils.config_consts import EffortLevel from codeflash.either import Result from codeflash.models.models import CodeOptimizationContext @@ -37,6 +38,7 @@ def prepare_optimizer_arguments(self, config_file: Path) -> None: args.config_file = config_file args.no_pr = True # LSP server should not create PRs args.worktree = True + args.effort = EffortLevel.LOW.value # low effort for high speed self.args = args # avoid initializing the optimizer during initialization, because it can cause an error if the api key is invalid diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 112298755..f74771da6 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -43,19 +43,15 @@ unified_diff_strings, ) from codeflash.code_utils.config_consts import ( - ADAPTIVE_OPTIMIZATION_THRESHOLD, COVERAGE_THRESHOLD, INDIVIDUAL_TESTCASE_TIMEOUT, - MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE, - MAX_REPAIRS_PER_TRACE, MIN_CORRECT_CANDIDATES, - N_TESTS_TO_GENERATE_EFFECTIVE, - REFINE_ALL_THRESHOLD, REFINED_CANDIDATE_RANKING_WEIGHTS, - REPAIR_UNMATCHED_PERCENTAGE_LIMIT, REPEAT_OPTIMIZATION_PROBABILITY, - TOP_N_REFINEMENTS, TOTAL_LOOPING_TIME_EFFECTIVE, + EffortKeys, + EffortLevel, + get_effort_value, ) from codeflash.code_utils.deduplicate_code import normalize_code from codeflash.code_utils.edit_generated_tests import ( @@ -187,8 +183,8 @@ def __init__( self, initial_candidates: list[OptimizedCandidate], future_line_profile_results: concurrent.futures.Future, - ai_service_client: AiServiceClient, eval_ctx: CandidateEvaluationContext, + effort: str, original_markdown_code: str, future_all_refinements: list[concurrent.futures.Future], future_all_code_repair: list[concurrent.futures.Future], @@ -198,11 +194,11 @@ def __init__( self.forest = CandidateForest() self.line_profiler_done = False self.refinement_done = False + self.eval_ctx = eval_ctx + self.effort = effort self.candidate_len = len(initial_candidates) - self.ai_service_client = ai_service_client self.refinement_calls_count = 0 self.original_markdown_code = original_markdown_code - self.eval_ctx = eval_ctx # Initialize queue with initial candidates for candidate in initial_candidates: @@ -298,8 +294,14 @@ def _filter_refined_candidates(self, candidates: list[OptimizedCandidate]) -> li """We generate a weighted ranking based on the runtime and diff lines and select the best of valid optimizations to be tested.""" self.refinement_calls_count += len(candidates) - if len(candidates) <= REFINE_ALL_THRESHOLD: + top_n_candidates = int( + min(int(get_effort_value(EffortKeys.TOP_VALID_CANDIDATES_FOR_REFINEMENT, self.effort)), len(candidates)) + ) + + if len(candidates) == top_n_candidates: + # no need for ranking since we will return all candidates return candidates + diff_lens_list = [] runtimes_list = [] for c in candidates: @@ -326,7 +328,6 @@ def _filter_refined_candidates(self, candidates: list[OptimizedCandidate]) -> li diffs_norm = normalize_by_max(diff_lens_list) # the lower the better score_dict = create_score_dictionary_from_metrics(weights, runtime_norm, diffs_norm) - top_n_candidates = int((TOP_N_REFINEMENTS * len(runtimes_list)) + 0.5) top_indecies = sorted(score_dict, key=score_dict.get)[:top_n_candidates] return [candidates[idx] for idx in top_indecies] @@ -377,6 +378,9 @@ def __init__( self.experiment_id = os.getenv("CODEFLASH_EXPERIMENT_ID", None) self.local_aiservice_client = LocalAiServiceClient() if self.experiment_id else None self.test_files = TestFiles(test_files=[]) + + self.effort = getattr(args, "effort", EffortLevel.MEDIUM.value) if args else EffortLevel.MEDIUM.value + self.args = args # Check defaults for these self.function_trace_id: str = str(uuid.uuid4()) self.original_module_path = module_name_from_file_path(self.function_to_optimize.file_path, self.project_root) @@ -384,7 +388,7 @@ def __init__( self.function_benchmark_timings = function_benchmark_timings if function_benchmark_timings else {} self.total_benchmark_timings = total_benchmark_timings if total_benchmark_timings else {} self.replay_tests_dir = replay_tests_dir if replay_tests_dir else None - n_tests = N_TESTS_TO_GENERATE_EFFECTIVE + n_tests = get_effort_value(EffortKeys.N_GENERATED_TESTS, self.effort) self.executor = concurrent.futures.ThreadPoolExecutor( max_workers=n_tests + 3 if self.experiment_id is None else n_tests + 4 ) @@ -437,7 +441,7 @@ def generate_and_instrument_tests( str, ]: """Generate and instrument tests for the function.""" - n_tests = N_TESTS_TO_GENERATE_EFFECTIVE + n_tests = get_effort_value(EffortKeys.N_GENERATED_TESTS, self.effort) generated_test_paths = [ get_test_file_path( self.test_cfg.tests_root, self.function_to_optimize.function_name, test_index, test_type="unit" @@ -1028,6 +1032,7 @@ def determine_best_candidate( dependency_code=code_context.read_only_context_code, trace_id=self.get_trace_id(exp_type), line_profiler_results=original_code_baseline.line_profile_results["str_out"], + n_candidates=get_effort_value(EffortKeys.N_OPTIMIZER_LP_CANDIDATES, self.effort), experiment_metadata=ExperimentMetadata( id=self.experiment_id, group="control" if exp_type == "EXP0" else "experiment" ) @@ -1038,8 +1043,8 @@ def determine_best_candidate( processor = CandidateProcessor( candidates, future_line_profile_results, - self.aiservice_client, eval_ctx, + self.effort, code_context.read_writable_code.markdown, self.future_all_refinements, self.future_all_code_repair, @@ -1105,7 +1110,9 @@ def call_adaptive_optimize( eval_ctx: CandidateEvaluationContext, ai_service_client: AiServiceClient, ) -> concurrent.futures.Future[OptimizedCandidate | None] | None: - if self.adaptive_optimization_counter >= MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE: + if self.adaptive_optimization_counter >= get_effort_value( + EffortKeys.MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE, self.effort + ): logger.debug( f"Max adaptive optimizations reached for {self.function_to_optimize.qualified_name}: {self.adaptive_optimization_counter}" ) @@ -1113,7 +1120,7 @@ def call_adaptive_optimize( adaptive_count = sum(1 for c in prev_candidates if c.source == OptimizedCandidateSource.ADAPTIVE) - if adaptive_count >= ADAPTIVE_OPTIMIZATION_THRESHOLD: + if adaptive_count >= get_effort_value(EffortKeys.ADAPTIVE_OPTIMIZATION_THRESHOLD, self.effort): return None request_candidates = [] @@ -1433,7 +1440,7 @@ def generate_tests( generated_perf_test_paths: list[Path], ) -> Result[tuple[int, GeneratedTestsList, dict[str, set[FunctionCalledInTest]], str], str]: """Generate unit tests and concolic tests for the function.""" - n_tests = N_TESTS_TO_GENERATE_EFFECTIVE + n_tests = get_effort_value(EffortKeys.N_GENERATED_TESTS, self.effort) assert len(generated_test_paths) == n_tests if not self.args.no_gen_tests: @@ -1500,6 +1507,7 @@ def generate_optimizations( run_experiment: bool = False, # noqa: FBT001, FBT002 ) -> Result[tuple[OptimizationSet, str], str]: """Generate optimization candidates for the function. Backend handles multi-model diversity.""" + n_candidates = get_effort_value(EffortKeys.N_OPTIMIZER_CANDIDATES, self.effort) future_optimization_candidates = self.executor.submit( self.aiservice_client.optimize_python_code, read_writable_code.markdown, @@ -1507,6 +1515,7 @@ def generate_optimizations( self.function_trace_id[:-4] + "EXP0" if run_experiment else self.function_trace_id, ExperimentMetadata(id=self.experiment_id, group="control") if run_experiment else None, is_async=self.function_to_optimize.is_async, + n_candidates=n_candidates, ) future_references = self.executor.submit( @@ -1529,6 +1538,7 @@ def generate_optimizations( self.function_trace_id[:-4] + "EXP1", ExperimentMetadata(id=self.experiment_id, group="experiment"), is_async=self.function_to_optimize.is_async, + n_candidates=n_candidates, ) futures.append(future_candidates_exp) @@ -2087,8 +2097,9 @@ def repair_if_possible( test_results_count: int, exp_type: str, ) -> None: - if self.repair_counter >= MAX_REPAIRS_PER_TRACE: - logger.debug(f"Repair counter reached {MAX_REPAIRS_PER_TRACE}, skipping repair") + max_repairs = get_effort_value(EffortKeys.MAX_CODE_REPAIRS_PER_TRACE, self.effort) + if self.repair_counter >= max_repairs: + logger.debug(f"Repair counter reached {max_repairs}, skipping repair") return successful_candidates_count = sum(1 for is_correct in eval_ctx.is_correct.values() if is_correct) @@ -2104,7 +2115,7 @@ def repair_if_possible( logger.debug("No diffs found, skipping repair") return result_unmatched_perc = len(diffs) / test_results_count - if result_unmatched_perc > REPAIR_UNMATCHED_PERCENTAGE_LIMIT: + if result_unmatched_perc > get_effort_value(EffortKeys.REPAIR_UNMATCHED_PERCENTAGE_LIMIT, self.effort): logger.debug(f"Result unmatched percentage is {result_unmatched_perc * 100}%, skipping repair") return diff --git a/codeflash/tracer.py b/codeflash/tracer.py index eb011befa..c26214f77 100644 --- a/codeflash/tracer.py +++ b/codeflash/tracer.py @@ -24,6 +24,7 @@ from codeflash.cli_cmds.console import console from codeflash.code_utils.code_utils import get_run_tmp_file from codeflash.code_utils.compat import SAFE_SYS_EXECUTABLE +from codeflash.code_utils.config_consts import EffortLevel from codeflash.code_utils.config_parser import parse_config_file from codeflash.tracing.pytest_parallelization import pytest_split @@ -214,6 +215,7 @@ def main(args: Namespace | None = None) -> ArgumentParser: from codeflash.optimization import optimizer + args.effort = EffortLevel.HIGH.value optimizer.run_with_args(args) # Delete the trace file and the replay test file if they exist