From 1018a693dd0eeb4e5c3526756767cf32848c7579 Mon Sep 17 00:00:00 2001
From: ali <mohammed18200118@gmail.com>
Date: Thu, 18 Dec 2025 17:54:13 +0200
Subject: [PATCH 01/12] optimization effort

---
 codeflash/api/aiservice.py                   |  9 ++--
 codeflash/cli_cmds/cli.py                    |  3 ++
 codeflash/code_utils/config_consts.py        | 49 +++++++++++++++-----
 codeflash/code_utils/git_utils.py            | 34 --------------
 codeflash/optimization/function_optimizer.py | 14 +++---
 5 files changed, 49 insertions(+), 60 deletions(-)

diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py
index 8743ab2ac..217f52167 100644
--- a/codeflash/api/aiservice.py
+++ b/codeflash/api/aiservice.py
@@ -12,7 +12,6 @@
 from codeflash.cli_cmds.console import console, logger
 from codeflash.code_utils.code_replacer import is_zero_diff
 from codeflash.code_utils.code_utils import unified_diff_strings
-from codeflash.code_utils.config_consts import N_CANDIDATES_EFFECTIVE, N_CANDIDATES_LP_EFFECTIVE
 from codeflash.code_utils.env_utils import get_codeflash_api_key
 from codeflash.code_utils.git_utils import get_last_commit_author_if_pr_exists, get_repo_owner_and_name
 from codeflash.code_utils.time_utils import humanize_runtime
@@ -130,7 +129,7 @@ def optimize_python_code(  # noqa: D417
         payload = {
             "source_code": source_code,
             "dependency_code": dependency_code,
-            "num_variants": num_candidates,
+            "n_candidates": num_candidates,
             "trace_id": trace_id,
             "python_version": platform.python_version(),
             "experiment_metadata": experiment_metadata,
@@ -138,7 +137,6 @@ def optimize_python_code(  # noqa: D417
             "current_username": get_last_commit_author_if_pr_exists(None),
             "repo_owner": git_repo_owner,
             "repo_name": git_repo_name,
-            "n_candidates": N_CANDIDATES_EFFECTIVE,
             "is_async": is_async,
         }
 
@@ -172,7 +170,7 @@ def optimize_python_code_line_profiler(  # noqa: D417
         dependency_code: str,
         trace_id: str,
         line_profiler_results: str,
-        num_candidates: int = 10,
+        num_candidates: int = 8,
         experiment_metadata: ExperimentMetadata | None = None,
     ) -> list[OptimizedCandidate]:
         """Optimize the given python code for performance by making a request to the Django endpoint.
@@ -193,14 +191,13 @@ def optimize_python_code_line_profiler(  # noqa: D417
         payload = {
             "source_code": source_code,
             "dependency_code": dependency_code,
-            "num_variants": num_candidates,
+            "n_candidates_lp": num_candidates,
             "line_profiler_results": line_profiler_results,
             "trace_id": trace_id,
             "python_version": platform.python_version(),
             "experiment_metadata": experiment_metadata,
             "codeflash_version": codeflash_version,
             "lsp_mode": is_LSP_enabled(),
-            "n_candidates_lp": N_CANDIDATES_LP_EFFECTIVE,
         }
 
         console.rule()
diff --git a/codeflash/cli_cmds/cli.py b/codeflash/cli_cmds/cli.py
index a6e28aaaa..139a25d98 100644
--- a/codeflash/cli_cmds/cli.py
+++ b/codeflash/cli_cmds/cli.py
@@ -104,6 +104,9 @@ def parse_args() -> Namespace:
         action="store_true",
         help="(Deprecated) Async function optimization is now enabled by default. This flag is ignored.",
     )
+    parser.add_argument(
+        "--effort", type=str, help="Effort level for optimization", choices=["low", "medium", "high"], default="medium"
+    )
 
     args, unknown_args = parser.parse_known_args()
     sys.argv[:] = [sys.argv[0], *unknown_args]
diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py
index 6b2805fbf..31bb78db1 100644
--- a/codeflash/code_utils/config_consts.py
+++ b/codeflash/code_utils/config_consts.py
@@ -1,27 +1,20 @@
+from enum import Enum
+
 MAX_TEST_RUN_ITERATIONS = 5
 INDIVIDUAL_TESTCASE_TIMEOUT = 15
 MAX_FUNCTION_TEST_SECONDS = 60
-N_CANDIDATES = 5
 MIN_IMPROVEMENT_THRESHOLD = 0.05
 MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD = 0.10  # 10% minimum improvement for async throughput
 MAX_TEST_FUNCTION_RUNS = 50
 MAX_CUMULATIVE_TEST_RUNTIME_NANOSECONDS = 100e6  # 100ms
-N_TESTS_TO_GENERATE = 2
 TOTAL_LOOPING_TIME = 10.0  # 10 second candidate benchmarking budget
 COVERAGE_THRESHOLD = 60.0
 MIN_TESTCASE_PASSED_THRESHOLD = 6
 REPEAT_OPTIMIZATION_PROBABILITY = 0.1
 DEFAULT_IMPORTANCE_THRESHOLD = 0.001
-N_CANDIDATES_LP = 6
 
 # LSP-specific
-N_CANDIDATES_LSP = 3
-N_TESTS_TO_GENERATE_LSP = 2
 TOTAL_LOOPING_TIME_LSP = 10.0  # Kept same timing for LSP mode to avoid in increase in performance reporting
-N_CANDIDATES_LP_LSP = 3
-
-MAX_N_CANDIDATES = 5
-MAX_N_CANDIDATES_LP = 6
 
 try:
     from codeflash.lsp.helpers import is_LSP_enabled
@@ -30,9 +23,41 @@
 except ImportError:
     _IS_LSP_ENABLED = False
 
-N_CANDIDATES_EFFECTIVE = min(N_CANDIDATES_LSP if _IS_LSP_ENABLED else N_CANDIDATES, MAX_N_CANDIDATES)
-N_CANDIDATES_LP_EFFECTIVE = min(N_CANDIDATES_LP_LSP if _IS_LSP_ENABLED else N_CANDIDATES_LP, MAX_N_CANDIDATES_LP)
-N_TESTS_TO_GENERATE_EFFECTIVE = N_TESTS_TO_GENERATE_LSP if _IS_LSP_ENABLED else N_TESTS_TO_GENERATE
 TOTAL_LOOPING_TIME_EFFECTIVE = TOTAL_LOOPING_TIME_LSP if _IS_LSP_ENABLED else TOTAL_LOOPING_TIME
 
 MAX_CONTEXT_LEN_REVIEW = 1000
+
+
+class EffortLevel(str, Enum):
+    LOW = "low"
+    MEDIUM = "medium"
+    HIGH = "high"
+
+
+class Effort:
+    @staticmethod
+    def get_number_of_optimizer_candidates(effort: str) -> int:
+        if effort == EffortLevel.LOW.value:
+            return 3
+        if effort == EffortLevel.MEDIUM.value:
+            return 4
+        if effort == EffortLevel.HIGH.value:
+            return 5
+        msg = f"Invalid effort level: {effort}"
+        raise ValueError(msg)
+
+    @staticmethod
+    def get_number_of_optimizer_lp_candidates(effort: str) -> int:
+        if effort == EffortLevel.LOW.value:
+            return 3
+        if effort == EffortLevel.MEDIUM.value:
+            return 5
+        if effort == EffortLevel.HIGH.value:
+            return 6
+        msg = f"Invalid effort level: {effort}"
+        raise ValueError(msg)
+
+    @staticmethod
+    def get_number_of_generated_tests(effort: str) -> int:  # noqa: ARG004
+        # we don't use effort with generated tests for now
+        return 2
diff --git a/codeflash/code_utils/git_utils.py b/codeflash/code_utils/git_utils.py
index 40a725692..c6501b36e 100644
--- a/codeflash/code_utils/git_utils.py
+++ b/codeflash/code_utils/git_utils.py
@@ -1,10 +1,7 @@
 from __future__ import annotations
 
 import os
-import shutil
-import subprocess
 import sys
-import tempfile
 import time
 from functools import cache
 from io import StringIO
@@ -16,7 +13,6 @@
 from unidiff import PatchSet
 
 from codeflash.cli_cmds.console import logger
-from codeflash.code_utils.config_consts import N_CANDIDATES_EFFECTIVE
 
 if TYPE_CHECKING:
     from git import Repo
@@ -153,36 +149,6 @@ def check_and_push_branch(repo: git.Repo, git_remote: str | None = "origin", *,
     return True
 
 
-def create_worktree_root_dir(module_root: Path) -> tuple[Path | None, Path | None]:
-    git_root = git_root_dir() if check_running_in_git_repo(module_root) else None
-    worktree_root_dir = Path(tempfile.mkdtemp()) if git_root else None
-    return git_root, worktree_root_dir
-
-
-def create_git_worktrees(
-    git_root: Path | None, worktree_root_dir: Path | None, module_root: Path
-) -> tuple[Path | None, list[Path]]:
-    if git_root and worktree_root_dir:
-        worktree_root = Path(tempfile.mkdtemp(dir=worktree_root_dir))
-        worktrees = [Path(tempfile.mkdtemp(dir=worktree_root)) for _ in range(N_CANDIDATES_EFFECTIVE + 1)]
-        for worktree in worktrees:
-            subprocess.run(["git", "worktree", "add", "-d", worktree], cwd=module_root, check=True)
-    else:
-        worktree_root = None
-        worktrees = []
-    return worktree_root, worktrees
-
-
-def remove_git_worktrees(worktree_root: Path | None, worktrees: list[Path]) -> None:
-    try:
-        for worktree in worktrees:
-            subprocess.run(["git", "worktree", "remove", "-f", worktree], check=True)
-    except subprocess.CalledProcessError as e:
-        logger.warning(f"Error removing worktrees: {e}")
-    if worktree_root:
-        shutil.rmtree(worktree_root)
-
-
 def get_last_commit_author_if_pr_exists(repo: Repo | None = None) -> str | None:
     """Return the author's name of the last commit in the current branch if PR_NUMBER is set.
 
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 860c2eaf1..b07c008e9 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -42,11 +42,9 @@
 from codeflash.code_utils.config_consts import (
     COVERAGE_THRESHOLD,
     INDIVIDUAL_TESTCASE_TIMEOUT,
-    N_CANDIDATES_EFFECTIVE,
-    N_CANDIDATES_LP_EFFECTIVE,
-    N_TESTS_TO_GENERATE_EFFECTIVE,
     REPEAT_OPTIMIZATION_PROBABILITY,
     TOTAL_LOOPING_TIME_EFFECTIVE,
+    Effort,
 )
 from codeflash.code_utils.deduplicate_code import normalize_code
 from codeflash.code_utils.edit_generated_tests import (
@@ -239,7 +237,7 @@ def __init__(
         self.function_benchmark_timings = function_benchmark_timings if function_benchmark_timings else {}
         self.total_benchmark_timings = total_benchmark_timings if total_benchmark_timings else {}
         self.replay_tests_dir = replay_tests_dir if replay_tests_dir else None
-        n_tests = N_TESTS_TO_GENERATE_EFFECTIVE
+        n_tests = Effort.get_number_of_generated_tests(args.effort)
         self.executor = concurrent.futures.ThreadPoolExecutor(
             max_workers=n_tests + 3 if self.experiment_id is None else n_tests + 4
         )
@@ -287,7 +285,7 @@ def generate_and_instrument_tests(
         str,
     ]:
         """Generate and instrument tests for the function."""
-        n_tests = N_TESTS_TO_GENERATE_EFFECTIVE
+        n_tests = Effort.get_number_of_generated_tests(self.args.effort)
         generated_test_paths = [
             get_test_file_path(
                 self.test_cfg.tests_root, self.function_to_optimize.function_name, test_index, test_type="unit"
@@ -842,7 +840,7 @@ def determine_best_candidate(
             dependency_code=code_context.read_only_context_code,
             trace_id=self.get_trace_id(exp_type),
             line_profiler_results=original_code_baseline.line_profile_results["str_out"],
-            num_candidates=N_CANDIDATES_LP_EFFECTIVE,
+            num_candidates=Effort.get_number_of_optimizer_lp_candidates(self.args.effort),
             experiment_metadata=ExperimentMetadata(
                 id=self.experiment_id, group="control" if exp_type == "EXP0" else "experiment"
             )
@@ -1211,7 +1209,7 @@ def generate_tests(
         generated_perf_test_paths: list[Path],
     ) -> Result[tuple[int, GeneratedTestsList, dict[str, set[FunctionCalledInTest]], str], str]:
         """Generate unit tests and concolic tests for the function."""
-        n_tests = N_TESTS_TO_GENERATE_EFFECTIVE
+        n_tests = Effort.get_number_of_generated_tests(self.args.effort)
         assert len(generated_test_paths) == n_tests
 
         # Submit test generation tasks
@@ -1273,7 +1271,7 @@ def generate_optimizations(
         run_experiment: bool = False,  # noqa: FBT001, FBT002
     ) -> Result[tuple[OptimizationSet, str], str]:
         """Generate optimization candidates for the function."""
-        n_candidates = N_CANDIDATES_EFFECTIVE
+        n_candidates = Effort.get_number_of_optimizer_candidates(self.args.effort)
 
         future_optimization_candidates = self.executor.submit(
             self.aiservice_client.optimize_python_code,

From 3e20a37f9426612d34ddaef136a4c4f98337922c Mon Sep 17 00:00:00 2001
From: ali <mohammed18200118@gmail.com>
Date: Fri, 19 Dec 2025 16:47:25 +0200
Subject: [PATCH 02/12] more effort values

---
 codeflash/code_utils/config_consts.py        | 78 +++++++++++---------
 codeflash/optimization/function_optimizer.py | 35 +++++----
 2 files changed, 62 insertions(+), 51 deletions(-)

diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py
index 7cabe017b..e252fe9d1 100644
--- a/codeflash/code_utils/config_consts.py
+++ b/codeflash/code_utils/config_consts.py
@@ -1,4 +1,4 @@
-from enum import Enum
+from enum import StrEnum, auto
 
 MAX_TEST_RUN_ITERATIONS = 5
 INDIVIDUAL_TESTCASE_TIMEOUT = 15
@@ -13,18 +13,11 @@
 REPEAT_OPTIMIZATION_PROBABILITY = 0.1
 DEFAULT_IMPORTANCE_THRESHOLD = 0.001
 
-# Refinement
-REFINE_ALL_THRESHOLD = 2  # when valid optimizations count is 2 or less, refine all optimizations
 REFINED_CANDIDATE_RANKING_WEIGHTS = (2, 1)  # (runtime, diff), runtime is more important than diff by a factor of 2
-TOP_N_REFINEMENTS = 0.45  # top 45% of valid optimizations (based on the weighted score) are refined
 
 # LSP-specific
 TOTAL_LOOPING_TIME_LSP = 10.0  # Kept same timing for LSP mode to avoid in increase in performance reporting
 
-# Code repair
-REPAIR_UNMATCHED_PERCENTAGE_LIMIT = 0.4  # if the percentage of unmatched tests is greater than this, we won't fix it (lowering this value makes the repair more stricted)
-MAX_REPAIRS_PER_TRACE = 4  # maximum number of repairs we will do for each function
-
 try:
     from codeflash.lsp.helpers import is_LSP_enabled
 
@@ -37,36 +30,49 @@
 MAX_CONTEXT_LEN_REVIEW = 1000
 
 
-class EffortLevel(str, Enum):
-    LOW = "low"
-    MEDIUM = "medium"
-    HIGH = "high"
+class EffortLevel(StrEnum):
+    LOW = auto()
+    MEDIUM = auto()
+    HIGH = auto()
 
 
-class Effort:
-    @staticmethod
-    def get_number_of_optimizer_candidates(effort: str) -> int:
-        if effort == EffortLevel.LOW.value:
-            return 3
-        if effort == EffortLevel.MEDIUM.value:
-            return 4
-        if effort == EffortLevel.HIGH.value:
-            return 5
-        msg = f"Invalid effort level: {effort}"
-        raise ValueError(msg)
+class EffortKeys(StrEnum):
+    N_OPTIMIZER_CANDIDATES = auto()
+    N_OPTIMIZER_LP_CANDIDATES = auto()
+    N_GENERATED_TESTS = auto()
+    MAX_CODE_REPAIRS_PER_TRACE = auto()
+    REPAIR_UNMATCHED_PERCENTAGE_LIMIT = auto()
+    REFINE_ALL_THRESHOLD = auto()
+    TOP_VALID_CANDIDATES_FOR_REFINEMENT = auto()
+
 
-    @staticmethod
-    def get_number_of_optimizer_lp_candidates(effort: str) -> int:
-        if effort == EffortLevel.LOW.value:
-            return 3
-        if effort == EffortLevel.MEDIUM.value:
-            return 5
-        if effort == EffortLevel.HIGH.value:
-            return 6
+EFFORT_VALUES: dict[str, dict[EffortLevel, any]] = {
+    EffortKeys.N_OPTIMIZER_CANDIDATES.value: {EffortLevel.LOW: 3, EffortLevel.MEDIUM: 4, EffortLevel.HIGH: 5},
+    EffortKeys.N_OPTIMIZER_LP_CANDIDATES.value: {EffortLevel.LOW: 3, EffortLevel.MEDIUM: 5, EffortLevel.HIGH: 6},
+    # we don't use effort with generated tests for now
+    EffortKeys.N_GENERATED_TESTS.value: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 2, EffortLevel.HIGH: 2},
+    # maximum number of repairs we will do for each function
+    EffortKeys.MAX_CODE_REPAIRS_PER_TRACE.value: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 4, EffortLevel.HIGH: 5},
+    # if the percentage of unmatched tests is greater than this, we won't fix it (lowering this value makes the repair more stricted)
+    # on the low effort we lower the limit to 20% to be more strict (less repairs)
+    EffortKeys.REPAIR_UNMATCHED_PERCENTAGE_LIMIT.value: {
+        EffortLevel.LOW: 0.2,
+        EffortLevel.MEDIUM: 0.4,
+        EffortLevel.HIGH: 0.5,
+    },
+    # when valid optimizations count is N or less, refine all optimizations
+    EffortKeys.REFINE_ALL_THRESHOLD.value: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 3, EffortLevel.HIGH: 4},
+    # Top valid candidates percentage for refinements
+    EffortKeys.TOP_VALID_CANDIDATES_FOR_REFINEMENT: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 3, EffortLevel.HIGH: 4},
+}
+
+
+def get_effort_value(key: EffortKeys, effort: EffortLevel) -> any:
+    key_str = key.value
+    if key_str in EFFORT_VALUES:
+        if effort in EFFORT_VALUES[key_str]:
+            return EFFORT_VALUES[key_str][effort]
         msg = f"Invalid effort level: {effort}"
         raise ValueError(msg)
-
-    @staticmethod
-    def get_number_of_generated_tests(effort: str) -> int:  # noqa: ARG004
-        # we don't use effort with generated tests for now
-        return 2
+    msg = f"Invalid key: {key_str}"
+    raise ValueError(msg)
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 81c51fc3e..8771ff304 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -45,14 +45,11 @@
 from codeflash.code_utils.config_consts import (
     COVERAGE_THRESHOLD,
     INDIVIDUAL_TESTCASE_TIMEOUT,
-    MAX_REPAIRS_PER_TRACE,
-    REFINE_ALL_THRESHOLD,
     REFINED_CANDIDATE_RANKING_WEIGHTS,
-    REPAIR_UNMATCHED_PERCENTAGE_LIMIT,
     REPEAT_OPTIMIZATION_PROBABILITY,
-    TOP_N_REFINEMENTS,
     TOTAL_LOOPING_TIME_EFFECTIVE,
-    Effort,
+    EffortKeys,
+    get_effort_value,
 )
 from codeflash.code_utils.deduplicate_code import normalize_code
 from codeflash.code_utils.edit_generated_tests import (
@@ -191,8 +188,16 @@ def refine_optimizations(self, request: list[AIServiceRefinerRequest]) -> concur
     def _process_refinement_results(self) -> OptimizedCandidate | None:
         """Process refinement results and add to queue. We generate a weighted ranking based on the runtime and diff lines and select the best (round of 45%) of valid optimizations to be refined."""
         future_refinements: list[concurrent.futures.Future] = []
+        top_n_candidates = int(
+            min(
+                get_effort_value(EffortKeys.TOP_VALID_CANDIDATES_FOR_REFINEMENT, self.args.effort),
+                len(self.all_refinements_data),
+            )
+        )
 
-        if len(self.all_refinements_data) <= REFINE_ALL_THRESHOLD:
+        if top_n_candidates == len(self.all_refinements_data) or len(self.all_refinements_data) <= get_effort_value(
+            EffortKeys.REFINE_ALL_THRESHOLD, self.args.effort
+        ):
             for data in self.all_refinements_data:
                 future_refinements.append(self.refine_optimizations([data]))  # noqa: PERF401
         else:
@@ -209,7 +214,6 @@ def _process_refinement_results(self) -> OptimizedCandidate | None:
             diffs_norm = normalize_by_max(diff_lens_list)
             # the lower the better
             score_dict = create_score_dictionary_from_metrics(weights, runtime_norm, diffs_norm)
-            top_n_candidates = int((TOP_N_REFINEMENTS * len(runtimes_list)) + 0.5)
             top_indecies = sorted(score_dict, key=score_dict.get)[:top_n_candidates]
 
             for idx in top_indecies:
@@ -310,7 +314,7 @@ def __init__(
         self.function_benchmark_timings = function_benchmark_timings if function_benchmark_timings else {}
         self.total_benchmark_timings = total_benchmark_timings if total_benchmark_timings else {}
         self.replay_tests_dir = replay_tests_dir if replay_tests_dir else None
-        n_tests = Effort.get_number_of_generated_tests(args.effort)
+        n_tests = get_effort_value(EffortKeys.N_GENERATED_TESTS, args.effort)
         self.executor = concurrent.futures.ThreadPoolExecutor(
             max_workers=n_tests + 3 if self.experiment_id is None else n_tests + 4
         )
@@ -360,7 +364,7 @@ def generate_and_instrument_tests(
         str,
     ]:
         """Generate and instrument tests for the function."""
-        n_tests = Effort.get_number_of_generated_tests(self.args.effort)
+        n_tests = get_effort_value(EffortKeys.N_GENERATED_TESTS, self.args.effort)
         generated_test_paths = [
             get_test_file_path(
                 self.test_cfg.tests_root, self.function_to_optimize.function_name, test_index, test_type="unit"
@@ -925,7 +929,7 @@ def determine_best_candidate(
             dependency_code=code_context.read_only_context_code,
             trace_id=self.get_trace_id(exp_type),
             line_profiler_results=original_code_baseline.line_profile_results["str_out"],
-            num_candidates=Effort.get_number_of_optimizer_lp_candidates(self.args.effort),
+            num_candidates=get_effort_value(EffortKeys.N_OPTIMIZER_LP_CANDIDATES, self.args.effort),
             experiment_metadata=ExperimentMetadata(
                 id=self.experiment_id, group="control" if exp_type == "EXP0" else "experiment"
             )
@@ -1290,7 +1294,7 @@ def generate_tests(
         generated_perf_test_paths: list[Path],
     ) -> Result[tuple[int, GeneratedTestsList, dict[str, set[FunctionCalledInTest]], str], str]:
         """Generate unit tests and concolic tests for the function."""
-        n_tests = Effort.get_number_of_generated_tests(self.args.effort)
+        n_tests = get_effort_value(EffortKeys.N_GENERATED_TESTS, self.args.effort)
         assert len(generated_test_paths) == n_tests
 
         # Submit test generation tasks
@@ -1352,7 +1356,7 @@ def generate_optimizations(
         run_experiment: bool = False,  # noqa: FBT001, FBT002
     ) -> Result[tuple[OptimizationSet, str], str]:
         """Generate optimization candidates for the function."""
-        n_candidates = Effort.get_number_of_optimizer_candidates(self.args.effort)
+        n_candidates = get_effort_value(EffortKeys.N_OPTIMIZER_CANDIDATES, self.args.effort)
 
         future_optimization_candidates = self.executor.submit(
             self.aiservice_client.optimize_python_code,
@@ -1919,8 +1923,9 @@ def repair_if_possible(
         test_results_count: int,
         exp_type: str,
     ) -> None:
-        if self.repair_counter >= MAX_REPAIRS_PER_TRACE:
-            logger.debug(f"Repair counter reached {MAX_REPAIRS_PER_TRACE}, skipping repair")
+        max_repairs = get_effort_value(EffortKeys.MAX_CODE_REPAIRS_PER_TRACE, self.args.effort)
+        if self.repair_counter >= max_repairs:
+            logger.debug(f"Repair counter reached {max_repairs}, skipping repair")
             return
         if candidate.source not in (OptimizedCandidateSource.OPTIMIZE, OptimizedCandidateSource.OPTIMIZE_LP):
             # only repair the first pass of the candidates for now
@@ -1930,7 +1935,7 @@ def repair_if_possible(
             logger.debug("No diffs found, skipping repair")
             return
         result_unmatched_perc = len(diffs) / test_results_count
-        if result_unmatched_perc > REPAIR_UNMATCHED_PERCENTAGE_LIMIT:
+        if result_unmatched_perc > get_effort_value(EffortKeys.REPAIR_UNMATCHED_PERCENTAGE_LIMIT, self.args.effort):
             logger.debug(f"Result unmatched percentage is {result_unmatched_perc * 100}%, skipping repair")
             return
 

From f4be23b6a95c391ac547c2bb8f4ff8652c324717 Mon Sep 17 00:00:00 2001
From: ali <mohammed18200118@gmail.com>
Date: Fri, 19 Dec 2025 17:01:44 +0200
Subject: [PATCH 03/12] fix

---
 codeflash/code_utils/config_consts.py        | 2 +-
 codeflash/optimization/function_optimizer.py | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py
index e252fe9d1..885f48c57 100644
--- a/codeflash/code_utils/config_consts.py
+++ b/codeflash/code_utils/config_consts.py
@@ -62,7 +62,7 @@ class EffortKeys(StrEnum):
     },
     # when valid optimizations count is N or less, refine all optimizations
     EffortKeys.REFINE_ALL_THRESHOLD.value: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 3, EffortLevel.HIGH: 4},
-    # Top valid candidates percentage for refinements
+    # Top valid candidates for refinements
     EffortKeys.TOP_VALID_CANDIDATES_FOR_REFINEMENT: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 3, EffortLevel.HIGH: 4},
 }
 
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 8771ff304..670a83d92 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -134,6 +134,7 @@ def __init__(
         ai_service_client: AiServiceClient,
         executor: concurrent.futures.ThreadPoolExecutor,
         future_all_code_repair: list[concurrent.futures.Future],
+        effort: str,
     ) -> None:
         self.candidate_queue = queue.Queue()
         self.line_profiler_done = False
@@ -141,6 +142,7 @@ def __init__(
         self.candidate_len = len(initial_candidates)
         self.ai_service_client = ai_service_client
         self.executor = executor
+        self.effort = effort
 
         # Initialize queue with initial candidates
         for candidate in initial_candidates:
@@ -190,13 +192,13 @@ def _process_refinement_results(self) -> OptimizedCandidate | None:
         future_refinements: list[concurrent.futures.Future] = []
         top_n_candidates = int(
             min(
-                get_effort_value(EffortKeys.TOP_VALID_CANDIDATES_FOR_REFINEMENT, self.args.effort),
+                get_effort_value(EffortKeys.TOP_VALID_CANDIDATES_FOR_REFINEMENT, self.effort),
                 len(self.all_refinements_data),
             )
         )
 
         if top_n_candidates == len(self.all_refinements_data) or len(self.all_refinements_data) <= get_effort_value(
-            EffortKeys.REFINE_ALL_THRESHOLD, self.args.effort
+            EffortKeys.REFINE_ALL_THRESHOLD, self.effort
         ):
             for data in self.all_refinements_data:
                 future_refinements.append(self.refine_optimizations([data]))  # noqa: PERF401
@@ -944,6 +946,7 @@ def determine_best_candidate(
             self.aiservice_client,
             self.executor,
             self.future_all_code_repair,
+            self.args.effort,
         )
         candidate_index = 0
 

From 2f7fc605dc94376775f3fe4e37f189a4ae8c34f1 Mon Sep 17 00:00:00 2001
From: ali <mohammed18200118@gmail.com>
Date: Tue, 30 Dec 2025 23:08:39 +0200
Subject: [PATCH 04/12] set the right effort level for each case

---
 codeflash/code_utils/config_consts.py        | 5 +----
 codeflash/lsp/server.py                      | 2 ++
 codeflash/optimization/function_optimizer.py | 7 +++----
 codeflash/tracer.py                          | 2 ++
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py
index 885f48c57..1f191e6d6 100644
--- a/codeflash/code_utils/config_consts.py
+++ b/codeflash/code_utils/config_consts.py
@@ -42,7 +42,6 @@ class EffortKeys(StrEnum):
     N_GENERATED_TESTS = auto()
     MAX_CODE_REPAIRS_PER_TRACE = auto()
     REPAIR_UNMATCHED_PERCENTAGE_LIMIT = auto()
-    REFINE_ALL_THRESHOLD = auto()
     TOP_VALID_CANDIDATES_FOR_REFINEMENT = auto()
 
 
@@ -54,14 +53,12 @@ class EffortKeys(StrEnum):
     # maximum number of repairs we will do for each function
     EffortKeys.MAX_CODE_REPAIRS_PER_TRACE.value: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 4, EffortLevel.HIGH: 5},
     # if the percentage of unmatched tests is greater than this, we won't fix it (lowering this value makes the repair more stricted)
-    # on the low effort we lower the limit to 20% to be more strict (less repairs)
+    # on the low effort we lower the limit to 20% to be more strict (less repairs, less time)
     EffortKeys.REPAIR_UNMATCHED_PERCENTAGE_LIMIT.value: {
         EffortLevel.LOW: 0.2,
         EffortLevel.MEDIUM: 0.4,
         EffortLevel.HIGH: 0.5,
     },
-    # when valid optimizations count is N or less, refine all optimizations
-    EffortKeys.REFINE_ALL_THRESHOLD.value: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 3, EffortLevel.HIGH: 4},
     # Top valid candidates for refinements
     EffortKeys.TOP_VALID_CANDIDATES_FOR_REFINEMENT: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 3, EffortLevel.HIGH: 4},
 }
diff --git a/codeflash/lsp/server.py b/codeflash/lsp/server.py
index 582e5033c..6e97a9e15 100644
--- a/codeflash/lsp/server.py
+++ b/codeflash/lsp/server.py
@@ -7,6 +7,7 @@
 from pygls.lsp.server import LanguageServer
 from pygls.protocol import LanguageServerProtocol
 
+from codeflash.code_utils.config_consts import EffortLevel
 from codeflash.either import Result
 from codeflash.models.models import CodeOptimizationContext
 
@@ -37,6 +38,7 @@ def prepare_optimizer_arguments(self, config_file: Path) -> None:
         args.config_file = config_file
         args.no_pr = True  # LSP server should not create PRs
         args.worktree = True
+        args.effort = EffortLevel.LOW.value  # low effort for high speed
         self.args = args
         # avoid initializing the optimizer during initialization, because it can cause an error if the api key is invalid
 
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 670a83d92..28f0998f0 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -192,14 +192,13 @@ def _process_refinement_results(self) -> OptimizedCandidate | None:
         future_refinements: list[concurrent.futures.Future] = []
         top_n_candidates = int(
             min(
-                get_effort_value(EffortKeys.TOP_VALID_CANDIDATES_FOR_REFINEMENT, self.effort),
+                int(get_effort_value(EffortKeys.TOP_VALID_CANDIDATES_FOR_REFINEMENT, self.effort)),
                 len(self.all_refinements_data),
             )
         )
 
-        if top_n_candidates == len(self.all_refinements_data) or len(self.all_refinements_data) <= get_effort_value(
-            EffortKeys.REFINE_ALL_THRESHOLD, self.effort
-        ):
+        if top_n_candidates == len(self.all_refinements_data):
+            # if we'll refine all candidates, we can skip the ranking and just refine them all
             for data in self.all_refinements_data:
                 future_refinements.append(self.refine_optimizations([data]))  # noqa: PERF401
         else:
diff --git a/codeflash/tracer.py b/codeflash/tracer.py
index eb011befa..c26214f77 100644
--- a/codeflash/tracer.py
+++ b/codeflash/tracer.py
@@ -24,6 +24,7 @@
 from codeflash.cli_cmds.console import console
 from codeflash.code_utils.code_utils import get_run_tmp_file
 from codeflash.code_utils.compat import SAFE_SYS_EXECUTABLE
+from codeflash.code_utils.config_consts import EffortLevel
 from codeflash.code_utils.config_parser import parse_config_file
 from codeflash.tracing.pytest_parallelization import pytest_split
 
@@ -214,6 +215,7 @@ def main(args: Namespace | None = None) -> ArgumentParser:
 
                 from codeflash.optimization import optimizer
 
+                args.effort = EffortLevel.HIGH.value
                 optimizer.run_with_args(args)
 
                 # Delete the trace file and the replay test file if they exist

From a126d9ef8978fca1a79f4701a8efb61f08a74547 Mon Sep 17 00:00:00 2001
From: ali <mohammed18200118@gmail.com>
Date: Tue, 6 Jan 2026 02:31:48 +0200
Subject: [PATCH 05/12] number of candidates for model distribution & control
 adaptive optimization params with effort

---
 codeflash/api/aiservice.py                   | 13 +++++++------
 codeflash/code_utils/config_consts.py        | 20 ++++++++++----------
 codeflash/optimization/function_optimizer.py | 14 ++++++++------
 3 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py
index 38e21f0a7..14e74ee36 100644
--- a/codeflash/api/aiservice.py
+++ b/codeflash/api/aiservice.py
@@ -16,7 +16,6 @@
 from codeflash.code_utils.env_utils import get_codeflash_api_key
 from codeflash.code_utils.git_utils import get_last_commit_author_if_pr_exists, get_repo_owner_and_name
 from codeflash.code_utils.time_utils import humanize_runtime
-from codeflash.lsp.helpers import is_LSP_enabled
 from codeflash.models.ExperimentMetadata import ExperimentMetadata
 from codeflash.models.models import (
     AIServiceRefinerRequest,
@@ -128,6 +127,7 @@ def optimize_python_code(  # noqa: D417
         experiment_metadata: ExperimentMetadata | None = None,
         *,
         is_async: bool = False,
+        n_candidates: int = 5,
     ) -> list[OptimizedCandidate]:
         """Optimize the given python code for performance by making a request to the Django endpoint.
 
@@ -138,6 +138,7 @@ def optimize_python_code(  # noqa: D417
         - trace_id (str): Trace id of optimization run
         - experiment_metadata (Optional[ExperimentalMetadata, None]): Any available experiment metadata for this optimization
         - is_async (bool): Whether the function being optimized is async
+        - n_candidates (int): Number of candidates to generate
 
         Returns
         -------
@@ -160,10 +161,10 @@ def optimize_python_code(  # noqa: D417
             "repo_owner": git_repo_owner,
             "repo_name": git_repo_name,
             "is_async": is_async,
-            "lsp_mode": is_LSP_enabled(),
             "call_sequence": self.get_next_sequence(),
+            "n_candidates": n_candidates,
         }
-        logger.debug(f"Sending optimize request: trace_id={trace_id}, lsp_mode={payload['lsp_mode']}")
+        logger.debug(f"Sending optimize request: trace_id={trace_id}, n_candidates={payload['n_candidates']}")
 
         try:
             response = self.make_ai_service_request("/optimize", payload=payload, timeout=60)
@@ -195,7 +196,7 @@ def optimize_python_code_line_profiler(  # noqa: D417
         dependency_code: str,
         trace_id: str,
         line_profiler_results: str,
-        num_candidates: int = 8,
+        n_candidates: int,
         experiment_metadata: ExperimentMetadata | None = None,
     ) -> list[OptimizedCandidate]:
         """Optimize the given python code for performance using line profiler results.
@@ -207,6 +208,7 @@ def optimize_python_code_line_profiler(  # noqa: D417
         - trace_id (str): Trace id of optimization run
         - line_profiler_results (str): Line profiler output to guide optimization
         - experiment_metadata (Optional[ExperimentalMetadata, None]): Any available experiment metadata for this optimization
+        - n_candidates (int): Number of candidates to generate
 
         Returns
         -------
@@ -223,13 +225,12 @@ def optimize_python_code_line_profiler(  # noqa: D417
         payload = {
             "source_code": source_code,
             "dependency_code": dependency_code,
-            "n_candidates_lp": num_candidates,
+            "n_candidates": n_candidates,
             "line_profiler_results": line_profiler_results,
             "trace_id": trace_id,
             "python_version": platform.python_version(),
             "experiment_metadata": experiment_metadata,
             "codeflash_version": codeflash_version,
-            "lsp_mode": is_LSP_enabled(),
             "call_sequence": self.get_next_sequence(),
         }
 
diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py
index 75949af92..19cdc56c4 100644
--- a/codeflash/code_utils/config_consts.py
+++ b/codeflash/code_utils/config_consts.py
@@ -25,14 +25,6 @@
 # LSP-specific
 TOTAL_LOOPING_TIME_LSP = 10.0  # Kept same timing for LSP mode to avoid in increase in performance reporting
 
-# Adaptive optimization
-# TODO (ali): make this configurable with effort arg once the PR is merged
-ADAPTIVE_OPTIMIZATION_THRESHOLD = 2  # Max adaptive optimizations per single candidate tree (for example : optimize -> refine -> adaptive -> another adaptive).
-# MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE = 4  # maximum number of adaptive optimizations we will do for each function (this can be 2 adaptive optimizations for 2 candidates for example)
-MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE = (
-    0  # disable adaptive optimizations until we have this value controlled by the effort arg
-)
-
 try:
     from codeflash.lsp.helpers import is_LSP_enabled
 
@@ -58,11 +50,13 @@ class EffortKeys(StrEnum):
     MAX_CODE_REPAIRS_PER_TRACE = auto()
     REPAIR_UNMATCHED_PERCENTAGE_LIMIT = auto()
     TOP_VALID_CANDIDATES_FOR_REFINEMENT = auto()
+    ADAPTIVE_OPTIMIZATION_THRESHOLD = auto()
+    MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE = auto()
 
 
 EFFORT_VALUES: dict[str, dict[EffortLevel, any]] = {
-    EffortKeys.N_OPTIMIZER_CANDIDATES.value: {EffortLevel.LOW: 3, EffortLevel.MEDIUM: 4, EffortLevel.HIGH: 5},
-    EffortKeys.N_OPTIMIZER_LP_CANDIDATES.value: {EffortLevel.LOW: 3, EffortLevel.MEDIUM: 5, EffortLevel.HIGH: 6},
+    EffortKeys.N_OPTIMIZER_CANDIDATES.value: {EffortLevel.LOW: 3, EffortLevel.MEDIUM: 5, EffortLevel.HIGH: 6},
+    EffortKeys.N_OPTIMIZER_LP_CANDIDATES.value: {EffortLevel.LOW: 4, EffortLevel.MEDIUM: 6, EffortLevel.HIGH: 7},
     # we don't use effort with generated tests for now
     EffortKeys.N_GENERATED_TESTS.value: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 2, EffortLevel.HIGH: 2},
     # maximum number of repairs we will do for each function
@@ -76,6 +70,12 @@ class EffortKeys(StrEnum):
     },
     # Top valid candidates for refinements
     EffortKeys.TOP_VALID_CANDIDATES_FOR_REFINEMENT: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 3, EffortLevel.HIGH: 4},
+    EffortKeys.ADAPTIVE_OPTIMIZATION_THRESHOLD.value: {EffortLevel.LOW: 0, EffortLevel.MEDIUM: 1, EffortLevel.HIGH: 3},
+    EffortKeys.MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE.value: {
+        EffortLevel.LOW: 0,
+        EffortLevel.MEDIUM: 3,
+        EffortLevel.HIGH: 10,
+    },
 }
 
 
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 34db058ef..8cfed2614 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -43,10 +43,8 @@
     unified_diff_strings,
 )
 from codeflash.code_utils.config_consts import (
-    ADAPTIVE_OPTIMIZATION_THRESHOLD,
     COVERAGE_THRESHOLD,
     INDIVIDUAL_TESTCASE_TIMEOUT,
-    MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE,
     REFINED_CANDIDATE_RANKING_WEIGHTS,
     REPEAT_OPTIMIZATION_PROBABILITY,
     TOTAL_LOOPING_TIME_EFFECTIVE,
@@ -1018,7 +1016,7 @@ def determine_best_candidate(
             dependency_code=code_context.read_only_context_code,
             trace_id=self.get_trace_id(exp_type),
             line_profiler_results=original_code_baseline.line_profile_results["str_out"],
-            num_candidates=get_effort_value(EffortKeys.N_OPTIMIZER_LP_CANDIDATES, self.args.effort),
+            n_candidates=get_effort_value(EffortKeys.N_OPTIMIZER_LP_CANDIDATES, self.args.effort),
             experiment_metadata=ExperimentMetadata(
                 id=self.experiment_id, group="control" if exp_type == "EXP0" else "experiment"
             )
@@ -1097,7 +1095,9 @@ def call_adaptive_optimize(
         eval_ctx: CandidateEvaluationContext,
         ai_service_client: AiServiceClient,
     ) -> concurrent.futures.Future[OptimizedCandidate | None] | None:
-        if self.adaptive_optimization_counter >= MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE:
+        if self.adaptive_optimization_counter >= get_effort_value(
+            EffortKeys.MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE, self.args.effort
+        ):
             logger.debug(
                 f"Max adaptive optimizations reached for {self.function_to_optimize.qualified_name}: {self.adaptive_optimization_counter}"
             )
@@ -1105,7 +1105,7 @@ def call_adaptive_optimize(
 
         adaptive_count = sum(1 for c in prev_candidates if c.source == OptimizedCandidateSource.ADAPTIVE)
 
-        if adaptive_count >= ADAPTIVE_OPTIMIZATION_THRESHOLD:
+        if adaptive_count >= get_effort_value(EffortKeys.ADAPTIVE_OPTIMIZATION_THRESHOLD, self.args.effort):
             return None
 
         request_candidates = []
@@ -1492,7 +1492,7 @@ def generate_optimizations(
         run_experiment: bool = False,  # noqa: FBT001, FBT002
     ) -> Result[tuple[OptimizationSet, str], str]:
         """Generate optimization candidates for the function. Backend handles multi-model diversity."""
-        # n_candidates = get_effort_value(EffortKeys.N_OPTIMIZER_CANDIDATES, self.args.effort)
+        n_candidates = get_effort_value(EffortKeys.N_OPTIMIZER_CANDIDATES, self.args.effort)
         future_optimization_candidates = self.executor.submit(
             self.aiservice_client.optimize_python_code,
             read_writable_code.markdown,
@@ -1500,6 +1500,7 @@ def generate_optimizations(
             self.function_trace_id[:-4] + "EXP0" if run_experiment else self.function_trace_id,
             ExperimentMetadata(id=self.experiment_id, group="control") if run_experiment else None,
             is_async=self.function_to_optimize.is_async,
+            n_candidates=n_candidates,
         )
 
         future_references = self.executor.submit(
@@ -1522,6 +1523,7 @@ def generate_optimizations(
                 self.function_trace_id[:-4] + "EXP1",
                 ExperimentMetadata(id=self.experiment_id, group="experiment"),
                 is_async=self.function_to_optimize.is_async,
+                n_candidates=n_candidates,
             )
             futures.append(future_candidates_exp)
 

From 18e0b249c522aa83cce6caf92189d658594d3c40 Mon Sep 17 00:00:00 2001
From: ali <mohammed18200118@gmail.com>
Date: Tue, 6 Jan 2026 03:50:43 +0200
Subject: [PATCH 06/12] default effort value for function optimizer

---
 codeflash/optimization/function_optimizer.py | 24 ++++++++++++--------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 8cfed2614..a253acefb 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -49,6 +49,7 @@
     REPEAT_OPTIMIZATION_PROBABILITY,
     TOTAL_LOOPING_TIME_EFFECTIVE,
     EffortKeys,
+    EffortLevel,
     get_effort_value,
 )
 from codeflash.code_utils.deduplicate_code import normalize_code
@@ -375,6 +376,9 @@ def __init__(
         self.experiment_id = os.getenv("CODEFLASH_EXPERIMENT_ID", None)
         self.local_aiservice_client = LocalAiServiceClient() if self.experiment_id else None
         self.test_files = TestFiles(test_files=[])
+
+        self.effort = getattr(args, "effort", EffortLevel.MEDIUM.value) if args else EffortLevel.MEDIUM.value
+
         self.args = args  # Check defaults for these
         self.function_trace_id: str = str(uuid.uuid4())
         self.original_module_path = module_name_from_file_path(self.function_to_optimize.file_path, self.project_root)
@@ -382,7 +386,7 @@ def __init__(
         self.function_benchmark_timings = function_benchmark_timings if function_benchmark_timings else {}
         self.total_benchmark_timings = total_benchmark_timings if total_benchmark_timings else {}
         self.replay_tests_dir = replay_tests_dir if replay_tests_dir else None
-        n_tests = get_effort_value(EffortKeys.N_GENERATED_TESTS, args.effort)
+        n_tests = get_effort_value(EffortKeys.N_GENERATED_TESTS, self.effort)
         self.executor = concurrent.futures.ThreadPoolExecutor(
             max_workers=n_tests + 3 if self.experiment_id is None else n_tests + 4
         )
@@ -434,7 +438,7 @@ def generate_and_instrument_tests(
         str,
     ]:
         """Generate and instrument tests for the function."""
-        n_tests = get_effort_value(EffortKeys.N_GENERATED_TESTS, self.args.effort)
+        n_tests = get_effort_value(EffortKeys.N_GENERATED_TESTS, self.effort)
         generated_test_paths = [
             get_test_file_path(
                 self.test_cfg.tests_root, self.function_to_optimize.function_name, test_index, test_type="unit"
@@ -1016,7 +1020,7 @@ def determine_best_candidate(
             dependency_code=code_context.read_only_context_code,
             trace_id=self.get_trace_id(exp_type),
             line_profiler_results=original_code_baseline.line_profile_results["str_out"],
-            n_candidates=get_effort_value(EffortKeys.N_OPTIMIZER_LP_CANDIDATES, self.args.effort),
+            n_candidates=get_effort_value(EffortKeys.N_OPTIMIZER_LP_CANDIDATES, self.effort),
             experiment_metadata=ExperimentMetadata(
                 id=self.experiment_id, group="control" if exp_type == "EXP0" else "experiment"
             )
@@ -1031,7 +1035,7 @@ def determine_best_candidate(
             self.aiservice_client,
             self.executor,
             self.future_all_code_repair,
-            self.args.effort,
+            self.effort,
             self.future_adaptive_optimizations,
         )
         candidate_index = 0
@@ -1096,7 +1100,7 @@ def call_adaptive_optimize(
         ai_service_client: AiServiceClient,
     ) -> concurrent.futures.Future[OptimizedCandidate | None] | None:
         if self.adaptive_optimization_counter >= get_effort_value(
-            EffortKeys.MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE, self.args.effort
+            EffortKeys.MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE, self.effort
         ):
             logger.debug(
                 f"Max adaptive optimizations reached for {self.function_to_optimize.qualified_name}: {self.adaptive_optimization_counter}"
@@ -1105,7 +1109,7 @@ def call_adaptive_optimize(
 
         adaptive_count = sum(1 for c in prev_candidates if c.source == OptimizedCandidateSource.ADAPTIVE)
 
-        if adaptive_count >= get_effort_value(EffortKeys.ADAPTIVE_OPTIMIZATION_THRESHOLD, self.args.effort):
+        if adaptive_count >= get_effort_value(EffortKeys.ADAPTIVE_OPTIMIZATION_THRESHOLD, self.effort):
             return None
 
         request_candidates = []
@@ -1425,7 +1429,7 @@ def generate_tests(
         generated_perf_test_paths: list[Path],
     ) -> Result[tuple[int, GeneratedTestsList, dict[str, set[FunctionCalledInTest]], str], str]:
         """Generate unit tests and concolic tests for the function."""
-        n_tests = get_effort_value(EffortKeys.N_GENERATED_TESTS, self.args.effort)
+        n_tests = get_effort_value(EffortKeys.N_GENERATED_TESTS, self.effort)
         assert len(generated_test_paths) == n_tests
 
         if not self.args.no_gen_tests:
@@ -1492,7 +1496,7 @@ def generate_optimizations(
         run_experiment: bool = False,  # noqa: FBT001, FBT002
     ) -> Result[tuple[OptimizationSet, str], str]:
         """Generate optimization candidates for the function. Backend handles multi-model diversity."""
-        n_candidates = get_effort_value(EffortKeys.N_OPTIMIZER_CANDIDATES, self.args.effort)
+        n_candidates = get_effort_value(EffortKeys.N_OPTIMIZER_CANDIDATES, self.effort)
         future_optimization_candidates = self.executor.submit(
             self.aiservice_client.optimize_python_code,
             read_writable_code.markdown,
@@ -2059,7 +2063,7 @@ def repair_if_possible(
         test_results_count: int,
         exp_type: str,
     ) -> None:
-        max_repairs = get_effort_value(EffortKeys.MAX_CODE_REPAIRS_PER_TRACE, self.args.effort)
+        max_repairs = get_effort_value(EffortKeys.MAX_CODE_REPAIRS_PER_TRACE, self.effort)
         if self.repair_counter >= max_repairs:
             logger.debug(f"Repair counter reached {max_repairs}, skipping repair")
             return
@@ -2071,7 +2075,7 @@ def repair_if_possible(
             logger.debug("No diffs found, skipping repair")
             return
         result_unmatched_perc = len(diffs) / test_results_count
-        if result_unmatched_perc > get_effort_value(EffortKeys.REPAIR_UNMATCHED_PERCENTAGE_LIMIT, self.args.effort):
+        if result_unmatched_perc > get_effort_value(EffortKeys.REPAIR_UNMATCHED_PERCENTAGE_LIMIT, self.effort):
             logger.debug(f"Result unmatched percentage is {result_unmatched_perc * 100}%, skipping repair")
             return
 

From 8afe34fc59a62d5321449ef2dfdbf31686341472 Mon Sep 17 00:00:00 2001
From: ali <mohammed18200118@gmail.com>
Date: Tue, 6 Jan 2026 04:07:04 +0200
Subject: [PATCH 07/12] fix enum python issue

---
 codeflash/code_utils/config_consts.py | 33 ++++++++++++++-------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py
index 19cdc56c4..9257cf040 100644
--- a/codeflash/code_utils/config_consts.py
+++ b/codeflash/code_utils/config_consts.py
@@ -1,4 +1,5 @@
-from enum import StrEnum, auto
+from enum import Enum
+from typing import Any
 
 MAX_TEST_RUN_ITERATIONS = 5
 INDIVIDUAL_TESTCASE_TIMEOUT = 15
@@ -37,24 +38,24 @@
 MAX_CONTEXT_LEN_REVIEW = 1000
 
 
-class EffortLevel(StrEnum):
-    LOW = auto()
-    MEDIUM = auto()
-    HIGH = auto()
+class EffortLevel(str, Enum):
+    LOW = "low"
+    MEDIUM = "medium"
+    HIGH = "high"
 
 
-class EffortKeys(StrEnum):
-    N_OPTIMIZER_CANDIDATES = auto()
-    N_OPTIMIZER_LP_CANDIDATES = auto()
-    N_GENERATED_TESTS = auto()
-    MAX_CODE_REPAIRS_PER_TRACE = auto()
-    REPAIR_UNMATCHED_PERCENTAGE_LIMIT = auto()
-    TOP_VALID_CANDIDATES_FOR_REFINEMENT = auto()
-    ADAPTIVE_OPTIMIZATION_THRESHOLD = auto()
-    MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE = auto()
+class EffortKeys(str, Enum):
+    N_OPTIMIZER_CANDIDATES = "N_OPTIMIZER_CANDIDATES"
+    N_OPTIMIZER_LP_CANDIDATES = "N_OPTIMIZER_LP_CANDIDATES"
+    N_GENERATED_TESTS = "N_GENERATED_TESTS"
+    MAX_CODE_REPAIRS_PER_TRACE = "MAX_CODE_REPAIRS_PER_TRACE"
+    REPAIR_UNMATCHED_PERCENTAGE_LIMIT = "REPAIR_UNMATCHED_PERCENTAGE_LIMIT"
+    TOP_VALID_CANDIDATES_FOR_REFINEMENT = "TOP_VALID_CANDIDATES_FOR_REFINEMENT"
+    ADAPTIVE_OPTIMIZATION_THRESHOLD = "ADAPTIVE_OPTIMIZATION_THRESHOLD"
+    MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE = "MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE"
 
 
-EFFORT_VALUES: dict[str, dict[EffortLevel, any]] = {
+EFFORT_VALUES: dict[str, dict[EffortLevel, Any]] = {
     EffortKeys.N_OPTIMIZER_CANDIDATES.value: {EffortLevel.LOW: 3, EffortLevel.MEDIUM: 5, EffortLevel.HIGH: 6},
     EffortKeys.N_OPTIMIZER_LP_CANDIDATES.value: {EffortLevel.LOW: 4, EffortLevel.MEDIUM: 6, EffortLevel.HIGH: 7},
     # we don't use effort with generated tests for now
@@ -79,7 +80,7 @@ class EffortKeys(StrEnum):
 }
 
 
-def get_effort_value(key: EffortKeys, effort: EffortLevel) -> any:
+def get_effort_value(key: EffortKeys, effort: EffortLevel) -> Any:  # noqa: ANN401
     key_str = key.value
     if key_str in EFFORT_VALUES:
         if effort in EFFORT_VALUES[key_str]:

From 54cf458f950f5fdc2b054d5235d3f672d5681a1d Mon Sep 17 00:00:00 2001
From: mohammed ahmed <64513301+mohammedahmed18@users.noreply.github.com>
Date: Tue, 6 Jan 2026 23:42:34 +0000
Subject: [PATCH 08/12] merge main into optimization-effort

---
 codeflash/code_utils/config_consts.py         |   3 +
 codeflash/context/code_context_extractor.py   |  57 ++++--
 .../context/unused_definition_remover.py      |  12 ++
 codeflash/models/models.py                    |   3 +
 codeflash/optimization/function_optimizer.py  | 164 +++++++++--------
 tests/test_code_context_extractor.py          | 170 +++++++++++++++++-
 tests/test_instrument_line_profiler.py        |   6 +-
 tests/test_remove_unused_definitions.py       |  59 ++++++
 8 files changed, 382 insertions(+), 92 deletions(-)

diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py
index 9257cf040..f3a881e1b 100644
--- a/codeflash/code_utils/config_consts.py
+++ b/codeflash/code_utils/config_consts.py
@@ -26,6 +26,9 @@
 # LSP-specific
 TOTAL_LOOPING_TIME_LSP = 10.0  # Kept same timing for LSP mode to avoid in increase in performance reporting
 
+# setting this value to 1 will disable repair if there is at least one correct candidate
+MIN_CORRECT_CANDIDATES = 2
+
 try:
     from codeflash.lsp.helpers import is_LSP_enabled
 
diff --git a/codeflash/context/code_context_extractor.py b/codeflash/context/code_context_extractor.py
index 14d549633..a411bafac 100644
--- a/codeflash/context/code_context_extractor.py
+++ b/codeflash/context/code_context_extractor.py
@@ -446,31 +446,45 @@ def get_function_sources_from_jedi(
                     definition_path = definition.module_path
 
                     # The definition is part of this project and not defined within the original function
-                    if (
+                    is_valid_definition = (
                         str(definition_path).startswith(str(project_root_path) + os.sep)
                         and not path_belongs_to_site_packages(definition_path)
                         and definition.full_name
-                        and definition.type == "function"
                         and not belongs_to_function_qualified(definition, qualified_function_name)
                         and definition.full_name.startswith(definition.module_name)
+                    )
+                    if is_valid_definition and definition.type == "function":
+                        qualified_name = get_qualified_name(definition.module_name, definition.full_name)
                         # Avoid nested functions or classes. Only class.function is allowed
-                        and len(
-                            (qualified_name := get_qualified_name(definition.module_name, definition.full_name)).split(
-                                "."
+                        if len(qualified_name.split(".")) <= 2:
+                            function_source = FunctionSource(
+                                file_path=definition_path,
+                                qualified_name=qualified_name,
+                                fully_qualified_name=definition.full_name,
+                                only_function_name=definition.name,
+                                source_code=definition.get_line_code(),
+                                jedi_definition=definition,
                             )
+                            file_path_to_function_source[definition_path].add(function_source)
+                            function_source_list.append(function_source)
+                    # When a class is instantiated (e.g., MyClass()), track its __init__ as a helper
+                    # This ensures the class definition with constructor is included in testgen context
+                    elif is_valid_definition and definition.type == "class":
+                        init_qualified_name = get_qualified_name(
+                            definition.module_name, f"{definition.full_name}.__init__"
                         )
-                        <= 2
-                    ):
-                        function_source = FunctionSource(
-                            file_path=definition_path,
-                            qualified_name=qualified_name,
-                            fully_qualified_name=definition.full_name,
-                            only_function_name=definition.name,
-                            source_code=definition.get_line_code(),
-                            jedi_definition=definition,
-                        )
-                        file_path_to_function_source[definition_path].add(function_source)
-                        function_source_list.append(function_source)
+                        # Only include if it's a top-level class (not nested)
+                        if len(init_qualified_name.split(".")) <= 2:
+                            function_source = FunctionSource(
+                                file_path=definition_path,
+                                qualified_name=init_qualified_name,
+                                fully_qualified_name=f"{definition.full_name}.__init__",
+                                only_function_name="__init__",
+                                source_code=definition.get_line_code(),
+                                jedi_definition=definition,
+                            )
+                            file_path_to_function_source[definition_path].add(function_source)
+                            function_source_list.append(function_source)
 
     return file_path_to_function_source, function_source_list
 
@@ -647,7 +661,10 @@ def prune_cst_for_code_hashing(  # noqa: PLR0911
 
     if isinstance(node, cst.FunctionDef):
         qualified_name = f"{prefix}.{node.name.value}" if prefix else node.name.value
-        if qualified_name in target_functions:
+        # For hashing, exclude __init__ methods even if in target_functions
+        # because they don't affect the semantic behavior being hashed
+        # But include other dunder methods like __call__ which do affect behavior
+        if qualified_name in target_functions and node.name.value != "__init__":
             new_body = remove_docstring_from_body(node.body) if isinstance(node.body, cst.IndentedBlock) else node.body
             return node.with_changes(body=new_body), True
         return None, False
@@ -666,7 +683,9 @@ def prune_cst_for_code_hashing(  # noqa: PLR0911
         for stmt in node.body.body:
             if isinstance(stmt, cst.FunctionDef):
                 qualified_name = f"{class_prefix}.{stmt.name.value}"
-                if qualified_name in target_functions:
+                # For hashing, exclude __init__ methods even if in target_functions
+                # but include other methods like __call__ which affect behavior
+                if qualified_name in target_functions and stmt.name.value != "__init__":
                     stmt_with_changes = stmt.with_changes(
                         body=remove_docstring_from_body(cast("cst.IndentedBlock", stmt.body))
                     )
diff --git a/codeflash/context/unused_definition_remover.py b/codeflash/context/unused_definition_remover.py
index 8e6ea057c..823cb735b 100644
--- a/codeflash/context/unused_definition_remover.py
+++ b/codeflash/context/unused_definition_remover.py
@@ -223,6 +223,18 @@ def visit_ClassDef(self, node: cst.ClassDef) -> None:
             self.current_class = class_name
             self.current_top_level_name = class_name
 
+            # Track base classes as dependencies
+            for base in node.bases:
+                if isinstance(base.value, cst.Name):
+                    base_name = base.value.value
+                    if base_name in self.definitions and class_name in self.definitions:
+                        self.definitions[class_name].dependencies.add(base_name)
+                elif isinstance(base.value, cst.Attribute):
+                    # Handle cases like module.ClassName
+                    attr_name = base.value.attr.value
+                    if attr_name in self.definitions and class_name in self.definitions:
+                        self.definitions[class_name].dependencies.add(attr_name)
+
         self.class_depth += 1
 
     def leave_ClassDef(self, original_node: cst.ClassDef) -> None:  # noqa: ARG002
diff --git a/codeflash/models/models.py b/codeflash/models/models.py
index 1af946da4..844ff9603 100644
--- a/codeflash/models/models.py
+++ b/codeflash/models/models.py
@@ -463,6 +463,9 @@ def register_new_candidate(
     def get_speedup_ratio(self, optimization_id: str) -> float | None:
         return self.speedup_ratios.get(optimization_id)
 
+    def get_optimized_runtime(self, optimization_id: str) -> float | None:
+        return self.optimized_runtimes.get(optimization_id)
+
 
 @dataclass(frozen=True)
 class TestsInFile:
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index a253acefb..0b525e3e7 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -45,6 +45,7 @@
 from codeflash.code_utils.config_consts import (
     COVERAGE_THRESHOLD,
     INDIVIDUAL_TESTCASE_TIMEOUT,
+    MIN_CORRECT_CANDIDATES,
     REFINED_CANDIDATE_RANKING_WEIGHTS,
     REPEAT_OPTIMIZATION_PROBABILITY,
     TOTAL_LOOPING_TIME_EFFECTIVE,
@@ -181,11 +182,11 @@ def __init__(
         self,
         initial_candidates: list[OptimizedCandidate],
         future_line_profile_results: concurrent.futures.Future,
-        all_refinements_data: list[AIServiceRefinerRequest],
         ai_service_client: AiServiceClient,
-        executor: concurrent.futures.ThreadPoolExecutor,
+        eval_ctx: CandidateEvaluationContext,
+        original_markdown_code: str,
+        future_all_refinements: list[concurrent.futures.Future],
         future_all_code_repair: list[concurrent.futures.Future],
-        effort: str,
         future_adaptive_optimizations: list[concurrent.futures.Future],
     ) -> None:
         self.candidate_queue = queue.Queue()
@@ -194,9 +195,9 @@ def __init__(
         self.refinement_done = False
         self.candidate_len = len(initial_candidates)
         self.ai_service_client = ai_service_client
-        self.executor = executor
-        self.effort = effort
         self.refinement_calls_count = 0
+        self.original_markdown_code = original_markdown_code
+        self.eval_ctx = eval_ctx
 
         # Initialize queue with initial candidates
         for candidate in initial_candidates:
@@ -204,7 +205,7 @@ def __init__(
             self.candidate_queue.put(candidate)
 
         self.future_line_profile_results = future_line_profile_results
-        self.all_refinements_data = all_refinements_data
+        self.future_all_refinements = future_all_refinements
         self.future_all_code_repair = future_all_code_repair
         self.future_adaptive_optimizations = future_adaptive_optimizations
 
@@ -235,7 +236,13 @@ def _handle_empty_queue(self) -> CandidateNode | None:
                 lambda: self.future_all_code_repair.clear(),
             )
         if self.line_profiler_done and not self.refinement_done:
-            return self._process_refinement_results()
+            return self._process_candidates(
+                self.future_all_refinements,
+                "Refining generated code for improved quality and performance...",
+                "Added {0} candidates from refinement, total candidates now: {1}",
+                lambda: setattr(self, "refinement_done", True),
+                filter_candidates_func=self._filter_refined_candidates,
+            )
         if len(self.future_adaptive_optimizations) > 0:
             return self._process_candidates(
                 self.future_adaptive_optimizations,
@@ -251,6 +258,7 @@ def _process_candidates(
         loading_msg: str,
         success_msg: str,
         callback: Callable[[], None],
+        filter_candidates_func: Callable[[list[OptimizedCandidate]], list[OptimizedCandidate]] | None = None,
     ) -> CandidateNode | None:
         if len(future_candidates) == 0:
             return None
@@ -269,6 +277,7 @@ def _process_candidates(
                 else:
                     candidates.append(candidate_result)
 
+            candidates = filter_candidates_func(candidates) if filter_candidates_func else candidates
             for candidate in candidates:
                 self.forest.add(candidate)
                 self.candidate_queue.put(candidate)
@@ -280,55 +289,50 @@ def _process_candidates(
             callback()
             return self.get_next_candidate()
 
-    def refine_optimizations(self, request: list[AIServiceRefinerRequest]) -> concurrent.futures.Future:
-        return self.executor.submit(self.ai_service_client.optimize_python_code_refinement, request=request)
+    def _filter_refined_candidates(self, candidates: list[OptimizedCandidate]) -> list[OptimizedCandidate]:
+        """We generate a weighted ranking based on the runtime and diff lines and select the best of valid optimizations to be tested."""
+        self.refinement_calls_count += len(candidates)
 
-    def _process_refinement_results(self) -> CandidateNode | None:
-        """Process refinement results and add to queue. We generate a weighted ranking based on the runtime and diff lines and select the best (round of 45%) of valid optimizations to be refined."""
-        future_refinements: list[concurrent.futures.Future] = []
         top_n_candidates = int(
             min(
                 int(get_effort_value(EffortKeys.TOP_VALID_CANDIDATES_FOR_REFINEMENT, self.effort)),
                 len(self.all_refinements_data),
             )
         )
-        refinement_call_index = 0
 
-        if top_n_candidates == len(self.all_refinements_data):
-            # if we'll refine all candidates, we can skip the ranking and just refine them all
-            for data in self.all_refinements_data:
-                refinement_call_index += 1
-                future_refinements.append(self.refine_optimizations([data]))
-        else:
-            diff_lens_list = []
-            runtimes_list = []
-            for c in self.all_refinements_data:
-                diff_lens_list.append(diff_length(c.original_source_code, c.optimized_source_code))
-                runtimes_list.append(c.optimized_code_runtime)
-
-            runtime_w, diff_w = REFINED_CANDIDATE_RANKING_WEIGHTS
-            weights = choose_weights(runtime=runtime_w, diff=diff_w)
-
-            runtime_norm = normalize_by_max(runtimes_list)
-            diffs_norm = normalize_by_max(diff_lens_list)
-            # the lower the better
-            score_dict = create_score_dictionary_from_metrics(weights, runtime_norm, diffs_norm)
-            top_indecies = sorted(score_dict, key=score_dict.get)[:top_n_candidates]
-
-            for idx in top_indecies:
-                refinement_call_index += 1
-                data = self.all_refinements_data[idx]
-                future_refinements.append(self.refine_optimizations([data]))
-
-        # Track total refinement calls made
-        self.refinement_calls_count = refinement_call_index
-
-        return self._process_candidates(
-            future_refinements,
-            "Refining generated code for improved quality and performance...",
-            "Added {0} candidates from refinement, total candidates now: {1}",
-            lambda: setattr(self, "refinement_done", True),
-        )
+        if len(candidates) == top_n_candidates:
+            # no need for ranking since we will return all candidates
+            return candidates
+
+        diff_lens_list = []
+        runtimes_list = []
+        for c in candidates:
+            # current refined candidates is not benchmarked yet, a close values we would expect to be the parent candidate
+            parent_id = c.parent_id
+            parent_candidate_node = self.forest.get_node(parent_id)
+            parent_optimized_runtime = self.eval_ctx.get_optimized_runtime(parent_id)
+            if not parent_optimized_runtime or not parent_candidate_node:
+                continue
+            diff_lens_list.append(
+                diff_length(self.original_markdown_code, parent_candidate_node.candidate.source_code.markdown)
+            )
+            runtimes_list.append(parent_optimized_runtime)
+
+        if not runtimes_list or not diff_lens_list:
+            # should not happen
+            logger.warning("No valid candidates for refinement while filtering")
+            return candidates
+
+        runtime_w, diff_w = REFINED_CANDIDATE_RANKING_WEIGHTS
+        weights = choose_weights(runtime=runtime_w, diff=diff_w)
+
+        runtime_norm = normalize_by_max(runtimes_list)
+        diffs_norm = normalize_by_max(diff_lens_list)
+        # the lower the better
+        score_dict = create_score_dictionary_from_metrics(weights, runtime_norm, diffs_norm)
+        top_indecies = sorted(score_dict, key=score_dict.get)[:top_n_candidates]
+
+        return [candidates[idx] for idx in top_indecies]
 
     def is_done(self) -> bool:
         """Check if processing is complete."""
@@ -392,6 +396,7 @@ def __init__(
         )
         self.optimization_review = ""
         self.future_all_code_repair: list[concurrent.futures.Future] = []
+        self.future_all_refinements: list[concurrent.futures.Future] = []
         self.future_adaptive_optimizations: list[concurrent.futures.Future] = []
         self.repair_counter = 0  # track how many repairs we did for each function
         self.adaptive_optimization_counter = 0  # track how many adaptive optimizations we did for each function
@@ -838,7 +843,6 @@ def process_single_candidate(
         original_helper_code: dict[Path, str],
         file_path_to_helper_classes: dict[Path, set[str]],
         eval_ctx: CandidateEvaluationContext,
-        all_refinements_data: list[AIServiceRefinerRequest],
         exp_type: str,
         function_references: str,
     ) -> BestOptimization | None:
@@ -894,6 +898,7 @@ def process_single_candidate(
             baseline_results=original_code_baseline,
             original_helper_code=original_helper_code,
             file_path_to_helper_classes=file_path_to_helper_classes,
+            eval_ctx=eval_ctx,
             code_context=code_context,
             candidate=candidate,
             exp_type=exp_type,
@@ -947,33 +952,40 @@ def process_single_candidate(
                 c.source == OptimizedCandidateSource.REFINE for c in current_tree_candidates
             )
 
+            aiservice_client = self.aiservice_client if exp_type == "EXP0" else self.local_aiservice_client
+
             if is_candidate_refined_before:
                 future_adaptive_optimization = self.call_adaptive_optimize(
                     trace_id=self.get_trace_id(exp_type),
                     original_source_code=code_context.read_writable_code.markdown,
                     prev_candidates=current_tree_candidates,
                     eval_ctx=eval_ctx,
-                    ai_service_client=self.aiservice_client if exp_type == "EXP0" else self.local_aiservice_client,
+                    ai_service_client=aiservice_client,
                 )
                 if future_adaptive_optimization:
                     self.future_adaptive_optimizations.append(future_adaptive_optimization)
             else:
-                all_refinements_data.append(
-                    AIServiceRefinerRequest(
-                        optimization_id=best_optimization.candidate.optimization_id,
-                        original_source_code=code_context.read_writable_code.markdown,
-                        read_only_dependency_code=code_context.read_only_context_code,
-                        original_code_runtime=original_code_baseline.runtime,
-                        optimized_source_code=best_optimization.candidate.source_code.markdown,
-                        optimized_explanation=best_optimization.candidate.explanation,
-                        optimized_code_runtime=best_optimization.runtime,
-                        speedup=f"{int(performance_gain(original_runtime_ns=original_code_baseline.runtime, optimized_runtime_ns=best_optimization.runtime) * 100)}%",
-                        trace_id=self.get_trace_id(exp_type),
-                        original_line_profiler_results=original_code_baseline.line_profile_results["str_out"],
-                        optimized_line_profiler_results=best_optimization.line_profiler_test_results["str_out"],
-                        function_references=function_references,
-                    )
+                future_refinement = self.executor.submit(
+                    aiservice_client.optimize_python_code_refinement,
+                    request=[
+                        AIServiceRefinerRequest(
+                            optimization_id=best_optimization.candidate.optimization_id,
+                            original_source_code=code_context.read_writable_code.markdown,
+                            read_only_dependency_code=code_context.read_only_context_code,
+                            original_code_runtime=original_code_baseline.runtime,
+                            optimized_source_code=best_optimization.candidate.source_code.markdown,
+                            optimized_explanation=best_optimization.candidate.explanation,
+                            optimized_code_runtime=best_optimization.runtime,
+                            speedup=f"{int(performance_gain(original_runtime_ns=original_code_baseline.runtime, optimized_runtime_ns=best_optimization.runtime) * 100)}%",
+                            trace_id=self.get_trace_id(exp_type),
+                            original_line_profiler_results=original_code_baseline.line_profile_results["str_out"],
+                            optimized_line_profiler_results=best_optimization.line_profiler_test_results["str_out"],
+                            function_references=function_references,
+                        )
+                    ],
                 )
+                self.future_all_refinements.append(future_refinement)
+
         # Display runtime information
         if is_LSP_enabled():
             lsp_log(LspMarkdownMessage(markdown=tree_to_markdown(tree)))
@@ -1005,9 +1017,11 @@ def determine_best_candidate(
 
         # Initialize evaluation context and async tasks
         eval_ctx = CandidateEvaluationContext()
-        all_refinements_data: list[AIServiceRefinerRequest] = []
+
+        self.future_all_refinements.clear()
         self.future_all_code_repair.clear()
         self.future_adaptive_optimizations.clear()
+
         self.repair_counter = 0
         self.adaptive_optimization_counter = 0
 
@@ -1031,9 +1045,10 @@ def determine_best_candidate(
         processor = CandidateProcessor(
             candidates,
             future_line_profile_results,
-            all_refinements_data,
             self.aiservice_client,
-            self.executor,
+            eval_ctx,
+            code_context.read_writable_code.markdown,
+            self.future_all_refinements,
             self.future_all_code_repair,
             self.effort,
             self.future_adaptive_optimizations,
@@ -1058,7 +1073,6 @@ def determine_best_candidate(
                     original_helper_code=original_helper_code,
                     file_path_to_helper_classes=file_path_to_helper_classes,
                     eval_ctx=eval_ctx,
-                    all_refinements_data=all_refinements_data,
                     exp_type=exp_type,
                     function_references=function_references,
                 )
@@ -2059,6 +2073,7 @@ def repair_if_possible(
         self,
         candidate: OptimizedCandidate,
         diffs: list[TestDiff],
+        eval_ctx: CandidateEvaluationContext,
         code_context: CodeOptimizationContext,
         test_results_count: int,
         exp_type: str,
@@ -2067,6 +2082,12 @@ def repair_if_possible(
         if self.repair_counter >= max_repairs:
             logger.debug(f"Repair counter reached {max_repairs}, skipping repair")
             return
+
+        successful_candidates_count = sum(1 for is_correct in eval_ctx.is_correct.values() if is_correct)
+        if successful_candidates_count >= MIN_CORRECT_CANDIDATES:
+            logger.debug(f"{successful_candidates_count} of the candidates were correct, no need to repair")
+            return
+
         if candidate.source not in (OptimizedCandidateSource.OPTIMIZE, OptimizedCandidateSource.OPTIMIZE_LP):
             # only repair the first pass of the candidates for now
             logger.debug(f"Candidate is a result of {candidate.source.value}, skipping repair")
@@ -2104,6 +2125,7 @@ def run_optimized_candidate(
         baseline_results: OriginalCodeBaseline,
         original_helper_code: dict[Path, str],
         file_path_to_helper_classes: dict[Path, set[str]],
+        eval_ctx: CandidateEvaluationContext,
         code_context: CodeOptimizationContext,
         candidate: OptimizedCandidate,
         exp_type: str,
@@ -2159,7 +2181,9 @@ def run_optimized_candidate(
                 logger.info("h3|Test results matched ✅")
                 console.rule()
             else:
-                self.repair_if_possible(candidate, diffs, code_context, len(candidate_behavior_results), exp_type)
+                self.repair_if_possible(
+                    candidate, diffs, eval_ctx, code_context, len(candidate_behavior_results), exp_type
+                )
                 return self.get_results_not_matched_error()
 
             logger.info(f"loading|Running performance tests for candidate {optimization_candidate_index}...")
diff --git a/tests/test_code_context_extractor.py b/tests/test_code_context_extractor.py
index aa4e2880f..b7cce0869 100644
--- a/tests/test_code_context_extractor.py
+++ b/tests/test_code_context_extractor.py
@@ -84,7 +84,8 @@ def test_code_replacement10() -> None:
 
     code_ctx = get_code_optimization_context(function_to_optimize=func_top_optimize, project_root_path=file_path.parent)
     qualified_names = {func.qualified_name for func in code_ctx.helper_functions}
-    assert qualified_names == {"HelperClass.helper_method"}  # Nested method should not be in here
+    # HelperClass.__init__ is now tracked because HelperClass(self.name) instantiates the class
+    assert qualified_names == {"HelperClass.helper_method", "HelperClass.__init__"}  # Nested method should not be in here
     read_write_context, read_only_context = code_ctx.read_writable_code, code_ctx.read_only_context_code
     hashing_context = code_ctx.hashing_code_context
 
@@ -570,6 +571,8 @@ def __call__(self, *args: _P.args, **kwargs: _P.kwargs) -> _R:
 class AbstractCacheBackend(CacheBackend, Protocol[_KEY_T, _STORE_T]):
     """Interface for cache backends used by the persistent cache decorator."""
 
+    def __init__(self) -> None: ...
+
     def hash_key(
         self,
         *,
@@ -1296,6 +1299,8 @@ def __repr__(self) -> str:
 ```
 ```python:{path_to_transform_utils.relative_to(project_root)}
 class DataTransformer:
+    def __init__(self):
+        self.data = None
 
     def transform(self, data):
         self.data = data
@@ -1599,7 +1604,11 @@ def __repr__(self) -> str:
         \"\"\"Return a string representation of the DataProcessor.\"\"\"
         return f"DataProcessor(default_prefix={{self.default_prefix!r}})"
 ```
-
+```python:{path_to_transform_utils.relative_to(project_root)}
+class DataTransformer:
+    def __init__(self):
+        self.data = None
+```
 """
     expected_hashing_context = f"""
 ```python:utils.py
@@ -1705,6 +1714,7 @@ def test_direct_module_import() -> None:
 
     expected_read_only_context = """
 ```python:utils.py
+import math
 from transform_utils import DataTransformer
 
 class DataProcessor:
@@ -1712,6 +1722,11 @@ class DataProcessor:
 
     number = 1
 
+    def __init__(self, default_prefix: str = "PREFIX_"):
+        \"\"\"Initialize the DataProcessor with a default prefix.\"\"\"
+        self.default_prefix = default_prefix
+        self.number += math.log(self.number)
+
     def __repr__(self) -> str:
         \"\"\"Return a string representation of the DataProcessor.\"\"\"
         return f"DataProcessor(default_prefix={self.default_prefix!r})"
@@ -2727,3 +2742,154 @@ async def async_function():
     # Verify correct order
     expected_order = ["GLOBAL_CONSTANT", "ANOTHER_CONSTANT", "FINAL_ASSIGNMENT"]
     assert collector.assignment_order == expected_order
+
+
+def test_class_instantiation_includes_init_as_helper(tmp_path: Path) -> None:
+    """Test that when a class is instantiated, its __init__ method is tracked as a helper.
+
+    This test verifies the fix for the bug where class constructors were not
+    included in the context when only the class instantiation was called
+    (not any other methods). This caused LLMs to not know the constructor
+    signatures when generating tests.
+    """
+    code = '''
+class DataDumper:
+    """A class that dumps data."""
+
+    def __init__(self, data):
+        """Initialize with data."""
+        self.data = data
+
+    def dump(self):
+        """Dump the data."""
+        return self.data
+
+
+def target_function():
+    # Only instantiates DataDumper, doesn't call any other methods
+    dumper = DataDumper({"key": "value"})
+    return dumper
+'''
+    file_path = tmp_path / "test_code.py"
+    file_path.write_text(code, encoding="utf-8")
+    opt = Optimizer(
+        Namespace(
+            project_root=file_path.parent.resolve(),
+            disable_telemetry=True,
+            tests_root="tests",
+            test_framework="pytest",
+            pytest_cmd="pytest",
+            experiment_id=None,
+            test_project_root=Path().resolve(),
+        )
+    )
+    function_to_optimize = FunctionToOptimize(
+        function_name="target_function",
+        file_path=file_path,
+        parents=[],
+        starting_line=None,
+        ending_line=None,
+    )
+
+    code_ctx = get_code_optimization_context(function_to_optimize, opt.args.project_root)
+
+    # The __init__ method should be tracked as a helper since DataDumper() instantiates the class
+    qualified_names = {func.qualified_name for func in code_ctx.helper_functions}
+    assert "DataDumper.__init__" in qualified_names, (
+        "DataDumper.__init__ should be tracked as a helper when the class is instantiated"
+    )
+
+    # The testgen context should contain the class with __init__ (critical for LLM to know constructor)
+    testgen_context = code_ctx.testgen_context.markdown
+    assert "class DataDumper:" in testgen_context, "DataDumper class should be in testgen context"
+    assert "def __init__(self, data):" in testgen_context, (
+        "__init__ method should be included in testgen context"
+    )
+
+    # The hashing context should NOT contain __init__ (excluded for stability)
+    hashing_context = code_ctx.hashing_code_context
+    assert "__init__" not in hashing_context, (
+        "__init__ should NOT be in hashing context (excluded for hash stability)"
+    )
+
+
+def test_class_instantiation_preserves_full_class_in_testgen(tmp_path: Path) -> None:
+    """Test that instantiated classes are fully preserved in testgen context.
+
+    This is specifically for the unstructured LayoutDumper bug where helper classes
+    that were instantiated but had no other methods called were being excluded
+    from the testgen context.
+    """
+    code = '''
+class LayoutDumper:
+    """Base class for layout dumpers."""
+    layout_source: str = "unknown"
+
+    def __init__(self, layout):
+        self._layout = layout
+
+    def dump(self) -> dict:
+        raise NotImplementedError()
+
+
+class ObjectDetectionLayoutDumper(LayoutDumper):
+    """Specific dumper for object detection layouts."""
+
+    def __init__(self, layout):
+        super().__init__(layout)
+
+    def dump(self) -> dict:
+        return {"type": "object_detection", "layout": self._layout}
+
+
+def dump_layout(layout_type, layout):
+    """Dump a layout based on its type."""
+    if layout_type == "object_detection":
+        dumper = ObjectDetectionLayoutDumper(layout)
+    else:
+        dumper = LayoutDumper(layout)
+    return dumper.dump()
+'''
+    file_path = tmp_path / "test_code.py"
+    file_path.write_text(code, encoding="utf-8")
+    opt = Optimizer(
+        Namespace(
+            project_root=file_path.parent.resolve(),
+            disable_telemetry=True,
+            tests_root="tests",
+            test_framework="pytest",
+            pytest_cmd="pytest",
+            experiment_id=None,
+            test_project_root=Path().resolve(),
+        )
+    )
+    function_to_optimize = FunctionToOptimize(
+        function_name="dump_layout",
+        file_path=file_path,
+        parents=[],
+        starting_line=None,
+        ending_line=None,
+    )
+
+    code_ctx = get_code_optimization_context(function_to_optimize, opt.args.project_root)
+    qualified_names = {func.qualified_name for func in code_ctx.helper_functions}
+
+    # Both class __init__ methods should be tracked as helpers
+    assert "ObjectDetectionLayoutDumper.__init__" in qualified_names, (
+        "ObjectDetectionLayoutDumper.__init__ should be tracked"
+    )
+    assert "LayoutDumper.__init__" in qualified_names, (
+        "LayoutDumper.__init__ should be tracked"
+    )
+
+    # The testgen context should include both classes with their __init__ methods
+    testgen_context = code_ctx.testgen_context.markdown
+    assert "class LayoutDumper:" in testgen_context, "LayoutDumper should be in testgen context"
+    assert "class ObjectDetectionLayoutDumper" in testgen_context, (
+        "ObjectDetectionLayoutDumper should be in testgen context"
+    )
+
+    # Both __init__ methods should be in the testgen context (so LLM knows constructor signatures)
+    assert testgen_context.count("def __init__") >= 2, (
+        "Both __init__ methods should be in testgen context"
+    )
diff --git a/tests/test_instrument_line_profiler.py b/tests/test_instrument_line_profiler.py
index 71d1005c0..675db5944 100644
--- a/tests/test_instrument_line_profiler.py
+++ b/tests/test_instrument_line_profiler.py
@@ -55,6 +55,7 @@ def hi():
 
 
 class BubbleSortClass:
+    @codeflash_line_profile
     def __init__(self):
         pass
 
@@ -117,7 +118,9 @@ def sort_classmethod(x):
     return y.sorter(x)
 """
         assert code_path.read_text("utf-8") == expected_code_main
-        assert code_context.helper_functions.__len__() == 0
+        # WrapperClass.__init__ is now detected as a helper since WrapperClass.BubbleSortClass() instantiates it
+        assert len(code_context.helper_functions) == 1
+        assert code_context.helper_functions[0].qualified_name == "WrapperClass.__init__"
     finally:
         func_optimizer.write_code_and_helpers(
             func_optimizer.function_to_optimize_source_code, original_helper_code, func_optimizer.function_to_optimize.file_path
@@ -283,6 +286,7 @@ def sorter(arr):
     ans = helper(arr)
     return ans
 class helper:
+    @codeflash_line_profile
     def __init__(self, arr):
         return arr.sort()
 """
diff --git a/tests/test_remove_unused_definitions.py b/tests/test_remove_unused_definitions.py
index 86a57bb6d..8d09a95e1 100644
--- a/tests/test_remove_unused_definitions.py
+++ b/tests/test_remove_unused_definitions.py
@@ -337,6 +337,65 @@ def unused_function():
     result = remove_unused_definitions_by_function_names(code, qualified_functions)
     assert result.strip() == expected.strip()
 
+def test_base_class_inheritance() -> None:
+    """Test that base classes used only for inheritance are preserved."""
+    code = """
+class LayoutDumper:
+    def dump(self):
+        raise NotImplementedError
+
+class ObjectDetectionLayoutDumper(LayoutDumper):
+    def __init__(self, data):
+        self.data = data
+    def dump(self):
+        return self.data
+
+class ExtractedLayoutDumper(LayoutDumper):
+    def __init__(self, data):
+        self.data = data
+    def dump(self):
+        return self.data
+
+class UnusedClass:
+    pass
+
+def test_function():
+    dumper = ObjectDetectionLayoutDumper({})
+    return dumper.dump()
+"""
+
+    expected = """
+class LayoutDumper:
+    def dump(self):
+        raise NotImplementedError
+
+class ObjectDetectionLayoutDumper(LayoutDumper):
+    def __init__(self, data):
+        self.data = data
+    def dump(self):
+        return self.data
+
+class ExtractedLayoutDumper(LayoutDumper):
+    def __init__(self, data):
+        self.data = data
+    def dump(self):
+        return self.data
+
+class UnusedClass:
+    pass
+
+def test_function():
+    dumper = ObjectDetectionLayoutDumper({})
+    return dumper.dump()
+"""
+
+    qualified_functions = {"test_function"}
+    result = remove_unused_definitions_by_function_names(code, qualified_functions)
+    # LayoutDumper should be preserved because ObjectDetectionLayoutDumper inherits from it
+    assert "class LayoutDumper" in result
+    assert "class ObjectDetectionLayoutDumper" in result
+
+
 def test_conditional_and_loop_variables() -> None:
     """Test handling of variables defined in if-else and while loops."""
     code = """

From c18af789c945421b101644ec506bbd673d6d5cfa Mon Sep 17 00:00:00 2001
From: mohammed ahmed <64513301+mohammedahmed18@users.noreply.github.com>
Date: Wed, 7 Jan 2026 00:24:49 +0000
Subject: [PATCH 09/12] fixes

---
 codeflash/code_utils/config_consts.py        |  7 +++++--
 codeflash/optimization/function_optimizer.py | 11 +++++------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py
index f3a881e1b..97be8808c 100644
--- a/codeflash/code_utils/config_consts.py
+++ b/codeflash/code_utils/config_consts.py
@@ -1,5 +1,7 @@
+from __future__ import annotations
+
 from enum import Enum
-from typing import Any
+from typing import Any, Union
 
 MAX_TEST_RUN_ITERATIONS = 5
 INDIVIDUAL_TESTCASE_TIMEOUT = 15
@@ -83,8 +85,9 @@ class EffortKeys(str, Enum):
 }
 
 
-def get_effort_value(key: EffortKeys, effort: EffortLevel) -> Any:  # noqa: ANN401
+def get_effort_value(key: EffortKeys, effort: Union[EffortLevel,str]) -> Any:  # noqa: ANN401
     key_str = key.value
+    effort = effort.value if isinstance(effort, EffortLevel) else effort
     if key_str in EFFORT_VALUES:
         if effort in EFFORT_VALUES[key_str]:
             return EFFORT_VALUES[key_str][effort]
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 0b525e3e7..05f6965c9 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -182,8 +182,8 @@ def __init__(
         self,
         initial_candidates: list[OptimizedCandidate],
         future_line_profile_results: concurrent.futures.Future,
-        ai_service_client: AiServiceClient,
         eval_ctx: CandidateEvaluationContext,
+        effort: str,
         original_markdown_code: str,
         future_all_refinements: list[concurrent.futures.Future],
         future_all_code_repair: list[concurrent.futures.Future],
@@ -193,11 +193,11 @@ def __init__(
         self.forest = CandidateForest()
         self.line_profiler_done = False
         self.refinement_done = False
+        self.eval_ctx = eval_ctx
+        self.effort = effort
         self.candidate_len = len(initial_candidates)
-        self.ai_service_client = ai_service_client
         self.refinement_calls_count = 0
         self.original_markdown_code = original_markdown_code
-        self.eval_ctx = eval_ctx
 
         # Initialize queue with initial candidates
         for candidate in initial_candidates:
@@ -296,7 +296,7 @@ def _filter_refined_candidates(self, candidates: list[OptimizedCandidate]) -> li
         top_n_candidates = int(
             min(
                 int(get_effort_value(EffortKeys.TOP_VALID_CANDIDATES_FOR_REFINEMENT, self.effort)),
-                len(self.all_refinements_data),
+                len(candidates),
             )
         )
 
@@ -1045,12 +1045,11 @@ def determine_best_candidate(
         processor = CandidateProcessor(
             candidates,
             future_line_profile_results,
-            self.aiservice_client,
             eval_ctx,
+            self.effort,
             code_context.read_writable_code.markdown,
             self.future_all_refinements,
             self.future_all_code_repair,
-            self.effort,
             self.future_adaptive_optimizations,
         )
         candidate_index = 0

From 2a86446fdcc61c96171574595d1b84bef39bfc1e Mon Sep 17 00:00:00 2001
From: mohammed ahmed <64513301+mohammedahmed18@users.noreply.github.com>
Date: Wed, 7 Jan 2026 00:41:39 +0000
Subject: [PATCH 10/12] formatting and linting

---
 codeflash/code_utils/config_consts.py        | 21 ++++++++++++--------
 codeflash/optimization/function_optimizer.py |  5 +----
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py
index 97be8808c..1ee373579 100644
--- a/codeflash/code_utils/config_consts.py
+++ b/codeflash/code_utils/config_consts.py
@@ -85,13 +85,18 @@ class EffortKeys(str, Enum):
 }
 
 
-def get_effort_value(key: EffortKeys, effort: Union[EffortLevel,str]) -> Any:  # noqa: ANN401
+def get_effort_value(key: EffortKeys, effort: Union[EffortLevel, str]) -> Any:  # noqa: ANN401
     key_str = key.value
-    effort = effort.value if isinstance(effort, EffortLevel) else effort
-    if key_str in EFFORT_VALUES:
-        if effort in EFFORT_VALUES[key_str]:
-            return EFFORT_VALUES[key_str][effort]
-        msg = f"Invalid effort level: {effort}"
+
+    if isinstance(effort, str):
+        try:
+            effort = EffortLevel(effort)
+        except ValueError:
+            msg = f"Invalid effort level: {effort}"
+            raise ValueError(msg) from None
+
+    if key_str not in EFFORT_VALUES:
+        msg = f"Invalid key: {key_str}"
         raise ValueError(msg)
-    msg = f"Invalid key: {key_str}"
-    raise ValueError(msg)
+
+    return EFFORT_VALUES[key_str][effort]
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 05f6965c9..79878d991 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -294,10 +294,7 @@ def _filter_refined_candidates(self, candidates: list[OptimizedCandidate]) -> li
         self.refinement_calls_count += len(candidates)
 
         top_n_candidates = int(
-            min(
-                int(get_effort_value(EffortKeys.TOP_VALID_CANDIDATES_FOR_REFINEMENT, self.effort)),
-                len(candidates),
-            )
+            min(int(get_effort_value(EffortKeys.TOP_VALID_CANDIDATES_FOR_REFINEMENT, self.effort)), len(candidates))
         )
 
         if len(candidates) == top_n_candidates:

From ca9769c7689f29e0a9ec4414684a390820a8058e Mon Sep 17 00:00:00 2001
From: mohammed ahmed <64513301+mohammedahmed18@users.noreply.github.com>
Date: Wed, 7 Jan 2026 00:53:08 +0000
Subject: [PATCH 11/12] modify effort values

---
 codeflash/code_utils/config_consts.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py
index 1ee373579..ca79ebbd9 100644
--- a/codeflash/code_utils/config_consts.py
+++ b/codeflash/code_utils/config_consts.py
@@ -65,22 +65,24 @@ class EffortKeys(str, Enum):
     EffortKeys.N_OPTIMIZER_LP_CANDIDATES.value: {EffortLevel.LOW: 4, EffortLevel.MEDIUM: 6, EffortLevel.HIGH: 7},
     # we don't use effort with generated tests for now
     EffortKeys.N_GENERATED_TESTS.value: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 2, EffortLevel.HIGH: 2},
-    # maximum number of repairs we will do for each function
-    EffortKeys.MAX_CODE_REPAIRS_PER_TRACE.value: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 4, EffortLevel.HIGH: 5},
+    # maximum number of repairs we will do for each function (in case the valid candidates is less than MIN_CORRECT_CANDIDATES)
+    EffortKeys.MAX_CODE_REPAIRS_PER_TRACE.value: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 3, EffortLevel.HIGH: 5},
     # if the percentage of unmatched tests is greater than this, we won't fix it (lowering this value makes the repair more stricted)
     # on the low effort we lower the limit to 20% to be more strict (less repairs, less time)
     EffortKeys.REPAIR_UNMATCHED_PERCENTAGE_LIMIT.value: {
         EffortLevel.LOW: 0.2,
-        EffortLevel.MEDIUM: 0.4,
-        EffortLevel.HIGH: 0.5,
+        EffortLevel.MEDIUM: 0.3,
+        EffortLevel.HIGH: 0.4,
     },
     # Top valid candidates for refinements
     EffortKeys.TOP_VALID_CANDIDATES_FOR_REFINEMENT: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 3, EffortLevel.HIGH: 4},
-    EffortKeys.ADAPTIVE_OPTIMIZATION_THRESHOLD.value: {EffortLevel.LOW: 0, EffortLevel.MEDIUM: 1, EffortLevel.HIGH: 3},
+    # max number of adaptive optimization calls to make per a single candidates tree
+    EffortKeys.ADAPTIVE_OPTIMIZATION_THRESHOLD.value: {EffortLevel.LOW: 0, EffortLevel.MEDIUM: 1, EffortLevel.HIGH: 2},
+    # max number of adaptive optimization calls to make per a single trace
     EffortKeys.MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE.value: {
         EffortLevel.LOW: 0,
-        EffortLevel.MEDIUM: 3,
-        EffortLevel.HIGH: 10,
+        EffortLevel.MEDIUM: 2,
+        EffortLevel.HIGH: 4,
     },
 }
 

From 0a33bc10ef93c511d6fa1bb2ff24380f11a0d45e Mon Sep 17 00:00:00 2001
From: ali <mohammed18200118@gmail.com>
Date: Wed, 7 Jan 2026 18:36:54 +0200
Subject: [PATCH 12/12] disable adaptive optimization for medium effort

---
 codeflash/code_utils/config_consts.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py
index ca79ebbd9..96d6b8e14 100644
--- a/codeflash/code_utils/config_consts.py
+++ b/codeflash/code_utils/config_consts.py
@@ -77,11 +77,11 @@ class EffortKeys(str, Enum):
     # Top valid candidates for refinements
     EffortKeys.TOP_VALID_CANDIDATES_FOR_REFINEMENT: {EffortLevel.LOW: 2, EffortLevel.MEDIUM: 3, EffortLevel.HIGH: 4},
     # max number of adaptive optimization calls to make per a single candidates tree
-    EffortKeys.ADAPTIVE_OPTIMIZATION_THRESHOLD.value: {EffortLevel.LOW: 0, EffortLevel.MEDIUM: 1, EffortLevel.HIGH: 2},
+    EffortKeys.ADAPTIVE_OPTIMIZATION_THRESHOLD.value: {EffortLevel.LOW: 0, EffortLevel.MEDIUM: 0, EffortLevel.HIGH: 2},
     # max number of adaptive optimization calls to make per a single trace
     EffortKeys.MAX_ADAPTIVE_OPTIMIZATIONS_PER_TRACE.value: {
         EffortLevel.LOW: 0,
-        EffortLevel.MEDIUM: 2,
+        EffortLevel.MEDIUM: 0,
         EffortLevel.HIGH: 4,
     },
 }