From d3b7b12d791abbd6c74be33abe30eb04b2ae528a Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Mon, 2 Mar 2026 10:55:23 -0800 Subject: [PATCH 01/30] Take advantage of hyper sparsity in dual push --- cpp/src/dual_simplex/crossover.cpp | 40 +++++++++++++++++++----------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp index 988c9c50ad..16f503e893 100644 --- a/cpp/src/dual_simplex/crossover.cpp +++ b/cpp/src/dual_simplex/crossover.cpp @@ -331,6 +331,7 @@ void compute_dual_solution_from_basis(const lp_problem_t& lp, template i_t dual_push(const lp_problem_t& lp, + const csr_matrix_t& Arow, const simplex_solver_settings_t& settings, f_t start_time, lp_solution_t& solution, @@ -401,11 +402,9 @@ i_t dual_push(const lp_problem_t& lp, es_sparse.x[0] = -delta_zs; // B^T delta_y = -delta_zs*es - std::vector delta_y(m); sparse_vector_t delta_y_sparse(m, 1); sparse_vector_t UTsol_sparse(m, 1); ft.b_transpose_solve(es_sparse, delta_y_sparse, UTsol_sparse); - delta_y_sparse.scatter(delta_y); // We solved B^T delta_y = -delta_zs*es, but for the update we need // U^T*etilde = es. @@ -417,15 +416,23 @@ i_t dual_push(const lp_problem_t& lp, // delta_zN = -N^T delta_y std::vector delta_zN(n - m); - for (i_t k = 0; k < n - m; ++k) { - const i_t j = nonbasic_list[k]; - const i_t col_start = lp.A.col_start[j]; - const i_t col_end = lp.A.col_start[j + 1]; - f_t dot = 0.0; - for (i_t p = col_start; p < col_end; ++p) { - dot += lp.A.x[p] * delta_y[lp.A.i[p]]; + std::vector delta_expanded(n, 0.); + + // Iterate directly over sparse delta_y instead of checking zeros + for (i_t nnz_idx = 0; nnz_idx < delta_y_sparse.i.size(); ++nnz_idx) { + const i_t row = delta_y_sparse.i[nnz_idx]; + const f_t val = delta_y_sparse.x[nnz_idx]; + + // Accumulate contributions from this row to all columns + const i_t row_start = Arow.row_start[row]; + const i_t row_end = Arow.row_start[row + 1]; + for (i_t p = row_start; p < row_end; ++p) { + const i_t col = Arow.j[p]; + delta_expanded[col] += Arow.x[p] * val; } - delta_zN[k] = -dot; + } + for (i_t k = 0; k < n - m; ++k) { + delta_zN[k] = -delta_expanded[nonbasic_list[k]]; } i_t entering_index = -1; @@ -435,8 +442,10 @@ i_t dual_push(const lp_problem_t& lp, assert(step_length >= -1e-6); // y <- y + step_length * delta_y - for (i_t i = 0; i < m; ++i) { - y[i] += step_length * delta_y[i]; + // Optimized: Only update non-zero elements from sparse representation + for (i_t nnz_idx = 0; nnz_idx < delta_y_sparse.i.size(); ++nnz_idx) { + const i_t i = delta_y_sparse.i[nnz_idx]; + y[i] += step_length * delta_y_sparse.x[nnz_idx]; } // z <- z + step_length * delta z @@ -725,7 +734,6 @@ i_t primal_push(const lp_problem_t& lp, { const i_t m = lp.num_rows; const i_t n = lp.num_cols; - settings.log.debug("Primal push: superbasic %ld\n", superbasic_list.size()); std::vector& x = solution.x; @@ -1002,6 +1010,7 @@ i_t primal_push(const lp_problem_t& lp, } solution.x = x_compare; solution.iterations += num_pushes; + return 0; } @@ -1190,6 +1199,9 @@ crossover_status_t crossover(const lp_problem_t& lp, f_t crossover_start = tic(); f_t work_estimate = 0; + csr_matrix_t Arow(m, n, 1); + lp.A.to_compressed_row(Arow); + settings.log.printf("\n"); settings.log.printf("Starting crossover\n"); @@ -1332,7 +1344,7 @@ crossover_status_t crossover(const lp_problem_t& lp, verify_basis(m, n, vstatus); compare_vstatus_with_lists(m, n, basic_list, nonbasic_list, vstatus); i_t dual_push_status = dual_push( - lp, settings, start_time, solution, ft, basic_list, nonbasic_list, superbasic_list, vstatus); + lp, Arow, settings, start_time, solution, ft, basic_list, nonbasic_list, superbasic_list, vstatus); if (dual_push_status < 0) { return return_to_status(dual_push_status); } settings.log.debug("basic list size %ld m %d\n", basic_list.size(), m); settings.log.debug("nonbasic list size %ld n - m %d\n", nonbasic_list.size(), n - m); From 6aae07d21fff1f45d4c71c67392a24adaa2ce0fe Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Thu, 5 Mar 2026 16:22:51 -0800 Subject: [PATCH 02/30] Initial stab at moving root solves to B&B --- cpp/src/branch_and_bound/branch_and_bound.cpp | 89 ++++++++++++++++--- cpp/src/branch_and_bound/branch_and_bound.hpp | 15 +++- .../dual_simplex/simplex_solver_settings.hpp | 3 + cpp/src/dual_simplex/types.hpp | 13 +++ cpp/src/mip_heuristics/CMakeLists.txt | 3 +- .../diversity/diversity_manager.cu | 70 ++++++++------- .../diversity/diversity_manager.cuh | 15 ++++ cpp/src/mip_heuristics/root_lp.cu | 74 +++++++++++++++ cpp/src/mip_heuristics/root_lp.cuh | 31 +++++++ cpp/src/mip_heuristics/solver.cu | 18 +++- 10 files changed, 281 insertions(+), 50 deletions(-) create mode 100644 cpp/src/mip_heuristics/root_lp.cu create mode 100644 cpp/src/mip_heuristics/root_lp.cuh diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 6ce9a4f4d0..1fe020667d 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -9,6 +9,8 @@ #include #include +#include + #include #include @@ -28,6 +30,7 @@ #include #include +#include #include #include #include @@ -241,7 +244,9 @@ template branch_and_bound_t::branch_and_bound_t( const user_problem_t& user_problem, const simplex_solver_settings_t& solver_settings, - f_t start_time) + f_t start_time, + cuopt::linear_programming::detail::problem_t* mip_problem_ptr, + i_t num_gpus) : original_problem_(user_problem), settings_(solver_settings), original_lp_(user_problem.handle_ptr, 1, 1, 1), @@ -250,7 +255,9 @@ branch_and_bound_t::branch_and_bound_t( root_relax_soln_(1, 1), root_crossover_soln_(1, 1), pc_(1), - solver_status_(mip_status_t::UNSET) + solver_status_(mip_status_t::UNSET), + mip_problem_ptr_(mip_problem_ptr), + pdlp_root_num_gpus_(num_gpus) { exploration_stats_.start_time = start_time; #ifdef PRINT_CONSTRAINT_MATRIX @@ -1811,15 +1818,65 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( std::ref(root_vstatus), std::ref(edge_norms), nullptr); - // Wait for the root relaxation solution to be sent by the diversity manager or dual simplex - // to finish - while (!root_crossover_solution_set_.load(std::memory_order_acquire) && - *get_root_concurrent_halt() == 0) { + + std::optional>> pdlp_future_opt; + if (enable_concurrent_lp_root_solve_ && mip_problem_ptr_ != nullptr) { + root_crossover_solution_set_.store(false, std::memory_order_release); + pdlp_future_opt = + std::async(std::launch::async, + &cuopt::linear_programming::detail::run_pdlp_barrier_for_root_lp, + mip_problem_ptr_, + lp_settings.time_limit, + get_root_concurrent_halt(), + pdlp_root_num_gpus_); + } + + // Wait for first completion: PDLP/Barrier future, dual simplex future, or legacy callback + while (*get_root_concurrent_halt() == 0) { + bool pdlp_ready = + pdlp_future_opt && pdlp_future_opt->valid() && + pdlp_future_opt->wait_for(std::chrono::milliseconds(0)) == std::future_status::ready; + bool ds_ready = + root_status_future.wait_for(std::chrono::milliseconds(0)) == std::future_status::ready; + if (root_crossover_solution_set_.load(std::memory_order_acquire) || pdlp_ready || ds_ready) { + break; + } std::this_thread::sleep_for(std::chrono::milliseconds(1)); - continue; } - if (root_crossover_solution_set_.load(std::memory_order_acquire)) { + bool use_pdlp_path = false; + if (pdlp_future_opt && pdlp_future_opt->valid() && + pdlp_future_opt->wait_for(std::chrono::milliseconds(0)) == std::future_status::ready) { + auto result = pdlp_future_opt->get(); + root_crossover_soln_.x = result.primal; + root_crossover_soln_.y = result.dual; + root_crossover_soln_.z = result.reduced_costs; + root_crossover_soln_.objective = result.objective; + root_crossover_soln_.user_objective = result.user_objective; + root_crossover_soln_.iterations = result.iterations; + root_objective_ = result.objective; + root_crossover_solution_set_.store(true, std::memory_order_release); + if (lp_settings.on_first_lp_solution_available) { + lp_settings.on_first_lp_solution_available(result); + } + use_pdlp_path = true; + } + + if (!use_pdlp_path && root_crossover_solution_set_.load(std::memory_order_acquire)) { + // Legacy path: set_root_relaxation_solution was invoked + root_relaxation_first_solution_t legacy_result; + legacy_result.primal = root_crossover_soln_.x; + legacy_result.dual = root_crossover_soln_.y; + legacy_result.reduced_costs = root_crossover_soln_.z; + legacy_result.objective = root_crossover_soln_.objective; + legacy_result.user_objective = root_crossover_soln_.user_objective; + legacy_result.iterations = root_crossover_soln_.iterations; + if (lp_settings.on_first_lp_solution_available) { + lp_settings.on_first_lp_solution_available(legacy_result); + } + } + + if (use_pdlp_path || root_crossover_solution_set_.load(std::memory_order_acquire)) { // Crush the root relaxation solution on converted user problem std::vector crushed_root_x; crush_primal_solution( @@ -1909,9 +1966,19 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( solver_name = "Dual Simplex"; } } else { - root_status = root_status_future.get(); - user_objective = root_relax_soln_.user_objective; - iter = root_relax_soln_.iterations; + root_status = root_status_future.get(); + root_relaxation_first_solution_t ds_result; + ds_result.primal = root_relax_soln.x; + ds_result.dual = root_relax_soln.y; + ds_result.reduced_costs = root_relax_soln.z; + ds_result.objective = root_relax_soln.objective; + ds_result.user_objective = root_relax_soln.user_objective; + ds_result.iterations = root_relax_soln.iterations; + if (lp_settings.on_first_lp_solution_available) { + lp_settings.on_first_lp_solution_available(ds_result); + } + user_objective = root_relax_soln.user_objective; + iter = root_relax_soln.iterations; solver_name = "Dual Simplex"; } diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index a13d5cedcf..825d89049d 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -35,6 +35,10 @@ #include #include +namespace cuopt::linear_programming::detail { +template +class problem_t; +} namespace cuopt::linear_programming::dual_simplex { enum class mip_status_t { @@ -66,9 +70,12 @@ struct deterministic_diving_policy_t; template class branch_and_bound_t { public: - branch_and_bound_t(const user_problem_t& user_problem, - const simplex_solver_settings_t& solver_settings, - f_t start_time); + branch_and_bound_t( + const user_problem_t& user_problem, + const simplex_solver_settings_t& solver_settings, + f_t start_time, + cuopt::linear_programming::detail::problem_t* mip_problem_ptr = nullptr, + i_t num_gpus = 1); // Set an initial guess based on the user_problem. This should be called before solve. void set_initial_guess(const std::vector& user_guess) { guess_ = user_guess; } @@ -193,6 +200,8 @@ class branch_and_bound_t { bool enable_concurrent_lp_root_solve_{false}; std::atomic root_concurrent_halt_{0}; bool is_root_solution_set{false}; + cuopt::linear_programming::detail::problem_t* mip_problem_ptr_{nullptr}; + i_t pdlp_root_num_gpus_{1}; // Pseudocosts pseudo_costs_t pc_; diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index 815e229232..b014823cda 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -202,6 +202,9 @@ struct simplex_solver_settings_t { std::function&, f_t)> node_processed_callback; std::function heuristic_preemption_callback; std::function&, std::vector&, f_t)> set_simplex_solution_callback; + // Called by B&B when first LP solution is available (PDLP/Barrier or dual simplex). + std::function const&)> + on_first_lp_solution_available; mutable logger_t log; std::atomic* concurrent_halt; // if nullptr ignored, if !nullptr, 0 if solver should // continue, 1 if solver should halt diff --git a/cpp/src/dual_simplex/types.hpp b/cpp/src/dual_simplex/types.hpp index ea46a1f67e..6660a86f0a 100644 --- a/cpp/src/dual_simplex/types.hpp +++ b/cpp/src/dual_simplex/types.hpp @@ -9,6 +9,7 @@ #include #include +#include namespace cuopt::linear_programming::dual_simplex { @@ -19,6 +20,18 @@ using float64_t = double; constexpr float64_t inf = std::numeric_limits::infinity(); +// First LP solution from either PDLP/Barrier or dual simplex; used to notify diversity manager +// without B&B depending on PDLP types. +template +struct root_relaxation_first_solution_t { + std::vector primal; + std::vector dual; + std::vector reduced_costs; + f_t objective{0}; + f_t user_objective{0}; + i_t iterations{0}; +}; + // We return this constant to signal that a concurrent halt has occurred #define CONCURRENT_HALT_RETURN -2 // We return this constant to signal that a time limit has occurred diff --git a/cpp/src/mip_heuristics/CMakeLists.txt b/cpp/src/mip_heuristics/CMakeLists.txt index a200d4265b..202ed94bd5 100644 --- a/cpp/src/mip_heuristics/CMakeLists.txt +++ b/cpp/src/mip_heuristics/CMakeLists.txt @@ -41,7 +41,8 @@ set(MIP_NON_LP_FILES ${CMAKE_CURRENT_SOURCE_DIR}/presolve/conflict_graph/clique_table.cu ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump.cu ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump_kernels.cu - ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/fj_cpu.cu) + ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/fj_cpu.cu + ${CMAKE_CURRENT_SOURCE_DIR}/root_lp.cu) # Choose which files to include based on build mode if(BUILD_LP_ONLY) diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index ed165fe610..7e223bdd98 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -409,7 +409,16 @@ solution_t diversity_manager_t::run_solver() bool bb_thread_solution_exists = simplex_solution_exists.load(); if (bb_thread_solution_exists) { ls.lp_optimal_exists = true; + } else if (branch_and_bound_ptr != nullptr && + branch_and_bound_ptr->enable_concurrent_lp_root_solve()) { + // B&B drives root relaxation; wait for first solution (PDLP/Barrier or dual simplex) + first_solution_ready_.store(false, std::memory_order_release); + std::unique_lock lock(first_solution_mutex_); + first_solution_cv_.wait(lock, [this]() { return first_solution_ready_.load(); }); + lock.unlock(); + clamp_within_var_bounds(lp_optimal_solution, problem_ptr, problem_ptr->handle_ptr); } else if (!fj_only_run) { + // Heuristics-only or non-concurrent: diversity manager runs LP solve convert_greater_to_less(*problem_ptr); f_t tolerance_divisor = @@ -481,38 +490,6 @@ solution_t diversity_manager_t::run_solver() // to bring variables within the bounds } - // Send PDLP relaxed solution to branch and bound - if (problem_ptr->set_root_relaxation_solution_callback != nullptr) { - auto& d_primal_solution = lp_result.get_primal_solution(); - auto& d_dual_solution = lp_result.get_dual_solution(); - auto& d_reduced_costs = lp_result.get_reduced_cost(); - - std::vector host_primal(d_primal_solution.size()); - std::vector host_dual(d_dual_solution.size()); - std::vector host_reduced_costs(d_reduced_costs.size()); - raft::copy(host_primal.data(), - d_primal_solution.data(), - d_primal_solution.size(), - problem_ptr->handle_ptr->get_stream()); - raft::copy(host_dual.data(), - d_dual_solution.data(), - d_dual_solution.size(), - problem_ptr->handle_ptr->get_stream()); - raft::copy(host_reduced_costs.data(), - d_reduced_costs.data(), - d_reduced_costs.size(), - problem_ptr->handle_ptr->get_stream()); - problem_ptr->handle_ptr->sync_stream(); - - // PDLP returns user-space objective (it applies objective_scaling_factor internally) - auto user_obj = lp_result.get_objective_value(); - auto solver_obj = problem_ptr->get_solver_obj_from_user_obj(user_obj); - auto iterations = lp_result.get_additional_termination_information().number_of_steps_taken; - // Set for the B&B (param4 expects solver space, param5 expects user space) - problem_ptr->set_root_relaxation_solution_callback( - host_primal, host_dual, host_reduced_costs, solver_obj, user_obj, iterations); - } - // in case the pdlp returned var boudns that are out of bounds clamp_within_var_bounds(lp_optimal_solution, problem_ptr, problem_ptr->handle_ptr); } @@ -851,6 +828,35 @@ std::pair, bool> diversity_manager_t::recombine( return std::make_pair(solution_t(a), false); } +template +void diversity_manager_t::on_first_lp_solution( + cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t const& result) +{ + { + std::lock_guard lock(relaxed_solution_mutex); + cuopt_assert(result.primal.size() == lp_optimal_solution.size(), + "First LP solution primal size mismatch"); + cuopt_assert(result.dual.size() == lp_dual_optimal_solution.size(), + "First LP solution dual size mismatch"); + raft::copy(lp_optimal_solution.data(), + result.primal.data(), + result.primal.size(), + problem_ptr->handle_ptr->get_stream()); + raft::copy(lp_dual_optimal_solution.data(), + result.dual.data(), + result.dual.size(), + problem_ptr->handle_ptr->get_stream()); + problem_ptr->handle_ptr->sync_stream(); + ls.lp_optimal_exists = true; + set_new_user_bound(result.user_objective); + } + { + std::lock_guard lock(first_solution_mutex_); + first_solution_ready_.store(true, std::memory_order_release); + first_solution_cv_.notify_all(); + } +} + template void diversity_manager_t::set_simplex_solution(const std::vector& solution, const std::vector& dual_solution, diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cuh b/cpp/src/mip_heuristics/diversity/diversity_manager.cuh index d4e24bdeaf..6dd53be52b 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cuh +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cuh @@ -21,12 +21,17 @@ #include #include +#include + #include #include #include #include #include +#include +#include + namespace cuopt::linear_programming::detail { template @@ -70,6 +75,11 @@ class diversity_manager_t { const std::vector& dual_solution, f_t objective); + // Called by B&B when first LP solution is available (PDLP/Barrier or dual simplex). + void on_first_lp_solution( + cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t const& + result); + mip_solver_context_t& context; dual_simplex::branch_and_bound_t* branch_and_bound_ptr; problem_t* problem_ptr; @@ -97,6 +107,11 @@ class diversity_manager_t { // atomic for signalling pdlp to stop std::atomic global_concurrent_halt{0}; + // First solution from B&B: wait for B&B to call on_first_lp_solution when run_bb and concurrent + std::mutex first_solution_mutex_; + std::condition_variable first_solution_cv_; + std::atomic first_solution_ready_{false}; + rins_t rins; bool run_only_ls_recombiner{false}; diff --git a/cpp/src/mip_heuristics/root_lp.cu b/cpp/src/mip_heuristics/root_lp.cu new file mode 100644 index 0000000000..d11fbda957 --- /dev/null +++ b/cpp/src/mip_heuristics/root_lp.cu @@ -0,0 +1,74 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#include +#include +#include "root_lp.cuh" + +#include +#include + +#include +#include +#include + +namespace cuopt::linear_programming::detail { + +template +cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t +run_pdlp_barrier_for_root_lp(problem_t* problem, + f_t time_limit, + std::atomic* concurrent_halt, + i_t num_gpus) +{ + convert_greater_to_less(*problem); + f_t tolerance_divisor = + problem->tolerances.absolute_tolerance / + (problem->tolerances.relative_tolerance > 0 ? problem->tolerances.relative_tolerance : 1); + pdlp_solver_settings_t pdlp_settings{}; + pdlp_settings.tolerances.relative_primal_tolerance = + problem->tolerances.absolute_tolerance / tolerance_divisor; + pdlp_settings.tolerances.relative_dual_tolerance = + problem->tolerances.absolute_tolerance / tolerance_divisor; + pdlp_settings.time_limit = time_limit; + pdlp_settings.first_primal_feasible = false; + pdlp_settings.concurrent_halt = concurrent_halt; + pdlp_settings.method = method_t::Concurrent; + pdlp_settings.inside_mip = true; + pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable2; + pdlp_settings.num_gpus = num_gpus; + pdlp_settings.presolver = presolver_t::None; + + timer_t lp_timer(time_limit); + auto lp_result = solve_lp_with_method(*problem, pdlp_settings, lp_timer); + + cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t result; + auto stream = problem->handle_ptr->get_stream(); + result.primal.resize(lp_result.get_primal_solution().size()); + result.dual.resize(lp_result.get_dual_solution().size()); + result.reduced_costs.resize(lp_result.get_reduced_cost().size()); + raft::copy( + result.primal.data(), lp_result.get_primal_solution().data(), result.primal.size(), stream); + raft::copy(result.dual.data(), lp_result.get_dual_solution().data(), result.dual.size(), stream); + raft::copy(result.reduced_costs.data(), + lp_result.get_reduced_cost().data(), + result.reduced_costs.size(), + stream); + problem->handle_ptr->sync_stream(); + result.objective = problem->get_solver_obj_from_user_obj(lp_result.get_objective_value()); + result.user_objective = lp_result.get_objective_value(); + result.iterations = lp_result.get_additional_termination_information().number_of_steps_taken; + return result; +} + +template cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t +run_pdlp_barrier_for_root_lp(problem_t*, double, std::atomic*, int); + +template cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t +run_pdlp_barrier_for_root_lp(problem_t*, float, std::atomic*, int); + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/root_lp.cuh b/cpp/src/mip_heuristics/root_lp.cuh new file mode 100644 index 0000000000..8683ebe820 --- /dev/null +++ b/cpp/src/mip_heuristics/root_lp.cuh @@ -0,0 +1,31 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include + +#include +#include + +namespace cuopt::linear_programming::detail { + +template +class problem_t; + +/** + * Run PDLP/Barrier for root LP (used by branch-and-bound when concurrent root solve is enabled). + * Implemented in root_lp.cu so GPU code (convert_greater_to_less, solve_lp_with_method) can run. + */ +template +cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t +run_pdlp_barrier_for_root_lp(problem_t* problem, + f_t time_limit, + std::atomic* concurrent_halt, + i_t num_gpus); + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu index e6f6d50b62..42f7995ed6 100644 --- a/cpp/src/mip_heuristics/solver.cu +++ b/cpp/src/mip_heuristics/solver.cu @@ -257,17 +257,29 @@ solution_t mip_solver_t::run_solver() &solution_helper, std::placeholders::_1, std::placeholders::_2); + + branch_and_bound_settings.on_first_lp_solution_available = + [&dm](dual_simplex::root_relaxation_first_solution_t const& result) { + dm.on_first_lp_solution(result); + }; } // Create the branch and bound object - branch_and_bound = std::make_unique>( - branch_and_bound_problem, branch_and_bound_settings, timer_.get_tic_start()); + auto* mip_problem_ptr = (context.settings.determinism_mode == CUOPT_MODE_OPPORTUNISTIC) + ? context.problem_ptr + : nullptr; + i_t num_gpus = context.settings.num_gpus; + branch_and_bound = + std::make_unique>(branch_and_bound_problem, + branch_and_bound_settings, + timer_.get_tic_start(), + mip_problem_ptr, + num_gpus); context.branch_and_bound_ptr = branch_and_bound.get(); auto* stats_ptr = &context.stats; branch_and_bound->set_user_bound_callback( [stats_ptr](f_t user_bound) { stats_ptr->set_solution_bound(user_bound); }); - // Set the primal heuristics -> branch and bound callback if (context.settings.determinism_mode == CUOPT_MODE_OPPORTUNISTIC) { branch_and_bound->set_concurrent_lp_root_solve(true); From 1b6d98a9996ec7d186f76c195cb296f5a58acef9 Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Thu, 5 Mar 2026 18:45:06 -0800 Subject: [PATCH 03/30] Move branch and bound problem to inside branch and bound --- cpp/src/branch_and_bound/CMakeLists.txt | 1 + cpp/src/branch_and_bound/branch_and_bound.cpp | 8 +++----- cpp/src/branch_and_bound/branch_and_bound.hpp | 19 ++++++++++++------- cpp/src/mip_heuristics/diversity/lns/rins.cu | 6 ++---- .../diversity/recombiners/sub_mip.cuh | 6 ++---- cpp/src/mip_heuristics/solver.cu | 19 ++++--------------- 6 files changed, 24 insertions(+), 35 deletions(-) diff --git a/cpp/src/branch_and_bound/CMakeLists.txt b/cpp/src/branch_and_bound/CMakeLists.txt index 5bb1017120..9b04014fb7 100644 --- a/cpp/src/branch_and_bound/CMakeLists.txt +++ b/cpp/src/branch_and_bound/CMakeLists.txt @@ -5,6 +5,7 @@ set(BRANCH_AND_BOUND_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/branch_and_bound.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/branch_and_bound_from_mip.cu ${CMAKE_CURRENT_SOURCE_DIR}/mip_node.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pseudo_costs.cpp ${CMAKE_CURRENT_SOURCE_DIR}/diving_heuristics.cpp diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 1fe020667d..16a76537aa 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -244,9 +244,7 @@ template branch_and_bound_t::branch_and_bound_t( const user_problem_t& user_problem, const simplex_solver_settings_t& solver_settings, - f_t start_time, - cuopt::linear_programming::detail::problem_t* mip_problem_ptr, - i_t num_gpus) + f_t start_time) : original_problem_(user_problem), settings_(solver_settings), original_lp_(user_problem.handle_ptr, 1, 1, 1), @@ -256,8 +254,8 @@ branch_and_bound_t::branch_and_bound_t( root_crossover_soln_(1, 1), pc_(1), solver_status_(mip_status_t::UNSET), - mip_problem_ptr_(mip_problem_ptr), - pdlp_root_num_gpus_(num_gpus) + mip_problem_ptr_(nullptr), + pdlp_root_num_gpus_(1) { exploration_stats_.start_time = start_time; #ifdef PRINT_CONSTRAINT_MATRIX diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index 825d89049d..909c57e0c8 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -70,12 +70,16 @@ struct deterministic_diving_policy_t; template class branch_and_bound_t { public: - branch_and_bound_t( - const user_problem_t& user_problem, - const simplex_solver_settings_t& solver_settings, - f_t start_time, - cuopt::linear_programming::detail::problem_t* mip_problem_ptr = nullptr, - i_t num_gpus = 1); + /** Build from MIP problem_t (used by mip_heuristics). Implemented in + * branch_and_bound_from_mip.cu. */ + branch_and_bound_t(cuopt::linear_programming::detail::problem_t* mip_problem_ptr, + const simplex_solver_settings_t& solver_settings, + f_t start_time, + i_t num_gpus = 1); + /** Build from user_problem_t (used by dual_simplex/solve.cpp, RINS, sub_mip). */ + branch_and_bound_t(const user_problem_t& user_problem, + const simplex_solver_settings_t& solver_settings, + f_t start_time); // Set an initial guess based on the user_problem. This should be called before solve. void set_initial_guess(const std::vector& user_guess) { guess_ = user_guess; } @@ -122,6 +126,7 @@ class branch_and_bound_t { std::vector& repaired_solution) const; f_t get_lower_bound(); + i_t get_num_cols() const { return original_problem_.num_cols; } bool enable_concurrent_lp_root_solve() const { return enable_concurrent_lp_root_solve_; } std::atomic* get_root_concurrent_halt() { return &root_concurrent_halt_; } void set_root_concurrent_halt(int value) { root_concurrent_halt_ = value; } @@ -146,7 +151,7 @@ class branch_and_bound_t { producer_sync_t& get_producer_sync() { return producer_sync_; } private: - const user_problem_t& original_problem_; + user_problem_t original_problem_; const simplex_solver_settings_t settings_; work_limit_context_t work_unit_context_{"B&B"}; diff --git a/cpp/src/mip_heuristics/diversity/lns/rins.cu b/cpp/src/mip_heuristics/diversity/lns/rins.cu index 7fd8533f82..1d009b8fb7 100644 --- a/cpp/src/mip_heuristics/diversity/lns/rins.cu +++ b/cpp/src/mip_heuristics/diversity/lns/rins.cu @@ -248,12 +248,9 @@ void rins_t::run_rins() // run sub-mip namespace dual_simplex = cuopt::linear_programming::dual_simplex; - dual_simplex::user_problem_t branch_and_bound_problem(&rins_handle); dual_simplex::simplex_solver_settings_t branch_and_bound_settings; dual_simplex::mip_solution_t branch_and_bound_solution(1); dual_simplex::mip_status_t branch_and_bound_status = dual_simplex::mip_status_t::UNSET; - fixed_problem.get_host_user_problem(branch_and_bound_problem); - branch_and_bound_solution.resize(branch_and_bound_problem.num_cols); // Fill in the settings for branch and bound branch_and_bound_settings.time_limit = time_limit; // branch_and_bound_settings.node_limit = 5000 + node_count / 100; // try harder as time goes @@ -274,7 +271,8 @@ void rins_t::run_rins() rins_solution_queue.push_back(solution); }; dual_simplex::branch_and_bound_t branch_and_bound( - branch_and_bound_problem, branch_and_bound_settings, dual_simplex::tic()); + &fixed_problem, branch_and_bound_settings, dual_simplex::tic(), 1); + branch_and_bound_solution.resize(branch_and_bound.get_num_cols()); branch_and_bound.set_initial_guess(cuopt::host_copy(fixed_assignment, rins_handle.get_stream())); branch_and_bound_status = branch_and_bound.solve(branch_and_bound_solution); diff --git a/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh b/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh index b2f7f80066..5b9821cc3f 100644 --- a/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh +++ b/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh @@ -95,10 +95,7 @@ class sub_mip_recombiner_t : public recombiner_t { if (run_sub_mip) { // run sub-mip namespace dual_simplex = cuopt::linear_programming::dual_simplex; - dual_simplex::user_problem_t branch_and_bound_problem(offspring.handle_ptr); dual_simplex::simplex_solver_settings_t branch_and_bound_settings; - fixed_problem.get_host_user_problem(branch_and_bound_problem); - branch_and_bound_solution.resize(branch_and_bound_problem.num_cols); // Fill in the settings for branch and bound branch_and_bound_settings.time_limit = sub_mip_recombiner_config_t::sub_mip_time_limit; branch_and_bound_settings.print_presolve_stats = false; @@ -117,7 +114,8 @@ class sub_mip_recombiner_t : public recombiner_t { // disable B&B logs, so that it is not interfering with the main B&B thread branch_and_bound_settings.log.log = false; dual_simplex::branch_and_bound_t branch_and_bound( - branch_and_bound_problem, branch_and_bound_settings, dual_simplex::tic()); + &fixed_problem, branch_and_bound_settings, dual_simplex::tic(), 1); + branch_and_bound_solution.resize(branch_and_bound.get_num_cols()); branch_and_bound_status = branch_and_bound.solve(branch_and_bound_solution); if (solution_vector.size() > 0) { cuopt_assert(fixed_assignment.size() == branch_and_bound_solution.x.size(), diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu index 42f7995ed6..8d5cd813bf 100644 --- a/cpp/src/mip_heuristics/solver.cu +++ b/cpp/src/mip_heuristics/solver.cu @@ -180,13 +180,11 @@ solution_t mip_solver_t::run_solver() namespace dual_simplex = cuopt::linear_programming::dual_simplex; std::future branch_and_bound_status_future; - dual_simplex::user_problem_t branch_and_bound_problem(context.problem_ptr->handle_ptr); context.problem_ptr->recompute_objective_integrality(); if (context.problem_ptr->is_objective_integral()) { CUOPT_LOG_INFO("Objective function is integral, scale %g", context.problem_ptr->presolve_data.objective_scaling_factor); } - branch_and_bound_problem.objective_is_integral = context.problem_ptr->is_objective_integral(); dual_simplex::simplex_solver_settings_t branch_and_bound_settings; std::unique_ptr> branch_and_bound; branch_and_bound_solution_helper_t solution_helper(&dm, branch_and_bound_settings); @@ -194,11 +192,6 @@ solution_t mip_solver_t::run_solver() bool run_bb = !context.settings.heuristics_only; if (run_bb) { - // Convert the presolved problem to dual_simplex::user_problem_t - op_problem_.get_host_user_problem(branch_and_bound_problem); - // Resize the solution now that we know the number of columns/variables - branch_and_bound_solution.resize(branch_and_bound_problem.num_cols); - // Fill in the settings for branch and bound branch_and_bound_settings.time_limit = timer_.get_time_limit(); branch_and_bound_settings.node_limit = context.settings.node_limit; @@ -264,17 +257,13 @@ solution_t mip_solver_t::run_solver() }; } - // Create the branch and bound object - auto* mip_problem_ptr = (context.settings.determinism_mode == CUOPT_MODE_OPPORTUNISTIC) - ? context.problem_ptr - : nullptr; - i_t num_gpus = context.settings.num_gpus; + // Create the branch and bound object (builds user_problem from context.problem_ptr) branch_and_bound = - std::make_unique>(branch_and_bound_problem, + std::make_unique>(context.problem_ptr, branch_and_bound_settings, timer_.get_tic_start(), - mip_problem_ptr, - num_gpus); + context.settings.num_gpus); + branch_and_bound_solution.resize(branch_and_bound->get_num_cols()); context.branch_and_bound_ptr = branch_and_bound.get(); auto* stats_ptr = &context.stats; branch_and_bound->set_user_bound_callback( From e311548c0419eaa4df26bc7b96b713431018328c Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Fri, 6 Mar 2026 15:32:21 -0800 Subject: [PATCH 04/30] fix link errors --- .../branch_and_bound_from_mip.cu | 106 ++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 cpp/src/branch_and_bound/branch_and_bound_from_mip.cu diff --git a/cpp/src/branch_and_bound/branch_and_bound_from_mip.cu b/cpp/src/branch_and_bound/branch_and_bound_from_mip.cu new file mode 100644 index 0000000000..d4e713ebff --- /dev/null +++ b/cpp/src/branch_and_bound/branch_and_bound_from_mip.cu @@ -0,0 +1,106 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#include +#include +#include + +#include + +#include +#include +#include +#include + +namespace cuopt::linear_programming::dual_simplex { + +namespace { +template +void full_variable_types(const user_problem_t& original_problem, + const lp_problem_t& original_lp, + std::vector& var_types) +{ + var_types = original_problem.var_types; + if (original_lp.num_cols > original_problem.num_cols) { + var_types.resize(original_lp.num_cols); + for (i_t k = original_problem.num_cols; k < original_lp.num_cols; k++) { + var_types[k] = variable_type_t::CONTINUOUS; + } + } +} +} // anonymous namespace + +template +branch_and_bound_t::branch_and_bound_t( + cuopt::linear_programming::detail::problem_t* mip_problem_ptr, + const simplex_solver_settings_t& solver_settings, + f_t start_time, + i_t num_gpus) + : original_problem_(mip_problem_ptr->handle_ptr), + settings_(solver_settings), + original_lp_(mip_problem_ptr->handle_ptr, 1, 1, 1), + Arow_(1, 1, 0), + incumbent_(1), + root_relax_soln_(1, 1), + root_crossover_soln_(1, 1), + pc_(1), + solver_status_(mip_status_t::UNSET), + mip_problem_ptr_(mip_problem_ptr), + pdlp_root_num_gpus_(num_gpus) +{ + exploration_stats_.start_time = start_time; + mip_problem_ptr->recompute_objective_integrality(); + original_problem_.objective_is_integral = mip_problem_ptr->is_objective_integral(); + mip_problem_ptr->get_host_user_problem(original_problem_); + +#ifdef PRINT_CONSTRAINT_MATRIX + settings_.log.printf("A"); + original_problem_.A.print_matrix(); +#endif + + dualize_info_t dualize_info; + convert_user_problem(original_problem_, settings_, original_lp_, new_slacks_, dualize_info); + full_variable_types(original_problem_, original_lp_, var_types_); + +#ifdef CHECK_SLACKS + assert(new_slacks_.size() == original_lp_.num_rows); + for (i_t slack : new_slacks_) { + const i_t col_start = original_lp_.A.col_start[slack]; + const i_t col_end = original_lp_.A.col_start[slack + 1]; + const i_t col_len = col_end - col_start; + if (col_len != 1) { + settings_.log.printf("Slack %d has %d nzs\n", slack, col_len); + assert(col_len == 1); + } + const i_t i = original_lp_.A.i[col_start]; + const f_t x = original_lp_.A.x[col_start]; + if (std::abs(x) != 1.0) { + settings_.log.printf("Slack %d row %d has non-unit coefficient %e\n", slack, i, x); + assert(std::abs(x) == 1.0); + } + } +#endif + + upper_bound_ = inf; + root_objective_ = std::numeric_limits::quiet_NaN(); +} + +template branch_and_bound_t::branch_and_bound_t( + cuopt::linear_programming::detail::problem_t*, + const simplex_solver_settings_t&, + double, + int); + +#ifdef MIP_INSTANTIATION_FLOAT +template branch_and_bound_t::branch_and_bound_t( + cuopt::linear_programming::detail::problem_t*, + const simplex_solver_settings_t&, + float, + int); +#endif + +} // namespace cuopt::linear_programming::dual_simplex From c522274671af93a24c93e1776cab9525f91770ae Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Fri, 6 Mar 2026 15:40:52 -0800 Subject: [PATCH 05/30] Fix link errors --- cpp/src/dual_simplex/presolve.cpp | 60 +++++++++++++++++++++++++++++++ cpp/src/mip_heuristics/root_lp.cu | 3 +- 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index b9ee419517..e8af7ba514 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -7,6 +7,8 @@ #include +#include + #include #include #include @@ -1571,4 +1573,62 @@ template void uncrush_solution(const presolve_info_t& #endif +#if CUOPT_INSTANTIATE_FLOAT + +template void convert_user_problem( + const user_problem_t& user_problem, + const simplex_solver_settings_t& settings, + lp_problem_t& problem, + std::vector& new_slacks, + dualize_info_t& dualize_info); + +template void convert_user_lp_with_guess( + const user_problem_t& user_problem, + const lp_solution_t& initial_solution, + const std::vector& initial_slack, + lp_problem_t& lp, + lp_solution_t& converted_solution); + +template int presolve(const lp_problem_t& original, + const simplex_solver_settings_t& settings, + lp_problem_t& presolved, + presolve_info_t& presolve_info); + +template void crush_primal_solution(const user_problem_t& user_problem, + const lp_problem_t& problem, + const std::vector& user_solution, + const std::vector& new_slacks, + std::vector& solution); + +template float crush_dual_solution(const user_problem_t& user_problem, + const lp_problem_t& problem, + const std::vector& new_slacks, + const std::vector& user_y, + const std::vector& user_z, + std::vector& y, + std::vector& z); + +template void uncrush_primal_solution(const user_problem_t& user_problem, + const lp_problem_t& problem, + const std::vector& solution, + std::vector& user_solution); + +template void uncrush_dual_solution(const user_problem_t& user_problem, + const lp_problem_t& problem, + const std::vector& y, + const std::vector& z, + std::vector& user_y, + std::vector& user_z); + +template void uncrush_solution(const presolve_info_t& presolve_info, + const simplex_solver_settings_t& settings, + const std::vector& crushed_x, + const std::vector& crushed_y, + const std::vector& crushed_z, + std::vector& uncrushed_x, + std::vector& uncrushed_y, + std::vector& uncrushed_z); + +#endif + } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/mip_heuristics/root_lp.cu b/cpp/src/mip_heuristics/root_lp.cu index d11fbda957..8e3346286b 100644 --- a/cpp/src/mip_heuristics/root_lp.cu +++ b/cpp/src/mip_heuristics/root_lp.cu @@ -68,7 +68,8 @@ run_pdlp_barrier_for_root_lp(problem_t* problem, template cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t run_pdlp_barrier_for_root_lp(problem_t*, double, std::atomic*, int); +#ifdef MIP_INSTANTIATION_FLOAT template cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t run_pdlp_barrier_for_root_lp(problem_t*, float, std::atomic*, int); - +#endif } // namespace cuopt::linear_programming::detail From 55b2afb2d54a10d1e90facab9702162e77e50307 Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Mon, 9 Mar 2026 15:21:53 -0700 Subject: [PATCH 06/30] Fix sync issues --- cpp/src/branch_and_bound/branch_and_bound.cpp | 143 ++++++++---------- 1 file changed, 65 insertions(+), 78 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 16a76537aa..1b9f370cc5 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1801,80 +1801,61 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( i_t iter = 0; std::string solver_name = ""; - // Root node path + // Launch dual simplex on a background thread (it may be halted later if PDLP+crossover wins). lp_status_t root_status; - std::future root_status_future; - root_status_future = std::async(std::launch::async, - &solve_linear_program_with_advanced_basis, - std::ref(original_lp_), - exploration_stats_.start_time, - std::ref(lp_settings), - std::ref(root_relax_soln), - std::ref(basis_update), - std::ref(basic_list), - std::ref(nonbasic_list), - std::ref(root_vstatus), - std::ref(edge_norms), - nullptr); - - std::optional>> pdlp_future_opt; - if (enable_concurrent_lp_root_solve_ && mip_problem_ptr_ != nullptr) { - root_crossover_solution_set_.store(false, std::memory_order_release); - pdlp_future_opt = - std::async(std::launch::async, - &cuopt::linear_programming::detail::run_pdlp_barrier_for_root_lp, - mip_problem_ptr_, - lp_settings.time_limit, - get_root_concurrent_halt(), - pdlp_root_num_gpus_); - } - - // Wait for first completion: PDLP/Barrier future, dual simplex future, or legacy callback - while (*get_root_concurrent_halt() == 0) { - bool pdlp_ready = - pdlp_future_opt && pdlp_future_opt->valid() && - pdlp_future_opt->wait_for(std::chrono::milliseconds(0)) == std::future_status::ready; - bool ds_ready = - root_status_future.wait_for(std::chrono::milliseconds(0)) == std::future_status::ready; - if (root_crossover_solution_set_.load(std::memory_order_acquire) || pdlp_ready || ds_ready) { - break; - } - std::this_thread::sleep_for(std::chrono::milliseconds(1)); - } + std::future root_status_future = + std::async(std::launch::async, + &solve_linear_program_with_advanced_basis, + std::ref(original_lp_), + exploration_stats_.start_time, + std::ref(lp_settings), + std::ref(root_relax_soln), + std::ref(basis_update), + std::ref(basic_list), + std::ref(nonbasic_list), + std::ref(root_vstatus), + std::ref(edge_norms), + nullptr); + + const auto wait_timeout_s = static_cast(std::max(600.0, 2.0 * lp_settings.time_limit)); + const auto wait_timeout = std::chrono::seconds(wait_timeout_s); bool use_pdlp_path = false; - if (pdlp_future_opt && pdlp_future_opt->valid() && - pdlp_future_opt->wait_for(std::chrono::milliseconds(0)) == std::future_status::ready) { - auto result = pdlp_future_opt->get(); - root_crossover_soln_.x = result.primal; - root_crossover_soln_.y = result.dual; - root_crossover_soln_.z = result.reduced_costs; - root_crossover_soln_.objective = result.objective; - root_crossover_soln_.user_objective = result.user_objective; - root_crossover_soln_.iterations = result.iterations; - root_objective_ = result.objective; - root_crossover_solution_set_.store(true, std::memory_order_release); - if (lp_settings.on_first_lp_solution_available) { - lp_settings.on_first_lp_solution_available(result); - } - use_pdlp_path = true; - } - if (!use_pdlp_path && root_crossover_solution_set_.load(std::memory_order_acquire)) { - // Legacy path: set_root_relaxation_solution was invoked - root_relaxation_first_solution_t legacy_result; - legacy_result.primal = root_crossover_soln_.x; - legacy_result.dual = root_crossover_soln_.y; - legacy_result.reduced_costs = root_crossover_soln_.z; - legacy_result.objective = root_crossover_soln_.objective; - legacy_result.user_objective = root_crossover_soln_.user_objective; - legacy_result.iterations = root_crossover_soln_.iterations; - if (lp_settings.on_first_lp_solution_available) { - lp_settings.on_first_lp_solution_available(legacy_result); + if (enable_concurrent_lp_root_solve_ && mip_problem_ptr_ != nullptr) { + if (root_crossover_solution_set_.load(std::memory_order_acquire)) { + // Legacy path: set_root_relaxation_solution was already invoked (e.g. by diversity manager). + root_relaxation_first_solution_t legacy_result; + legacy_result.primal = root_crossover_soln_.x; + legacy_result.dual = root_crossover_soln_.y; + legacy_result.reduced_costs = root_crossover_soln_.z; + legacy_result.objective = root_crossover_soln_.objective; + legacy_result.user_objective = root_crossover_soln_.user_objective; + legacy_result.iterations = root_crossover_soln_.iterations; + if (lp_settings.on_first_lp_solution_available) { + lp_settings.on_first_lp_solution_available(legacy_result); + } + use_pdlp_path = true; + } else { + // Run PDLP/Barrier on the main thread, then crossover on the main thread. + auto result = cuopt::linear_programming::detail::run_pdlp_barrier_for_root_lp( + mip_problem_ptr_, lp_settings.time_limit, get_root_concurrent_halt(), pdlp_root_num_gpus_); + root_crossover_soln_.x = result.primal; + root_crossover_soln_.y = result.dual; + root_crossover_soln_.z = result.reduced_costs; + root_crossover_soln_.objective = result.objective; + root_crossover_soln_.user_objective = result.user_objective; + root_crossover_soln_.iterations = result.iterations; + root_objective_ = result.objective; + root_crossover_solution_set_.store(true, std::memory_order_release); + if (lp_settings.on_first_lp_solution_available) { + lp_settings.on_first_lp_solution_available(result); + } + use_pdlp_path = true; } } - if (use_pdlp_path || root_crossover_solution_set_.load(std::memory_order_acquire)) { + if (use_pdlp_path) { // Crush the root relaxation solution on converted user problem std::vector crushed_root_x; crush_primal_solution( @@ -1907,9 +1888,13 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( // Check if crossover was stopped by dual simplex if (crossover_status == crossover_status_t::OPTIMAL) { - set_root_concurrent_halt(1); // Stop dual simplex - root_status = root_status_future.get(); // Wait for dual simplex to finish - set_root_concurrent_halt(0); // Clear the concurrent halt flag + set_root_concurrent_halt(1); // Stop dual simplex + if (root_status_future.wait_for(wait_timeout) == std::future_status::ready) { + root_status = root_status_future.get(); + } else { + root_status = lp_status_t::OPTIMAL; + } + set_root_concurrent_halt(0); // Clear the concurrent halt flag // Override the root relaxation solution with the crossover solution root_relax_soln = root_crossover_soln_; root_vstatus = crossover_vstatus_; @@ -1926,14 +1911,9 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( } } if (basic_list.size() != original_lp_.num_rows) { - settings_.log.printf( - "basic_list size %d != m %d\n", basic_list.size(), original_lp_.num_rows); assert(basic_list.size() == original_lp_.num_rows); } if (nonbasic_list.size() != original_lp_.num_cols - original_lp_.num_rows) { - settings_.log.printf("nonbasic_list size %d != n - m %d\n", - nonbasic_list.size(), - original_lp_.num_cols - original_lp_.num_rows); assert(nonbasic_list.size() == original_lp_.num_cols - original_lp_.num_rows); } // Populate the basis_update from the crossover vstatus @@ -1946,7 +1926,6 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( nonbasic_list, crossover_vstatus_); if (refactor_status != 0) { - settings_.log.printf("Failed to refactor basis. %d deficient columns.\n", refactor_status); assert(refactor_status == 0); root_status = lp_status_t::NUMERICAL_ISSUES; } @@ -1958,13 +1937,21 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( iter = root_crossover_soln_.iterations; solver_name = "Barrier/PDLP and Crossover"; } else { - root_status = root_status_future.get(); + if (root_status_future.wait_for(wait_timeout) == std::future_status::ready) { + root_status = root_status_future.get(); + } else { + root_status = lp_status_t::TIME_LIMIT; + } user_objective = root_relax_soln_.user_objective; iter = root_relax_soln_.iterations; solver_name = "Dual Simplex"; } } else { - root_status = root_status_future.get(); + if (root_status_future.wait_for(wait_timeout) == std::future_status::ready) { + root_status = root_status_future.get(); + } else { + root_status = lp_status_t::TIME_LIMIT; + } root_relaxation_first_solution_t ds_result; ds_result.primal = root_relax_soln.x; ds_result.dual = root_relax_soln.y; From 1d213bda8e2e4557aaa121b967ebed05c701fcdf Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Tue, 10 Mar 2026 06:32:20 -0700 Subject: [PATCH 07/30] remove stale code --- cpp/src/branch_and_bound/branch_and_bound.cpp | 43 ++++++------------- cpp/src/branch_and_bound/branch_and_bound.hpp | 22 ---------- cpp/src/mip_heuristics/problem/problem.cu | 5 +-- cpp/src/mip_heuristics/problem/problem.cuh | 3 -- cpp/src/mip_heuristics/solver.cu | 10 ----- 5 files changed, 14 insertions(+), 69 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 1b9f370cc5..ce480c8982 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1823,36 +1823,20 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( bool use_pdlp_path = false; if (enable_concurrent_lp_root_solve_ && mip_problem_ptr_ != nullptr) { - if (root_crossover_solution_set_.load(std::memory_order_acquire)) { - // Legacy path: set_root_relaxation_solution was already invoked (e.g. by diversity manager). - root_relaxation_first_solution_t legacy_result; - legacy_result.primal = root_crossover_soln_.x; - legacy_result.dual = root_crossover_soln_.y; - legacy_result.reduced_costs = root_crossover_soln_.z; - legacy_result.objective = root_crossover_soln_.objective; - legacy_result.user_objective = root_crossover_soln_.user_objective; - legacy_result.iterations = root_crossover_soln_.iterations; - if (lp_settings.on_first_lp_solution_available) { - lp_settings.on_first_lp_solution_available(legacy_result); - } - use_pdlp_path = true; - } else { - // Run PDLP/Barrier on the main thread, then crossover on the main thread. - auto result = cuopt::linear_programming::detail::run_pdlp_barrier_for_root_lp( - mip_problem_ptr_, lp_settings.time_limit, get_root_concurrent_halt(), pdlp_root_num_gpus_); - root_crossover_soln_.x = result.primal; - root_crossover_soln_.y = result.dual; - root_crossover_soln_.z = result.reduced_costs; - root_crossover_soln_.objective = result.objective; - root_crossover_soln_.user_objective = result.user_objective; - root_crossover_soln_.iterations = result.iterations; - root_objective_ = result.objective; - root_crossover_solution_set_.store(true, std::memory_order_release); - if (lp_settings.on_first_lp_solution_available) { - lp_settings.on_first_lp_solution_available(result); - } - use_pdlp_path = true; + // Run PDLP/Barrier on the main thread, then crossover on the main thread. + auto result = cuopt::linear_programming::detail::run_pdlp_barrier_for_root_lp( + mip_problem_ptr_, lp_settings.time_limit, get_root_concurrent_halt(), pdlp_root_num_gpus_); + root_crossover_soln_.x = result.primal; + root_crossover_soln_.y = result.dual; + root_crossover_soln_.z = result.reduced_costs; + root_crossover_soln_.objective = result.objective; + root_crossover_soln_.user_objective = result.user_objective; + root_crossover_soln_.iterations = result.iterations; + root_objective_ = result.objective; + if (lp_settings.on_first_lp_solution_available) { + lp_settings.on_first_lp_solution_available(result); } + use_pdlp_path = true; } if (use_pdlp_path) { @@ -1980,7 +1964,6 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( } settings_.log.printf("\n"); - is_root_solution_set = true; return root_status; } diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index 909c57e0c8..de7b8455e9 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -84,26 +84,6 @@ class branch_and_bound_t { // Set an initial guess based on the user_problem. This should be called before solve. void set_initial_guess(const std::vector& user_guess) { guess_ = user_guess; } - // Set the root solution found by PDLP - void set_root_relaxation_solution(const std::vector& primal, - const std::vector& dual, - const std::vector& reduced_costs, - f_t objective, - f_t user_objective, - i_t iterations) - { - if (!is_root_solution_set) { - root_crossover_soln_.x = primal; - root_crossover_soln_.y = dual; - root_crossover_soln_.z = reduced_costs; - root_objective_ = objective; - root_crossover_soln_.objective = objective; - root_crossover_soln_.user_objective = user_objective; - root_crossover_soln_.iterations = iterations; - root_crossover_solution_set_.store(true, std::memory_order_release); - } - } - // Set a solution based on the user problem during the course of the solve void set_new_solution(const std::vector& solution); @@ -201,10 +181,8 @@ class branch_and_bound_t { lp_solution_t root_relax_soln_; lp_solution_t root_crossover_soln_; std::vector edge_norms_; - std::atomic root_crossover_solution_set_{false}; bool enable_concurrent_lp_root_solve_{false}; std::atomic root_concurrent_halt_{0}; - bool is_root_solution_set{false}; cuopt::linear_programming::detail::problem_t* mip_problem_ptr_{nullptr}; i_t pdlp_root_num_gpus_{1}; diff --git a/cpp/src/mip_heuristics/problem/problem.cu b/cpp/src/mip_heuristics/problem/problem.cu index bc93a9d988..18d8e7ded0 100644 --- a/cpp/src/mip_heuristics/problem/problem.cu +++ b/cpp/src/mip_heuristics/problem/problem.cu @@ -148,8 +148,7 @@ problem_t::problem_t( Q_values(problem_.get_quadratic_objective_values()) { op_problem_cstr_body(problem_); - branch_and_bound_callback = nullptr; - set_root_relaxation_solution_callback = nullptr; + branch_and_bound_callback = nullptr; } template @@ -161,7 +160,6 @@ problem_t::problem_t(const problem_t& problem_) integer_fixed_problem(problem_.integer_fixed_problem), integer_fixed_variable_map(problem_.integer_fixed_variable_map, handle_ptr->get_stream()), branch_and_bound_callback(nullptr), - set_root_relaxation_solution_callback(nullptr), n_variables(problem_.n_variables), n_constraints(problem_.n_constraints), n_binary_vars(problem_.n_binary_vars), @@ -217,7 +215,6 @@ problem_t::problem_t(const problem_t& problem_, integer_fixed_problem(problem_.integer_fixed_problem), integer_fixed_variable_map(problem_.integer_fixed_variable_map, handle_ptr->get_stream()), branch_and_bound_callback(nullptr), - set_root_relaxation_solution_callback(nullptr), n_variables(problem_.n_variables), n_constraints(problem_.n_constraints), n_binary_vars(problem_.n_binary_vars), diff --git a/cpp/src/mip_heuristics/problem/problem.cuh b/cpp/src/mip_heuristics/problem/problem.cuh index b9ca420820..489ad424f4 100644 --- a/cpp/src/mip_heuristics/problem/problem.cuh +++ b/cpp/src/mip_heuristics/problem/problem.cuh @@ -236,9 +236,6 @@ class problem_t { rmm::device_uvector integer_fixed_variable_map; std::function&)> branch_and_bound_callback; - std::function&, const std::vector&, const std::vector&, f_t, f_t, i_t)> - set_root_relaxation_solution_callback; typename mip_solver_settings_t::tolerances_t tolerances{}; i_t n_variables{0}; diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu index 8d5cd813bf..1257aa6409 100644 --- a/cpp/src/mip_heuristics/solver.cu +++ b/cpp/src/mip_heuristics/solver.cu @@ -288,16 +288,6 @@ solution_t mip_solver_t::run_solver() context.work_unit_scheduler_.register_context(branch_and_bound->get_work_unit_context()); // context.work_unit_scheduler_.verbose = true; - context.problem_ptr->set_root_relaxation_solution_callback = - std::bind(&dual_simplex::branch_and_bound_t::set_root_relaxation_solution, - branch_and_bound.get(), - std::placeholders::_1, - std::placeholders::_2, - std::placeholders::_3, - std::placeholders::_4, - std::placeholders::_5, - std::placeholders::_6); - if (timer_.check_time_limit()) { CUOPT_LOG_INFO("Time limit reached during B&B setup"); solution_t sol(*context.problem_ptr); From 9937868924c872e3a4850e74f2eceb25ba55cf4c Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Thu, 19 Mar 2026 07:38:32 -0700 Subject: [PATCH 08/30] Launch 3 threads, one for dual simplex, one for PDLP+crossover, one for Barrier+crossover --- .../pdlp/solver_settings.hpp | 2 + .../pdlp/solver_solution.hpp | 1 + cpp/src/branch_and_bound/branch_and_bound.cpp | 333 ++++++++++++------ cpp/src/branch_and_bound/branch_and_bound.hpp | 22 +- .../branch_and_bound_from_mip.cu | 1 - cpp/src/dual_simplex/phase2.cpp | 4 - cpp/src/mip_heuristics/root_lp.cu | 194 ++++++++-- cpp/src/mip_heuristics/root_lp.cuh | 49 ++- cpp/src/pdlp/pdlp.cu | 6 +- cpp/src/pdlp/solve.cu | 19 +- cpp/src/pdlp/solver_solution.cu | 6 + 11 files changed, 486 insertions(+), 151 deletions(-) diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp index f6ad4c8619..5de5489576 100644 --- a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp @@ -248,6 +248,8 @@ class pdlp_solver_settings_t { bool inside_mip{false}; // For concurrent termination std::atomic* concurrent_halt{nullptr}; + /** If true, solver does not set concurrent_halt; caller sets it after crossover. */ + bool halt_set_by_caller{false}; static constexpr f_t minimal_absolute_tolerance = 1.0e-12; pdlp_hyper_params::pdlp_hyper_params_t hyper_params; // Holds the information of new variable lower and upper bounds for each climber in the format: diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp index 45a47e7401..9bd5796a89 100644 --- a/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp +++ b/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp @@ -235,6 +235,7 @@ class optimization_problem_solution_t : public base_solution_t { * @return rmm::device_uvector The device memory container for the reduced cost. */ rmm::device_uvector& get_reduced_cost(); + const rmm::device_uvector& get_reduced_cost() const; /** * @brief Get termination reason diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index ce480c8982..3cf273545e 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -32,12 +32,16 @@ #include #include #include +#include #include #include #include +#include #include #include #include +#include +#include #include #include #include @@ -251,7 +255,6 @@ branch_and_bound_t::branch_and_bound_t( Arow_(1, 1, 0), incumbent_(1), root_relax_soln_(1, 1), - root_crossover_soln_(1, 1), pc_(1), solver_status_(mip_status_t::UNSET), mip_problem_ptr_(nullptr), @@ -1786,6 +1789,84 @@ void branch_and_bound_t::single_threaded_solve() } } +template +void branch_and_bound_t::run_concurrent_pdlp_and_barrier_with_crossover( + const simplex_solver_settings_t& lp_settings, + crossover_status_t& crossover_status_out, + lp_solution_t& winner_crossover_soln_out, + std::vector& winner_crossover_vstatus_out, + f_t& winner_root_objective_out, + std::string& winner_solver_name_out, + std::atomic& winner, + std::mutex* first_solver_mutex, + bool* first_solver_callback_done, + std::thread& pdlp_thread_out, + std::thread& barrier_thread_out) +{ + // PDLP+crossover and Barrier+crossover each in a thread. winner: 0=none, 1=dual, 2=PDLP, + // 3=Barrier. + struct concurrent_shared_state_t { + std::mutex first_result_mutex; + }; + auto shared = std::make_shared(); + + auto do_crush_crossover = [this, + &lp_settings, + &crossover_status_out, + &winner_crossover_soln_out, + &winner_crossover_vstatus_out, + &winner_root_objective_out, + &winner_solver_name_out, + &winner, + first_solver_mutex, + first_solver_callback_done, + shared](const root_relaxation_first_solution_t& result, + const char* solver_name, + int winner_id) { + return cuopt::linear_programming::detail::run_crush_crossover_and_maybe_win( + result, + original_problem_, + original_lp_, + new_slacks_, + settings_, + exploration_stats_.start_time, + get_root_concurrent_halt(), + [this]() { set_root_concurrent_halt(1); }, + lp_settings.on_first_lp_solution_available, + first_solver_mutex, + first_solver_callback_done, + &shared->first_result_mutex, + &winner, + winner_id, + &crossover_status_out, + &winner_crossover_soln_out, + &winner_crossover_vstatus_out, + &winner_root_objective_out, + solver_name, + &winner_solver_name_out); + }; + + pdlp_thread_out = std::thread([this, &lp_settings, do_crush_crossover]() { + auto result = cuopt::linear_programming::detail::run_solver_for_root_lp( + mip_problem_ptr_, + lp_settings.time_limit, + get_root_concurrent_halt(), + pdlp_root_num_gpus_, + cuopt::linear_programming::method_t::PDLP); + (void)do_crush_crossover(result, "PDLP", 2); + }); + + barrier_thread_out = std::thread([this, &lp_settings, do_crush_crossover]() { + auto result = cuopt::linear_programming::detail::run_solver_for_root_lp( + mip_problem_ptr_, + lp_settings.time_limit, + get_root_concurrent_halt(), + pdlp_root_num_gpus_, + cuopt::linear_programming::method_t::Barrier); + (void)do_crush_crossover(result, "Barrier", 3); + }); +} + template lp_status_t branch_and_bound_t::solve_root_relaxation( simplex_solver_settings_t const& lp_settings, @@ -1801,94 +1882,148 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( i_t iter = 0; std::string solver_name = ""; - // Launch dual simplex on a background thread (it may be halted later if PDLP+crossover wins). - lp_status_t root_status; - std::future root_status_future = - std::async(std::launch::async, - &solve_linear_program_with_advanced_basis, - std::ref(original_lp_), - exploration_stats_.start_time, - std::ref(lp_settings), - std::ref(root_relax_soln), - std::ref(basis_update), - std::ref(basic_list), - std::ref(nonbasic_list), - std::ref(root_vstatus), - std::ref(edge_norms), - nullptr); - - const auto wait_timeout_s = static_cast(std::max(600.0, 2.0 * lp_settings.time_limit)); - const auto wait_timeout = std::chrono::seconds(wait_timeout_s); - - bool use_pdlp_path = false; + // Dual simplex runs on the main thread when concurrent; otherwise it runs alone on main. + auto dual_simplex_settings = std::make_shared>(lp_settings); + dual_simplex_settings->inside_mip = 1; + + lp_status_t root_status = lp_status_t::UNSET; + lp_status_t root_result_status = + lp_status_t::UNSET; // dual simplex result; set when dual returns, read in else branch + + bool use_pdlp_path = false; + bool dual_simplex_finished_first = false; + + crossover_status_t crossover_status = crossover_status_t::NUMERICAL_ISSUES; + lp_solution_t winner_crossover_soln(original_lp_.num_rows, original_lp_.num_cols); + std::vector winner_crossover_vstatus; + f_t winner_root_objective = 0; + std::string root_winner_solver_name; + + std::thread pdlp_thread; + std::thread barrier_thread; + std::thread dual_simplex_thread; + std::atomic winner{0}; // 0=none, 1=dual, 2=PDLP, 3=Barrier if (enable_concurrent_lp_root_solve_ && mip_problem_ptr_ != nullptr) { - // Run PDLP/Barrier on the main thread, then crossover on the main thread. - auto result = cuopt::linear_programming::detail::run_pdlp_barrier_for_root_lp( - mip_problem_ptr_, lp_settings.time_limit, get_root_concurrent_halt(), pdlp_root_num_gpus_); - root_crossover_soln_.x = result.primal; - root_crossover_soln_.y = result.dual; - root_crossover_soln_.z = result.reduced_costs; - root_crossover_soln_.objective = result.objective; - root_crossover_soln_.user_objective = result.user_objective; - root_crossover_soln_.iterations = result.iterations; - root_objective_ = result.objective; + // All three run in threads; main only starts them and joins. First to finish with OPTIMAL sets + // winner and halt. + std::mutex first_solver_mutex; + bool first_solver_callback_done = false; + run_concurrent_pdlp_and_barrier_with_crossover(lp_settings, + crossover_status, + winner_crossover_soln, + winner_crossover_vstatus, + winner_root_objective, + root_winner_solver_name, + winner, + &first_solver_mutex, + &first_solver_callback_done, + pdlp_thread, + barrier_thread); + + // Dual simplex does not call on_first_lp_solution: diversity manager prefers optimal first; + // only PDLP/Barrier feed first solution when they have one. + dual_simplex_thread = std::thread([this, + dual_simplex_settings, + &root_relax_soln, + &basis_update, + &basic_list, + &nonbasic_list, + &root_vstatus, + &edge_norms, + &root_result_status, + &winner]() { + lp_status_t status = + solve_linear_program_with_advanced_basis(original_lp_, + exploration_stats_.start_time, + *dual_simplex_settings, + root_relax_soln, + basis_update, + basic_list, + nonbasic_list, + root_vstatus, + edge_norms, + nullptr); + root_result_status = status; + int expected = 0; + if (status == lp_status_t::OPTIMAL && + winner.compare_exchange_strong(expected, 1, std::memory_order_acq_rel)) { + set_root_concurrent_halt(1); + } + }); + + struct join_threads_guard_t { + std::thread* a = nullptr; + std::thread* b = nullptr; + std::thread* c = nullptr; + ~join_threads_guard_t() + { + if (a && a->joinable()) { a->join(); } + if (b && b->joinable()) { b->join(); } + if (c && c->joinable()) { c->join(); } + } + } join_guard; + join_guard.a = &pdlp_thread; + join_guard.b = &barrier_thread; + join_guard.c = &dual_simplex_thread; + + pdlp_thread.join(); + barrier_thread.join(); + dual_simplex_thread.join(); + join_guard.a = nullptr; + join_guard.b = nullptr; + join_guard.c = nullptr; + + // Winner may have set concurrent_halt==1 to stop peer solvers. All threads are joined; reset + // the flag for the rest of B&B (subsequent LP solves, etc.). + set_root_concurrent_halt(0); + + const int w = winner.load(std::memory_order_acquire); + use_pdlp_path = (w == 2 || w == 3); + if (w == 1) { dual_simplex_finished_first = true; } + } else { + // Non-concurrent: run dual simplex on main only. + root_status = solve_linear_program_with_advanced_basis(original_lp_, + exploration_stats_.start_time, + *dual_simplex_settings, + root_relax_soln, + basis_update, + basic_list, + nonbasic_list, + root_vstatus, + edge_norms, + nullptr); + root_result_status = root_status; if (lp_settings.on_first_lp_solution_available) { - lp_settings.on_first_lp_solution_available(result); + root_relaxation_first_solution_t ds_result; + ds_result.primal = root_relax_soln.x; + ds_result.dual = root_relax_soln.y; + ds_result.reduced_costs = root_relax_soln.z; + ds_result.objective = root_relax_soln.objective; + ds_result.user_objective = root_relax_soln.user_objective; + ds_result.iterations = root_relax_soln.iterations; + lp_settings.on_first_lp_solution_available(ds_result); } - use_pdlp_path = true; } if (use_pdlp_path) { - // Crush the root relaxation solution on converted user problem - std::vector crushed_root_x; - crush_primal_solution( - original_problem_, original_lp_, root_crossover_soln_.x, new_slacks_, crushed_root_x); - std::vector crushed_root_y; - std::vector crushed_root_z; - - f_t dual_res_inf = crush_dual_solution(original_problem_, - original_lp_, - new_slacks_, - root_crossover_soln_.y, - root_crossover_soln_.z, - crushed_root_y, - crushed_root_z); - - root_crossover_soln_.x = crushed_root_x; - root_crossover_soln_.y = crushed_root_y; - root_crossover_soln_.z = crushed_root_z; - - // Call crossover on the crushed solution - auto root_crossover_settings = settings_; - root_crossover_settings.log.log = false; - root_crossover_settings.concurrent_halt = get_root_concurrent_halt(); - crossover_status_t crossover_status = crossover(original_lp_, - root_crossover_settings, - root_crossover_soln_, - exploration_stats_.start_time, - root_crossover_soln_, - crossover_vstatus_); - - // Check if crossover was stopped by dual simplex + root_objective_ = winner_root_objective; + auto root_crossover_settings = settings_; + root_crossover_settings.log.log = false; + // Single-threaded CPU post-processing (refactor_basis, edge norms); concurrent halt must not + // apply. + root_crossover_settings.concurrent_halt = nullptr; if (crossover_status == crossover_status_t::OPTIMAL) { - set_root_concurrent_halt(1); // Stop dual simplex - if (root_status_future.wait_for(wait_timeout) == std::future_status::ready) { - root_status = root_status_future.get(); - } else { - root_status = lp_status_t::OPTIMAL; - } - set_root_concurrent_halt(0); // Clear the concurrent halt flag - // Override the root relaxation solution with the crossover solution - root_relax_soln = root_crossover_soln_; - root_vstatus = crossover_vstatus_; + // Use winner's crossover solution; no wait. + root_relax_soln = winner_crossover_soln; + root_vstatus = winner_crossover_vstatus; root_status = lp_status_t::OPTIMAL; basic_list.clear(); nonbasic_list.reserve(original_lp_.num_cols - original_lp_.num_rows); nonbasic_list.clear(); // Get the basic list and nonbasic list from the vstatus for (i_t j = 0; j < original_lp_.num_cols; j++) { - if (crossover_vstatus_[j] == variable_status_t::BASIC) { + if (winner_crossover_vstatus[j] == variable_status_t::BASIC) { basic_list.push_back(j); } else { nonbasic_list.push_back(j); @@ -1908,7 +2043,7 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( exploration_stats_.start_time, basic_list, nonbasic_list, - crossover_vstatus_); + winner_crossover_vstatus); if (refactor_status != 0) { assert(refactor_status == 0); root_status = lp_status_t::NUMERICAL_ISSUES; @@ -1917,35 +2052,30 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( // Set the edge norms to a default value edge_norms.resize(original_lp_.num_cols, -1.0); set_uninitialized_steepest_edge_norms(original_lp_, basic_list, edge_norms); - user_objective = root_crossover_soln_.user_objective; - iter = root_crossover_soln_.iterations; - solver_name = "Barrier/PDLP and Crossover"; + user_objective = winner_crossover_soln.user_objective; + iter = winner_crossover_soln.iterations; + solver_name = root_winner_solver_name + " and Crossover"; } else { - if (root_status_future.wait_for(wait_timeout) == std::future_status::ready) { - root_status = root_status_future.get(); - } else { - root_status = lp_status_t::TIME_LIMIT; + // Crossover winner path but crossover was not OPTIMAL. Map crossover outcome to lp_status_t. + switch (crossover_status) { + case crossover_status_t::TIME_LIMIT: root_status = lp_status_t::TIME_LIMIT; break; + case crossover_status_t::NUMERICAL_ISSUES: + root_status = lp_status_t::NUMERICAL_ISSUES; + break; + case crossover_status_t::CONCURRENT_LIMIT: root_status = lp_status_t::TIME_LIMIT; break; + case crossover_status_t::PRIMAL_FEASIBLE: + case crossover_status_t::DUAL_FEASIBLE: root_status = lp_status_t::NUMERICAL_ISSUES; break; + default: root_status = lp_status_t::NUMERICAL_ISSUES; break; } - user_objective = root_relax_soln_.user_objective; - iter = root_relax_soln_.iterations; - solver_name = "Dual Simplex"; + user_objective = winner_crossover_soln.user_objective; + iter = winner_crossover_soln.iterations; + solver_name = root_winner_solver_name + " and Crossover"; } } else { - if (root_status_future.wait_for(wait_timeout) == std::future_status::ready) { - root_status = root_status_future.get(); - } else { - root_status = lp_status_t::TIME_LIMIT; - } - root_relaxation_first_solution_t ds_result; - ds_result.primal = root_relax_soln.x; - ds_result.dual = root_relax_soln.y; - ds_result.reduced_costs = root_relax_soln.z; - ds_result.objective = root_relax_soln.objective; - ds_result.user_objective = root_relax_soln.user_objective; - ds_result.iterations = root_relax_soln.iterations; - if (lp_settings.on_first_lp_solution_available) { - lp_settings.on_first_lp_solution_available(ds_result); - } + // Use dual simplex result (root_result_status was set when dual simplex returned). + root_status = root_result_status; + (void)dual_simplex_finished_first; // used only to select path + // Diversity manager was already notified by whoever was first (dual simplex, PDLP, or Barrier). user_objective = root_relax_soln.user_objective; iter = root_relax_soln.iterations; solver_name = "Dual Simplex"; @@ -1965,6 +2095,7 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( settings_.log.printf("\n"); + set_root_concurrent_halt(0); return root_status; } diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index de7b8455e9..eeccb75af3 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -17,6 +17,7 @@ #include +#include #include #include #include @@ -32,6 +33,10 @@ #include +#include +#include +#include + #include #include @@ -118,6 +123,21 @@ class branch_and_bound_t { std::vector& nonbasic_list, std::vector& edge_norms); + /** Starts PDLP+crossover and Barrier+crossover in two threads. winner is 0=none, 1=dual, 2=PDLP, + * 3=Barrier; first OPTIMAL sets it. first_solver_* for diversity manager callback. */ + void run_concurrent_pdlp_and_barrier_with_crossover( + const simplex_solver_settings_t& lp_settings, + crossover_status_t& crossover_status_out, + lp_solution_t& winner_crossover_soln_out, + std::vector& winner_crossover_vstatus_out, + f_t& winner_root_objective_out, + std::string& winner_solver_name_out, + std::atomic& winner, + std::mutex* first_solver_mutex, + bool* first_solver_callback_done, + std::thread& pdlp_thread_out, + std::thread& barrier_thread_out); + i_t find_reduced_cost_fixings(f_t upper_bound, std::vector& lower_bounds, std::vector& upper_bounds); @@ -176,10 +196,8 @@ class branch_and_bound_t { // Variables for the root node in the search tree. std::vector root_vstatus_; - std::vector crossover_vstatus_; f_t root_objective_; lp_solution_t root_relax_soln_; - lp_solution_t root_crossover_soln_; std::vector edge_norms_; bool enable_concurrent_lp_root_solve_{false}; std::atomic root_concurrent_halt_{0}; diff --git a/cpp/src/branch_and_bound/branch_and_bound_from_mip.cu b/cpp/src/branch_and_bound/branch_and_bound_from_mip.cu index d4e713ebff..4e90956f68 100644 --- a/cpp/src/branch_and_bound/branch_and_bound_from_mip.cu +++ b/cpp/src/branch_and_bound/branch_and_bound_from_mip.cu @@ -46,7 +46,6 @@ branch_and_bound_t::branch_and_bound_t( Arow_(1, 1, 0), incumbent_(1), root_relax_soln_(1, 1), - root_crossover_soln_(1, 1), pc_(1), solver_status_(mip_status_t::UNSET), mip_problem_ptr_(mip_problem_ptr), diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 426d9a7535..7cfed77385 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -3551,10 +3551,6 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, 100.0 * dense_delta_z / (sparse_delta_z + dense_delta_z)); ft.print_stats(); } - if (settings.inside_mip && settings.concurrent_halt != nullptr) { - settings.log.debug("Setting concurrent halt in Dual Simplex Phase 2\n"); - *settings.concurrent_halt = 1; - } } return status; } diff --git a/cpp/src/mip_heuristics/root_lp.cu b/cpp/src/mip_heuristics/root_lp.cu index 8e3346286b..b181db43cd 100644 --- a/cpp/src/mip_heuristics/root_lp.cu +++ b/cpp/src/mip_heuristics/root_lp.cu @@ -12,18 +12,48 @@ #include #include +#include +#include #include #include #include namespace cuopt::linear_programming::detail { +namespace { template cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t -run_pdlp_barrier_for_root_lp(problem_t* problem, - f_t time_limit, - std::atomic* concurrent_halt, - i_t num_gpus) +copy_lp_result_to_root_solution(problem_t* problem, + const optimization_problem_solution_t& lp_result) +{ + cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t result; + auto stream = problem->handle_ptr->get_stream(); + result.primal.resize(lp_result.get_primal_solution().size()); + result.dual.resize(lp_result.get_dual_solution().size()); + result.reduced_costs.resize(lp_result.get_reduced_cost().size()); + raft::copy( + result.primal.data(), lp_result.get_primal_solution().data(), result.primal.size(), stream); + raft::copy(result.dual.data(), lp_result.get_dual_solution().data(), result.dual.size(), stream); + raft::copy(result.reduced_costs.data(), + lp_result.get_reduced_cost().data(), + result.reduced_costs.size(), + stream); + problem->handle_ptr->sync_stream(); + result.objective = problem->get_solver_obj_from_user_obj(lp_result.get_objective_value()); + result.user_objective = lp_result.get_objective_value(); + result.iterations = lp_result.get_additional_termination_information().number_of_steps_taken; + return result; +} + +} // namespace + +template +cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t +run_solver_for_root_lp(problem_t* problem, + f_t time_limit, + std::atomic* concurrent_halt, + i_t num_gpus, + method_t method) { convert_greater_to_less(*problem); f_t tolerance_divisor = @@ -37,39 +67,153 @@ run_pdlp_barrier_for_root_lp(problem_t* problem, pdlp_settings.time_limit = time_limit; pdlp_settings.first_primal_feasible = false; pdlp_settings.concurrent_halt = concurrent_halt; - pdlp_settings.method = method_t::Concurrent; + pdlp_settings.halt_set_by_caller = true; // B&B sets halt only after crossover + pdlp_settings.method = method; pdlp_settings.inside_mip = true; - pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable2; pdlp_settings.num_gpus = num_gpus; pdlp_settings.presolver = presolver_t::None; + pdlp_settings.crossover = false; // B&B does crush + crossover for both paths + if (method == method_t::PDLP) { pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable2; } timer_t lp_timer(time_limit); auto lp_result = solve_lp_with_method(*problem, pdlp_settings, lp_timer); + return copy_lp_result_to_root_solution(problem, lp_result); +} - cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t result; - auto stream = problem->handle_ptr->get_stream(); - result.primal.resize(lp_result.get_primal_solution().size()); - result.dual.resize(lp_result.get_dual_solution().size()); - result.reduced_costs.resize(lp_result.get_reduced_cost().size()); - raft::copy( - result.primal.data(), lp_result.get_primal_solution().data(), result.primal.size(), stream); - raft::copy(result.dual.data(), lp_result.get_dual_solution().data(), result.dual.size(), stream); - raft::copy(result.reduced_costs.data(), - lp_result.get_reduced_cost().data(), - result.reduced_costs.size(), - stream); - problem->handle_ptr->sync_stream(); - result.objective = problem->get_solver_obj_from_user_obj(lp_result.get_objective_value()); - result.user_objective = lp_result.get_objective_value(); - result.iterations = lp_result.get_additional_termination_information().number_of_steps_taken; - return result; +template +cuopt::linear_programming::dual_simplex::crossover_status_t run_crush_crossover_and_maybe_win( + const cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t& result, + const cuopt::linear_programming::dual_simplex::user_problem_t& original_problem, + const cuopt::linear_programming::dual_simplex::lp_problem_t& original_lp, + const std::vector& new_slacks, + const cuopt::linear_programming::dual_simplex::simplex_solver_settings_t& + crossover_settings, + f_t start_time, + std::atomic* concurrent_halt, + std::function set_halter, + std::function&)> + on_first_lp_solution, + std::mutex* first_solver_mutex, + bool* first_solver_callback_done, + std::mutex* first_result_mutex, + std::atomic* winner, + int winner_id, + cuopt::linear_programming::dual_simplex::crossover_status_t* first_crossover_status_out, + cuopt::linear_programming::dual_simplex::lp_solution_t* winner_crossover_soln, + std::vector* winner_crossover_vstatus, + f_t* winner_root_objective, + const char* this_solver_name, + std::string* winner_solver_name_out) +{ + using namespace cuopt::linear_programming::dual_simplex; + if (on_first_lp_solution) { + std::lock_guard lock(*first_solver_mutex); + if (!*first_solver_callback_done) { + *first_solver_callback_done = true; + on_first_lp_solution(result); + } + } + lp_solution_t soln(original_lp.num_rows, original_lp.num_cols); + soln.x = result.primal; + soln.y = result.dual; + soln.z = result.reduced_costs; + soln.objective = result.objective; + soln.user_objective = result.user_objective; + soln.iterations = result.iterations; + std::vector crushed_x; + crush_primal_solution(original_problem, original_lp, soln.x, new_slacks, crushed_x); + std::vector crushed_y; + std::vector crushed_z; + (void)crush_dual_solution( + original_problem, original_lp, new_slacks, soln.y, soln.z, crushed_y, crushed_z); + soln.x = std::move(crushed_x); + soln.y = std::move(crushed_y); + soln.z = std::move(crushed_z); + lp_solution_t crossover_out(original_lp.num_rows, original_lp.num_cols); + std::vector vstatus_out(original_lp.num_cols); + auto root_crossover_settings = crossover_settings; + root_crossover_settings.inside_mip = + 1; // root LP crossover; dual_phase2 uses this to set concurrent_halt + root_crossover_settings.log.log = false; + root_crossover_settings.concurrent_halt = concurrent_halt; + crossover_status_t status = + crossover(original_lp, root_crossover_settings, soln, start_time, crossover_out, vstatus_out); + { + std::lock_guard lock(*first_result_mutex); + int expected = 0; + if (status == crossover_status_t::OPTIMAL && + winner->compare_exchange_strong(expected, winner_id, std::memory_order_acq_rel)) { + *first_crossover_status_out = status; + if (winner_solver_name_out) { *winner_solver_name_out = this_solver_name; } + winner_crossover_soln->x = std::move(crossover_out.x); + winner_crossover_soln->y = std::move(crossover_out.y); + winner_crossover_soln->z = std::move(crossover_out.z); + winner_crossover_soln->objective = result.objective; + winner_crossover_soln->user_objective = result.user_objective; + winner_crossover_soln->iterations = result.iterations; + *winner_root_objective = result.objective; + *winner_crossover_vstatus = std::move(vstatus_out); + set_halter(); + } else { + if (winner->load(std::memory_order_acquire) != 0) { status = *first_crossover_status_out; } + } + } + return status; } template cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t -run_pdlp_barrier_for_root_lp(problem_t*, double, std::atomic*, int); +run_solver_for_root_lp( + problem_t*, double, std::atomic*, int, method_t); +template cuopt::linear_programming::dual_simplex::crossover_status_t +run_crush_crossover_and_maybe_win( + const cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t&, + const cuopt::linear_programming::dual_simplex::user_problem_t&, + const cuopt::linear_programming::dual_simplex::lp_problem_t&, + const std::vector&, + const cuopt::linear_programming::dual_simplex::simplex_solver_settings_t&, + double, + std::atomic*, + std::function, + std::function&)>, + std::mutex*, + bool*, + std::mutex*, + std::atomic*, + int, + cuopt::linear_programming::dual_simplex::crossover_status_t*, + cuopt::linear_programming::dual_simplex::lp_solution_t*, + std::vector*, + double*, + const char*, + std::string*); #ifdef MIP_INSTANTIATION_FLOAT template cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t -run_pdlp_barrier_for_root_lp(problem_t*, float, std::atomic*, int); +run_solver_for_root_lp(problem_t*, float, std::atomic*, int, method_t); +template cuopt::linear_programming::dual_simplex::crossover_status_t +run_crush_crossover_and_maybe_win( + const cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t&, + const cuopt::linear_programming::dual_simplex::user_problem_t&, + const cuopt::linear_programming::dual_simplex::lp_problem_t&, + const std::vector&, + const cuopt::linear_programming::dual_simplex::simplex_solver_settings_t&, + float, + std::atomic*, + std::function, + std::function&)>, + std::mutex*, + bool*, + std::mutex*, + std::atomic*, + int, + cuopt::linear_programming::dual_simplex::crossover_status_t*, + cuopt::linear_programming::dual_simplex::lp_solution_t*, + std::vector*, + float*, + const char*, + std::string*); #endif } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/root_lp.cuh b/cpp/src/mip_heuristics/root_lp.cuh index 8683ebe820..2f87884fe9 100644 --- a/cpp/src/mip_heuristics/root_lp.cuh +++ b/cpp/src/mip_heuristics/root_lp.cuh @@ -7,25 +7,60 @@ #pragma once +#include +#include #include #include #include +#include +#include +#include namespace cuopt::linear_programming::detail { template class problem_t; +/** Run PDLP or Barrier for root LP. Uses concurrent_halt to stop; does not set it. Crossover done + * by caller. */ +template +cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t +run_solver_for_root_lp(problem_t* problem, + f_t time_limit, + std::atomic* concurrent_halt, + i_t num_gpus, + method_t method); + /** - * Run PDLP/Barrier for root LP (used by branch-and-bound when concurrent root solve is enabled). - * Implemented in root_lp.cu so GPU code (convert_greater_to_less, solve_lp_with_method) can run. + * Run crush + crossover on a root LP solution and optionally store as winner (first to finish). + * Used by B&B when running PDLP and Barrier concurrently; both paths call this after their solver + * returns. */ template -cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t -run_pdlp_barrier_for_root_lp(problem_t* problem, - f_t time_limit, - std::atomic* concurrent_halt, - i_t num_gpus); +cuopt::linear_programming::dual_simplex::crossover_status_t run_crush_crossover_and_maybe_win( + const cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t& result, + const cuopt::linear_programming::dual_simplex::user_problem_t& original_problem, + const cuopt::linear_programming::dual_simplex::lp_problem_t& original_lp, + const std::vector& new_slacks, + const cuopt::linear_programming::dual_simplex::simplex_solver_settings_t& + crossover_settings, + f_t start_time, + std::atomic* concurrent_halt, + std::function set_halter, + std::function&)> + on_first_lp_solution, + std::mutex* first_solver_mutex, + bool* first_solver_callback_done, + std::mutex* first_result_mutex, + std::atomic* winner, + int winner_id, + cuopt::linear_programming::dual_simplex::crossover_status_t* first_crossover_status_out, + cuopt::linear_programming::dual_simplex::lp_solution_t* winner_crossover_soln, + std::vector* winner_crossover_vstatus, + f_t* winner_root_objective, + const char* this_solver_name, + std::string* winner_solver_name_out); } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/pdlp/pdlp.cu b/cpp/src/pdlp/pdlp.cu index cda60cf5ff..e36ab732a7 100644 --- a/cpp/src/pdlp/pdlp.cu +++ b/cpp/src/pdlp/pdlp.cu @@ -445,9 +445,9 @@ std::optional> pdlp_solver_t pdlp_termination_status_t::IterationLimit)); } - // Check for concurrent limit - if (settings_.method == method_t::Concurrent && settings_.concurrent_halt != nullptr && - *settings_.concurrent_halt == 1) { + // Check for concurrent limit (whenever caller provides a halt flag, e.g. B&B racing PDLP vs + // Barrier) + if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { #ifdef PDLP_VERBOSE_MODE RAFT_CUDA_TRY(cudaDeviceSynchronize()); std::cout << "Concurrent Limit reached, returning current solution" << std::endl; diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu index 5e1e25bbee..39bbda6bd3 100644 --- a/cpp/src/pdlp/solve.cu +++ b/cpp/src/pdlp/solve.cu @@ -459,9 +459,10 @@ run_barrier(dual_simplex::user_problem_t& user_problem, CUOPT_LOG_CONDITIONAL_INFO( !settings.inside_mip, "Barrier finished in %.2f seconds", timer.elapsed_time()); - if (settings.concurrent_halt != nullptr && (status == dual_simplex::lp_status_t::OPTIMAL || - status == dual_simplex::lp_status_t::UNBOUNDED || - status == dual_simplex::lp_status_t::INFEASIBLE)) { + if (!settings.halt_set_by_caller && settings.concurrent_halt != nullptr && + (status == dual_simplex::lp_status_t::OPTIMAL || + status == dual_simplex::lp_status_t::UNBOUNDED || + status == dual_simplex::lp_status_t::INFEASIBLE)) { // We finished. Tell PDLP to stop if it is still running. *settings.concurrent_halt = 1; } @@ -531,9 +532,10 @@ run_dual_simplex(dual_simplex::user_problem_t& user_problem, CUOPT_LOG_CONDITIONAL_INFO( !settings.inside_mip, "Dual simplex finished in %.2f seconds", timer.elapsed_time()); - if (settings.concurrent_halt != nullptr && (status == dual_simplex::lp_status_t::OPTIMAL || - status == dual_simplex::lp_status_t::UNBOUNDED || - status == dual_simplex::lp_status_t::INFEASIBLE)) { + if (!settings.halt_set_by_caller && settings.concurrent_halt != nullptr && + (status == dual_simplex::lp_status_t::OPTIMAL || + status == dual_simplex::lp_status_t::UNBOUNDED || + status == dual_simplex::lp_status_t::INFEASIBLE)) { // We finished. Tell PDLP to stop if it is still running. *settings.concurrent_halt = 1; } @@ -677,8 +679,9 @@ optimization_problem_solution_t run_pdlp(detail::problem_t& CUOPT_LOG_CONDITIONAL_INFO( !settings.inside_mip, "Crossover status %s", sol.get_termination_status_string().c_str()); } - if (settings.method == method_t::Concurrent && settings.concurrent_halt != nullptr && - crossover_info == 0 && sol.get_termination_status() == pdlp_termination_status_t::Optimal) { + if (!settings.halt_set_by_caller && settings.method == method_t::Concurrent && + settings.concurrent_halt != nullptr && crossover_info == 0 && + sol.get_termination_status() == pdlp_termination_status_t::Optimal) { // We finished. Tell dual simplex to stop if it is still running. CUOPT_LOG_CONDITIONAL_INFO(!settings.inside_mip, "PDLP finished. Telling others to stop"); *settings.concurrent_halt = 1; diff --git a/cpp/src/pdlp/solver_solution.cu b/cpp/src/pdlp/solver_solution.cu index a8001b91c1..29179a8c21 100644 --- a/cpp/src/pdlp/solver_solution.cu +++ b/cpp/src/pdlp/solver_solution.cu @@ -372,6 +372,12 @@ rmm::device_uvector& optimization_problem_solution_t::get_reduced return reduced_cost_; } +template +const rmm::device_uvector& optimization_problem_solution_t::get_reduced_cost() const +{ + return reduced_cost_; +} + template pdlp_termination_status_t optimization_problem_solution_t::get_termination_status( i_t id) const From 0f1a6f9f4be9eb23e0c63b387e45dbd927f343a6 Mon Sep 17 00:00:00 2001 From: Jake Awe Date: Thu, 19 Mar 2026 11:02:52 -0500 Subject: [PATCH 09/30] Revert "Prepare release/26.04" This reverts commit 4d5f5e530f0e506314562a5ba80530cbb90f056e. --- .github/workflows/build.yaml | 28 ++++++++--------- .github/workflows/pr.yaml | 30 +++++++++---------- .github/workflows/test.yaml | 10 +++---- .../trigger-breaking-change-alert.yaml | 2 +- RAPIDS_BRANCH | 2 +- .../routing/routing-example.ipynb | 2 +- docs/cuopt/source/faq.rst | 2 +- 7 files changed, 38 insertions(+), 38 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 3eb1f1f066..593d48bd74 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -45,7 +45,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -55,7 +55,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -65,7 +65,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -73,7 +73,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-cuopt-mps-parser: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -88,7 +88,7 @@ jobs: wheel-publish-cuopt-mps-parser: needs: wheel-build-cuopt-mps-parser secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -99,7 +99,7 @@ jobs: wheel-build-libcuopt: needs: wheel-build-cuopt-mps-parser secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -112,7 +112,7 @@ jobs: wheel-publish-libcuopt: needs: wheel-build-libcuopt secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -123,7 +123,7 @@ jobs: wheel-build-cuopt: needs: [wheel-build-cuopt-mps-parser, wheel-build-libcuopt] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -135,7 +135,7 @@ jobs: wheel-publish-cuopt: needs: wheel-build-cuopt secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -145,7 +145,7 @@ jobs: package-type: python wheel-build-cuopt-server: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -160,7 +160,7 @@ jobs: wheel-publish-cuopt-server: needs: wheel-build-cuopt-server secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -171,7 +171,7 @@ jobs: docs-build: needs: [python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} node_type: "gpu-l4-latest-1" @@ -185,7 +185,7 @@ jobs: script: "ci/build_docs.sh" wheel-build-cuopt-sh-client: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -201,7 +201,7 @@ jobs: wheel-publish-cuopt-sh-client: needs: wheel-build-cuopt-sh-client secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 47a3bd9fca..95741c1fb5 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -34,7 +34,7 @@ jobs: - wheel-build-cuopt-sh-client - test-self-hosted-server secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@python-3.14 if: always() with: needs: ${{ toJSON(needs) }} @@ -111,7 +111,7 @@ jobs: changed-files: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@python-3.14 with: files_yaml: | build_docs: @@ -279,20 +279,20 @@ jobs: - '!gemini-extension.json' checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@python-3.14 with: enable_check_generated_files: false conda-cpp-build: needs: [checks, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@python-3.14 with: build_type: pull-request script: ci/build_cpp.sh matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_lean_filter }} conda-cpp-tests: needs: [conda-cpp-build, changed-files, compute-matrix-filters] - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@python-3.14 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp with: build_type: pull-request @@ -308,14 +308,14 @@ jobs: conda-python-build: needs: [conda-cpp-build, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@python-3.14 with: build_type: pull-request script: ci/build_python.sh matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_test_filter }} conda-python-tests: needs: [conda-python-build, changed-files, compute-matrix-filters] - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@python-3.14 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_conda with: run_codecov: false @@ -332,7 +332,7 @@ jobs: docs-build: needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@python-3.14 if: fromJSON(needs.changed-files.outputs.changed_file_groups).build_docs with: build_type: pull-request @@ -345,7 +345,7 @@ jobs: wheel-build-cuopt-mps-parser: needs: compute-matrix-filters secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: pull-request script: ci/build_wheel_cuopt_mps_parser.sh @@ -357,7 +357,7 @@ jobs: wheel-build-libcuopt: needs: [wheel-build-cuopt-mps-parser, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: # build for every combination of arch and CUDA version, but only for the latest Python matrix_filter: ${{ needs.compute-matrix-filters.outputs.libcuopt_filter }} @@ -368,7 +368,7 @@ jobs: wheel-build-cuopt: needs: [wheel-build-cuopt-mps-parser, wheel-build-libcuopt, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: pull-request script: ci/build_wheel_cuopt.sh @@ -377,7 +377,7 @@ jobs: matrix_filter: ${{ needs.compute-matrix-filters.outputs.wheel_lean_filter }} wheel-tests-cuopt: needs: [wheel-build-cuopt, wheel-build-cuopt-mps-parser, wheel-build-cuopt-sh-client, changed-files, compute-matrix-filters] - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@python-3.14 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_wheels with: build_type: pull-request @@ -393,7 +393,7 @@ jobs: wheel-build-cuopt-server: needs: [checks, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: pull-request script: ci/build_wheel_cuopt_server.sh @@ -405,7 +405,7 @@ jobs: wheel-build-cuopt-sh-client: needs: compute-matrix-filters secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: pull-request script: ci/build_wheel_cuopt_sh_client.sh @@ -417,7 +417,7 @@ jobs: matrix_filter: ${{ needs.compute-matrix-filters.outputs.cuopt_sh_client_filter }} wheel-tests-cuopt-server: needs: [wheel-build-cuopt, wheel-build-cuopt-server, changed-files, compute-matrix-filters] - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@python-3.14 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_wheels with: build_type: pull-request diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 9ad7609e8a..e88b7829f5 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -27,7 +27,7 @@ on: jobs: conda-cpp-tests: - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@main with: build_type: ${{ inputs.build_type }} branch: ${{ inputs.branch }} @@ -42,7 +42,7 @@ jobs: script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }} conda-python-tests: - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@main with: run_codecov: false build_type: ${{ inputs.build_type }} @@ -58,7 +58,7 @@ jobs: script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }} wheel-tests-cuopt: - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main with: build_type: ${{ inputs.build_type }} branch: ${{ inputs.branch }} @@ -73,7 +73,7 @@ jobs: script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }} wheel-tests-cuopt-server: - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main with: build_type: ${{ inputs.build_type }} branch: ${{ inputs.branch }} @@ -89,7 +89,7 @@ jobs: script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }} conda-notebook-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main with: build_type: ${{ inputs.build_type }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml index d394b97db4..57b178740c 100644 --- a/.github/workflows/trigger-breaking-change-alert.yaml +++ b/.github/workflows/trigger-breaking-change-alert.yaml @@ -15,7 +15,7 @@ jobs: trigger-notifier: if: contains(github.event.pull_request.labels.*.name, 'breaking') secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@release/26.04 + uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@main with: sender_login: ${{ github.event.sender.login }} sender_avatar: ${{ github.event.sender.avatar_url }} diff --git a/RAPIDS_BRANCH b/RAPIDS_BRANCH index d5ea6ced53..ba2906d066 100644 --- a/RAPIDS_BRANCH +++ b/RAPIDS_BRANCH @@ -1 +1 @@ -release/26.04 +main diff --git a/docs/cuopt/source/cuopt-python/routing/routing-example.ipynb b/docs/cuopt/source/cuopt-python/routing/routing-example.ipynb index 9df5e2c0c7..9cfc05f9bb 100644 --- a/docs/cuopt/source/cuopt-python/routing/routing-example.ipynb +++ b/docs/cuopt/source/cuopt-python/routing/routing-example.ipynb @@ -147,7 +147,7 @@ "metadata": {}, "source": [ "#### Compressed Sparse Row (CSR) representation of above weighted waypoint graph.\n", - "For details on the CSR encoding of the above graph see the [cost_matrix_and_waypoint_graph_creation.ipynb](https://github.com/NVIDIA/cuopt-examples/blob/release/26.04/intra-factory_transport/cost_matrix_and_waypoint_graph_creation.ipynb) notebook." + "For details on the CSR encoding of the above graph see the [cost_matrix_and_waypoint_graph_creation.ipynb](https://github.com/NVIDIA/cuopt-examples/blob/main/intra-factory_transport/cost_matrix_and_waypoint_graph_creation.ipynb) notebook." ] }, { diff --git a/docs/cuopt/source/faq.rst b/docs/cuopt/source/faq.rst index 1985052531..0c3a0e219f 100644 --- a/docs/cuopt/source/faq.rst +++ b/docs/cuopt/source/faq.rst @@ -283,7 +283,7 @@ Routing FAQ So in either case, task locations are actually integer indices into another structure. - If you have (lat, long) values, then you can generate a cost matrix using a map API. cuOpt does not directly connect to a third-party map engine, but that can be done outside of cuOpt as shown `here `__. + If you have (lat, long) values, then you can generate a cost matrix using a map API. cuOpt does not directly connect to a third-party map engine, but that can be done outside of cuOpt as shown `here `__. .. dropdown:: Is it possible to define constraints such as refrigerated vehicles required for certain orders? From e2ea6872842475d01f95e3ed8f8abaee0534fdc9 Mon Sep 17 00:00:00 2001 From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com> Date: Thu, 19 Mar 2026 11:15:30 -0500 Subject: [PATCH 10/30] Update to 26.06 (#975) This PR updates the repository to version 26.06. This is part of the 26.04 release burndown process. --- .claude-plugin/marketplace.json | 2 +- .cursor-plugin/plugin.json | 2 +- .github/workflows/build.yaml | 30 +++++------ .../workflows/build_test_publish_images.yaml | 2 +- .github/workflows/pr.yaml | 32 ++++++------ .github/workflows/test.yaml | 2 +- README.md | 12 ++--- VERSION | 2 +- .../all_cuda-129_arch-aarch64.yaml | 10 ++-- .../all_cuda-129_arch-x86_64.yaml | 10 ++-- .../all_cuda-131_arch-aarch64.yaml | 10 ++-- .../all_cuda-131_arch-x86_64.yaml | 10 ++-- dependencies.yaml | 50 +++++++++---------- gemini-extension.json | 2 +- helmchart/cuopt-server/Chart.yaml | 4 +- helmchart/cuopt-server/values.yaml | 2 +- python/cuopt/pyproject.toml | 18 +++---- python/cuopt_self_hosted/pyproject.toml | 2 +- python/cuopt_server/pyproject.toml | 2 +- python/libcuopt/pyproject.toml | 8 +-- skills/cuopt-developer/SKILL.md | 2 +- skills/cuopt-installation-api-c/SKILL.md | 2 +- skills/cuopt-installation-api-python/SKILL.md | 2 +- skills/cuopt-installation-common/SKILL.md | 2 +- skills/cuopt-installation-developer/SKILL.md | 2 +- skills/cuopt-lp-milp-api-c/SKILL.md | 2 +- skills/cuopt-lp-milp-api-cli/SKILL.md | 2 +- skills/cuopt-lp-milp-api-python/SKILL.md | 2 +- skills/cuopt-qp-api-c/SKILL.md | 2 +- skills/cuopt-qp-api-cli/SKILL.md | 2 +- skills/cuopt-qp-api-python/SKILL.md | 2 +- skills/cuopt-routing-api-python/SKILL.md | 2 +- skills/cuopt-server-api-python/SKILL.md | 2 +- skills/cuopt-server-common/SKILL.md | 2 +- skills/cuopt-user-rules/SKILL.md | 2 +- skills/lp-milp-formulation/SKILL.md | 2 +- skills/qp-formulation/SKILL.md | 2 +- skills/routing-formulation/SKILL.md | 2 +- skills/skill-evolution/SKILL.md | 4 +- 39 files changed, 126 insertions(+), 126 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 4c5df380f6..6ddf2583c4 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -5,7 +5,7 @@ }, "metadata": { "description": "Agent skills for NVIDIA cuOpt: routing (VRP, TSP, PDP), LP/MILP/QP, installation (Python/C/developer), and REST server.", - "version": "26.04.00" + "version": "26.06.00" }, "plugins": [ { diff --git a/.cursor-plugin/plugin.json b/.cursor-plugin/plugin.json index 5f34873671..e740506140 100644 --- a/.cursor-plugin/plugin.json +++ b/.cursor-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "nvidia-cuopt-skills", "description": "Agent skills for NVIDIA cuOpt: routing (VRP, TSP, PDP), LP/MILP/QP, installation (Python/C/developer), and REST server. Use when building or solving optimization with cuOpt.", - "version": "26.04.00", + "version": "26.06.00", "author": { "name": "NVIDIA" }, diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 593d48bd74..a945cde8ec 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -45,7 +45,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@main with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -55,7 +55,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@main with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -65,7 +65,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@main with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -73,7 +73,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-cuopt-mps-parser: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -88,7 +88,7 @@ jobs: wheel-publish-cuopt-mps-parser: needs: wheel-build-cuopt-mps-parser secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -99,7 +99,7 @@ jobs: wheel-build-libcuopt: needs: wheel-build-cuopt-mps-parser secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -112,7 +112,7 @@ jobs: wheel-publish-libcuopt: needs: wheel-build-libcuopt secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -123,7 +123,7 @@ jobs: wheel-build-cuopt: needs: [wheel-build-cuopt-mps-parser, wheel-build-libcuopt] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -135,7 +135,7 @@ jobs: wheel-publish-cuopt: needs: wheel-build-cuopt secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -145,7 +145,7 @@ jobs: package-type: python wheel-build-cuopt-server: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -160,7 +160,7 @@ jobs: wheel-publish-cuopt-server: needs: wheel-build-cuopt-server secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -171,7 +171,7 @@ jobs: docs-build: needs: [python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main with: build_type: ${{ inputs.build_type || 'branch' }} node_type: "gpu-l4-latest-1" @@ -181,11 +181,11 @@ jobs: arch: "amd64" file_to_upload: "docs/cuopt/build/html/" artifact-name: "cuopt_docs" - container_image: "rapidsai/ci-conda:26.04-latest" + container_image: "rapidsai/ci-conda:26.06-latest" script: "ci/build_docs.sh" wheel-build-cuopt-sh-client: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -201,7 +201,7 @@ jobs: wheel-publish-cuopt-sh-client: needs: wheel-build-cuopt-sh-client secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/build_test_publish_images.yaml b/.github/workflows/build_test_publish_images.yaml index f8f7366e13..17d4e9ab57 100644 --- a/.github/workflows/build_test_publish_images.yaml +++ b/.github/workflows/build_test_publish_images.yaml @@ -55,7 +55,7 @@ jobs: compute-matrix: runs-on: ubuntu-latest container: - image: rapidsai/ci-conda:26.04-latest + image: rapidsai/ci-conda:26.06-latest outputs: MATRIX: ${{ steps.compute-matrix.outputs.MATRIX }} CUOPT_VER: ${{ steps.compute-cuopt-ver.outputs.CUOPT_VER }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 95741c1fb5..a652c23b9a 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -34,7 +34,7 @@ jobs: - wheel-build-cuopt-sh-client - test-self-hosted-server secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@main if: always() with: needs: ${{ toJSON(needs) }} @@ -111,7 +111,7 @@ jobs: changed-files: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@main with: files_yaml: | build_docs: @@ -279,20 +279,20 @@ jobs: - '!gemini-extension.json' checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@main with: enable_check_generated_files: false conda-cpp-build: needs: [checks, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@main with: build_type: pull-request script: ci/build_cpp.sh matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_lean_filter }} conda-cpp-tests: needs: [conda-cpp-build, changed-files, compute-matrix-filters] - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@main if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp with: build_type: pull-request @@ -308,14 +308,14 @@ jobs: conda-python-build: needs: [conda-cpp-build, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@main with: build_type: pull-request script: ci/build_python.sh matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_test_filter }} conda-python-tests: needs: [conda-python-build, changed-files, compute-matrix-filters] - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@main if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_conda with: run_codecov: false @@ -332,7 +332,7 @@ jobs: docs-build: needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main if: fromJSON(needs.changed-files.outputs.changed_file_groups).build_docs with: build_type: pull-request @@ -340,12 +340,12 @@ jobs: arch: "amd64" file_to_upload: "docs/cuopt/build/html/" artifact-name: "cuopt_docs" - container_image: "rapidsai/ci-conda:26.04-latest" + container_image: "rapidsai/ci-conda:26.06-latest" script: "ci/build_docs.sh" wheel-build-cuopt-mps-parser: needs: compute-matrix-filters secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main with: build_type: pull-request script: ci/build_wheel_cuopt_mps_parser.sh @@ -357,7 +357,7 @@ jobs: wheel-build-libcuopt: needs: [wheel-build-cuopt-mps-parser, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main with: # build for every combination of arch and CUDA version, but only for the latest Python matrix_filter: ${{ needs.compute-matrix-filters.outputs.libcuopt_filter }} @@ -368,7 +368,7 @@ jobs: wheel-build-cuopt: needs: [wheel-build-cuopt-mps-parser, wheel-build-libcuopt, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main with: build_type: pull-request script: ci/build_wheel_cuopt.sh @@ -377,7 +377,7 @@ jobs: matrix_filter: ${{ needs.compute-matrix-filters.outputs.wheel_lean_filter }} wheel-tests-cuopt: needs: [wheel-build-cuopt, wheel-build-cuopt-mps-parser, wheel-build-cuopt-sh-client, changed-files, compute-matrix-filters] - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_wheels with: build_type: pull-request @@ -393,7 +393,7 @@ jobs: wheel-build-cuopt-server: needs: [checks, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main with: build_type: pull-request script: ci/build_wheel_cuopt_server.sh @@ -405,7 +405,7 @@ jobs: wheel-build-cuopt-sh-client: needs: compute-matrix-filters secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main with: build_type: pull-request script: ci/build_wheel_cuopt_sh_client.sh @@ -417,7 +417,7 @@ jobs: matrix_filter: ${{ needs.compute-matrix-filters.outputs.cuopt_sh_client_filter }} wheel-tests-cuopt-server: needs: [wheel-build-cuopt, wheel-build-cuopt-server, changed-files, compute-matrix-filters] - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_wheels with: build_type: pull-request diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index e88b7829f5..a8cc5f2943 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -97,5 +97,5 @@ jobs: sha: ${{ inputs.sha }} node_type: "gpu-l4-latest-1" arch: "amd64" - container_image: "rapidsai/ci-conda:26.04-latest" + container_image: "rapidsai/ci-conda:26.06-latest" script: ci/test_notebooks.sh diff --git a/README.md b/README.md index 379a48c350..95c8598d77 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ # cuOpt - GPU-accelerated Optimization [![Build Status](https://github.com/NVIDIA/cuopt/actions/workflows/build.yaml/badge.svg)](https://github.com/NVIDIA/cuopt/actions/workflows/build.yaml) -[![Version](https://img.shields.io/badge/version-26.04.00-blue)](https://github.com/NVIDIA/cuopt/releases) +[![Version](https://img.shields.io/badge/version-26.06.00-blue)](https://github.com/NVIDIA/cuopt/releases) [![Documentation](https://img.shields.io/badge/docs-latest-brightgreen)](https://docs.nvidia.com/cuopt/user-guide/latest/introduction.html) [![Docker Hub](https://img.shields.io/badge/docker-nvidia%2Fcuopt-blue?logo=docker)](https://hub.docker.com/r/nvidia/cuopt) [![Examples](https://img.shields.io/badge/examples-cuopt--examples-orange)](https://github.com/NVIDIA/cuopt-examples) @@ -83,7 +83,7 @@ For CUDA 12.x: pip install \ --extra-index-url=https://pypi.nvidia.com \ nvidia-cuda-runtime-cu12==12.9.* \ - cuopt-server-cu12==26.04.* cuopt-sh-client==26.04.* + cuopt-server-cu12==26.06.* cuopt-sh-client==26.06.* ``` Development wheels are available as nightlies, please update `--extra-index-url` to `https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/` to install latest nightly packages. @@ -91,7 +91,7 @@ Development wheels are available as nightlies, please update `--extra-index-url` pip install --pre \ --extra-index-url=https://pypi.nvidia.com \ --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/ \ - cuopt-server-cu12==26.04.* cuopt-sh-client==26.04.* + cuopt-server-cu12==26.06.* cuopt-sh-client==26.06.* ``` For CUDA 13.x: @@ -99,7 +99,7 @@ For CUDA 13.x: ```bash pip install \ --extra-index-url=https://pypi.nvidia.com \ - cuopt-server-cu13==26.04.* cuopt-sh-client==26.04.* + cuopt-server-cu13==26.06.* cuopt-sh-client==26.06.* ``` Development wheels are available as nightlies, please update `--extra-index-url` to `https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/` to install latest nightly packages. @@ -107,7 +107,7 @@ Development wheels are available as nightlies, please update `--extra-index-url` pip install --pre \ --extra-index-url=https://pypi.nvidia.com \ --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/ \ - cuopt-server-cu13==26.04.* cuopt-sh-client==26.04.* + cuopt-server-cu13==26.06.* cuopt-sh-client==26.06.* ``` @@ -118,7 +118,7 @@ cuOpt can be installed with conda (via [miniforge](https://github.com/conda-forg All other dependencies are installed automatically when `cuopt-server` and `cuopt-sh-client` are installed. ```bash -conda install -c rapidsai -c conda-forge -c nvidia cuopt-server=26.04.* cuopt-sh-client=26.04.* +conda install -c rapidsai -c conda-forge -c nvidia cuopt-server=26.06.* cuopt-sh-client=26.06.* ``` We also provide [nightly conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD diff --git a/VERSION b/VERSION index 0bd0e8a95b..cdb610a24d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -26.04.00 +26.06.00 diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index ecef112dd5..104e7e70d1 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -18,7 +18,7 @@ dependencies: - cuda-python>=12.9.2,<13.0 - cuda-sanitizer-api - cuda-version=12.9 -- cudf==26.4.*,>=0.0.0a0 +- cudf==26.6.*,>=0.0.0a0 - cupy>=13.6.0 - cxx-compiler - cython>=3.0.3 @@ -32,8 +32,8 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libraft-headers==26.4.*,>=0.0.0a0 -- librmm==26.4.*,>=0.0.0a0 +- libraft-headers==26.6.*,>=0.0.0a0 +- librmm==26.6.*,>=0.0.0a0 - make - msgpack-numpy==0.4.8 - msgpack-python==1.1.2 @@ -50,7 +50,7 @@ dependencies: - pip - pre-commit - psutil>=6.0.0 -- pylibraft==26.4.*,>=0.0.0a0 +- pylibraft==26.6.*,>=0.0.0a0 - pyrsistent - pytest-cov - pytest<9.0 @@ -59,7 +59,7 @@ dependencies: - rapids-build-backend>=0.4.0,<0.5.0 - rapids-logger==0.2.*,>=0.0.0a0 - requests -- rmm==26.4.*,>=0.0.0a0 +- rmm==26.6.*,>=0.0.0a0 - scikit-build-core>=0.11.0 - scipy>=1.14.1 - sphinx diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index 35c825280c..06aa6121f9 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -18,7 +18,7 @@ dependencies: - cuda-python>=12.9.2,<13.0 - cuda-sanitizer-api - cuda-version=12.9 -- cudf==26.4.*,>=0.0.0a0 +- cudf==26.6.*,>=0.0.0a0 - cupy>=13.6.0 - cxx-compiler - cython>=3.0.3 @@ -32,8 +32,8 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libraft-headers==26.4.*,>=0.0.0a0 -- librmm==26.4.*,>=0.0.0a0 +- libraft-headers==26.6.*,>=0.0.0a0 +- librmm==26.6.*,>=0.0.0a0 - make - msgpack-numpy==0.4.8 - msgpack-python==1.1.2 @@ -50,7 +50,7 @@ dependencies: - pip - pre-commit - psutil>=6.0.0 -- pylibraft==26.4.*,>=0.0.0a0 +- pylibraft==26.6.*,>=0.0.0a0 - pyrsistent - pytest-cov - pytest<9.0 @@ -59,7 +59,7 @@ dependencies: - rapids-build-backend>=0.4.0,<0.5.0 - rapids-logger==0.2.*,>=0.0.0a0 - requests -- rmm==26.4.*,>=0.0.0a0 +- rmm==26.6.*,>=0.0.0a0 - scikit-build-core>=0.11.0 - scipy>=1.14.1 - sphinx diff --git a/conda/environments/all_cuda-131_arch-aarch64.yaml b/conda/environments/all_cuda-131_arch-aarch64.yaml index 2b717d4e98..a68ebf1285 100644 --- a/conda/environments/all_cuda-131_arch-aarch64.yaml +++ b/conda/environments/all_cuda-131_arch-aarch64.yaml @@ -18,7 +18,7 @@ dependencies: - cuda-python>=13.0.1,<14.0 - cuda-sanitizer-api - cuda-version=13.1 -- cudf==26.4.*,>=0.0.0a0 +- cudf==26.6.*,>=0.0.0a0 - cupy>=13.6.0 - cxx-compiler - cython>=3.0.3 @@ -32,8 +32,8 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libraft-headers==26.4.*,>=0.0.0a0 -- librmm==26.4.*,>=0.0.0a0 +- libraft-headers==26.6.*,>=0.0.0a0 +- librmm==26.6.*,>=0.0.0a0 - make - msgpack-numpy==0.4.8 - msgpack-python==1.1.2 @@ -50,7 +50,7 @@ dependencies: - pip - pre-commit - psutil>=6.0.0 -- pylibraft==26.4.*,>=0.0.0a0 +- pylibraft==26.6.*,>=0.0.0a0 - pyrsistent - pytest-cov - pytest<9.0 @@ -59,7 +59,7 @@ dependencies: - rapids-build-backend>=0.4.0,<0.5.0 - rapids-logger==0.2.*,>=0.0.0a0 - requests -- rmm==26.4.*,>=0.0.0a0 +- rmm==26.6.*,>=0.0.0a0 - scikit-build-core>=0.11.0 - scipy>=1.14.1 - sphinx diff --git a/conda/environments/all_cuda-131_arch-x86_64.yaml b/conda/environments/all_cuda-131_arch-x86_64.yaml index f605a83f3b..043d55e148 100644 --- a/conda/environments/all_cuda-131_arch-x86_64.yaml +++ b/conda/environments/all_cuda-131_arch-x86_64.yaml @@ -18,7 +18,7 @@ dependencies: - cuda-python>=13.0.1,<14.0 - cuda-sanitizer-api - cuda-version=13.1 -- cudf==26.4.*,>=0.0.0a0 +- cudf==26.6.*,>=0.0.0a0 - cupy>=13.6.0 - cxx-compiler - cython>=3.0.3 @@ -32,8 +32,8 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libraft-headers==26.4.*,>=0.0.0a0 -- librmm==26.4.*,>=0.0.0a0 +- libraft-headers==26.6.*,>=0.0.0a0 +- librmm==26.6.*,>=0.0.0a0 - make - msgpack-numpy==0.4.8 - msgpack-python==1.1.2 @@ -50,7 +50,7 @@ dependencies: - pip - pre-commit - psutil>=6.0.0 -- pylibraft==26.4.*,>=0.0.0a0 +- pylibraft==26.6.*,>=0.0.0a0 - pyrsistent - pytest-cov - pytest<9.0 @@ -59,7 +59,7 @@ dependencies: - rapids-build-backend>=0.4.0,<0.5.0 - rapids-logger==0.2.*,>=0.0.0a0 - requests -- rmm==26.4.*,>=0.0.0a0 +- rmm==26.6.*,>=0.0.0a0 - scikit-build-core>=0.11.0 - scipy>=1.14.1 - sphinx diff --git a/dependencies.yaml b/dependencies.yaml index 014889c7d5..db60f63569 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -311,7 +311,7 @@ dependencies: common: - output_types: [conda] packages: - - libcuopt-tests==26.4.*,>=0.0.0a0 + - libcuopt-tests==26.6.*,>=0.0.0a0 build_wheels: common: - output_types: [requirements, pyproject] @@ -413,7 +413,7 @@ dependencies: common: - output_types: conda packages: - - &libcuopt_unsuffixed libcuopt==26.4.*,>=0.0.0a0 + - &libcuopt_unsuffixed libcuopt==26.6.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -426,18 +426,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - libcuopt-cu12==26.4.*,>=0.0.0a0 + - libcuopt-cu12==26.6.*,>=0.0.0a0 - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - libcuopt-cu13==26.4.*,>=0.0.0a0 + - libcuopt-cu13==26.6.*,>=0.0.0a0 - {matrix: null, packages: [*libcuopt_unsuffixed]} depends_on_cuopt: common: - output_types: conda packages: - - &cuopt_unsuffixed cuopt==26.4.*,>=0.0.0a0 + - &cuopt_unsuffixed cuopt==26.6.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -450,18 +450,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - cuopt-cu12==26.4.*,>=0.0.0a0 + - cuopt-cu12==26.6.*,>=0.0.0a0 - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - cuopt-cu13==26.4.*,>=0.0.0a0 + - cuopt-cu13==26.6.*,>=0.0.0a0 - {matrix: null, packages: [*cuopt_unsuffixed]} depends_on_cuopt_server: common: - output_types: conda packages: - - &cuopt_server_unsuffixed cuopt-server==26.4.*,>=0.0.0a0 + - &cuopt_server_unsuffixed cuopt-server==26.6.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -474,18 +474,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - cuopt-server-cu12==26.4.*,>=0.0.0a0 + - cuopt-server-cu12==26.6.*,>=0.0.0a0 - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - cuopt-server-cu13==26.4.*,>=0.0.0a0 + - cuopt-server-cu13==26.6.*,>=0.0.0a0 - {matrix: null, packages: [*cuopt_server_unsuffixed]} depends_on_cuopt_sh_client: common: - output_types: [conda, requirements, pyproject] packages: - - &cuopt_sh_client_unsuffixed cuopt-sh-client==26.4.*,>=0.0.0a0 + - &cuopt_sh_client_unsuffixed cuopt-sh-client==26.6.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -495,7 +495,7 @@ dependencies: common: - output_types: [requirements, pyproject, conda] packages: - - cuopt-mps-parser==26.4.*,>=0.0.0a0 + - cuopt-mps-parser==26.6.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -505,12 +505,12 @@ dependencies: common: - output_types: conda packages: - - libraft-headers==26.4.*,>=0.0.0a0 + - libraft-headers==26.6.*,>=0.0.0a0 depends_on_librmm: common: - output_types: conda packages: - - &librmm_unsuffixed librmm==26.4.*,>=0.0.0a0 + - &librmm_unsuffixed librmm==26.6.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -522,12 +522,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - librmm-cu12==26.4.*,>=0.0.0a0 + - librmm-cu12==26.6.*,>=0.0.0a0 - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - librmm-cu13==26.4.*,>=0.0.0a0 + - librmm-cu13==26.6.*,>=0.0.0a0 - {matrix: null, packages: [*librmm_unsuffixed]} depends_on_cupy: common: @@ -562,7 +562,7 @@ dependencies: common: - output_types: conda packages: - - &rmm_unsuffixed rmm==26.4.*,>=0.0.0a0 + - &rmm_unsuffixed rmm==26.6.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -574,12 +574,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - rmm-cu12==26.4.*,>=0.0.0a0 + - rmm-cu12==26.6.*,>=0.0.0a0 - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - rmm-cu13==26.4.*,>=0.0.0a0 + - rmm-cu13==26.6.*,>=0.0.0a0 - matrix: packages: - *rmm_unsuffixed @@ -588,7 +588,7 @@ dependencies: common: - output_types: conda packages: - - &cudf_unsuffixed cudf==26.4.*,>=0.0.0a0 + - &cudf_unsuffixed cudf==26.6.*,>=0.0.0a0 - output_types: requirements packages: - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple @@ -599,12 +599,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - cudf-cu12==26.4.*,>=0.0.0a0 + - cudf-cu12==26.6.*,>=0.0.0a0 - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - cudf-cu13==26.4.*,>=0.0.0a0 + - cudf-cu13==26.6.*,>=0.0.0a0 - matrix: packages: - *cudf_unsuffixed @@ -613,7 +613,7 @@ dependencies: common: - output_types: conda packages: - - &pylibraft_unsuffixed pylibraft==26.4.*,>=0.0.0a0 + - &pylibraft_unsuffixed pylibraft==26.6.*,>=0.0.0a0 - output_types: requirements packages: - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple @@ -624,12 +624,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - pylibraft-cu12==26.4.*,>=0.0.0a0 + - pylibraft-cu12==26.6.*,>=0.0.0a0 - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - pylibraft-cu13==26.4.*,>=0.0.0a0 + - pylibraft-cu13==26.6.*,>=0.0.0a0 - matrix: packages: - *pylibraft_unsuffixed diff --git a/gemini-extension.json b/gemini-extension.json index b4c6b764a4..c5ef9883f8 100644 --- a/gemini-extension.json +++ b/gemini-extension.json @@ -1,6 +1,6 @@ { "name": "nvidia-cuopt-skills", "description": "Agent skills for NVIDIA cuOpt optimization engine: routing, LP/MILP/QP, installation, and server.", - "version": "26.04.00", + "version": "26.06.00", "contextFileName": "AGENTS.md" } diff --git a/helmchart/cuopt-server/Chart.yaml b/helmchart/cuopt-server/Chart.yaml index 074d94bec9..811ac067cb 100644 --- a/helmchart/cuopt-server/Chart.yaml +++ b/helmchart/cuopt-server/Chart.yaml @@ -1,5 +1,5 @@ apiVersion: v2 -appVersion: 26.4.0 +appVersion: 26.6.0 description: A Helm chart for NVIDIA cuOpt Server with GPU support home: https://docs.nvidia.com/cuopt/user-guide/latest/resources.html keywords: @@ -14,4 +14,4 @@ name: cuopt-server sources: - https://docs.nvidia.com/cuopt/user-guide/latest/resources.html type: application -version: 26.4.0 +version: 26.6.0 diff --git a/helmchart/cuopt-server/values.yaml b/helmchart/cuopt-server/values.yaml index 5218596552..6adafea79e 100644 --- a/helmchart/cuopt-server/values.yaml +++ b/helmchart/cuopt-server/values.yaml @@ -7,7 +7,7 @@ replicaCount: 1 image: repository: nvidia/cuopt pullPolicy: IfNotPresent - tag: "26.4.0-cuda12.9-py3.12" + tag: "26.6.0-cuda12.9-py3.12" imagePullSecrets: [] nameOverride: "" diff --git a/python/cuopt/pyproject.toml b/python/cuopt/pyproject.toml index e86b5bdd73..eff7e01769 100644 --- a/python/cuopt/pyproject.toml +++ b/python/cuopt/pyproject.toml @@ -20,18 +20,18 @@ license = "Apache-2.0" requires-python = ">=3.11" dependencies = [ "cuda-python>=13.0.1,<14.0", - "cudf==26.4.*,>=0.0.0a0", - "cuopt-mps-parser==26.4.*,>=0.0.0a0", + "cudf==26.6.*,>=0.0.0a0", + "cuopt-mps-parser==26.6.*,>=0.0.0a0", "cupy-cuda13x>=13.6.0", - "libcuopt==26.4.*,>=0.0.0a0", + "libcuopt==26.6.*,>=0.0.0a0", "numba-cuda>=0.22.1", "numba>=0.60.0,<0.65.0", "numpy>=1.23.5,<3.0", "pandas>=2.0", - "pylibraft==26.4.*,>=0.0.0a0", + "pylibraft==26.6.*,>=0.0.0a0", "pyyaml>=6.0.0", "rapids-logger==0.2.*,>=0.0.0a0", - "rmm==26.4.*,>=0.0.0a0", + "rmm==26.6.*,>=0.0.0a0", "scipy>=1.14.1", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -101,12 +101,12 @@ dependencies-file = "../../dependencies.yaml" matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" requires = [ "cmake>=3.30.4", - "cuopt-mps-parser==26.4.*,>=0.0.0a0", + "cuopt-mps-parser==26.6.*,>=0.0.0a0", "cupy-cuda13x>=13.6.0", "cython>=3.0.3", - "libcuopt==26.4.*,>=0.0.0a0", + "libcuopt==26.6.*,>=0.0.0a0", "ninja", - "pylibraft==26.4.*,>=0.0.0a0", + "pylibraft==26.6.*,>=0.0.0a0", "rapids-logger==0.2.*,>=0.0.0a0", - "rmm==26.4.*,>=0.0.0a0", + "rmm==26.6.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. diff --git a/python/cuopt_self_hosted/pyproject.toml b/python/cuopt_self_hosted/pyproject.toml index 7645c99ed0..43aa80a5b3 100644 --- a/python/cuopt_self_hosted/pyproject.toml +++ b/python/cuopt_self_hosted/pyproject.toml @@ -20,7 +20,7 @@ license = "Apache-2.0" license-files = ["LICENSE"] requires-python = ">=3.11" dependencies = [ - "cuopt-mps-parser==26.4.*,>=0.0.0a0", + "cuopt-mps-parser==26.6.*,>=0.0.0a0", "msgpack-numpy==0.4.8", "msgpack==1.1.2", "requests", diff --git a/python/cuopt_server/pyproject.toml b/python/cuopt_server/pyproject.toml index d24cfcbd77..ce96c884be 100644 --- a/python/cuopt_server/pyproject.toml +++ b/python/cuopt_server/pyproject.toml @@ -21,7 +21,7 @@ license = "Apache-2.0" license-files = ["LICENSE"] requires-python = ">=3.11" dependencies = [ - "cuopt==26.4.*,>=0.0.0a0", + "cuopt==26.6.*,>=0.0.0a0", "cupy-cuda13x>=13.6.0", "fastapi", "jsonref==1.1.0", diff --git a/python/libcuopt/pyproject.toml b/python/libcuopt/pyproject.toml index 2507971a0f..e5c0c58fab 100644 --- a/python/libcuopt/pyproject.toml +++ b/python/libcuopt/pyproject.toml @@ -30,8 +30,8 @@ classifiers = [ "Programming Language :: Python :: 3.14", ] dependencies = [ - "cuopt-mps-parser==26.4.*,>=0.0.0a0", - "librmm==26.4.*,>=0.0.0a0", + "cuopt-mps-parser==26.6.*,>=0.0.0a0", + "librmm==26.6.*,>=0.0.0a0", "nvidia-cublas", "nvidia-cudart", "nvidia-cudss", @@ -81,8 +81,8 @@ dependencies-file = "../../dependencies.yaml" matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" requires = [ "cmake>=3.30.4", - "cuopt-mps-parser==26.4.*,>=0.0.0a0", - "librmm==26.4.*,>=0.0.0a0", + "cuopt-mps-parser==26.6.*,>=0.0.0a0", + "librmm==26.6.*,>=0.0.0a0", "ninja", "rapids-logger==0.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. diff --git a/skills/cuopt-developer/SKILL.md b/skills/cuopt-developer/SKILL.md index 12419153ac..99743f9171 100644 --- a/skills/cuopt-developer/SKILL.md +++ b/skills/cuopt-developer/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-developer -version: "26.04.00" +version: "26.06.00" description: Contribute to NVIDIA cuOpt codebase including C++/CUDA, Python, server, docs, and CI. Use when the user wants to modify solver internals, add features, submit PRs, or understand the codebase architecture. --- diff --git a/skills/cuopt-installation-api-c/SKILL.md b/skills/cuopt-installation-api-c/SKILL.md index 747382e3c7..bd4d60becc 100644 --- a/skills/cuopt-installation-api-c/SKILL.md +++ b/skills/cuopt-installation-api-c/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-installation-api-c -version: "26.04.00" +version: "26.06.00" description: Install cuOpt for C — conda, locate lib/headers, verification. Use when the user is installing or verifying the C API. Standalone; no common skill. --- diff --git a/skills/cuopt-installation-api-python/SKILL.md b/skills/cuopt-installation-api-python/SKILL.md index a3d7a5e5d2..771f5ec8b0 100644 --- a/skills/cuopt-installation-api-python/SKILL.md +++ b/skills/cuopt-installation-api-python/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-installation-api-python -version: "26.04.00" +version: "26.06.00" description: Install cuOpt for Python — pip, conda, Docker, verification. Use when the user is installing or verifying the Python API. Standalone; no common skill. --- diff --git a/skills/cuopt-installation-common/SKILL.md b/skills/cuopt-installation-common/SKILL.md index 6ceb9f9000..88534fb810 100644 --- a/skills/cuopt-installation-common/SKILL.md +++ b/skills/cuopt-installation-common/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-installation-common -version: "26.04.00" +version: "26.06.00" description: Install cuOpt — system and environment requirements only. Domain concepts; no install commands or interface guidance. --- diff --git a/skills/cuopt-installation-developer/SKILL.md b/skills/cuopt-installation-developer/SKILL.md index a002498853..1f3dff0d3f 100644 --- a/skills/cuopt-installation-developer/SKILL.md +++ b/skills/cuopt-installation-developer/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-installation-developer -version: "26.04.00" +version: "26.06.00" description: Developer installation — build cuOpt from source, run tests. Use when the user wants to set up a dev environment to contribute or modify cuOpt. --- diff --git a/skills/cuopt-lp-milp-api-c/SKILL.md b/skills/cuopt-lp-milp-api-c/SKILL.md index 53df3de63e..74b0d5dc92 100644 --- a/skills/cuopt-lp-milp-api-c/SKILL.md +++ b/skills/cuopt-lp-milp-api-c/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-lp-milp-api-c -version: "26.04.00" +version: "26.06.00" description: LP and MILP with cuOpt — C API only. Use when the user is embedding LP/MILP in C/C++. --- diff --git a/skills/cuopt-lp-milp-api-cli/SKILL.md b/skills/cuopt-lp-milp-api-cli/SKILL.md index cbdc1e7778..1f8e8a157c 100644 --- a/skills/cuopt-lp-milp-api-cli/SKILL.md +++ b/skills/cuopt-lp-milp-api-cli/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-lp-milp-api-cli -version: "26.04.00" +version: "26.06.00" description: LP and MILP with cuOpt — CLI only (MPS files, cuopt_cli). Use when the user is solving from MPS via command line. --- diff --git a/skills/cuopt-lp-milp-api-python/SKILL.md b/skills/cuopt-lp-milp-api-python/SKILL.md index a7cd9a59f2..e8435867db 100644 --- a/skills/cuopt-lp-milp-api-python/SKILL.md +++ b/skills/cuopt-lp-milp-api-python/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-lp-milp-api-python -version: "26.04.00" +version: "26.06.00" description: Solve Linear Programming (LP) and Mixed-Integer Linear Programming (MILP) with the Python API. Use when the user asks about optimization with linear constraints, integer variables, scheduling, resource allocation, facility location, or production planning. --- diff --git a/skills/cuopt-qp-api-c/SKILL.md b/skills/cuopt-qp-api-c/SKILL.md index bc1efb63d3..85014b81fd 100644 --- a/skills/cuopt-qp-api-c/SKILL.md +++ b/skills/cuopt-qp-api-c/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-qp-api-c -version: "26.04.00" +version: "26.06.00" description: Quadratic Programming (QP) with cuOpt — C API. Use when the user is embedding QP in C/C++. --- diff --git a/skills/cuopt-qp-api-cli/SKILL.md b/skills/cuopt-qp-api-cli/SKILL.md index 5f8a8e848a..7aec559126 100644 --- a/skills/cuopt-qp-api-cli/SKILL.md +++ b/skills/cuopt-qp-api-cli/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-qp-api-cli -version: "26.04.00" +version: "26.06.00" description: QP with cuOpt — CLI (e.g. cuopt_cli with QP-capable input). Use when the user is solving QP from the command line. --- diff --git a/skills/cuopt-qp-api-python/SKILL.md b/skills/cuopt-qp-api-python/SKILL.md index b85b9e3db2..39533aaeca 100644 --- a/skills/cuopt-qp-api-python/SKILL.md +++ b/skills/cuopt-qp-api-python/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-qp-api-python -version: "26.04.00" +version: "26.06.00" description: Quadratic Programming (QP) with cuOpt — Python API only (beta). Use when the user is building or solving QP in Python. --- diff --git a/skills/cuopt-routing-api-python/SKILL.md b/skills/cuopt-routing-api-python/SKILL.md index d8bf736f8f..c386107241 100644 --- a/skills/cuopt-routing-api-python/SKILL.md +++ b/skills/cuopt-routing-api-python/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-routing-api-python -version: "26.04.00" +version: "26.06.00" description: Vehicle routing (VRP, TSP, PDP) with cuOpt — Python API only. Use when the user is building or solving routing in Python. --- diff --git a/skills/cuopt-server-api-python/SKILL.md b/skills/cuopt-server-api-python/SKILL.md index b340e9883f..7d6ed175dd 100644 --- a/skills/cuopt-server-api-python/SKILL.md +++ b/skills/cuopt-server-api-python/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-server-api-python -version: "26.04.00" +version: "26.06.00" description: cuOpt REST server — start server, endpoints, Python/curl client examples. Use when the user is deploying or calling the REST API. --- diff --git a/skills/cuopt-server-common/SKILL.md b/skills/cuopt-server-common/SKILL.md index f23c9c4a5f..cc2a3728d5 100644 --- a/skills/cuopt-server-common/SKILL.md +++ b/skills/cuopt-server-common/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-server-common -version: "26.04.00" +version: "26.06.00" description: cuOpt REST server — what it does and how requests flow. Domain concepts; no deploy or client code. --- diff --git a/skills/cuopt-user-rules/SKILL.md b/skills/cuopt-user-rules/SKILL.md index 0777b9af15..87734f72a2 100644 --- a/skills/cuopt-user-rules/SKILL.md +++ b/skills/cuopt-user-rules/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-user-rules -version: "26.04.00" +version: "26.06.00" description: Base behavior rules for using NVIDIA cuOpt. Read this FIRST before any cuOpt user task (routing, LP/MILP, QP, installation, server). Covers handling incomplete questions, clarifying data requirements, verifying understanding, and running commands safely. --- diff --git a/skills/lp-milp-formulation/SKILL.md b/skills/lp-milp-formulation/SKILL.md index 64431a04c4..e429282033 100644 --- a/skills/lp-milp-formulation/SKILL.md +++ b/skills/lp-milp-formulation/SKILL.md @@ -1,6 +1,6 @@ --- name: lp-milp-formulation -version: "26.04.00" +version: "26.06.00" description: LP/MILP concepts and going from problem text to formulation. What LP/MILP are, required formulation questions, typical modeling elements, and how to parse problem statements (parameters, constraints, decisions, objective). --- diff --git a/skills/qp-formulation/SKILL.md b/skills/qp-formulation/SKILL.md index c87b887fbc..60aed00ede 100644 --- a/skills/qp-formulation/SKILL.md +++ b/skills/qp-formulation/SKILL.md @@ -1,6 +1,6 @@ --- name: qp-formulation -version: "26.04.00" +version: "26.06.00" description: Quadratic Programming (QP) — problem form and constraints. Domain concepts; no API or interface. QP is beta. --- diff --git a/skills/routing-formulation/SKILL.md b/skills/routing-formulation/SKILL.md index 4ab8d6419d..9cf8060cdf 100644 --- a/skills/routing-formulation/SKILL.md +++ b/skills/routing-formulation/SKILL.md @@ -1,6 +1,6 @@ --- name: routing-formulation -version: "26.04.00" +version: "26.06.00" description: Vehicle routing (VRP, TSP, PDP) — problem types and data requirements. Domain concepts; no API or interface. --- diff --git a/skills/skill-evolution/SKILL.md b/skills/skill-evolution/SKILL.md index d77fba1a3f..f3605795b7 100644 --- a/skills/skill-evolution/SKILL.md +++ b/skills/skill-evolution/SKILL.md @@ -1,6 +1,6 @@ --- name: skill-evolution -version: "26.04.00" +version: "26.06.00" description: After solving a non-trivial problem, detect generalizable learnings and propose skill updates so future interactions benefit automatically. Always active — applies to every interaction. --- @@ -182,7 +182,7 @@ When skill evolution creates an entirely new skill directory, add `origin: skill ```yaml --- name: new-skill-name -version: "26.04.00" +version: "26.06.00" description: ... origin: skill-evolution --- From d44661ded5958438c360dcf41a4a08e05a44d9d8 Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Thu, 19 Mar 2026 11:57:04 -0700 Subject: [PATCH 11/30] Unify two constructors --- cpp/src/branch_and_bound/CMakeLists.txt | 1 - cpp/src/branch_and_bound/branch_and_bound.cpp | 14 ++- cpp/src/branch_and_bound/branch_and_bound.hpp | 19 ++-- .../branch_and_bound_from_mip.cu | 105 ------------------ cpp/src/dual_simplex/solve.cpp | 4 +- cpp/src/mip_heuristics/diversity/lns/rins.cu | 6 +- .../diversity/recombiners/sub_mip.cuh | 6 +- cpp/src/mip_heuristics/solver.cu | 8 +- 8 files changed, 38 insertions(+), 125 deletions(-) delete mode 100644 cpp/src/branch_and_bound/branch_and_bound_from_mip.cu diff --git a/cpp/src/branch_and_bound/CMakeLists.txt b/cpp/src/branch_and_bound/CMakeLists.txt index 9b04014fb7..5bb1017120 100644 --- a/cpp/src/branch_and_bound/CMakeLists.txt +++ b/cpp/src/branch_and_bound/CMakeLists.txt @@ -5,7 +5,6 @@ set(BRANCH_AND_BOUND_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/branch_and_bound.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/branch_and_bound_from_mip.cu ${CMAKE_CURRENT_SOURCE_DIR}/mip_node.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pseudo_costs.cpp ${CMAKE_CURRENT_SOURCE_DIR}/diving_heuristics.cpp diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 3cf273545e..06ea002cac 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -248,7 +248,9 @@ template branch_and_bound_t::branch_and_bound_t( const user_problem_t& user_problem, const simplex_solver_settings_t& solver_settings, - f_t start_time) + f_t start_time, + cuopt::linear_programming::detail::problem_t* mip_problem_ptr, + i_t pdlp_root_num_gpus) : original_problem_(user_problem), settings_(solver_settings), original_lp_(user_problem.handle_ptr, 1, 1, 1), @@ -257,8 +259,8 @@ branch_and_bound_t::branch_and_bound_t( root_relax_soln_(1, 1), pc_(1), solver_status_(mip_status_t::UNSET), - mip_problem_ptr_(nullptr), - pdlp_root_num_gpus_(1) + mip_problem_ptr_(mip_problem_ptr), + pdlp_root_num_gpus_(pdlp_root_num_gpus) { exploration_stats_.start_time = start_time; #ifdef PRINT_CONSTRAINT_MATRIX @@ -3870,4 +3872,10 @@ template class branch_and_bound_t; #endif +#ifdef MIP_INSTANTIATION_FLOAT + +template class branch_and_bound_t; + +#endif + } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index eeccb75af3..2cbe55a48c 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -75,16 +75,15 @@ struct deterministic_diving_policy_t; template class branch_and_bound_t { public: - /** Build from MIP problem_t (used by mip_heuristics). Implemented in - * branch_and_bound_from_mip.cu. */ - branch_and_bound_t(cuopt::linear_programming::detail::problem_t* mip_problem_ptr, - const simplex_solver_settings_t& solver_settings, - f_t start_time, - i_t num_gpus = 1); - /** Build from user_problem_t (used by dual_simplex/solve.cpp, RINS, sub_mip). */ - branch_and_bound_t(const user_problem_t& user_problem, - const simplex_solver_settings_t& solver_settings, - f_t start_time); + /** Host \p user_problem must be fully populated by the caller. When \p mip_problem_ptr is + * non-null (GPU MIP / concurrent root), the caller must sync from device first, e.g. + * recompute_objective_integrality(), set objective_is_integral, get_host_user_problem(). */ + branch_and_bound_t( + const user_problem_t& user_problem, + const simplex_solver_settings_t& solver_settings, + f_t start_time, + cuopt::linear_programming::detail::problem_t* mip_problem_ptr = nullptr, + i_t pdlp_root_num_gpus = 1); // Set an initial guess based on the user_problem. This should be called before solve. void set_initial_guess(const std::vector& user_guess) { guess_ = user_guess; } diff --git a/cpp/src/branch_and_bound/branch_and_bound_from_mip.cu b/cpp/src/branch_and_bound/branch_and_bound_from_mip.cu deleted file mode 100644 index 4e90956f68..0000000000 --- a/cpp/src/branch_and_bound/branch_and_bound_from_mip.cu +++ /dev/null @@ -1,105 +0,0 @@ -/* clang-format off */ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - */ -/* clang-format on */ - -#include -#include -#include - -#include - -#include -#include -#include -#include - -namespace cuopt::linear_programming::dual_simplex { - -namespace { -template -void full_variable_types(const user_problem_t& original_problem, - const lp_problem_t& original_lp, - std::vector& var_types) -{ - var_types = original_problem.var_types; - if (original_lp.num_cols > original_problem.num_cols) { - var_types.resize(original_lp.num_cols); - for (i_t k = original_problem.num_cols; k < original_lp.num_cols; k++) { - var_types[k] = variable_type_t::CONTINUOUS; - } - } -} -} // anonymous namespace - -template -branch_and_bound_t::branch_and_bound_t( - cuopt::linear_programming::detail::problem_t* mip_problem_ptr, - const simplex_solver_settings_t& solver_settings, - f_t start_time, - i_t num_gpus) - : original_problem_(mip_problem_ptr->handle_ptr), - settings_(solver_settings), - original_lp_(mip_problem_ptr->handle_ptr, 1, 1, 1), - Arow_(1, 1, 0), - incumbent_(1), - root_relax_soln_(1, 1), - pc_(1), - solver_status_(mip_status_t::UNSET), - mip_problem_ptr_(mip_problem_ptr), - pdlp_root_num_gpus_(num_gpus) -{ - exploration_stats_.start_time = start_time; - mip_problem_ptr->recompute_objective_integrality(); - original_problem_.objective_is_integral = mip_problem_ptr->is_objective_integral(); - mip_problem_ptr->get_host_user_problem(original_problem_); - -#ifdef PRINT_CONSTRAINT_MATRIX - settings_.log.printf("A"); - original_problem_.A.print_matrix(); -#endif - - dualize_info_t dualize_info; - convert_user_problem(original_problem_, settings_, original_lp_, new_slacks_, dualize_info); - full_variable_types(original_problem_, original_lp_, var_types_); - -#ifdef CHECK_SLACKS - assert(new_slacks_.size() == original_lp_.num_rows); - for (i_t slack : new_slacks_) { - const i_t col_start = original_lp_.A.col_start[slack]; - const i_t col_end = original_lp_.A.col_start[slack + 1]; - const i_t col_len = col_end - col_start; - if (col_len != 1) { - settings_.log.printf("Slack %d has %d nzs\n", slack, col_len); - assert(col_len == 1); - } - const i_t i = original_lp_.A.i[col_start]; - const f_t x = original_lp_.A.x[col_start]; - if (std::abs(x) != 1.0) { - settings_.log.printf("Slack %d row %d has non-unit coefficient %e\n", slack, i, x); - assert(std::abs(x) == 1.0); - } - } -#endif - - upper_bound_ = inf; - root_objective_ = std::numeric_limits::quiet_NaN(); -} - -template branch_and_bound_t::branch_and_bound_t( - cuopt::linear_programming::detail::problem_t*, - const simplex_solver_settings_t&, - double, - int); - -#ifdef MIP_INSTANTIATION_FLOAT -template branch_and_bound_t::branch_and_bound_t( - cuopt::linear_programming::detail::problem_t*, - const simplex_solver_settings_t&, - float, - int); -#endif - -} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index d300d6011c..d5525891b6 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -706,7 +706,7 @@ i_t solve(const user_problem_t& problem, { i_t status; if (is_mip(problem) && !settings.relaxation) { - branch_and_bound_t branch_and_bound(problem, settings, tic()); + branch_and_bound_t branch_and_bound(problem, settings, tic()); mip_solution_t mip_solution(problem.num_cols); mip_status_t mip_status = branch_and_bound.solve(mip_solution); if (mip_status == mip_status_t::OPTIMAL) { @@ -745,7 +745,7 @@ i_t solve_mip_with_guess(const user_problem_t& problem, { i_t status; if (is_mip(problem)) { - branch_and_bound_t branch_and_bound(problem, settings, tic()); + branch_and_bound_t branch_and_bound(problem, settings, tic()); branch_and_bound.set_initial_guess(guess); mip_status_t mip_status = branch_and_bound.solve(solution); if (mip_status == mip_status_t::OPTIMAL) { diff --git a/cpp/src/mip_heuristics/diversity/lns/rins.cu b/cpp/src/mip_heuristics/diversity/lns/rins.cu index 1d009b8fb7..31819c9722 100644 --- a/cpp/src/mip_heuristics/diversity/lns/rins.cu +++ b/cpp/src/mip_heuristics/diversity/lns/rins.cu @@ -270,8 +270,12 @@ void rins_t::run_rins() f_t objective) { rins_solution_queue.push_back(solution); }; + dual_simplex::user_problem_t bb_user_problem(fixed_problem.handle_ptr); + fixed_problem.recompute_objective_integrality(); + bb_user_problem.objective_is_integral = fixed_problem.is_objective_integral(); + fixed_problem.get_host_user_problem(bb_user_problem); dual_simplex::branch_and_bound_t branch_and_bound( - &fixed_problem, branch_and_bound_settings, dual_simplex::tic(), 1); + bb_user_problem, branch_and_bound_settings, dual_simplex::tic(), &fixed_problem, 1); branch_and_bound_solution.resize(branch_and_bound.get_num_cols()); branch_and_bound.set_initial_guess(cuopt::host_copy(fixed_assignment, rins_handle.get_stream())); branch_and_bound_status = branch_and_bound.solve(branch_and_bound_solution); diff --git a/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh b/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh index 5b9821cc3f..4494f5d3c5 100644 --- a/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh +++ b/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh @@ -113,8 +113,12 @@ class sub_mip_recombiner_t : public recombiner_t { // disable B&B logs, so that it is not interfering with the main B&B thread branch_and_bound_settings.log.log = false; + dual_simplex::user_problem_t bb_user_problem(fixed_problem.handle_ptr); + fixed_problem.recompute_objective_integrality(); + bb_user_problem.objective_is_integral = fixed_problem.is_objective_integral(); + fixed_problem.get_host_user_problem(bb_user_problem); dual_simplex::branch_and_bound_t branch_and_bound( - &fixed_problem, branch_and_bound_settings, dual_simplex::tic(), 1); + bb_user_problem, branch_and_bound_settings, dual_simplex::tic(), &fixed_problem, 1); branch_and_bound_solution.resize(branch_and_bound.get_num_cols()); branch_and_bound_status = branch_and_bound.solve(branch_and_bound_solution); if (solution_vector.size() > 0) { diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu index 1257aa6409..dd0a084477 100644 --- a/cpp/src/mip_heuristics/solver.cu +++ b/cpp/src/mip_heuristics/solver.cu @@ -257,11 +257,15 @@ solution_t mip_solver_t::run_solver() }; } - // Create the branch and bound object (builds user_problem from context.problem_ptr) + dual_simplex::user_problem_t bb_user_problem(context.problem_ptr->handle_ptr); + context.problem_ptr->recompute_objective_integrality(); + bb_user_problem.objective_is_integral = context.problem_ptr->is_objective_integral(); + context.problem_ptr->get_host_user_problem(bb_user_problem); branch_and_bound = - std::make_unique>(context.problem_ptr, + std::make_unique>(bb_user_problem, branch_and_bound_settings, timer_.get_tic_start(), + context.problem_ptr, context.settings.num_gpus); branch_and_bound_solution.resize(branch_and_bound->get_num_cols()); context.branch_and_bound_ptr = branch_and_bound.get(); From c9e39d39e9b7d9cf8a821f9d1073f512b5ea26bd Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Thu, 19 Mar 2026 12:38:50 -0700 Subject: [PATCH 12/30] Fix compilation error --- cpp/src/branch_and_bound/branch_and_bound.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index feb4a561b1..b4c46ac8e9 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -48,7 +48,7 @@ class problem_t; template struct clique_table_t; -} +} // namespace cuopt::linear_programming::detail namespace cuopt::linear_programming::dual_simplex { @@ -88,9 +88,9 @@ class branch_and_bound_t { const user_problem_t& user_problem, const simplex_solver_settings_t& solver_settings, f_t start_time, - cuopt::linear_programming::detail::problem_t* mip_problem_ptr, - i_t pdlp_root_num_gpus, - std::shared_ptr> clique_table = nullptr); + cuopt::linear_programming::detail::problem_t* mip_problem_ptr = nullptr, + i_t pdlp_root_num_gpus = 1, + std::shared_ptr> clique_table = nullptr); // Set an initial guess based on the user_problem. This should be called before solve. void set_initial_guess(const std::vector& user_guess) { guess_ = user_guess; } From c99e1c3f1111701350333f5eb145a638d859c0f6 Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Tue, 10 Mar 2026 11:42:26 -0700 Subject: [PATCH 13/30] Optimize right-looking LU factorization with O(1) degree-bucket ops Replace linear degree-bucket search with O(1) swap-with-last removal using col_pos/row_pos position arrays, and eliminate O(row_degree) pre-traversal in schur_complement via a persistent last_in_row[] array --- cpp/src/dual_simplex/right_looking_lu.cpp | 210 +++++++++++++--------- 1 file changed, 126 insertions(+), 84 deletions(-) diff --git a/cpp/src/dual_simplex/right_looking_lu.cpp b/cpp/src/dual_simplex/right_looking_lu.cpp index 657ebc4762..53bfcf8ac5 100644 --- a/cpp/src/dual_simplex/right_looking_lu.cpp +++ b/cpp/src/dual_simplex/right_looking_lu.cpp @@ -30,7 +30,7 @@ struct element_t { f_t x; // coefficient value i_t next_in_column; // index of the next element in the column: kNone if there is no next element i_t next_in_row; // index of the next element in the row: kNone if there is no next element -}; +}; // 24 bytes constexpr int kNone = -1; template @@ -86,11 +86,11 @@ i_t load_elements(const csc_matrix_t& A, std::vector>& elements, std::vector& first_in_row, std::vector& first_in_col, + std::vector& last_in_row, f_t& work_estimate) { const i_t m = A.m; const i_t n = column_list.size(); - std::vector last_element_in_row(m, kNone); work_estimate += m; i_t nz = 0; @@ -105,15 +105,9 @@ i_t load_elements(const csc_matrix_t& A, elements[nz].x = A.x[p]; elements[nz].next_in_column = kNone; if (p > col_start) { elements[nz - 1].next_in_column = nz; } - elements[nz].next_in_row = kNone; // set the current next in row to None (since we don't know - // if there will be more entries in this row) - if (last_element_in_row[i] != kNone) { - // If we have seen an entry in this row before, set the last entry we've seen in this row to - // point to the current entry - elements[last_element_in_row[i]].next_in_row = nz; - } - // The current entry becomes the last element seen in the row - last_element_in_row[i] = nz; + elements[nz].next_in_row = kNone; + if (last_in_row[i] != kNone) { elements[last_in_row[i]].next_in_row = nz; } + last_in_row[i] = nz; if (p == col_start) { first_in_col[k] = nz; } if (first_in_row[i] == kNone) { first_in_row[i] = nz; } nz++; @@ -316,10 +310,11 @@ void update_Cdegree_and_col_count(i_t pivot_i, const std::vector& first_in_row, std::vector& Cdegree, std::vector>& col_count, + std::vector& col_pos, std::vector>& elements, f_t& work_estimate) { - // Update Cdegree and col_count + // Update Cdegree and col_count (O(1) removal using position array) i_t loop_count = 0; for (i_t p = first_in_row[pivot_i]; p != kNone; p = elements[p].next_in_row) { element_t* entry = &elements[p]; @@ -327,20 +322,20 @@ void update_Cdegree_and_col_count(i_t pivot_i, assert(entry->i == pivot_i); i_t cdeg = Cdegree[j]; assert(cdeg >= 0); - for (typename std::vector::iterator it = col_count[cdeg].begin(); - it != col_count[cdeg].end(); - it++) { - if (*it == j) { - // Remove col j from col_count[cdeg] - std::swap(*it, col_count[cdeg].back()); - col_count[cdeg].pop_back(); - work_estimate += (it - col_count[cdeg].begin()); - break; - } + // O(1) swap-with-last removal + { + i_t pos = col_pos[j]; + i_t other = col_count[cdeg].back(); + col_count[cdeg][pos] = other; + col_pos[other] = pos; + col_count[cdeg].pop_back(); } cdeg = --Cdegree[j]; assert(cdeg >= 0); - if (j != pivot_j && cdeg >= 0) { col_count[cdeg].push_back(j); } + if (j != pivot_j && cdeg >= 0) { + col_pos[j] = col_count[cdeg].size(); + col_count[cdeg].push_back(j); + } loop_count++; } work_estimate += 7 * loop_count; @@ -353,30 +348,31 @@ void update_Rdegree_and_row_count(i_t pivot_i, const std::vector& first_in_col, std::vector& Rdegree, std::vector>& row_count, + std::vector& row_pos, std::vector>& elements, f_t& work_estimate) { - // Update Rdegree and row_count + // Update Rdegree and row_count (O(1) removal using position array) i_t loop_count = 0; for (i_t p = first_in_col[pivot_j]; p != kNone; p = elements[p].next_in_column) { element_t* entry = &elements[p]; const i_t i = entry->i; i_t rdeg = Rdegree[i]; assert(rdeg >= 0); - for (typename std::vector::iterator it = row_count[rdeg].begin(); - it != row_count[rdeg].end(); - it++) { - if (*it == i) { - // Remove row i from row_count[rdeg] - std::swap(*it, row_count[rdeg].back()); - row_count[rdeg].pop_back(); - work_estimate += (it - row_count[rdeg].begin()); - break; - } + // O(1) swap-with-last removal + { + i_t pos = row_pos[i]; + i_t other = row_count[rdeg].back(); + row_count[rdeg][pos] = other; + row_pos[other] = pos; + row_count[rdeg].pop_back(); } rdeg = --Rdegree[i]; assert(rdeg >= 0); - if (i != pivot_i && rdeg >= 0) { row_count[rdeg].push_back(i); } + if (i != pivot_i && rdeg >= 0) { + row_pos[i] = row_count[rdeg].size(); + row_count[rdeg].push_back(i); + } loop_count++; } work_estimate += 7 * loop_count; @@ -400,18 +396,15 @@ void schur_complement(i_t pivot_i, std::vector& Cdegree, std::vector>& row_count, std::vector>& col_count, + std::vector& last_in_row, + std::vector& col_pos, + std::vector& row_pos, std::vector>& elements, f_t& work_estimate) { + // Initialize row_last_workspace from last_in_row (O(1) per row, no full row traversal) for (i_t p1 = first_in_col[pivot_j]; p1 != kNone; p1 = elements[p1].next_in_column) { - element_t* e = &elements[p1]; - const i_t i = e->i; - i_t row_last = kNone; - for (i_t p3 = first_in_row[i]; p3 != kNone; p3 = elements[p3].next_in_row) { - row_last = p3; - } - work_estimate += 2 * Rdegree[i]; - row_last_workspace[i] = row_last; + row_last_workspace[elements[p1].i] = last_in_row[elements[p1].i]; } work_estimate += 4 * Cdegree[pivot_j]; @@ -478,35 +471,29 @@ void schur_complement(i_t pivot_i, first_in_row[i] = fill_p; } row_last_workspace[i] = fill_p; - i_t rdeg = Rdegree[i]; // Rdgree must increase - for (typename std::vector::iterator it = row_count[rdeg].begin(); - it != row_count[rdeg].end(); - it++) { - if (*it == i) { - // Remove row i from row_count[rdeg] - std::swap(*it, row_count[rdeg].back()); - row_count[rdeg].pop_back(); - work_estimate += 2 * (it - row_count[rdeg].begin()); - break; - } + last_in_row[i] = fill_p; // maintain last_in_row persistent state + // Row degree update: O(1) removal using row_pos + { + i_t rdeg = Rdegree[i]; + i_t pos = row_pos[i]; + i_t other = row_count[rdeg].back(); + row_count[rdeg][pos] = other; + row_pos[other] = pos; + row_count[rdeg].pop_back(); + row_pos[i] = row_count[rdeg + 1].size(); + row_count[++Rdegree[i]].push_back(i); } - rdeg = ++Rdegree[i]; // Increase rdeg - row_count[rdeg].push_back(i); // Add row i to row_count[rdeg] - - i_t cdeg = Cdegree[j]; // Cdegree must increase - for (typename std::vector::iterator it = col_count[cdeg].begin(); - it != col_count[cdeg].end(); - it++) { - if (*it == j) { - // Remove col j from col_count[cdeg] - std::swap(*it, col_count[cdeg].back()); - col_count[cdeg].pop_back(); - work_estimate += 2 * (it - col_count[cdeg].begin()); - break; - } + // Col degree update: O(1) removal using col_pos + { + i_t cdeg = Cdegree[j]; + i_t pos = col_pos[j]; + i_t other = col_count[cdeg].back(); + col_count[cdeg][pos] = other; + col_pos[other] = pos; + col_count[cdeg].pop_back(); + col_pos[j] = col_count[cdeg + 1].size(); + col_count[++Cdegree[j]].push_back(j); } - cdeg = ++Cdegree[j]; // Increase Cdegree - col_count[cdeg].push_back(j); // Add column j to col_count[cdeg] } } work_estimate += 10 * Cdegree[pivot_j]; @@ -532,7 +519,6 @@ void remove_pivot_row(i_t pivot_i, f_t& work_estimate) { // Remove the pivot row - i_t row_loop_count = 0; for (i_t p0 = first_in_row[pivot_i]; p0 != kNone; p0 = elements[p0].next_in_row) { element_t* e = &elements[p0]; @@ -574,6 +560,7 @@ void remove_pivot_col(i_t pivot_i, std::vector& first_in_col, std::vector& first_in_row, std::vector& max_in_row, + std::vector& last_in_row, std::vector>& elements, f_t& work_estimate) { @@ -583,6 +570,7 @@ void remove_pivot_col(i_t pivot_i, element_t* e = &elements[p1]; const i_t i = e->i; i_t last = kNone; + i_t last_surviving = kNone; #ifdef THRESHOLD_ROOK_PIVOTING f_t max_in_row_i = 0.0; #endif @@ -598,16 +586,17 @@ void remove_pivot_col(i_t pivot_i, entry->i = -1; entry->j = -1; entry->x = std::numeric_limits::quiet_NaN(); - } + } else { + last_surviving = p; #ifdef THRESHOLD_ROOK_PIVOTING - else { const f_t abs_entryx = std::abs(entry->x); if (abs_entryx > max_in_row_i) { max_in_row_i = abs_entryx; } - } #endif + } last = p; row_loop_count++; } + last_in_row[i] = last_surviving; work_estimate += 3 * row_loop_count; #ifdef THRESHOLD_ROOK_PIVOTING max_in_row[i] = max_in_row_i; @@ -656,11 +645,28 @@ i_t right_looking_lu(const csc_matrix_t& A, const i_t Bnz = initialize_degree_data(A, column_list, Cdegree, Rdegree, col_count, row_count, work_estimate); + + // Position arrays for O(1) degree-bucket removal + std::vector col_pos(n); + for (i_t d = 0; d <= n; ++d) { + for (i_t pos = 0; pos < static_cast(col_count[d].size()); ++pos) { + col_pos[col_count[d][pos]] = pos; + } + } + std::vector row_pos(n); + for (i_t d = 0; d <= n; ++d) { + for (i_t pos = 0; pos < static_cast(row_count[d].size()); ++pos) { + row_pos[row_count[d][pos]] = pos; + } + } + std::vector> elements(Bnz); std::vector first_in_row(n, kNone); std::vector first_in_col(n, kNone); + std::vector last_in_row(n, kNone); work_estimate += 2 * n + Bnz; - load_elements(A, column_list, Bnz, elements, first_in_row, first_in_col, work_estimate); + load_elements( + A, column_list, Bnz, elements, first_in_row, first_in_col, last_in_row, work_estimate); std::vector column_j_workspace(n, kNone); std::vector row_last_workspace(n); @@ -777,9 +783,9 @@ i_t right_looking_lu(const csc_matrix_t& A, // Update Cdegree and col_count update_Cdegree_and_col_count( - pivot_i, pivot_j, first_in_row, Cdegree, col_count, elements, work_estimate); + pivot_i, pivot_j, first_in_row, Cdegree, col_count, col_pos, elements, work_estimate); update_Rdegree_and_row_count( - pivot_i, pivot_j, first_in_col, Rdegree, row_count, elements, work_estimate); + pivot_i, pivot_j, first_in_col, Rdegree, row_count, row_pos, elements, work_estimate); // A22 <- A22 - l u^T schur_complement(pivot_i, @@ -798,14 +804,23 @@ i_t right_looking_lu(const csc_matrix_t& A, Cdegree, row_count, col_count, + last_in_row, + col_pos, + row_pos, elements, work_estimate); // Remove the pivot row remove_pivot_row( pivot_i, pivot_j, first_in_col, first_in_row, max_in_column, elements, work_estimate); - remove_pivot_col( - pivot_i, pivot_j, first_in_col, first_in_row, max_in_row, elements, work_estimate); + remove_pivot_col(pivot_i, + pivot_j, + first_in_col, + first_in_row, + max_in_row, + last_in_row, + elements, + work_estimate); // Set pivot entry to sentinel value pivot_entry->i = -1; @@ -1030,10 +1045,28 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, const i_t Bnz = initialize_degree_data(A, column_list, Cdegree, Rdegree, col_count, row_count, work_estimate); + + // Position arrays for O(1) degree-bucket removal + // col_count has m+1 buckets, row_count has n+1 buckets + std::vector col_pos(n); + for (i_t d = 0; d <= m; ++d) { + for (i_t pos = 0; pos < static_cast(col_count[d].size()); ++pos) { + col_pos[col_count[d][pos]] = pos; + } + } + std::vector row_pos(m); + for (i_t d = 0; d <= n; ++d) { + for (i_t pos = 0; pos < static_cast(row_count[d].size()); ++pos) { + row_pos[row_count[d][pos]] = pos; + } + } + std::vector> elements(Bnz); std::vector first_in_row(m, kNone); std::vector first_in_col(n, kNone); - load_elements(A, column_list, Bnz, elements, first_in_row, first_in_col, work_estimate); + std::vector last_in_row(m, kNone); + load_elements( + A, column_list, Bnz, elements, first_in_row, first_in_col, last_in_row, work_estimate); std::vector column_j_workspace(m, kNone); std::vector row_last_workspace(m); @@ -1100,9 +1133,9 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, // Update Cdegree and col_count update_Cdegree_and_col_count( - pivot_i, pivot_j, first_in_row, Cdegree, col_count, elements, work_estimate); + pivot_i, pivot_j, first_in_row, Cdegree, col_count, col_pos, elements, work_estimate); update_Rdegree_and_row_count( - pivot_i, pivot_j, first_in_col, Rdegree, row_count, elements, work_estimate); + pivot_i, pivot_j, first_in_col, Rdegree, row_count, row_pos, elements, work_estimate); // A22 <- A22 - l u^T schur_complement(pivot_i, @@ -1121,14 +1154,23 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, Cdegree, row_count, col_count, + last_in_row, + col_pos, + row_pos, elements, work_estimate); // Remove the pivot row remove_pivot_row( pivot_i, pivot_j, first_in_col, first_in_row, max_in_column, elements, work_estimate); - remove_pivot_col( - pivot_i, pivot_j, first_in_col, first_in_row, max_in_row, elements, work_estimate); + remove_pivot_col(pivot_i, + pivot_j, + first_in_col, + first_in_row, + max_in_row, + last_in_row, + elements, + work_estimate); // Set pivot entry to sentinel value pivot_entry->i = -1; From 07cb595a7ddf30638755010a734a460525e99cd8 Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Mon, 16 Mar 2026 07:40:15 -0700 Subject: [PATCH 14/30] crossover: hoist delta_zN and delta_expanded out of dual push loop Allocate buffers once before the superbasic loop and reset with std::fill each iteration to avoid repeated O(n) allocations (PR #948 review). Made-with: Cursor --- cpp/src/dual_simplex/crossover.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp index 16f503e893..832f6891f6 100644 --- a/cpp/src/dual_simplex/crossover.cpp +++ b/cpp/src/dual_simplex/crossover.cpp @@ -388,6 +388,8 @@ i_t dual_push(const lp_problem_t& lp, std::vector& y = solution.y; const std::vector& x = solution.x; i_t num_pushes = 0; + std::vector delta_zN(n - m); + std::vector delta_expanded(n); while (superbasic_list.size() > 0) { const i_t s = superbasic_list.back(); const i_t basic_leaving_index = superbasic_list_index.back(); @@ -415,9 +417,9 @@ i_t dual_push(const lp_problem_t& lp, } // delta_zN = -N^T delta_y - std::vector delta_zN(n - m); - std::vector delta_expanded(n, 0.); - + std::fill(delta_expanded.begin(), delta_expanded.end(), 0.); + std::fill(delta_zN.begin(), delta_zN.end(), 0.); + // Iterate directly over sparse delta_y instead of checking zeros for (i_t nnz_idx = 0; nnz_idx < delta_y_sparse.i.size(); ++nnz_idx) { const i_t row = delta_y_sparse.i[nnz_idx]; From 953b83eb52237791df6973678da5cfea9ac06af6 Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Mon, 16 Mar 2026 09:24:59 -0700 Subject: [PATCH 15/30] Added review comments --- cpp/src/dual_simplex/right_looking_lu.cpp | 33 ++++++++++++++--------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/cpp/src/dual_simplex/right_looking_lu.cpp b/cpp/src/dual_simplex/right_looking_lu.cpp index 53bfcf8ac5..4800d644c1 100644 --- a/cpp/src/dual_simplex/right_looking_lu.cpp +++ b/cpp/src/dual_simplex/right_looking_lu.cpp @@ -105,8 +105,14 @@ i_t load_elements(const csc_matrix_t& A, elements[nz].x = A.x[p]; elements[nz].next_in_column = kNone; if (p > col_start) { elements[nz - 1].next_in_column = nz; } - elements[nz].next_in_row = kNone; - if (last_in_row[i] != kNone) { elements[last_in_row[i]].next_in_row = nz; } + elements[nz].next_in_row = kNone; // set the current next in row to None (since we don't know + // if there will be more entries in this row yet)) + if (last_in_row[i] != kNone) { + // If we have seen an entry in this row before, set the last entry we've seen in this row to + // point to the current entry + elements[last_in_row[i]].next_in_row = nz; + } + // The current entry becomes the last element seen in the row last_in_row[i] = nz; if (p == col_start) { first_in_col[k] = nz; } if (first_in_row[i] == kNone) { first_in_row[i] = nz; } @@ -402,9 +408,11 @@ void schur_complement(i_t pivot_i, std::vector>& elements, f_t& work_estimate) { - // Initialize row_last_workspace from last_in_row (O(1) per row, no full row traversal) + // row_last_workspace: temp copy of last_in_row for this pivot step, updated when adding fill + // last_in_row: persistent tail pointer per row for (i_t p1 = first_in_col[pivot_j]; p1 != kNone; p1 = elements[p1].next_in_column) { - row_last_workspace[elements[p1].i] = last_in_row[elements[p1].i]; + const i_t i = elements[p1].i; + row_last_workspace[i] = last_in_row[i]; } work_estimate += 4 * Cdegree[pivot_j]; @@ -569,8 +577,10 @@ void remove_pivot_col(i_t pivot_i, for (i_t p1 = first_in_col[pivot_j]; p1 != kNone; p1 = elements[p1].next_in_column) { element_t* e = &elements[p1]; const i_t i = e->i; - i_t last = kNone; - i_t last_surviving = kNone; + // Need both: last = previous-in-row (for link update when removing); last_surviving = new row + // tail (for last_in_row[i]). They differ when the pivot is the last element in the row. + i_t last = kNone; + i_t last_surviving = kNone; #ifdef THRESHOLD_ROOK_PIVOTING f_t max_in_row_i = 0.0; #endif @@ -647,13 +657,13 @@ i_t right_looking_lu(const csc_matrix_t& A, initialize_degree_data(A, column_list, Cdegree, Rdegree, col_count, row_count, work_estimate); // Position arrays for O(1) degree-bucket removal - std::vector col_pos(n); + std::vector col_pos(n); // if Cdegree[j] = nz, then j is in col_count[nz][col_pos[j]] for (i_t d = 0; d <= n; ++d) { for (i_t pos = 0; pos < static_cast(col_count[d].size()); ++pos) { col_pos[col_count[d][pos]] = pos; } } - std::vector row_pos(n); + std::vector row_pos(n); // if Rdegree[i] = nz, then i is in row_count[nz][row_pos[i]] for (i_t d = 0; d <= n; ++d) { for (i_t pos = 0; pos < static_cast(row_count[d].size()); ++pos) { row_pos[row_count[d][pos]] = pos; @@ -1046,15 +1056,14 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, const i_t Bnz = initialize_degree_data(A, column_list, Cdegree, Rdegree, col_count, row_count, work_estimate); - // Position arrays for O(1) degree-bucket removal - // col_count has m+1 buckets, row_count has n+1 buckets - std::vector col_pos(n); + // Position arrays for O(1) degree-bucket removal (col_count has m+1 buckets, row_count n+1) + std::vector col_pos(n); // if Cdegree[j] = nz, then j is in col_count[nz][col_pos[j]] for (i_t d = 0; d <= m; ++d) { for (i_t pos = 0; pos < static_cast(col_count[d].size()); ++pos) { col_pos[col_count[d][pos]] = pos; } } - std::vector row_pos(m); + std::vector row_pos(m); // if Rdegree[i] = nz, then i is in row_count[nz][row_pos[i]] for (i_t d = 0; d <= n; ++d) { for (i_t pos = 0; pos < static_cast(row_count[d].size()); ++pos) { row_pos[row_count[d][pos]] = pos; From 787fadf31cdc174700a201918d876e78b410e1da Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Mon, 16 Mar 2026 09:32:17 -0700 Subject: [PATCH 16/30] Remove code duplication --- cpp/src/dual_simplex/right_looking_lu.cpp | 47 +++++++++++++---------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/cpp/src/dual_simplex/right_looking_lu.cpp b/cpp/src/dual_simplex/right_looking_lu.cpp index 4800d644c1..37202000f8 100644 --- a/cpp/src/dual_simplex/right_looking_lu.cpp +++ b/cpp/src/dual_simplex/right_looking_lu.cpp @@ -79,6 +79,29 @@ i_t initialize_degree_data(const csc_matrix_t& A, return Bnz; } +// Fill col_pos and row_pos so that column j has col_pos[j] = its index in col_count[Cdegree[j]], +// and row i has row_pos[i] = its index in row_count[Rdegree[i]]. Enables O(1) degree-bucket +// removal. +template +void initialize_bucket_positions(const std::vector>& col_count, + const std::vector>& row_count, + i_t col_max_degree, + i_t row_max_degree, + std::vector& col_pos, + std::vector& row_pos) +{ + for (i_t d = 0; d <= col_max_degree; ++d) { + for (i_t pos = 0; pos < static_cast(col_count[d].size()); ++pos) { + col_pos[col_count[d][pos]] = pos; + } + } + for (i_t d = 0; d <= row_max_degree; ++d) { + for (i_t pos = 0; pos < static_cast(row_count[d].size()); ++pos) { + row_pos[row_count[d][pos]] = pos; + } + } +} + template i_t load_elements(const csc_matrix_t& A, const std::vector& column_list, @@ -656,19 +679,10 @@ i_t right_looking_lu(const csc_matrix_t& A, const i_t Bnz = initialize_degree_data(A, column_list, Cdegree, Rdegree, col_count, row_count, work_estimate); - // Position arrays for O(1) degree-bucket removal + // Position arrays for O(1) degree-bucket removal (col_count and row_count each have n+1 buckets) std::vector col_pos(n); // if Cdegree[j] = nz, then j is in col_count[nz][col_pos[j]] - for (i_t d = 0; d <= n; ++d) { - for (i_t pos = 0; pos < static_cast(col_count[d].size()); ++pos) { - col_pos[col_count[d][pos]] = pos; - } - } std::vector row_pos(n); // if Rdegree[i] = nz, then i is in row_count[nz][row_pos[i]] - for (i_t d = 0; d <= n; ++d) { - for (i_t pos = 0; pos < static_cast(row_count[d].size()); ++pos) { - row_pos[row_count[d][pos]] = pos; - } - } + initialize_bucket_positions(col_count, row_count, n, n, col_pos, row_pos); std::vector> elements(Bnz); std::vector first_in_row(n, kNone); @@ -1058,17 +1072,8 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, // Position arrays for O(1) degree-bucket removal (col_count has m+1 buckets, row_count n+1) std::vector col_pos(n); // if Cdegree[j] = nz, then j is in col_count[nz][col_pos[j]] - for (i_t d = 0; d <= m; ++d) { - for (i_t pos = 0; pos < static_cast(col_count[d].size()); ++pos) { - col_pos[col_count[d][pos]] = pos; - } - } std::vector row_pos(m); // if Rdegree[i] = nz, then i is in row_count[nz][row_pos[i]] - for (i_t d = 0; d <= n; ++d) { - for (i_t pos = 0; pos < static_cast(row_count[d].size()); ++pos) { - row_pos[row_count[d][pos]] = pos; - } - } + initialize_bucket_positions(col_count, row_count, m, n, col_pos, row_pos); std::vector> elements(Bnz); std::vector first_in_row(m, kNone); From 197bf8d54d6cd1de62ce403876e7a2c2e0b7aa15 Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Mon, 16 Mar 2026 10:37:23 -0700 Subject: [PATCH 17/30] keep the dense vector path alive --- cpp/src/dual_simplex/crossover.cpp | 61 ++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 19 deletions(-) diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp index 832f6891f6..14624a4f4c 100644 --- a/cpp/src/dual_simplex/crossover.cpp +++ b/cpp/src/dual_simplex/crossover.cpp @@ -389,7 +389,8 @@ i_t dual_push(const lp_problem_t& lp, const std::vector& x = solution.x; i_t num_pushes = 0; std::vector delta_zN(n - m); - std::vector delta_expanded(n); + std::vector delta_expanded; // workspace for sparse path (delta_y is sparse enough) + std::vector delta_y_dense; // workspace for dense path (delta_y is not sparse enough) while (superbasic_list.size() > 0) { const i_t s = superbasic_list.back(); const i_t basic_leaving_index = superbasic_list_index.back(); @@ -417,24 +418,38 @@ i_t dual_push(const lp_problem_t& lp, } // delta_zN = -N^T delta_y - std::fill(delta_expanded.begin(), delta_expanded.end(), 0.); + // Choose sparse vs dense method by delta_y sparsity (match dual simplex: sparse if <= 30% nnz) std::fill(delta_zN.begin(), delta_zN.end(), 0.); - - // Iterate directly over sparse delta_y instead of checking zeros - for (i_t nnz_idx = 0; nnz_idx < delta_y_sparse.i.size(); ++nnz_idx) { - const i_t row = delta_y_sparse.i[nnz_idx]; - const f_t val = delta_y_sparse.x[nnz_idx]; - - // Accumulate contributions from this row to all columns - const i_t row_start = Arow.row_start[row]; - const i_t row_end = Arow.row_start[row + 1]; - for (i_t p = row_start; p < row_end; ++p) { - const i_t col = Arow.j[p]; - delta_expanded[col] += Arow.x[p] * val; + const bool use_sparse = (delta_y_sparse.i.size() * 1.0 / m) <= 0.3; + + if (use_sparse) { + delta_expanded.resize(n); + std::fill(delta_expanded.begin(), delta_expanded.end(), 0.); + for (i_t nnz_idx = 0; nnz_idx < static_cast(delta_y_sparse.i.size()); ++nnz_idx) { + const i_t row = delta_y_sparse.i[nnz_idx]; + const f_t val = delta_y_sparse.x[nnz_idx]; + const i_t row_start = Arow.row_start[row]; + const i_t row_end = Arow.row_start[row + 1]; + for (i_t p = row_start; p < row_end; ++p) { + const i_t col = Arow.j[p]; + delta_expanded[col] += Arow.x[p] * val; + } + } + for (i_t k = 0; k < n - m; ++k) { + delta_zN[k] = -delta_expanded[nonbasic_list[k]]; + } + } else { + delta_y_sparse.to_dense(delta_y_dense); + for (i_t k = 0; k < n - m; ++k) { + const i_t j = nonbasic_list[k]; + f_t dot = 0.0; + const i_t c_start = lp.A.col_start[j]; + const i_t c_end = lp.A.col_start[j + 1]; + for (i_t p = c_start; p < c_end; ++p) { + dot += lp.A.x[p] * delta_y_dense[lp.A.i[p]]; + } + delta_zN[k] = -dot; } - } - for (i_t k = 0; k < n - m; ++k) { - delta_zN[k] = -delta_expanded[nonbasic_list[k]]; } i_t entering_index = -1; @@ -1345,8 +1360,16 @@ crossover_status_t crossover(const lp_problem_t& lp, basis_update_mpf_t ft(L, U, p, settings.refactor_frequency); verify_basis(m, n, vstatus); compare_vstatus_with_lists(m, n, basic_list, nonbasic_list, vstatus); - i_t dual_push_status = dual_push( - lp, Arow, settings, start_time, solution, ft, basic_list, nonbasic_list, superbasic_list, vstatus); + i_t dual_push_status = dual_push(lp, + Arow, + settings, + start_time, + solution, + ft, + basic_list, + nonbasic_list, + superbasic_list, + vstatus); if (dual_push_status < 0) { return return_to_status(dual_push_status); } settings.log.debug("basic list size %ld m %d\n", basic_list.size(), m); settings.log.debug("nonbasic list size %ld n - m %d\n", nonbasic_list.size(), n - m); From 990bcd0c05b0562ae184d7a1794bd4907f960fb8 Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Fri, 20 Mar 2026 08:21:54 -0700 Subject: [PATCH 18/30] Revert "Update to 26.06 (#975)" This reverts commit e2ea6872842475d01f95e3ed8f8abaee0534fdc9. --- .claude-plugin/marketplace.json | 2 +- .cursor-plugin/plugin.json | 2 +- .github/workflows/build.yaml | 30 +++++------ .../workflows/build_test_publish_images.yaml | 2 +- .github/workflows/pr.yaml | 32 ++++++------ .github/workflows/test.yaml | 2 +- README.md | 12 ++--- VERSION | 2 +- .../all_cuda-129_arch-aarch64.yaml | 10 ++-- .../all_cuda-129_arch-x86_64.yaml | 10 ++-- .../all_cuda-131_arch-aarch64.yaml | 10 ++-- .../all_cuda-131_arch-x86_64.yaml | 10 ++-- dependencies.yaml | 50 +++++++++---------- gemini-extension.json | 2 +- helmchart/cuopt-server/Chart.yaml | 4 +- helmchart/cuopt-server/values.yaml | 2 +- python/cuopt/pyproject.toml | 18 +++---- python/cuopt_self_hosted/pyproject.toml | 2 +- python/cuopt_server/pyproject.toml | 2 +- python/libcuopt/pyproject.toml | 8 +-- skills/cuopt-developer/SKILL.md | 2 +- skills/cuopt-installation-api-c/SKILL.md | 2 +- skills/cuopt-installation-api-python/SKILL.md | 2 +- skills/cuopt-installation-common/SKILL.md | 2 +- skills/cuopt-installation-developer/SKILL.md | 2 +- skills/cuopt-lp-milp-api-c/SKILL.md | 2 +- skills/cuopt-lp-milp-api-cli/SKILL.md | 2 +- skills/cuopt-lp-milp-api-python/SKILL.md | 2 +- skills/cuopt-qp-api-c/SKILL.md | 2 +- skills/cuopt-qp-api-cli/SKILL.md | 2 +- skills/cuopt-qp-api-python/SKILL.md | 2 +- skills/cuopt-routing-api-python/SKILL.md | 2 +- skills/cuopt-server-api-python/SKILL.md | 2 +- skills/cuopt-server-common/SKILL.md | 2 +- skills/cuopt-user-rules/SKILL.md | 2 +- skills/lp-milp-formulation/SKILL.md | 2 +- skills/qp-formulation/SKILL.md | 2 +- skills/routing-formulation/SKILL.md | 2 +- skills/skill-evolution/SKILL.md | 4 +- 39 files changed, 126 insertions(+), 126 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 6ddf2583c4..4c5df380f6 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -5,7 +5,7 @@ }, "metadata": { "description": "Agent skills for NVIDIA cuOpt: routing (VRP, TSP, PDP), LP/MILP/QP, installation (Python/C/developer), and REST server.", - "version": "26.06.00" + "version": "26.04.00" }, "plugins": [ { diff --git a/.cursor-plugin/plugin.json b/.cursor-plugin/plugin.json index e740506140..5f34873671 100644 --- a/.cursor-plugin/plugin.json +++ b/.cursor-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "nvidia-cuopt-skills", "description": "Agent skills for NVIDIA cuOpt: routing (VRP, TSP, PDP), LP/MILP/QP, installation (Python/C/developer), and REST server. Use when building or solving optimization with cuOpt.", - "version": "26.06.00", + "version": "26.04.00", "author": { "name": "NVIDIA" }, diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index a945cde8ec..593d48bd74 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -45,7 +45,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -55,7 +55,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -65,7 +65,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -73,7 +73,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-cuopt-mps-parser: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -88,7 +88,7 @@ jobs: wheel-publish-cuopt-mps-parser: needs: wheel-build-cuopt-mps-parser secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -99,7 +99,7 @@ jobs: wheel-build-libcuopt: needs: wheel-build-cuopt-mps-parser secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -112,7 +112,7 @@ jobs: wheel-publish-libcuopt: needs: wheel-build-libcuopt secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -123,7 +123,7 @@ jobs: wheel-build-cuopt: needs: [wheel-build-cuopt-mps-parser, wheel-build-libcuopt] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -135,7 +135,7 @@ jobs: wheel-publish-cuopt: needs: wheel-build-cuopt secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -145,7 +145,7 @@ jobs: package-type: python wheel-build-cuopt-server: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -160,7 +160,7 @@ jobs: wheel-publish-cuopt-server: needs: wheel-build-cuopt-server secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -171,7 +171,7 @@ jobs: docs-build: needs: [python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} node_type: "gpu-l4-latest-1" @@ -181,11 +181,11 @@ jobs: arch: "amd64" file_to_upload: "docs/cuopt/build/html/" artifact-name: "cuopt_docs" - container_image: "rapidsai/ci-conda:26.06-latest" + container_image: "rapidsai/ci-conda:26.04-latest" script: "ci/build_docs.sh" wheel-build-cuopt-sh-client: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -201,7 +201,7 @@ jobs: wheel-publish-cuopt-sh-client: needs: wheel-build-cuopt-sh-client secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/build_test_publish_images.yaml b/.github/workflows/build_test_publish_images.yaml index 17d4e9ab57..f8f7366e13 100644 --- a/.github/workflows/build_test_publish_images.yaml +++ b/.github/workflows/build_test_publish_images.yaml @@ -55,7 +55,7 @@ jobs: compute-matrix: runs-on: ubuntu-latest container: - image: rapidsai/ci-conda:26.06-latest + image: rapidsai/ci-conda:26.04-latest outputs: MATRIX: ${{ steps.compute-matrix.outputs.MATRIX }} CUOPT_VER: ${{ steps.compute-cuopt-ver.outputs.CUOPT_VER }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index a652c23b9a..95741c1fb5 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -34,7 +34,7 @@ jobs: - wheel-build-cuopt-sh-client - test-self-hosted-server secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@python-3.14 if: always() with: needs: ${{ toJSON(needs) }} @@ -111,7 +111,7 @@ jobs: changed-files: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@python-3.14 with: files_yaml: | build_docs: @@ -279,20 +279,20 @@ jobs: - '!gemini-extension.json' checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@python-3.14 with: enable_check_generated_files: false conda-cpp-build: needs: [checks, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@python-3.14 with: build_type: pull-request script: ci/build_cpp.sh matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_lean_filter }} conda-cpp-tests: needs: [conda-cpp-build, changed-files, compute-matrix-filters] - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@python-3.14 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp with: build_type: pull-request @@ -308,14 +308,14 @@ jobs: conda-python-build: needs: [conda-cpp-build, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@python-3.14 with: build_type: pull-request script: ci/build_python.sh matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_test_filter }} conda-python-tests: needs: [conda-python-build, changed-files, compute-matrix-filters] - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@python-3.14 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_conda with: run_codecov: false @@ -332,7 +332,7 @@ jobs: docs-build: needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@python-3.14 if: fromJSON(needs.changed-files.outputs.changed_file_groups).build_docs with: build_type: pull-request @@ -340,12 +340,12 @@ jobs: arch: "amd64" file_to_upload: "docs/cuopt/build/html/" artifact-name: "cuopt_docs" - container_image: "rapidsai/ci-conda:26.06-latest" + container_image: "rapidsai/ci-conda:26.04-latest" script: "ci/build_docs.sh" wheel-build-cuopt-mps-parser: needs: compute-matrix-filters secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: pull-request script: ci/build_wheel_cuopt_mps_parser.sh @@ -357,7 +357,7 @@ jobs: wheel-build-libcuopt: needs: [wheel-build-cuopt-mps-parser, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: # build for every combination of arch and CUDA version, but only for the latest Python matrix_filter: ${{ needs.compute-matrix-filters.outputs.libcuopt_filter }} @@ -368,7 +368,7 @@ jobs: wheel-build-cuopt: needs: [wheel-build-cuopt-mps-parser, wheel-build-libcuopt, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: pull-request script: ci/build_wheel_cuopt.sh @@ -377,7 +377,7 @@ jobs: matrix_filter: ${{ needs.compute-matrix-filters.outputs.wheel_lean_filter }} wheel-tests-cuopt: needs: [wheel-build-cuopt, wheel-build-cuopt-mps-parser, wheel-build-cuopt-sh-client, changed-files, compute-matrix-filters] - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@python-3.14 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_wheels with: build_type: pull-request @@ -393,7 +393,7 @@ jobs: wheel-build-cuopt-server: needs: [checks, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: pull-request script: ci/build_wheel_cuopt_server.sh @@ -405,7 +405,7 @@ jobs: wheel-build-cuopt-sh-client: needs: compute-matrix-filters secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: pull-request script: ci/build_wheel_cuopt_sh_client.sh @@ -417,7 +417,7 @@ jobs: matrix_filter: ${{ needs.compute-matrix-filters.outputs.cuopt_sh_client_filter }} wheel-tests-cuopt-server: needs: [wheel-build-cuopt, wheel-build-cuopt-server, changed-files, compute-matrix-filters] - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@python-3.14 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_wheels with: build_type: pull-request diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index a8cc5f2943..e88b7829f5 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -97,5 +97,5 @@ jobs: sha: ${{ inputs.sha }} node_type: "gpu-l4-latest-1" arch: "amd64" - container_image: "rapidsai/ci-conda:26.06-latest" + container_image: "rapidsai/ci-conda:26.04-latest" script: ci/test_notebooks.sh diff --git a/README.md b/README.md index 95c8598d77..379a48c350 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ # cuOpt - GPU-accelerated Optimization [![Build Status](https://github.com/NVIDIA/cuopt/actions/workflows/build.yaml/badge.svg)](https://github.com/NVIDIA/cuopt/actions/workflows/build.yaml) -[![Version](https://img.shields.io/badge/version-26.06.00-blue)](https://github.com/NVIDIA/cuopt/releases) +[![Version](https://img.shields.io/badge/version-26.04.00-blue)](https://github.com/NVIDIA/cuopt/releases) [![Documentation](https://img.shields.io/badge/docs-latest-brightgreen)](https://docs.nvidia.com/cuopt/user-guide/latest/introduction.html) [![Docker Hub](https://img.shields.io/badge/docker-nvidia%2Fcuopt-blue?logo=docker)](https://hub.docker.com/r/nvidia/cuopt) [![Examples](https://img.shields.io/badge/examples-cuopt--examples-orange)](https://github.com/NVIDIA/cuopt-examples) @@ -83,7 +83,7 @@ For CUDA 12.x: pip install \ --extra-index-url=https://pypi.nvidia.com \ nvidia-cuda-runtime-cu12==12.9.* \ - cuopt-server-cu12==26.06.* cuopt-sh-client==26.06.* + cuopt-server-cu12==26.04.* cuopt-sh-client==26.04.* ``` Development wheels are available as nightlies, please update `--extra-index-url` to `https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/` to install latest nightly packages. @@ -91,7 +91,7 @@ Development wheels are available as nightlies, please update `--extra-index-url` pip install --pre \ --extra-index-url=https://pypi.nvidia.com \ --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/ \ - cuopt-server-cu12==26.06.* cuopt-sh-client==26.06.* + cuopt-server-cu12==26.04.* cuopt-sh-client==26.04.* ``` For CUDA 13.x: @@ -99,7 +99,7 @@ For CUDA 13.x: ```bash pip install \ --extra-index-url=https://pypi.nvidia.com \ - cuopt-server-cu13==26.06.* cuopt-sh-client==26.06.* + cuopt-server-cu13==26.04.* cuopt-sh-client==26.04.* ``` Development wheels are available as nightlies, please update `--extra-index-url` to `https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/` to install latest nightly packages. @@ -107,7 +107,7 @@ Development wheels are available as nightlies, please update `--extra-index-url` pip install --pre \ --extra-index-url=https://pypi.nvidia.com \ --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/ \ - cuopt-server-cu13==26.06.* cuopt-sh-client==26.06.* + cuopt-server-cu13==26.04.* cuopt-sh-client==26.04.* ``` @@ -118,7 +118,7 @@ cuOpt can be installed with conda (via [miniforge](https://github.com/conda-forg All other dependencies are installed automatically when `cuopt-server` and `cuopt-sh-client` are installed. ```bash -conda install -c rapidsai -c conda-forge -c nvidia cuopt-server=26.06.* cuopt-sh-client=26.06.* +conda install -c rapidsai -c conda-forge -c nvidia cuopt-server=26.04.* cuopt-sh-client=26.04.* ``` We also provide [nightly conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD diff --git a/VERSION b/VERSION index cdb610a24d..0bd0e8a95b 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -26.06.00 +26.04.00 diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index 104e7e70d1..ecef112dd5 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -18,7 +18,7 @@ dependencies: - cuda-python>=12.9.2,<13.0 - cuda-sanitizer-api - cuda-version=12.9 -- cudf==26.6.*,>=0.0.0a0 +- cudf==26.4.*,>=0.0.0a0 - cupy>=13.6.0 - cxx-compiler - cython>=3.0.3 @@ -32,8 +32,8 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libraft-headers==26.6.*,>=0.0.0a0 -- librmm==26.6.*,>=0.0.0a0 +- libraft-headers==26.4.*,>=0.0.0a0 +- librmm==26.4.*,>=0.0.0a0 - make - msgpack-numpy==0.4.8 - msgpack-python==1.1.2 @@ -50,7 +50,7 @@ dependencies: - pip - pre-commit - psutil>=6.0.0 -- pylibraft==26.6.*,>=0.0.0a0 +- pylibraft==26.4.*,>=0.0.0a0 - pyrsistent - pytest-cov - pytest<9.0 @@ -59,7 +59,7 @@ dependencies: - rapids-build-backend>=0.4.0,<0.5.0 - rapids-logger==0.2.*,>=0.0.0a0 - requests -- rmm==26.6.*,>=0.0.0a0 +- rmm==26.4.*,>=0.0.0a0 - scikit-build-core>=0.11.0 - scipy>=1.14.1 - sphinx diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index 06aa6121f9..35c825280c 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -18,7 +18,7 @@ dependencies: - cuda-python>=12.9.2,<13.0 - cuda-sanitizer-api - cuda-version=12.9 -- cudf==26.6.*,>=0.0.0a0 +- cudf==26.4.*,>=0.0.0a0 - cupy>=13.6.0 - cxx-compiler - cython>=3.0.3 @@ -32,8 +32,8 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libraft-headers==26.6.*,>=0.0.0a0 -- librmm==26.6.*,>=0.0.0a0 +- libraft-headers==26.4.*,>=0.0.0a0 +- librmm==26.4.*,>=0.0.0a0 - make - msgpack-numpy==0.4.8 - msgpack-python==1.1.2 @@ -50,7 +50,7 @@ dependencies: - pip - pre-commit - psutil>=6.0.0 -- pylibraft==26.6.*,>=0.0.0a0 +- pylibraft==26.4.*,>=0.0.0a0 - pyrsistent - pytest-cov - pytest<9.0 @@ -59,7 +59,7 @@ dependencies: - rapids-build-backend>=0.4.0,<0.5.0 - rapids-logger==0.2.*,>=0.0.0a0 - requests -- rmm==26.6.*,>=0.0.0a0 +- rmm==26.4.*,>=0.0.0a0 - scikit-build-core>=0.11.0 - scipy>=1.14.1 - sphinx diff --git a/conda/environments/all_cuda-131_arch-aarch64.yaml b/conda/environments/all_cuda-131_arch-aarch64.yaml index a68ebf1285..2b717d4e98 100644 --- a/conda/environments/all_cuda-131_arch-aarch64.yaml +++ b/conda/environments/all_cuda-131_arch-aarch64.yaml @@ -18,7 +18,7 @@ dependencies: - cuda-python>=13.0.1,<14.0 - cuda-sanitizer-api - cuda-version=13.1 -- cudf==26.6.*,>=0.0.0a0 +- cudf==26.4.*,>=0.0.0a0 - cupy>=13.6.0 - cxx-compiler - cython>=3.0.3 @@ -32,8 +32,8 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libraft-headers==26.6.*,>=0.0.0a0 -- librmm==26.6.*,>=0.0.0a0 +- libraft-headers==26.4.*,>=0.0.0a0 +- librmm==26.4.*,>=0.0.0a0 - make - msgpack-numpy==0.4.8 - msgpack-python==1.1.2 @@ -50,7 +50,7 @@ dependencies: - pip - pre-commit - psutil>=6.0.0 -- pylibraft==26.6.*,>=0.0.0a0 +- pylibraft==26.4.*,>=0.0.0a0 - pyrsistent - pytest-cov - pytest<9.0 @@ -59,7 +59,7 @@ dependencies: - rapids-build-backend>=0.4.0,<0.5.0 - rapids-logger==0.2.*,>=0.0.0a0 - requests -- rmm==26.6.*,>=0.0.0a0 +- rmm==26.4.*,>=0.0.0a0 - scikit-build-core>=0.11.0 - scipy>=1.14.1 - sphinx diff --git a/conda/environments/all_cuda-131_arch-x86_64.yaml b/conda/environments/all_cuda-131_arch-x86_64.yaml index 043d55e148..f605a83f3b 100644 --- a/conda/environments/all_cuda-131_arch-x86_64.yaml +++ b/conda/environments/all_cuda-131_arch-x86_64.yaml @@ -18,7 +18,7 @@ dependencies: - cuda-python>=13.0.1,<14.0 - cuda-sanitizer-api - cuda-version=13.1 -- cudf==26.6.*,>=0.0.0a0 +- cudf==26.4.*,>=0.0.0a0 - cupy>=13.6.0 - cxx-compiler - cython>=3.0.3 @@ -32,8 +32,8 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libraft-headers==26.6.*,>=0.0.0a0 -- librmm==26.6.*,>=0.0.0a0 +- libraft-headers==26.4.*,>=0.0.0a0 +- librmm==26.4.*,>=0.0.0a0 - make - msgpack-numpy==0.4.8 - msgpack-python==1.1.2 @@ -50,7 +50,7 @@ dependencies: - pip - pre-commit - psutil>=6.0.0 -- pylibraft==26.6.*,>=0.0.0a0 +- pylibraft==26.4.*,>=0.0.0a0 - pyrsistent - pytest-cov - pytest<9.0 @@ -59,7 +59,7 @@ dependencies: - rapids-build-backend>=0.4.0,<0.5.0 - rapids-logger==0.2.*,>=0.0.0a0 - requests -- rmm==26.6.*,>=0.0.0a0 +- rmm==26.4.*,>=0.0.0a0 - scikit-build-core>=0.11.0 - scipy>=1.14.1 - sphinx diff --git a/dependencies.yaml b/dependencies.yaml index db60f63569..014889c7d5 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -311,7 +311,7 @@ dependencies: common: - output_types: [conda] packages: - - libcuopt-tests==26.6.*,>=0.0.0a0 + - libcuopt-tests==26.4.*,>=0.0.0a0 build_wheels: common: - output_types: [requirements, pyproject] @@ -413,7 +413,7 @@ dependencies: common: - output_types: conda packages: - - &libcuopt_unsuffixed libcuopt==26.6.*,>=0.0.0a0 + - &libcuopt_unsuffixed libcuopt==26.4.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -426,18 +426,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - libcuopt-cu12==26.6.*,>=0.0.0a0 + - libcuopt-cu12==26.4.*,>=0.0.0a0 - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - libcuopt-cu13==26.6.*,>=0.0.0a0 + - libcuopt-cu13==26.4.*,>=0.0.0a0 - {matrix: null, packages: [*libcuopt_unsuffixed]} depends_on_cuopt: common: - output_types: conda packages: - - &cuopt_unsuffixed cuopt==26.6.*,>=0.0.0a0 + - &cuopt_unsuffixed cuopt==26.4.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -450,18 +450,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - cuopt-cu12==26.6.*,>=0.0.0a0 + - cuopt-cu12==26.4.*,>=0.0.0a0 - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - cuopt-cu13==26.6.*,>=0.0.0a0 + - cuopt-cu13==26.4.*,>=0.0.0a0 - {matrix: null, packages: [*cuopt_unsuffixed]} depends_on_cuopt_server: common: - output_types: conda packages: - - &cuopt_server_unsuffixed cuopt-server==26.6.*,>=0.0.0a0 + - &cuopt_server_unsuffixed cuopt-server==26.4.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -474,18 +474,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - cuopt-server-cu12==26.6.*,>=0.0.0a0 + - cuopt-server-cu12==26.4.*,>=0.0.0a0 - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - cuopt-server-cu13==26.6.*,>=0.0.0a0 + - cuopt-server-cu13==26.4.*,>=0.0.0a0 - {matrix: null, packages: [*cuopt_server_unsuffixed]} depends_on_cuopt_sh_client: common: - output_types: [conda, requirements, pyproject] packages: - - &cuopt_sh_client_unsuffixed cuopt-sh-client==26.6.*,>=0.0.0a0 + - &cuopt_sh_client_unsuffixed cuopt-sh-client==26.4.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -495,7 +495,7 @@ dependencies: common: - output_types: [requirements, pyproject, conda] packages: - - cuopt-mps-parser==26.6.*,>=0.0.0a0 + - cuopt-mps-parser==26.4.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -505,12 +505,12 @@ dependencies: common: - output_types: conda packages: - - libraft-headers==26.6.*,>=0.0.0a0 + - libraft-headers==26.4.*,>=0.0.0a0 depends_on_librmm: common: - output_types: conda packages: - - &librmm_unsuffixed librmm==26.6.*,>=0.0.0a0 + - &librmm_unsuffixed librmm==26.4.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -522,12 +522,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - librmm-cu12==26.6.*,>=0.0.0a0 + - librmm-cu12==26.4.*,>=0.0.0a0 - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - librmm-cu13==26.6.*,>=0.0.0a0 + - librmm-cu13==26.4.*,>=0.0.0a0 - {matrix: null, packages: [*librmm_unsuffixed]} depends_on_cupy: common: @@ -562,7 +562,7 @@ dependencies: common: - output_types: conda packages: - - &rmm_unsuffixed rmm==26.6.*,>=0.0.0a0 + - &rmm_unsuffixed rmm==26.4.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -574,12 +574,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - rmm-cu12==26.6.*,>=0.0.0a0 + - rmm-cu12==26.4.*,>=0.0.0a0 - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - rmm-cu13==26.6.*,>=0.0.0a0 + - rmm-cu13==26.4.*,>=0.0.0a0 - matrix: packages: - *rmm_unsuffixed @@ -588,7 +588,7 @@ dependencies: common: - output_types: conda packages: - - &cudf_unsuffixed cudf==26.6.*,>=0.0.0a0 + - &cudf_unsuffixed cudf==26.4.*,>=0.0.0a0 - output_types: requirements packages: - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple @@ -599,12 +599,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - cudf-cu12==26.6.*,>=0.0.0a0 + - cudf-cu12==26.4.*,>=0.0.0a0 - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - cudf-cu13==26.6.*,>=0.0.0a0 + - cudf-cu13==26.4.*,>=0.0.0a0 - matrix: packages: - *cudf_unsuffixed @@ -613,7 +613,7 @@ dependencies: common: - output_types: conda packages: - - &pylibraft_unsuffixed pylibraft==26.6.*,>=0.0.0a0 + - &pylibraft_unsuffixed pylibraft==26.4.*,>=0.0.0a0 - output_types: requirements packages: - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple @@ -624,12 +624,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - pylibraft-cu12==26.6.*,>=0.0.0a0 + - pylibraft-cu12==26.4.*,>=0.0.0a0 - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - pylibraft-cu13==26.6.*,>=0.0.0a0 + - pylibraft-cu13==26.4.*,>=0.0.0a0 - matrix: packages: - *pylibraft_unsuffixed diff --git a/gemini-extension.json b/gemini-extension.json index c5ef9883f8..b4c6b764a4 100644 --- a/gemini-extension.json +++ b/gemini-extension.json @@ -1,6 +1,6 @@ { "name": "nvidia-cuopt-skills", "description": "Agent skills for NVIDIA cuOpt optimization engine: routing, LP/MILP/QP, installation, and server.", - "version": "26.06.00", + "version": "26.04.00", "contextFileName": "AGENTS.md" } diff --git a/helmchart/cuopt-server/Chart.yaml b/helmchart/cuopt-server/Chart.yaml index 811ac067cb..074d94bec9 100644 --- a/helmchart/cuopt-server/Chart.yaml +++ b/helmchart/cuopt-server/Chart.yaml @@ -1,5 +1,5 @@ apiVersion: v2 -appVersion: 26.6.0 +appVersion: 26.4.0 description: A Helm chart for NVIDIA cuOpt Server with GPU support home: https://docs.nvidia.com/cuopt/user-guide/latest/resources.html keywords: @@ -14,4 +14,4 @@ name: cuopt-server sources: - https://docs.nvidia.com/cuopt/user-guide/latest/resources.html type: application -version: 26.6.0 +version: 26.4.0 diff --git a/helmchart/cuopt-server/values.yaml b/helmchart/cuopt-server/values.yaml index 6adafea79e..5218596552 100644 --- a/helmchart/cuopt-server/values.yaml +++ b/helmchart/cuopt-server/values.yaml @@ -7,7 +7,7 @@ replicaCount: 1 image: repository: nvidia/cuopt pullPolicy: IfNotPresent - tag: "26.6.0-cuda12.9-py3.12" + tag: "26.4.0-cuda12.9-py3.12" imagePullSecrets: [] nameOverride: "" diff --git a/python/cuopt/pyproject.toml b/python/cuopt/pyproject.toml index eff7e01769..e86b5bdd73 100644 --- a/python/cuopt/pyproject.toml +++ b/python/cuopt/pyproject.toml @@ -20,18 +20,18 @@ license = "Apache-2.0" requires-python = ">=3.11" dependencies = [ "cuda-python>=13.0.1,<14.0", - "cudf==26.6.*,>=0.0.0a0", - "cuopt-mps-parser==26.6.*,>=0.0.0a0", + "cudf==26.4.*,>=0.0.0a0", + "cuopt-mps-parser==26.4.*,>=0.0.0a0", "cupy-cuda13x>=13.6.0", - "libcuopt==26.6.*,>=0.0.0a0", + "libcuopt==26.4.*,>=0.0.0a0", "numba-cuda>=0.22.1", "numba>=0.60.0,<0.65.0", "numpy>=1.23.5,<3.0", "pandas>=2.0", - "pylibraft==26.6.*,>=0.0.0a0", + "pylibraft==26.4.*,>=0.0.0a0", "pyyaml>=6.0.0", "rapids-logger==0.2.*,>=0.0.0a0", - "rmm==26.6.*,>=0.0.0a0", + "rmm==26.4.*,>=0.0.0a0", "scipy>=1.14.1", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -101,12 +101,12 @@ dependencies-file = "../../dependencies.yaml" matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" requires = [ "cmake>=3.30.4", - "cuopt-mps-parser==26.6.*,>=0.0.0a0", + "cuopt-mps-parser==26.4.*,>=0.0.0a0", "cupy-cuda13x>=13.6.0", "cython>=3.0.3", - "libcuopt==26.6.*,>=0.0.0a0", + "libcuopt==26.4.*,>=0.0.0a0", "ninja", - "pylibraft==26.6.*,>=0.0.0a0", + "pylibraft==26.4.*,>=0.0.0a0", "rapids-logger==0.2.*,>=0.0.0a0", - "rmm==26.6.*,>=0.0.0a0", + "rmm==26.4.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. diff --git a/python/cuopt_self_hosted/pyproject.toml b/python/cuopt_self_hosted/pyproject.toml index 43aa80a5b3..7645c99ed0 100644 --- a/python/cuopt_self_hosted/pyproject.toml +++ b/python/cuopt_self_hosted/pyproject.toml @@ -20,7 +20,7 @@ license = "Apache-2.0" license-files = ["LICENSE"] requires-python = ">=3.11" dependencies = [ - "cuopt-mps-parser==26.6.*,>=0.0.0a0", + "cuopt-mps-parser==26.4.*,>=0.0.0a0", "msgpack-numpy==0.4.8", "msgpack==1.1.2", "requests", diff --git a/python/cuopt_server/pyproject.toml b/python/cuopt_server/pyproject.toml index ce96c884be..d24cfcbd77 100644 --- a/python/cuopt_server/pyproject.toml +++ b/python/cuopt_server/pyproject.toml @@ -21,7 +21,7 @@ license = "Apache-2.0" license-files = ["LICENSE"] requires-python = ">=3.11" dependencies = [ - "cuopt==26.6.*,>=0.0.0a0", + "cuopt==26.4.*,>=0.0.0a0", "cupy-cuda13x>=13.6.0", "fastapi", "jsonref==1.1.0", diff --git a/python/libcuopt/pyproject.toml b/python/libcuopt/pyproject.toml index e5c0c58fab..2507971a0f 100644 --- a/python/libcuopt/pyproject.toml +++ b/python/libcuopt/pyproject.toml @@ -30,8 +30,8 @@ classifiers = [ "Programming Language :: Python :: 3.14", ] dependencies = [ - "cuopt-mps-parser==26.6.*,>=0.0.0a0", - "librmm==26.6.*,>=0.0.0a0", + "cuopt-mps-parser==26.4.*,>=0.0.0a0", + "librmm==26.4.*,>=0.0.0a0", "nvidia-cublas", "nvidia-cudart", "nvidia-cudss", @@ -81,8 +81,8 @@ dependencies-file = "../../dependencies.yaml" matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" requires = [ "cmake>=3.30.4", - "cuopt-mps-parser==26.6.*,>=0.0.0a0", - "librmm==26.6.*,>=0.0.0a0", + "cuopt-mps-parser==26.4.*,>=0.0.0a0", + "librmm==26.4.*,>=0.0.0a0", "ninja", "rapids-logger==0.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. diff --git a/skills/cuopt-developer/SKILL.md b/skills/cuopt-developer/SKILL.md index 99743f9171..12419153ac 100644 --- a/skills/cuopt-developer/SKILL.md +++ b/skills/cuopt-developer/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-developer -version: "26.06.00" +version: "26.04.00" description: Contribute to NVIDIA cuOpt codebase including C++/CUDA, Python, server, docs, and CI. Use when the user wants to modify solver internals, add features, submit PRs, or understand the codebase architecture. --- diff --git a/skills/cuopt-installation-api-c/SKILL.md b/skills/cuopt-installation-api-c/SKILL.md index bd4d60becc..747382e3c7 100644 --- a/skills/cuopt-installation-api-c/SKILL.md +++ b/skills/cuopt-installation-api-c/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-installation-api-c -version: "26.06.00" +version: "26.04.00" description: Install cuOpt for C — conda, locate lib/headers, verification. Use when the user is installing or verifying the C API. Standalone; no common skill. --- diff --git a/skills/cuopt-installation-api-python/SKILL.md b/skills/cuopt-installation-api-python/SKILL.md index 771f5ec8b0..a3d7a5e5d2 100644 --- a/skills/cuopt-installation-api-python/SKILL.md +++ b/skills/cuopt-installation-api-python/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-installation-api-python -version: "26.06.00" +version: "26.04.00" description: Install cuOpt for Python — pip, conda, Docker, verification. Use when the user is installing or verifying the Python API. Standalone; no common skill. --- diff --git a/skills/cuopt-installation-common/SKILL.md b/skills/cuopt-installation-common/SKILL.md index 88534fb810..6ceb9f9000 100644 --- a/skills/cuopt-installation-common/SKILL.md +++ b/skills/cuopt-installation-common/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-installation-common -version: "26.06.00" +version: "26.04.00" description: Install cuOpt — system and environment requirements only. Domain concepts; no install commands or interface guidance. --- diff --git a/skills/cuopt-installation-developer/SKILL.md b/skills/cuopt-installation-developer/SKILL.md index 1f3dff0d3f..a002498853 100644 --- a/skills/cuopt-installation-developer/SKILL.md +++ b/skills/cuopt-installation-developer/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-installation-developer -version: "26.06.00" +version: "26.04.00" description: Developer installation — build cuOpt from source, run tests. Use when the user wants to set up a dev environment to contribute or modify cuOpt. --- diff --git a/skills/cuopt-lp-milp-api-c/SKILL.md b/skills/cuopt-lp-milp-api-c/SKILL.md index 74b0d5dc92..53df3de63e 100644 --- a/skills/cuopt-lp-milp-api-c/SKILL.md +++ b/skills/cuopt-lp-milp-api-c/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-lp-milp-api-c -version: "26.06.00" +version: "26.04.00" description: LP and MILP with cuOpt — C API only. Use when the user is embedding LP/MILP in C/C++. --- diff --git a/skills/cuopt-lp-milp-api-cli/SKILL.md b/skills/cuopt-lp-milp-api-cli/SKILL.md index 1f8e8a157c..cbdc1e7778 100644 --- a/skills/cuopt-lp-milp-api-cli/SKILL.md +++ b/skills/cuopt-lp-milp-api-cli/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-lp-milp-api-cli -version: "26.06.00" +version: "26.04.00" description: LP and MILP with cuOpt — CLI only (MPS files, cuopt_cli). Use when the user is solving from MPS via command line. --- diff --git a/skills/cuopt-lp-milp-api-python/SKILL.md b/skills/cuopt-lp-milp-api-python/SKILL.md index e8435867db..a7cd9a59f2 100644 --- a/skills/cuopt-lp-milp-api-python/SKILL.md +++ b/skills/cuopt-lp-milp-api-python/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-lp-milp-api-python -version: "26.06.00" +version: "26.04.00" description: Solve Linear Programming (LP) and Mixed-Integer Linear Programming (MILP) with the Python API. Use when the user asks about optimization with linear constraints, integer variables, scheduling, resource allocation, facility location, or production planning. --- diff --git a/skills/cuopt-qp-api-c/SKILL.md b/skills/cuopt-qp-api-c/SKILL.md index 85014b81fd..bc1efb63d3 100644 --- a/skills/cuopt-qp-api-c/SKILL.md +++ b/skills/cuopt-qp-api-c/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-qp-api-c -version: "26.06.00" +version: "26.04.00" description: Quadratic Programming (QP) with cuOpt — C API. Use when the user is embedding QP in C/C++. --- diff --git a/skills/cuopt-qp-api-cli/SKILL.md b/skills/cuopt-qp-api-cli/SKILL.md index 7aec559126..5f8a8e848a 100644 --- a/skills/cuopt-qp-api-cli/SKILL.md +++ b/skills/cuopt-qp-api-cli/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-qp-api-cli -version: "26.06.00" +version: "26.04.00" description: QP with cuOpt — CLI (e.g. cuopt_cli with QP-capable input). Use when the user is solving QP from the command line. --- diff --git a/skills/cuopt-qp-api-python/SKILL.md b/skills/cuopt-qp-api-python/SKILL.md index 39533aaeca..b85b9e3db2 100644 --- a/skills/cuopt-qp-api-python/SKILL.md +++ b/skills/cuopt-qp-api-python/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-qp-api-python -version: "26.06.00" +version: "26.04.00" description: Quadratic Programming (QP) with cuOpt — Python API only (beta). Use when the user is building or solving QP in Python. --- diff --git a/skills/cuopt-routing-api-python/SKILL.md b/skills/cuopt-routing-api-python/SKILL.md index c386107241..d8bf736f8f 100644 --- a/skills/cuopt-routing-api-python/SKILL.md +++ b/skills/cuopt-routing-api-python/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-routing-api-python -version: "26.06.00" +version: "26.04.00" description: Vehicle routing (VRP, TSP, PDP) with cuOpt — Python API only. Use when the user is building or solving routing in Python. --- diff --git a/skills/cuopt-server-api-python/SKILL.md b/skills/cuopt-server-api-python/SKILL.md index 7d6ed175dd..b340e9883f 100644 --- a/skills/cuopt-server-api-python/SKILL.md +++ b/skills/cuopt-server-api-python/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-server-api-python -version: "26.06.00" +version: "26.04.00" description: cuOpt REST server — start server, endpoints, Python/curl client examples. Use when the user is deploying or calling the REST API. --- diff --git a/skills/cuopt-server-common/SKILL.md b/skills/cuopt-server-common/SKILL.md index cc2a3728d5..f23c9c4a5f 100644 --- a/skills/cuopt-server-common/SKILL.md +++ b/skills/cuopt-server-common/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-server-common -version: "26.06.00" +version: "26.04.00" description: cuOpt REST server — what it does and how requests flow. Domain concepts; no deploy or client code. --- diff --git a/skills/cuopt-user-rules/SKILL.md b/skills/cuopt-user-rules/SKILL.md index 87734f72a2..0777b9af15 100644 --- a/skills/cuopt-user-rules/SKILL.md +++ b/skills/cuopt-user-rules/SKILL.md @@ -1,6 +1,6 @@ --- name: cuopt-user-rules -version: "26.06.00" +version: "26.04.00" description: Base behavior rules for using NVIDIA cuOpt. Read this FIRST before any cuOpt user task (routing, LP/MILP, QP, installation, server). Covers handling incomplete questions, clarifying data requirements, verifying understanding, and running commands safely. --- diff --git a/skills/lp-milp-formulation/SKILL.md b/skills/lp-milp-formulation/SKILL.md index e429282033..64431a04c4 100644 --- a/skills/lp-milp-formulation/SKILL.md +++ b/skills/lp-milp-formulation/SKILL.md @@ -1,6 +1,6 @@ --- name: lp-milp-formulation -version: "26.06.00" +version: "26.04.00" description: LP/MILP concepts and going from problem text to formulation. What LP/MILP are, required formulation questions, typical modeling elements, and how to parse problem statements (parameters, constraints, decisions, objective). --- diff --git a/skills/qp-formulation/SKILL.md b/skills/qp-formulation/SKILL.md index 60aed00ede..c87b887fbc 100644 --- a/skills/qp-formulation/SKILL.md +++ b/skills/qp-formulation/SKILL.md @@ -1,6 +1,6 @@ --- name: qp-formulation -version: "26.06.00" +version: "26.04.00" description: Quadratic Programming (QP) — problem form and constraints. Domain concepts; no API or interface. QP is beta. --- diff --git a/skills/routing-formulation/SKILL.md b/skills/routing-formulation/SKILL.md index 9cf8060cdf..4ab8d6419d 100644 --- a/skills/routing-formulation/SKILL.md +++ b/skills/routing-formulation/SKILL.md @@ -1,6 +1,6 @@ --- name: routing-formulation -version: "26.06.00" +version: "26.04.00" description: Vehicle routing (VRP, TSP, PDP) — problem types and data requirements. Domain concepts; no API or interface. --- diff --git a/skills/skill-evolution/SKILL.md b/skills/skill-evolution/SKILL.md index f3605795b7..d77fba1a3f 100644 --- a/skills/skill-evolution/SKILL.md +++ b/skills/skill-evolution/SKILL.md @@ -1,6 +1,6 @@ --- name: skill-evolution -version: "26.06.00" +version: "26.04.00" description: After solving a non-trivial problem, detect generalizable learnings and propose skill updates so future interactions benefit automatically. Always active — applies to every interaction. --- @@ -182,7 +182,7 @@ When skill evolution creates an entirely new skill directory, add `origin: skill ```yaml --- name: new-skill-name -version: "26.06.00" +version: "26.04.00" description: ... origin: skill-evolution --- From 40b9e49fb32bc31d59e965ddb1fa479e7a5f126c Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Fri, 20 Mar 2026 08:28:55 -0700 Subject: [PATCH 19/30] Cleanup unnecessary changes --- .github/workflows/build.yaml | 28 ++++++++--------- .github/workflows/pr.yaml | 30 +++++++++---------- .github/workflows/test.yaml | 10 +++---- .../trigger-breaking-change-alert.yaml | 2 +- 4 files changed, 35 insertions(+), 35 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 593d48bd74..3eb1f1f066 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -45,7 +45,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@release/26.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -55,7 +55,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@release/26.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -65,7 +65,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@release/26.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -73,7 +73,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-cuopt-mps-parser: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/26.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -88,7 +88,7 @@ jobs: wheel-publish-cuopt-mps-parser: needs: wheel-build-cuopt-mps-parser secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@release/26.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -99,7 +99,7 @@ jobs: wheel-build-libcuopt: needs: wheel-build-cuopt-mps-parser secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/26.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -112,7 +112,7 @@ jobs: wheel-publish-libcuopt: needs: wheel-build-libcuopt secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@release/26.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -123,7 +123,7 @@ jobs: wheel-build-cuopt: needs: [wheel-build-cuopt-mps-parser, wheel-build-libcuopt] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/26.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -135,7 +135,7 @@ jobs: wheel-publish-cuopt: needs: wheel-build-cuopt secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@release/26.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -145,7 +145,7 @@ jobs: package-type: python wheel-build-cuopt-server: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/26.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -160,7 +160,7 @@ jobs: wheel-publish-cuopt-server: needs: wheel-build-cuopt-server secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@release/26.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -171,7 +171,7 @@ jobs: docs-build: needs: [python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@release/26.04 with: build_type: ${{ inputs.build_type || 'branch' }} node_type: "gpu-l4-latest-1" @@ -185,7 +185,7 @@ jobs: script: "ci/build_docs.sh" wheel-build-cuopt-sh-client: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/26.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -201,7 +201,7 @@ jobs: wheel-publish-cuopt-sh-client: needs: wheel-build-cuopt-sh-client secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@release/26.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 95741c1fb5..47a3bd9fca 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -34,7 +34,7 @@ jobs: - wheel-build-cuopt-sh-client - test-self-hosted-server secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@release/26.04 if: always() with: needs: ${{ toJSON(needs) }} @@ -111,7 +111,7 @@ jobs: changed-files: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@release/26.04 with: files_yaml: | build_docs: @@ -279,20 +279,20 @@ jobs: - '!gemini-extension.json' checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@release/26.04 with: enable_check_generated_files: false conda-cpp-build: needs: [checks, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@release/26.04 with: build_type: pull-request script: ci/build_cpp.sh matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_lean_filter }} conda-cpp-tests: needs: [conda-cpp-build, changed-files, compute-matrix-filters] - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@release/26.04 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp with: build_type: pull-request @@ -308,14 +308,14 @@ jobs: conda-python-build: needs: [conda-cpp-build, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@release/26.04 with: build_type: pull-request script: ci/build_python.sh matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_test_filter }} conda-python-tests: needs: [conda-python-build, changed-files, compute-matrix-filters] - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@release/26.04 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_conda with: run_codecov: false @@ -332,7 +332,7 @@ jobs: docs-build: needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@release/26.04 if: fromJSON(needs.changed-files.outputs.changed_file_groups).build_docs with: build_type: pull-request @@ -345,7 +345,7 @@ jobs: wheel-build-cuopt-mps-parser: needs: compute-matrix-filters secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/26.04 with: build_type: pull-request script: ci/build_wheel_cuopt_mps_parser.sh @@ -357,7 +357,7 @@ jobs: wheel-build-libcuopt: needs: [wheel-build-cuopt-mps-parser, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/26.04 with: # build for every combination of arch and CUDA version, but only for the latest Python matrix_filter: ${{ needs.compute-matrix-filters.outputs.libcuopt_filter }} @@ -368,7 +368,7 @@ jobs: wheel-build-cuopt: needs: [wheel-build-cuopt-mps-parser, wheel-build-libcuopt, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/26.04 with: build_type: pull-request script: ci/build_wheel_cuopt.sh @@ -377,7 +377,7 @@ jobs: matrix_filter: ${{ needs.compute-matrix-filters.outputs.wheel_lean_filter }} wheel-tests-cuopt: needs: [wheel-build-cuopt, wheel-build-cuopt-mps-parser, wheel-build-cuopt-sh-client, changed-files, compute-matrix-filters] - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@release/26.04 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_wheels with: build_type: pull-request @@ -393,7 +393,7 @@ jobs: wheel-build-cuopt-server: needs: [checks, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/26.04 with: build_type: pull-request script: ci/build_wheel_cuopt_server.sh @@ -405,7 +405,7 @@ jobs: wheel-build-cuopt-sh-client: needs: compute-matrix-filters secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@release/26.04 with: build_type: pull-request script: ci/build_wheel_cuopt_sh_client.sh @@ -417,7 +417,7 @@ jobs: matrix_filter: ${{ needs.compute-matrix-filters.outputs.cuopt_sh_client_filter }} wheel-tests-cuopt-server: needs: [wheel-build-cuopt, wheel-build-cuopt-server, changed-files, compute-matrix-filters] - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@python-3.14 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@release/26.04 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_wheels with: build_type: pull-request diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index e88b7829f5..9ad7609e8a 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -27,7 +27,7 @@ on: jobs: conda-cpp-tests: - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@release/26.04 with: build_type: ${{ inputs.build_type }} branch: ${{ inputs.branch }} @@ -42,7 +42,7 @@ jobs: script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }} conda-python-tests: - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@release/26.04 with: run_codecov: false build_type: ${{ inputs.build_type }} @@ -58,7 +58,7 @@ jobs: script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }} wheel-tests-cuopt: - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@release/26.04 with: build_type: ${{ inputs.build_type }} branch: ${{ inputs.branch }} @@ -73,7 +73,7 @@ jobs: script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }} wheel-tests-cuopt-server: - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@release/26.04 with: build_type: ${{ inputs.build_type }} branch: ${{ inputs.branch }} @@ -89,7 +89,7 @@ jobs: script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }} conda-notebook-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@release/26.04 with: build_type: ${{ inputs.build_type }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml index 57b178740c..d394b97db4 100644 --- a/.github/workflows/trigger-breaking-change-alert.yaml +++ b/.github/workflows/trigger-breaking-change-alert.yaml @@ -15,7 +15,7 @@ jobs: trigger-notifier: if: contains(github.event.pull_request.labels.*.name, 'breaking') secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@release/26.04 with: sender_login: ${{ github.event.sender.login }} sender_avatar: ${{ github.event.sender.avatar_url }} From c0d1514db232731c4720e7fe4b7ac37afa226bb8 Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Mon, 23 Mar 2026 13:05:44 -0700 Subject: [PATCH 20/30] Remove unused variable --- cpp/src/mip_heuristics/diversity/diversity_manager.cu | 5 ++--- cpp/src/mip_heuristics/diversity/diversity_manager.cuh | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index c659e9788b..18346209f9 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -40,7 +40,6 @@ std::vector recombiner_t::enabled_recombiners; template diversity_manager_t::diversity_manager_t(mip_solver_context_t& context_) : context(context_), - branch_and_bound_ptr(nullptr), problem_ptr(context.problem_ptr), diversity_config(), population("population", @@ -417,8 +416,8 @@ solution_t diversity_manager_t::run_solver() bool bb_thread_solution_exists = simplex_solution_exists.load(); if (bb_thread_solution_exists) { ls.lp_optimal_exists = true; - } else if (branch_and_bound_ptr != nullptr && - branch_and_bound_ptr->enable_concurrent_lp_root_solve()) { + } else if (context.branch_and_bound_ptr != nullptr && + context.branch_and_bound_ptr->enable_concurrent_lp_root_solve()) { // B&B drives root relaxation; wait for first solution (PDLP/Barrier or dual simplex) first_solution_ready_.store(false, std::memory_order_release); std::unique_lock lock(first_solution_mutex_); diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cuh b/cpp/src/mip_heuristics/diversity/diversity_manager.cuh index fed937a88b..a9517484c9 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cuh +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cuh @@ -82,7 +82,6 @@ class diversity_manager_t { result); mip_solver_context_t& context; - dual_simplex::branch_and_bound_t* branch_and_bound_ptr; problem_t* problem_ptr; diversity_config_t diversity_config; population_t population; From 31641083824c318aaf28ddbf459a5dedb49c9ae4 Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Mon, 30 Mar 2026 07:22:06 -0700 Subject: [PATCH 21/30] Disable green context --- cpp/src/barrier/sparse_cholesky.cuh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cpp/src/barrier/sparse_cholesky.cuh b/cpp/src/barrier/sparse_cholesky.cuh index f7938fb989..3bdba68401 100644 --- a/cpp/src/barrier/sparse_cholesky.cuh +++ b/cpp/src/barrier/sparse_cholesky.cuh @@ -131,6 +131,8 @@ std::size_t compute_hash(const f_t* arr, size_t size) return seed; } +//#define USE_BARRIER_GREEN_CONTEXT + template class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { public: @@ -155,6 +157,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { cuda_error = cudaSuccess; status = CUDSS_STATUS_SUCCESS; +#ifdef USE_BARRIER_GREEN_CONTEXT if (CUDART_VERSION >= 13000 && settings_.concurrent_halt != nullptr && settings_.num_gpus == 1) { cuGetErrorString_func = cuopt::detail::get_driver_entry_point("cuGetErrorString"); @@ -238,6 +241,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { &stream, barrier_green_ctx, CU_STREAM_NON_BLOCKING, stream_priority), reinterpret_cast(cuGetErrorString_func)); } +#endif auto cudss_device_idx = handle_ptr_->get_device(); auto cudss_device_count = 1; @@ -363,6 +367,8 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { CUDSS_CALL_AND_CHECK_EXIT(cudssConfigDestroy(solverConfig), status, "cudssConfigDestroy"); CUDSS_CALL_AND_CHECK_EXIT(cudssDestroy(handle), status, "cudssDestroy"); CUDA_CALL_AND_CHECK_EXIT(cudaStreamSynchronize(stream), "cudaStreamSynchronize"); + +#ifdef USE_BARRIER_GREEN_CONTEXT #if CUDART_VERSION >= 13000 if (settings_.concurrent_halt != nullptr && settings_.num_gpus == 1) { auto cuStreamDestroy_func = cuopt::detail::get_driver_entry_point("cuStreamDestroy"); @@ -374,6 +380,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { reinterpret_cast(cuGetErrorString_func)); handle_ptr_->get_stream().synchronize(); } +#endif #endif } From b920f9a1fac9d48bed8a509cf5cf72a9333bf4bb Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Mon, 30 Mar 2026 07:23:29 -0700 Subject: [PATCH 22/30] Move to appropriate file --- cpp/src/mip_heuristics/problem/problem.cu | 27 +++++++++++++++++++ cpp/src/mip_heuristics/problem/problem.cuh | 3 +++ .../problem/problem_helpers.cuh | 23 ---------------- 3 files changed, 30 insertions(+), 23 deletions(-) diff --git a/cpp/src/mip_heuristics/problem/problem.cu b/cpp/src/mip_heuristics/problem/problem.cu index 32aeef695a..c4f82843f4 100644 --- a/cpp/src/mip_heuristics/problem/problem.cu +++ b/cpp/src/mip_heuristics/problem/problem.cu @@ -2381,12 +2381,39 @@ void problem_t::update_variable_bounds(const std::vector& var_ind RAFT_CHECK_CUDA(handle_ptr->get_stream()); } +template +void convert_greater_to_less(detail::problem_t& problem) +{ + raft::common::nvtx::range scope("convert_greater_to_less"); + + auto* handle_ptr = problem.handle_ptr; + + constexpr i_t TPB = 256; + kernel_convert_greater_to_less + <<get_stream()>>>( + raft::device_span(problem.coefficients.data(), problem.coefficients.size()), + raft::device_span(problem.offsets.data(), problem.offsets.size()), + raft::device_span(problem.constraint_lower_bounds.data(), + problem.constraint_lower_bounds.size()), + raft::device_span(problem.constraint_upper_bounds.data(), + problem.constraint_upper_bounds.size())); + RAFT_CHECK_CUDA(handle_ptr->get_stream()); + + problem.compute_transpose_of_problem(); + + handle_ptr->sync_stream(); +} + #if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class problem_t; + +template void convert_greater_to_less(detail::problem_t&); #endif #if MIP_INSTANTIATE_DOUBLE template class problem_t; + +template void convert_greater_to_less(detail::problem_t&); #endif } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/problem/problem.cuh b/cpp/src/mip_heuristics/problem/problem.cuh index 130c97526e..7a837d4ceb 100644 --- a/cpp/src/mip_heuristics/problem/problem.cuh +++ b/cpp/src/mip_heuristics/problem/problem.cuh @@ -323,5 +323,8 @@ class problem_t { std::vector Q_values; }; +template +void convert_greater_to_less(detail::problem_t& problem); + } // namespace linear_programming::detail } // namespace cuopt diff --git a/cpp/src/mip_heuristics/problem/problem_helpers.cuh b/cpp/src/mip_heuristics/problem/problem_helpers.cuh index ebc8a488ea..deca71bf3d 100644 --- a/cpp/src/mip_heuristics/problem/problem_helpers.cuh +++ b/cpp/src/mip_heuristics/problem/problem_helpers.cuh @@ -398,27 +398,4 @@ static void csrsort_cusparse(rmm::device_uvector& values, check_csr_representation(values, offsets, indices, handle_ptr, cols, rows); } -template -static void convert_greater_to_less(detail::problem_t& problem) -{ - raft::common::nvtx::range scope("convert_greater_to_less"); - - auto* handle_ptr = problem.handle_ptr; - - constexpr i_t TPB = 256; - kernel_convert_greater_to_less - <<get_stream()>>>( - raft::device_span(problem.coefficients.data(), problem.coefficients.size()), - raft::device_span(problem.offsets.data(), problem.offsets.size()), - raft::device_span(problem.constraint_lower_bounds.data(), - problem.constraint_lower_bounds.size()), - raft::device_span(problem.constraint_upper_bounds.data(), - problem.constraint_upper_bounds.size())); - RAFT_CHECK_CUDA(handle_ptr->get_stream()); - - problem.compute_transpose_of_problem(); - - handle_ptr->sync_stream(); -} - } // namespace cuopt::linear_programming::detail From f4d0fa566f9e1cb51f8178a1b061c46a1793f31c Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Mon, 30 Mar 2026 07:42:58 -0700 Subject: [PATCH 23/30] Cleanup --- cpp/src/branch_and_bound/branch_and_bound.cpp | 12 +- cpp/src/branch_and_bound/branch_and_bound.hpp | 5 +- .../diversity/diversity_manager.cu | 150 +++++------------- .../diversity/diversity_manager.cuh | 9 +- .../mip_heuristics/relaxed_lp/relaxed_lp.cu | 2 +- cpp/src/mip_heuristics/root_lp.cu | 11 ++ cpp/src/mip_heuristics/root_lp.cuh | 4 + 7 files changed, 80 insertions(+), 113 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index a5c3948ec9..15db509d14 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1876,7 +1876,10 @@ void branch_and_bound_t::run_concurrent_pdlp_and_barrier_with_crossove get_root_concurrent_halt(), pdlp_root_num_gpus_, cuopt::linear_programming::method_t::PDLP); - (void)do_crush_crossover(result, "PDLP", 2); + // Only call crossover if the result status is OPTIMAL + if (result.is_optimal) { + (void)do_crush_crossover(result, "PDLP", 2); + } }); barrier_thread_out = std::thread([this, &lp_settings, do_crush_crossover]() { @@ -1886,7 +1889,11 @@ void branch_and_bound_t::run_concurrent_pdlp_and_barrier_with_crossove get_root_concurrent_halt(), pdlp_root_num_gpus_, cuopt::linear_programming::method_t::Barrier); - (void)do_crush_crossover(result, "Barrier", 3); + + // Only call crossover if the result status is OPTIMAL + if (result.is_optimal) { + (void)do_crush_crossover(result, "Barrier", 3); + } }); } @@ -1928,6 +1935,7 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( std::atomic winner{0}; // 0=none, 1=dual, 2=PDLP, 3=Barrier if (enable_concurrent_lp_root_solve_ && mip_problem_ptr_ != nullptr) { + convert_greater_to_less_2(*mip_problem_ptr_); // All three run in threads; main only starts them and joins. First to finish with OPTIMAL sets // winner and halt. std::mutex first_solver_mutex; diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index b4c46ac8e9..e60805939e 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -118,7 +118,10 @@ class branch_and_bound_t { i_t get_num_cols() const { return original_problem_.num_cols; } bool enable_concurrent_lp_root_solve() const { return enable_concurrent_lp_root_solve_; } std::atomic* get_root_concurrent_halt() { return &root_concurrent_halt_; } - void set_root_concurrent_halt(int value) { root_concurrent_halt_ = value; } + void set_root_concurrent_halt(int value) + { + root_concurrent_halt_.store(value, std::memory_order_relaxed); + } lp_status_t solve_root_relaxation(simplex_solver_settings_t const& lp_settings, lp_solution_t& root_relax_soln, std::vector& root_vstatus, diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index 18346209f9..2c7a5d08e2 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -15,8 +15,6 @@ #include #include -#include - #include #include @@ -192,6 +190,7 @@ bool diversity_manager_t::run_presolve(f_t time_limit, timer_t global_ ls.constraint_prop.bounds_update.set_updated_bounds(*problem_ptr); } bool run_probing_cache = !fj_only_run; + run_probing_cache = false; // Don't run probing cache in deterministic mode yet as neither B&B nor CPUFJ need it // and it doesn't make use of work units yet if (context.settings.determinism_mode == CUOPT_MODE_DETERMINISTIC) { run_probing_cache = false; } @@ -333,6 +332,20 @@ struct ls_cpufj_raii_guard_t { local_search_t& ls; }; +template +void diversity_manager_t::wait_for_branch_and_bound_first_root_relaxation() +{ + if (simplex_solution_exists.load(std::memory_order_acquire) || + first_solution_ready_.load(std::memory_order_acquire)) { + return; + } + std::unique_lock lock(first_solution_mutex_); + first_solution_cv_.wait(lock, [this]() { + return first_solution_ready_.load(std::memory_order_acquire) || + simplex_solution_exists.load(std::memory_order_acquire); + }); +} + // returns the best feasible solution template solution_t diversity_manager_t::run_solver() @@ -381,10 +394,7 @@ solution_t diversity_manager_t::run_solver() return population.best_feasible(); } - population.timer = timer; - const f_t time_limit = timer.remaining_time(); - const f_t lp_time_limit = - std::min(diversity_config.max_time_on_lp, time_limit * diversity_config.time_ratio_on_init_lp); + population.timer = timer; // after every change to the problem, we should resize all the relevant vars // we need to encapsulate that to prevent repetitions recombine_stats.reset(); @@ -413,93 +423,13 @@ solution_t diversity_manager_t::run_solver() lp_state_t& lp_state = problem_ptr->lp_state; // resize because some constructor might be called before the presolve lp_state.resize(*problem_ptr, problem_ptr->handle_ptr->get_stream()); - bool bb_thread_solution_exists = simplex_solution_exists.load(); - if (bb_thread_solution_exists) { - ls.lp_optimal_exists = true; - } else if (context.branch_and_bound_ptr != nullptr && - context.branch_and_bound_ptr->enable_concurrent_lp_root_solve()) { - // B&B drives root relaxation; wait for first solution (PDLP/Barrier or dual simplex) - first_solution_ready_.store(false, std::memory_order_release); - std::unique_lock lock(first_solution_mutex_); - first_solution_cv_.wait(lock, [this]() { return first_solution_ready_.load(); }); - lock.unlock(); - clamp_within_var_bounds(lp_optimal_solution, problem_ptr, problem_ptr->handle_ptr); - } else if (!fj_only_run) { - // Heuristics-only or non-concurrent: diversity manager runs LP solve - convert_greater_to_less(*problem_ptr); - - f_t tolerance_divisor = - problem_ptr->tolerances.absolute_tolerance / problem_ptr->tolerances.relative_tolerance; - if (tolerance_divisor == 0) { tolerance_divisor = 1; } - f_t absolute_tolerance = context.settings.tolerances.absolute_tolerance; - - pdlp_solver_settings_t pdlp_settings{}; - pdlp_settings.tolerances.relative_primal_tolerance = absolute_tolerance / tolerance_divisor; - pdlp_settings.tolerances.relative_dual_tolerance = absolute_tolerance / tolerance_divisor; - pdlp_settings.time_limit = lp_time_limit; - pdlp_settings.first_primal_feasible = false; - pdlp_settings.concurrent_halt = &global_concurrent_halt; - pdlp_settings.method = method_t::Concurrent; - pdlp_settings.inside_mip = true; - pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable2; - pdlp_settings.num_gpus = context.settings.num_gpus; - pdlp_settings.presolver = presolver_t::None; - - timer_t lp_timer(lp_time_limit); - auto lp_result = solve_lp_with_method(*problem_ptr, pdlp_settings, lp_timer); - - { - std::lock_guard guard(relaxed_solution_mutex); - if (!simplex_solution_exists.load()) { - cuopt_assert(lp_result.get_primal_solution().size() == lp_optimal_solution.size(), - "LP optimal solution size mismatch"); - cuopt_assert(lp_result.get_dual_solution().size() == lp_dual_optimal_solution.size(), - "LP dual optimal solution size mismatch"); - raft::copy(lp_optimal_solution.data(), - lp_result.get_primal_solution().data(), - lp_optimal_solution.size(), - problem_ptr->handle_ptr->get_stream()); - raft::copy(lp_dual_optimal_solution.data(), - lp_result.get_dual_solution().data(), - lp_dual_optimal_solution.size(), - problem_ptr->handle_ptr->get_stream()); - } else { - // copy the lp state - raft::copy(lp_state.prev_primal.data(), - lp_optimal_solution.data(), - lp_optimal_solution.size(), - problem_ptr->handle_ptr->get_stream()); - raft::copy(lp_state.prev_dual.data(), - lp_dual_optimal_solution.data(), - lp_dual_optimal_solution.size(), - problem_ptr->handle_ptr->get_stream()); - } - problem_ptr->handle_ptr->sync_stream(); - } - cuopt_assert(thrust::all_of(problem_ptr->handle_ptr->get_thrust_policy(), - lp_optimal_solution.begin(), - lp_optimal_solution.end(), - [] __host__ __device__(f_t val) { return std::isfinite(val); }), - "LP optimal solution contains non-finite values"); - ls.lp_optimal_exists = true; - if (lp_result.get_termination_status() == pdlp_termination_status_t::Optimal) { - set_new_user_bound(lp_result.get_objective_value()); - } else if (lp_result.get_termination_status() == pdlp_termination_status_t::PrimalInfeasible) { - CUOPT_LOG_ERROR("Problem is primal infeasible, continuing anyway!"); - ls.lp_optimal_exists = false; - } else if (lp_result.get_termination_status() == pdlp_termination_status_t::DualInfeasible) { - CUOPT_LOG_ERROR("PDLP detected dual infeasibility, continuing anyway!"); - ls.lp_optimal_exists = false; - } else if (lp_result.get_termination_status() == pdlp_termination_status_t::TimeLimit) { - CUOPT_LOG_DEBUG( - "Initial LP run exceeded time limit, continuing solver with partial LP result!"); - // note to developer, in debug mode the LP run might be too slow and it might cause PDLP not - // to bring variables within the bounds - } - // in case the pdlp returned var boudns that are out of bounds + const bool bb_drives_root = context.branch_and_bound_ptr != nullptr; + if (bb_drives_root) { + wait_for_branch_and_bound_first_root_relaxation(); + clamp_within_var_bounds(lp_optimal_solution, problem_ptr, problem_ptr->handle_ptr); - } + } if (ls.lp_optimal_exists) { solution_t lp_rounded_sol(*problem_ptr); @@ -854,7 +784,7 @@ void diversity_manager_t::on_first_lp_solution( result.dual.size(), problem_ptr->handle_ptr->get_stream()); problem_ptr->handle_ptr->sync_stream(); - ls.lp_optimal_exists = true; + ls.lp_optimal_exists = result.has_optimal_basis_relaxation; set_new_user_bound(result.user_objective); } { @@ -879,21 +809,27 @@ void diversity_manager_t::set_simplex_solution(const std::vector& cuopt_func_call(new_sol.copy_new_assignment(solution)); cuopt_func_call(new_sol.compute_feasibility()); cuopt_assert(integer_equal(new_sol.get_user_objective(), objective, 1e-3), "Objective mismatch"); - std::lock_guard lock(relaxed_solution_mutex); - simplex_solution_exists.store(true, std::memory_order_release); - global_concurrent_halt = 1; - CUOPT_LOG_DEBUG("Setting concurrent halt for PDLP inside diversity manager"); - // global_concurrent_halt.store(1, std::memory_order_release); - // it is safe to use lp_optimal_solution while executing the copy operation - // the operations are ordered as long as they are on the same stream - raft::copy( - lp_optimal_solution.data(), solution.data(), solution.size(), context.handle_ptr->get_stream()); - raft::copy(lp_dual_optimal_solution.data(), - dual_solution.data(), - dual_solution.size(), - context.handle_ptr->get_stream()); - set_new_user_bound(objective); - context.handle_ptr->sync_stream(); + { + std::lock_guard lock(relaxed_solution_mutex); + simplex_solution_exists.store(true, std::memory_order_release); + global_concurrent_halt = 1; + CUOPT_LOG_DEBUG("Setting concurrent halt for PDLP inside diversity manager"); + // it is safe to use lp_optimal_solution while executing the copy operation + // the operations are ordered as long as they are on the same stream + raft::copy( + lp_optimal_solution.data(), solution.data(), solution.size(), context.handle_ptr->get_stream()); + raft::copy(lp_dual_optimal_solution.data(), + dual_solution.data(), + dual_solution.size(), + context.handle_ptr->get_stream()); + set_new_user_bound(objective); + context.handle_ptr->sync_stream(); + } + ls.lp_optimal_exists = true; + { + std::lock_guard notify_lock(first_solution_mutex_); + first_solution_cv_.notify_all(); + } } #if MIP_INSTANTIATE_FLOAT diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cuh b/cpp/src/mip_heuristics/diversity/diversity_manager.cuh index a9517484c9..4d4154d557 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cuh +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cuh @@ -76,7 +76,8 @@ class diversity_manager_t { const std::vector& dual_solution, f_t objective); - // Called by B&B when first LP solution is available (PDLP/Barrier or dual simplex). + // Called when the first root LP vectors are available (PDLP/Barrier pre-crossover or dual-simplex + // root). has_optimal_basis_relaxation distinguishes basis-optimal roots from interior iterates. void on_first_lp_solution( cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t const& result); @@ -107,7 +108,8 @@ class diversity_manager_t { // atomic for signalling pdlp to stop std::atomic global_concurrent_halt{0}; - // First solution from B&B: wait for B&B to call on_first_lp_solution when run_bb and concurrent + // Sync with B&B root relaxation: on_first_lp_solution (PDLP/Barrier inner, or dual on main + // thread) or set_simplex_solution fills lp_*; run_solver waits on first_solution_cv_. std::mutex first_solution_mutex_; std::condition_variable first_solution_cv_; std::atomic first_solution_ready_{false}; @@ -118,6 +120,9 @@ class diversity_manager_t { bool run_only_bp_recombiner{false}; bool run_only_fp_recombiner{false}; bool run_only_sub_mip_recombiner{false}; + + private: + void wait_for_branch_and_bound_first_root_relaxation(); }; } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu b/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu index e2bbc8feb1..d26a4020b8 100644 --- a/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu +++ b/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu @@ -49,7 +49,7 @@ optimization_problem_solution_t get_relaxed_lp_solution( pdlp_settings.tolerances.relative_primal_tolerance = settings.tolerance / tolerance_divisor; pdlp_settings.tolerances.relative_dual_tolerance = settings.tolerance / tolerance_divisor; pdlp_settings.time_limit = settings.time_limit; - pdlp_settings.concurrent_halt = settings.concurrent_halt; + pdlp_settings.concurrent_halt = nullptr; //settings.concurrent_halt; pdlp_settings.per_constraint_residual = settings.per_constraint_residual; pdlp_settings.first_primal_feasible = settings.return_first_feasible; pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable2; diff --git a/cpp/src/mip_heuristics/root_lp.cu b/cpp/src/mip_heuristics/root_lp.cu index b181db43cd..d4b4ee3cda 100644 --- a/cpp/src/mip_heuristics/root_lp.cu +++ b/cpp/src/mip_heuristics/root_lp.cu @@ -42,6 +42,8 @@ copy_lp_result_to_root_solution(problem_t* problem, result.objective = problem->get_solver_obj_from_user_obj(lp_result.get_objective_value()); result.user_objective = lp_result.get_objective_value(); result.iterations = lp_result.get_additional_termination_information().number_of_steps_taken; + result.is_optimal = lp_result.get_termination_status() == pdlp_termination_status_t::Optimal; + result.has_optimal_basis_relaxation = false; // crush/crossover not done yet return result; } @@ -162,6 +164,12 @@ cuopt::linear_programming::dual_simplex::crossover_status_t run_crush_crossover_ return status; } +template +void convert_greater_to_less_2(problem_t& problem) +{ + convert_greater_to_less(problem); +} + template cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t run_solver_for_root_lp( problem_t*, double, std::atomic*, int, method_t); @@ -188,6 +196,7 @@ run_crush_crossover_and_maybe_win( double*, const char*, std::string*); +template void convert_greater_to_less_2(problem_t&); #ifdef MIP_INSTANTIATION_FLOAT template cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t @@ -215,5 +224,7 @@ run_crush_crossover_and_maybe_win( float*, const char*, std::string*); + +template void convert_greater_to_less_2(problem_t&); #endif } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/root_lp.cuh b/cpp/src/mip_heuristics/root_lp.cuh index 2f87884fe9..20b930be6d 100644 --- a/cpp/src/mip_heuristics/root_lp.cuh +++ b/cpp/src/mip_heuristics/root_lp.cuh @@ -63,4 +63,8 @@ cuopt::linear_programming::dual_simplex::crossover_status_t run_crush_crossover_ const char* this_solver_name, std::string* winner_solver_name_out); + +template +void convert_greater_to_less_2(detail::problem_t& problem); + } // namespace cuopt::linear_programming::detail From 95090005072b962d68b36a262212659109aaa5db Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Mon, 30 Mar 2026 08:05:24 -0700 Subject: [PATCH 24/30] Fix missing entries --- cpp/src/dual_simplex/types.hpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cpp/src/dual_simplex/types.hpp b/cpp/src/dual_simplex/types.hpp index 6660a86f0a..776c766e9a 100644 --- a/cpp/src/dual_simplex/types.hpp +++ b/cpp/src/dual_simplex/types.hpp @@ -24,6 +24,11 @@ constexpr float64_t inf = std::numeric_limits::infinity(); // without B&B depending on PDLP types. template struct root_relaxation_first_solution_t { + /// Inner PDLP/Barrier termination reported optimal (may still be pre-crossover). + bool is_optimal{false}; + /// True only when vectors are an optimal root relaxation on a basis (dual simplex optimal + /// root, or equivalently post-crossover). False for PDLP/Barrier inner iterates before crossover. + bool has_optimal_basis_relaxation{false}; std::vector primal; std::vector dual; std::vector reduced_costs; From 2a1ff142a878885dc0b36849e8fe51399127bb17 Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Mon, 30 Mar 2026 12:58:22 -0700 Subject: [PATCH 25/30] Cleanup concurrent halt handling --- cpp/src/barrier/barrier.cu | 43 +++++++++---------- cpp/src/barrier/sparse_cholesky.cuh | 39 ++++++----------- cpp/src/branch_and_bound/branch_and_bound.cpp | 18 +++----- cpp/src/branch_and_bound/branch_and_bound.hpp | 9 ++-- cpp/src/dual_simplex/basis_solves.cpp | 13 ++---- .../bound_flipping_ratio_test.cpp | 3 +- cpp/src/dual_simplex/crossover.cpp | 37 +++++++++++++--- cpp/src/dual_simplex/phase2.cpp | 10 +++-- cpp/src/dual_simplex/right_looking_lu.cpp | 7 ++- .../diversity/diversity_manager.cu | 17 +++++--- cpp/src/pdlp/pdlp.cu | 4 +- cpp/src/pdlp/solve.cu | 14 +++--- 12 files changed, 111 insertions(+), 103 deletions(-) diff --git a/cpp/src/barrier/barrier.cu b/cpp/src/barrier/barrier.cu index 075323744d..8e9f51d21e 100644 --- a/cpp/src/barrier/barrier.cu +++ b/cpp/src/barrier/barrier.cu @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -289,7 +290,7 @@ class iteration_data_t { // Ignore Q matrix for now find_dense_columns( lp.A, settings, dense_columns_unordered, n_dense_rows, max_row_nz, estimated_nz_AAT); - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; } + if (concurrent_halt_is_set(settings.concurrent_halt)) { return; } #ifdef PRINT_INFO for (i_t j : dense_columns_unordered) { settings.log.printf("Dense column %6d\n", j); @@ -350,7 +351,7 @@ class iteration_data_t { inv_sqrt_diag.set_scalar(1.0); if (n_upper_bounds > 0 || (has_Q && !use_augmented)) { inv_diag.sqrt(inv_sqrt_diag); } - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; } + if (concurrent_halt_is_set(settings.concurrent_halt)) { return; } // Copy A into AD AD = lp.A; @@ -396,22 +397,22 @@ class iteration_data_t { device_A.copy(host_A_CSR, lp.handle_ptr->get_stream()); RAFT_CHECK_CUDA(handle_ptr->get_stream()); - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; } + if (concurrent_halt_is_set(settings.concurrent_halt)) { return; } i_t factorization_size = use_augmented ? lp.num_rows + lp.num_cols : lp.num_rows; chol = std::make_unique>(handle_ptr, settings, factorization_size); chol->set_positive_definite(false); - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; } + if (concurrent_halt_is_set(settings.concurrent_halt)) { return; } // Perform symbolic analysis symbolic_status = 0; if (use_augmented) { // Build the sparsity pattern of the augmented system form_augmented(true); - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; } + if (concurrent_halt_is_set(settings.concurrent_halt)) { return; } symbolic_status = chol->analyze(device_augmented); } else { form_adat(true); - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; } + if (concurrent_halt_is_set(settings.concurrent_halt)) { return; } symbolic_status = chol->analyze(device_ADAT); } } @@ -581,7 +582,7 @@ class iteration_data_t { span_x[i] *= span_scale[span_col_ind[i]]; }); RAFT_CHECK_CUDA(stream_view_); - if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { return; } + if (concurrent_halt_is_set(settings_.concurrent_halt)) { return; } if (first_call) { try { initialize_cusparse_data( @@ -591,7 +592,7 @@ class iteration_data_t { return; } } - if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { return; } + if (concurrent_halt_is_set(settings_.concurrent_halt)) { return; } multiply_kernels(handle_ptr, device_A, device_AD, device_ADAT, cusparse_info); handle_ptr->sync_stream(); @@ -682,9 +683,7 @@ class iteration_data_t { dense_vector_t M_col(AD.m); solve_status = chol->solve(U_col, M_col); if (solve_status != 0) { return solve_status; } - if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { - return CONCURRENT_HALT_RETURN; - } + if (concurrent_halt_is_set(settings_.concurrent_halt)) { return CONCURRENT_HALT_RETURN; } M.set_column(k, M_col); if (debug) { @@ -701,9 +700,7 @@ class iteration_data_t { for (i_t k = 0; k < n_dense_columns; k++) { AD_dense.transpose_multiply( 1.0, M.values.data() + k * M.m, 0.0, H.values.data() + k * H.m); - if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { - return CONCURRENT_HALT_RETURN; - } + if (concurrent_halt_is_set(settings_.concurrent_halt)) { return CONCURRENT_HALT_RETURN; } } dense_vector_t e(n_dense_columns); @@ -1193,7 +1190,7 @@ class iteration_data_t { delta_nz[j] += fill; // Capture contributions from A(:, j). j will be encountered multiple times } - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; } + if (concurrent_halt_is_set(settings.concurrent_halt)) { return; } } int64_t sparse_nz_C = 0; @@ -1233,7 +1230,7 @@ class iteration_data_t { delta_nz[j] + static_cast( fill_estimate)); // Capture the estimated fill associated with column j } - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; } + if (concurrent_halt_is_set(settings.concurrent_halt)) { return; } } int64_t estimated_nz_C = 0; @@ -3429,7 +3426,7 @@ lp_status_t barrier_solver_t::solve(f_t start_time, if (lp.Q.n > 0) { create_Q(lp, Q); } iteration_data_t data(lp, num_upper_bounds, Q, settings); - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { + if (concurrent_halt_is_set(settings.concurrent_halt)) { settings.log.printf("Barrier solver halted\n"); return lp_status_t::CONCURRENT_LIMIT; } @@ -3458,7 +3455,7 @@ lp_status_t barrier_solver_t::solve(f_t start_time, settings.log.printf("Barrier time limit exceeded\n"); return lp_status_t::TIME_LIMIT; } - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { + if (concurrent_halt_is_set(settings.concurrent_halt)) { settings.log.printf("Barrier solver halted\n"); return lp_status_t::CONCURRENT_LIMIT; } @@ -3557,7 +3554,7 @@ lp_status_t barrier_solver_t::solve(f_t start_time, settings.log.printf("Barrier time limit exceeded\n"); return lp_status_t::TIME_LIMIT; } - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { + if (concurrent_halt_is_set(settings.concurrent_halt)) { settings.log.printf("Barrier solver halted\n"); return lp_status_t::CONCURRENT_LIMIT; } @@ -3568,7 +3565,7 @@ lp_status_t barrier_solver_t::solve(f_t start_time, i_t status = gpu_compute_search_direction( data, data.dw_aff, data.dx_aff, data.dy_aff, data.dv_aff, data.dz_aff, max_affine_residual); - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { + if (concurrent_halt_is_set(settings.concurrent_halt)) { settings.log.printf("Barrier solver halted\n"); return lp_status_t::CONCURRENT_LIMIT; } @@ -3593,7 +3590,7 @@ lp_status_t barrier_solver_t::solve(f_t start_time, settings.log.printf("Barrier time limit exceeded\n"); return lp_status_t::TIME_LIMIT; } - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { + if (concurrent_halt_is_set(settings.concurrent_halt)) { settings.log.printf("Barrier solver halted\n"); return lp_status_t::CONCURRENT_LIMIT; } @@ -3607,7 +3604,7 @@ lp_status_t barrier_solver_t::solve(f_t start_time, status = gpu_compute_search_direction( data, data.dw, data.dx, data.dy, data.dv, data.dz, max_corrector_residual); - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { + if (concurrent_halt_is_set(settings.concurrent_halt)) { settings.log.printf("Barrier solver halted\n"); return lp_status_t::CONCURRENT_LIMIT; } @@ -3633,7 +3630,7 @@ lp_status_t barrier_solver_t::solve(f_t start_time, settings.log.printf("Barrier time limit exceeded\n"); return lp_status_t::TIME_LIMIT; } - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { + if (concurrent_halt_is_set(settings.concurrent_halt)) { settings.log.printf("Barrier solver halted\n"); return lp_status_t::CONCURRENT_LIMIT; } diff --git a/cpp/src/barrier/sparse_cholesky.cuh b/cpp/src/barrier/sparse_cholesky.cuh index 3bdba68401..49f80f9393 100644 --- a/cpp/src/barrier/sparse_cholesky.cuh +++ b/cpp/src/barrier/sparse_cholesky.cuh @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -131,7 +132,7 @@ std::size_t compute_hash(const f_t* arr, size_t size) return seed; } -//#define USE_BARRIER_GREEN_CONTEXT +// #define USE_BARRIER_GREEN_CONTEXT template class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { @@ -367,7 +368,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { CUDSS_CALL_AND_CHECK_EXIT(cudssConfigDestroy(solverConfig), status, "cudssConfigDestroy"); CUDSS_CALL_AND_CHECK_EXIT(cudssDestroy(handle), status, "cudssDestroy"); CUDA_CALL_AND_CHECK_EXIT(cudaStreamSynchronize(stream), "cudaStreamSynchronize"); - + #ifdef USE_BARRIER_GREEN_CONTEXT #if CUDART_VERSION >= 13000 if (settings_.concurrent_halt != nullptr && settings_.num_gpus == 1) { @@ -452,9 +453,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { raft::common::nvtx::range fun_scope("Barrier: cuDSS Analyze : CUDSS_PHASE_ANALYSIS"); status = cudssExecute(handle, CUDSS_PHASE_REORDERING, solverConfig, solverData, A, cudss_x, cudss_b); - if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { - return CONCURRENT_HALT_RETURN; - } + if (concurrent_halt_is_set(settings_.concurrent_halt)) { return CONCURRENT_HALT_RETURN; } if (status != CUDSS_STATUS_SUCCESS) { settings_.log.printf( "FAILED: CUDSS call ended unsuccessfully with status = %d, details: cuDSSExecute for " @@ -468,9 +467,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { status = cudssExecute( handle, CUDSS_PHASE_SYMBOLIC_FACTORIZATION, solverConfig, solverData, A, cudss_x, cudss_b); - if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { - return CONCURRENT_HALT_RETURN; - } + if (concurrent_halt_is_set(settings_.concurrent_halt)) { return CONCURRENT_HALT_RETURN; } if (status != CUDSS_STATUS_SUCCESS) { settings_.log.printf( "FAILED: CUDSS call ended unsuccessfully with status = %d, details: cuDSSExecute for " @@ -526,9 +523,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { f_t start_numeric = tic(); status = cudssExecute( handle, CUDSS_PHASE_FACTORIZATION, solverConfig, solverData, A, cudss_x, cudss_b); - if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { - return CONCURRENT_HALT_RETURN; - } + if (concurrent_halt_is_set(settings_.concurrent_halt)) { return CONCURRENT_HALT_RETURN; } if (status != CUDSS_STATUS_SUCCESS) { settings_.log.printf( "FAILED: CUDSS call ended unsuccessfully with status = %d, details: cuDSSExecute for " @@ -542,9 +537,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { #endif f_t numeric_time = toc(start_numeric); - if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { - return CONCURRENT_HALT_RETURN; - } + if (concurrent_halt_is_set(settings_.concurrent_halt)) { return CONCURRENT_HALT_RETURN; } int info; size_t sizeWritten = 0; @@ -642,9 +635,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { A_created = true; // Perform symbolic analysis - if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { - return CONCURRENT_HALT_RETURN; - } + if (concurrent_halt_is_set(settings_.concurrent_halt)) { return CONCURRENT_HALT_RETURN; } f_t start_analysis = tic(); CUDSS_CALL_AND_CHECK( cudssExecute(handle, CUDSS_PHASE_REORDERING, solverConfig, solverData, A, cudss_x, cudss_b), @@ -652,9 +643,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { "cudssExecute for reordering"); f_t reorder_time = toc(start_analysis); - if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { - return CONCURRENT_HALT_RETURN; - } + if (concurrent_halt_is_set(settings_.concurrent_halt)) { return CONCURRENT_HALT_RETURN; } f_t start_symbolic = tic(); @@ -667,7 +656,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { f_t symbolic_time = toc(start_symbolic); f_t analysis_time = toc(start_analysis); settings_.log.printf("Symbolic factorization time : %.2fs\n", symbolic_time); - if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { + if (concurrent_halt_is_set(settings_.concurrent_halt)) { RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); handle_ptr_->get_stream().synchronize(); return CONCURRENT_HALT_RETURN; @@ -718,9 +707,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { "cudssExecute for factorization"); f_t numeric_time = toc(start_numeric); - if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { - return CONCURRENT_HALT_RETURN; - } + if (concurrent_halt_is_set(settings_.concurrent_halt)) { return CONCURRENT_HALT_RETURN; } int info; size_t sizeWritten = 0; @@ -783,9 +770,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { cudssMatrixSetValues(cudss_x, x.data()), status, "cudssMatrixSetValues for x"); status = cudssExecute(handle, CUDSS_PHASE_SOLVE, solverConfig, solverData, A, cudss_x, cudss_b); - if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { - return CONCURRENT_HALT_RETURN; - } + if (concurrent_halt_is_set(settings_.concurrent_halt)) { return CONCURRENT_HALT_RETURN; } if (status != CUDSS_STATUS_SUCCESS) { settings_.log.printf( "FAILED: CUDSS call ended unsuccessfully with status = %d, details: cuDSSExecute for " diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 15db509d14..87a7b867b3 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1854,7 +1854,7 @@ void branch_and_bound_t::run_concurrent_pdlp_and_barrier_with_crossove settings_, exploration_stats_.start_time, get_root_concurrent_halt(), - [this]() { set_root_concurrent_halt(1); }, + [this]() { signal_root_concurrent_halt(); }, lp_settings.on_first_lp_solution_available, first_solver_mutex, first_solver_callback_done, @@ -1877,9 +1877,7 @@ void branch_and_bound_t::run_concurrent_pdlp_and_barrier_with_crossove pdlp_root_num_gpus_, cuopt::linear_programming::method_t::PDLP); // Only call crossover if the result status is OPTIMAL - if (result.is_optimal) { - (void)do_crush_crossover(result, "PDLP", 2); - } + if (result.is_optimal) { (void)do_crush_crossover(result, "PDLP", 2); } }); barrier_thread_out = std::thread([this, &lp_settings, do_crush_crossover]() { @@ -1891,9 +1889,7 @@ void branch_and_bound_t::run_concurrent_pdlp_and_barrier_with_crossove cuopt::linear_programming::method_t::Barrier); // Only call crossover if the result status is OPTIMAL - if (result.is_optimal) { - (void)do_crush_crossover(result, "Barrier", 3); - } + if (result.is_optimal) { (void)do_crush_crossover(result, "Barrier", 3); } }); } @@ -1979,7 +1975,7 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( int expected = 0; if (status == lp_status_t::OPTIMAL && winner.compare_exchange_strong(expected, 1, std::memory_order_acq_rel)) { - set_root_concurrent_halt(1); + signal_root_concurrent_halt(); } }); @@ -2005,9 +2001,9 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( join_guard.b = nullptr; join_guard.c = nullptr; - // Winner may have set concurrent_halt==1 to stop peer solvers. All threads are joined; reset + // Winner may have signaled concurrent halt to stop peer solvers. All threads are joined; reset // the flag for the rest of B&B (subsequent LP solves, etc.). - set_root_concurrent_halt(0); + reset_root_concurrent_halt(); const int w = winner.load(std::memory_order_acquire); use_pdlp_path = (w == 2 || w == 3); @@ -2126,7 +2122,7 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( settings_.log.printf("\n"); - set_root_concurrent_halt(0); + reset_root_concurrent_halt(); return root_status; } diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index e60805939e..98ec74f477 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -118,10 +118,11 @@ class branch_and_bound_t { i_t get_num_cols() const { return original_problem_.num_cols; } bool enable_concurrent_lp_root_solve() const { return enable_concurrent_lp_root_solve_; } std::atomic* get_root_concurrent_halt() { return &root_concurrent_halt_; } - void set_root_concurrent_halt(int value) - { - root_concurrent_halt_.store(value, std::memory_order_relaxed); - } + /** Tell concurrent root solvers to stop; pairs with acquire loads on the shared halt pointer. */ + void signal_root_concurrent_halt() { concurrent_halt_signal(&root_concurrent_halt_); } + /** Clear halt after concurrent root threads have joined; no peers are reading the flag. */ + void reset_root_concurrent_halt() { concurrent_halt_reset(&root_concurrent_halt_); } + lp_status_t solve_root_relaxation(simplex_solver_settings_t const& lp_settings, lp_solution_t& root_relax_soln, std::vector& root_vstatus, diff --git a/cpp/src/dual_simplex/basis_solves.cpp b/cpp/src/dual_simplex/basis_solves.cpp index c5fee4e108..b425c4a886 100644 --- a/cpp/src/dual_simplex/basis_solves.cpp +++ b/cpp/src/dual_simplex/basis_solves.cpp @@ -6,6 +6,7 @@ /* clang-format on */ #include +#include #include #include @@ -390,9 +391,7 @@ i_t factorize_basis(const csc_matrix_t& A, SU, S_perm_inv, work_estimate); - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { - return CONCURRENT_HALT_RETURN; - } + if (concurrent_halt_is_set(settings.concurrent_halt)) { return CONCURRENT_HALT_RETURN; } if (Srank < 0) { return Srank; } if (Srank != Sdim) { // Get the rank deficient columns @@ -623,9 +622,7 @@ i_t factorize_basis(const csc_matrix_t& A, rank = right_looking_lu(A, settings, medium_tol, basic_list, start_time, q, L, U, pinv, work_estimate); if (rank < 0) { - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { - return CONCURRENT_HALT_RETURN; - } + if (concurrent_halt_is_set(settings.concurrent_halt)) { return CONCURRENT_HALT_RETURN; } return rank; } inverse_permutation(pinv, p); @@ -646,9 +643,7 @@ i_t factorize_basis(const csc_matrix_t& A, } work_estimate += 3 * (m - rank); } - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { - return CONCURRENT_HALT_RETURN; - } + if (concurrent_halt_is_set(settings.concurrent_halt)) { return CONCURRENT_HALT_RETURN; } if (verbose) { printf("Right Lnz+Unz %d t %.3f\n", L.col_start[m] + U.col_start[m], toc(fact_start)); } diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp index e30b067398..e776676eca 100644 --- a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp +++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp @@ -6,6 +6,7 @@ /* clang-format on */ #include +#include #include @@ -269,7 +270,7 @@ void bound_flipping_ratio_test_t::heap_passes(const std::vector& entering_index = RATIO_TEST_TIME_LIMIT; return; } - if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { + if (concurrent_halt_is_set(settings_.concurrent_halt)) { entering_index = CONCURRENT_HALT_RETURN; return; } diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp index 14624a4f4c..6e177570d8 100644 --- a/cpp/src/dual_simplex/crossover.cpp +++ b/cpp/src/dual_simplex/crossover.cpp @@ -5,6 +5,7 @@ */ /* clang-format on */ +#include #include #include @@ -611,7 +612,7 @@ i_t dual_push(const lp_problem_t& lp, settings.log.printf("Crossover time exceeded\n"); return TIME_LIMIT_RETURN; } - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { + if (concurrent_halt_is_set(settings.concurrent_halt)) { settings.log.printf("Concurrent halt\n"); return CONCURRENT_HALT_RETURN; } @@ -988,7 +989,7 @@ i_t primal_push(const lp_problem_t& lp, settings.log.printf("Crossover time limit exceeded\n"); return TIME_LIMIT_RETURN; } - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { + if (concurrent_halt_is_set(settings.concurrent_halt)) { settings.log.printf("Concurrent halt\n"); return CONCURRENT_HALT_RETURN; } @@ -1239,6 +1240,10 @@ crossover_status_t crossover(const lp_problem_t& lp, settings.log.printf("Aborting: initial basis selection\n"); return return_to_status(rank); } + if (concurrent_halt_is_set(settings.concurrent_halt)) { + settings.log.printf("Concurrent halt (after initial basis selection)\n"); + return crossover_status_t::CONCURRENT_LIMIT; + } i_t num_basic = 0; if (rank < m) { @@ -1247,6 +1252,10 @@ crossover_status_t crossover(const lp_problem_t& lp, } for (i_t k = 0; k < candidate_columns.size(); k++) { + if ((k & 31) == 0 && concurrent_halt_is_set(settings.concurrent_halt)) { + settings.log.printf("Concurrent halt (candidate column loop)\n"); + return crossover_status_t::CONCURRENT_LIMIT; + } const i_t j = candidate_columns[k]; vstatus[j] = vstatus_for_candidates[k]; if (vstatus[j] == variable_status_t::BASIC) { num_basic++; } @@ -1312,6 +1321,10 @@ crossover_status_t crossover(const lp_problem_t& lp, slacks_needed, work_estimate); if (rank < 0) { return return_to_status(rank); } + if (concurrent_halt_is_set(settings.concurrent_halt)) { + settings.log.printf("Concurrent halt (after initial basis factorization)\n"); + return crossover_status_t::CONCURRENT_LIMIT; + } if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); basis_repair(lp.A, @@ -1352,7 +1365,7 @@ crossover_status_t crossover(const lp_problem_t& lp, settings.log.printf("Time limit exceeded\n"); return crossover_status_t::TIME_LIMIT; } - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { + if (concurrent_halt_is_set(settings.concurrent_halt)) { settings.log.printf("Concurrent halt\n"); return crossover_status_t::CONCURRENT_LIMIT; } @@ -1408,13 +1421,17 @@ crossover_status_t crossover(const lp_problem_t& lp, } else if (dual_feasible && !primal_feasible) { i_t dual_iter = 0; std::vector edge_norms; + if (concurrent_halt_is_set(settings.concurrent_halt)) { + settings.log.printf("Concurrent halt (before crossover dual phase2 cleanup)\n"); + return crossover_status_t::CONCURRENT_LIMIT; + } dual::status_t status = dual_phase2(2, 0, start_time, lp, settings, vstatus, solution, dual_iter, edge_norms); if (toc(start_time) > settings.time_limit) { settings.log.printf("Time limit exceeded\n"); return crossover_status_t::TIME_LIMIT; } - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { + if (concurrent_halt_is_set(settings.concurrent_halt)) { settings.log.printf("Concurrent halt\n"); return crossover_status_t::CONCURRENT_LIMIT; } @@ -1454,6 +1471,10 @@ crossover_status_t crossover(const lp_problem_t& lp, i_t iter = 0; lp_solution_t phase1_solution(phase1_problem.num_rows, phase1_problem.num_cols); std::vector junk; + if (concurrent_halt_is_set(settings.concurrent_halt)) { + settings.log.printf("Concurrent halt (before crossover dual phase1)\n"); + return crossover_status_t::CONCURRENT_LIMIT; + } dual::status_t phase1_status = dual_phase2( 1, 1, start_time, phase1_problem, settings, phase1_vstatus, phase1_solution, iter, junk); if (phase1_status == dual::status_t::NUMERICAL || @@ -1570,13 +1591,17 @@ crossover_status_t crossover(const lp_problem_t& lp, dual::status_t status = dual::status_t::NUMERICAL; if (dual_infeas <= settings.dual_tol) { std::vector edge_norms; + if (concurrent_halt_is_set(settings.concurrent_halt)) { + settings.log.printf("Concurrent halt (before crossover dual phase2 after phase1)\n"); + return crossover_status_t::CONCURRENT_LIMIT; + } status = dual_phase2( 2, iter == 0 ? 1 : 0, start_time, lp, settings, vstatus, solution, iter, edge_norms); if (toc(start_time) > settings.time_limit) { settings.log.printf("Time limit exceeded\n"); return crossover_status_t::TIME_LIMIT; } - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { + if (concurrent_halt_is_set(settings.concurrent_halt)) { settings.log.printf("Concurrent halt\n"); return crossover_status_t::CONCURRENT_LIMIT; } @@ -1604,7 +1629,7 @@ crossover_status_t crossover(const lp_problem_t& lp, if (primal_feasible) { status = crossover_status_t::PRIMAL_FEASIBLE; } if (primal_feasible && dual_feasible) { status = crossover_status_t::OPTIMAL; - if (settings.concurrent_halt != nullptr) { *settings.concurrent_halt = 1; } + concurrent_halt_signal(settings.concurrent_halt); } return status; } diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 9434f4661a..f678b5f4b1 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -1377,9 +1378,7 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, settings.log.printf("Initialized %d of %d steepest edge norms in %.2fs\n", k, m, now); } if (toc(start_time) > settings.time_limit) { return -1; } - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { - return CONCURRENT_HALT_RETURN; - } + if (concurrent_halt_is_set(settings.concurrent_halt)) { return CONCURRENT_HALT_RETURN; } } work_estimate += 7 * m; return 0; @@ -2784,6 +2783,9 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, while (iter < iter_limit) { PHASE2_NVTX_RANGE("DualSimplex::phase2_main_loop"); + if (concurrent_halt_is_set(settings.concurrent_halt)) { + return dual::status_t::CONCURRENT_LIMIT; + } // Pricing i_t direction = 0; i_t basic_leaving_index = -1; @@ -3579,7 +3581,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, if (now > settings.time_limit) { return dual::status_t::TIME_LIMIT; } - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { + if (concurrent_halt_is_set(settings.concurrent_halt)) { return dual::status_t::CONCURRENT_LIMIT; } } diff --git a/cpp/src/dual_simplex/right_looking_lu.cpp b/cpp/src/dual_simplex/right_looking_lu.cpp index 37202000f8..1400924a4e 100644 --- a/cpp/src/dual_simplex/right_looking_lu.cpp +++ b/cpp/src/dual_simplex/right_looking_lu.cpp @@ -5,6 +5,7 @@ */ /* clang-format on */ +#include #include #include #include @@ -724,9 +725,7 @@ i_t right_looking_lu(const csc_matrix_t& A, i_t pivots = 0; for (i_t k = 0; k < n; ++k) { - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { - return CONCURRENT_HALT_RETURN; - } + if (concurrent_halt_is_set(settings.concurrent_halt)) { return CONCURRENT_HALT_RETURN; } if (toc(start_time) > settings.time_limit) { return TIME_LIMIT_RETURN; } // Find pivot that satisfies // abs(pivot) >= abstol, @@ -1257,7 +1256,7 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, last_print = tic(); } if (toc(start_time) > settings.time_limit) { return TIME_LIMIT_RETURN; } - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { + if (concurrent_halt_is_set(settings.concurrent_halt)) { settings.log.printf("Concurrent halt\n"); return CONCURRENT_HALT_RETURN; } diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index 2c7a5d08e2..12e6ee51f8 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -17,6 +17,7 @@ #include +#include #include constexpr bool fj_only_run = false; @@ -190,7 +191,7 @@ bool diversity_manager_t::run_presolve(f_t time_limit, timer_t global_ ls.constraint_prop.bounds_update.set_updated_bounds(*problem_ptr); } bool run_probing_cache = !fj_only_run; - run_probing_cache = false; + run_probing_cache = false; // Don't run probing cache in deterministic mode yet as neither B&B nor CPUFJ need it // and it doesn't make use of work units yet if (context.settings.determinism_mode == CUOPT_MODE_DETERMINISTIC) { run_probing_cache = false; } @@ -424,12 +425,12 @@ solution_t diversity_manager_t::run_solver() // resize because some constructor might be called before the presolve lp_state.resize(*problem_ptr, problem_ptr->handle_ptr->get_stream()); - const bool bb_drives_root = context.branch_and_bound_ptr != nullptr; + const bool bb_drives_root = context.branch_and_bound_ptr != nullptr; if (bb_drives_root) { - wait_for_branch_and_bound_first_root_relaxation(); + wait_for_branch_and_bound_first_root_relaxation(); clamp_within_var_bounds(lp_optimal_solution, problem_ptr, problem_ptr->handle_ptr); - } + } if (ls.lp_optimal_exists) { solution_t lp_rounded_sol(*problem_ptr); @@ -812,12 +813,14 @@ void diversity_manager_t::set_simplex_solution(const std::vector& { std::lock_guard lock(relaxed_solution_mutex); simplex_solution_exists.store(true, std::memory_order_release); - global_concurrent_halt = 1; + global_concurrent_halt.store(1, std::memory_order_release); CUOPT_LOG_DEBUG("Setting concurrent halt for PDLP inside diversity manager"); // it is safe to use lp_optimal_solution while executing the copy operation // the operations are ordered as long as they are on the same stream - raft::copy( - lp_optimal_solution.data(), solution.data(), solution.size(), context.handle_ptr->get_stream()); + raft::copy(lp_optimal_solution.data(), + solution.data(), + solution.size(), + context.handle_ptr->get_stream()); raft::copy(lp_dual_optimal_solution.data(), dual_solution.data(), dual_solution.size(), diff --git a/cpp/src/pdlp/pdlp.cu b/cpp/src/pdlp/pdlp.cu index 9424240a08..18e548b0bf 100644 --- a/cpp/src/pdlp/pdlp.cu +++ b/cpp/src/pdlp/pdlp.cu @@ -37,6 +37,8 @@ #include #include +#include + #include #include #include @@ -501,7 +503,7 @@ std::optional> pdlp_solver_t // Check for concurrent limit (whenever caller provides a halt flag, e.g. B&B racing PDLP vs // Barrier) - if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { + if (cuopt::linear_programming::dual_simplex::concurrent_halt_is_set(settings_.concurrent_halt)) { #ifdef PDLP_VERBOSE_MODE RAFT_CUDA_TRY(cudaDeviceSynchronize()); std::cout << "Concurrent Limit reached, returning current solution" << std::endl; diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu index 3c3ce1e0eb..fc7831bbe3 100644 --- a/cpp/src/pdlp/solve.cu +++ b/cpp/src/pdlp/solve.cu @@ -40,6 +40,7 @@ #include +#include #include #include #include @@ -468,7 +469,7 @@ run_barrier(dual_simplex::user_problem_t& user_problem, status == dual_simplex::lp_status_t::UNBOUNDED || status == dual_simplex::lp_status_t::INFEASIBLE)) { // We finished. Tell PDLP to stop if it is still running. - *settings.concurrent_halt = 1; + dual_simplex::concurrent_halt_signal(settings.concurrent_halt); } return {std::move(solution), status, timer.elapsed_time(), norm_user_objective, norm_rhs}; @@ -541,7 +542,7 @@ run_dual_simplex(dual_simplex::user_problem_t& user_problem, status == dual_simplex::lp_status_t::UNBOUNDED || status == dual_simplex::lp_status_t::INFEASIBLE)) { // We finished. Tell PDLP to stop if it is still running. - *settings.concurrent_halt = 1; + dual_simplex::concurrent_halt_signal(settings.concurrent_halt); } return {std::move(solution), status, timer.elapsed_time(), norm_user_objective, norm_rhs}; @@ -830,11 +831,12 @@ optimization_problem_solution_t run_pdlp(detail::problem_t& CUOPT_LOG_CONDITIONAL_INFO( !settings.inside_mip, "Crossover status %s", sol.get_termination_status_string().c_str()); } - if (!settings.halt_set_by_caller && settings.method == method_t::Concurrent && settings.concurrent_halt != nullptr && - crossover_info == 0 && sol.get_termination_status() == pdlp_termination_status_t::Optimal) { + if (!settings.halt_set_by_caller && settings.method == method_t::Concurrent && + settings.concurrent_halt != nullptr && crossover_info == 0 && + sol.get_termination_status() == pdlp_termination_status_t::Optimal) { // We finished. Tell dual simplex to stop if it is still running. CUOPT_LOG_CONDITIONAL_INFO(!settings.inside_mip, "PDLP finished. Telling others to stop"); - *settings.concurrent_halt = 1; + dual_simplex::concurrent_halt_signal(settings.concurrent_halt); } } return sol; @@ -1109,7 +1111,7 @@ optimization_problem_solution_t run_concurrent( pdlp_solver_settings_t settings_pdlp(settings); // Set the concurrent halt pointer - global_concurrent_halt = 0; + global_concurrent_halt.store(0, std::memory_order_relaxed); settings_pdlp.concurrent_halt = &global_concurrent_halt; // Make sure allocations are done on the original stream From bb074ac29258e55826c74ac6b277160a56306309 Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Mon, 30 Mar 2026 12:58:51 -0700 Subject: [PATCH 26/30] add missing include --- cpp/src/dual_simplex/concurrent_halt.hpp | 31 ++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 cpp/src/dual_simplex/concurrent_halt.hpp diff --git a/cpp/src/dual_simplex/concurrent_halt.hpp b/cpp/src/dual_simplex/concurrent_halt.hpp new file mode 100644 index 0000000000..c7752424a4 --- /dev/null +++ b/cpp/src/dual_simplex/concurrent_halt.hpp @@ -0,0 +1,31 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ +#pragma once + +#include + +namespace cuopt::linear_programming::dual_simplex { + +/** True if caller requested stop (any non-zero value). Uses acquire for pairing with release stores. */ +inline bool concurrent_halt_is_set(std::atomic const* halt) +{ + return halt != nullptr && std::atomic_load_explicit(halt, std::memory_order_acquire) != 0; +} + +/** Signal peer solvers to stop. No-op if halt is null. Uses release for pairing with acquire loads. */ +inline void concurrent_halt_signal(std::atomic* halt) +{ + if (halt != nullptr) { std::atomic_store_explicit(halt, 1, std::memory_order_release); } +} + +/** Clear halt after concurrent threads have joined; no peers are reading the flag. */ +inline void concurrent_halt_reset(std::atomic* halt) +{ + if (halt != nullptr) { std::atomic_store_explicit(halt, 0, std::memory_order_relaxed); } +} + +} // namespace cuopt::linear_programming::dual_simplex From 47daeae0b4e0537154a29995f4cd795605926877 Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Mon, 30 Mar 2026 13:24:05 -0700 Subject: [PATCH 27/30] Add missing include --- cpp/src/branch_and_bound/branch_and_bound.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index 98ec74f477..f088888f0f 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -14,6 +14,7 @@ #include #include #include +#include #include From 70ab58f424c60f3166b212e01792d3db9b027ad0 Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Tue, 31 Mar 2026 08:23:00 -0700 Subject: [PATCH 28/30] Handle failures after cut generation cleanly --- cpp/src/branch_and_bound/branch_and_bound.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 87a7b867b3..729b6e4212 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -2520,13 +2520,21 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } if (cut_status != dual::status_t::OPTIMAL) { - settings_.log.printf("Numerical issue at root node. Resolving from scratch\n"); + // Root relaxation was already optimal; this is reoptimization after adding cuts / bound + // changes (warm-started dual phase2), which can fail for numerical reasons unrelated to + // the initial Barrier/PDLP root solve. + settings_.log.printf( + "Dual phase2 after cuts did not reach optimal (status=%s, cut pass %d). " + "Resolving root LP from scratch.\n", + dual::status_to_string(cut_status).c_str(), + static_cast(cut_pass)); + basis_update_mpf_t scratch_basis(original_lp_.num_rows, settings_.refactor_frequency); lp_status_t scratch_status = solve_linear_program_with_advanced_basis(original_lp_, exploration_stats_.start_time, lp_settings, root_relax_soln_, - basis_update, + scratch_basis, basic_list, nonbasic_list, root_vstatus_, @@ -2536,8 +2544,11 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut cut_status = convert_lp_status_to_dual_status(scratch_status); exploration_stats_.total_lp_iters += root_relax_soln_.iterations; root_objective_ = compute_objective(original_lp_, root_relax_soln_.x); + basis_update = std::move(scratch_basis); } else { - settings_.log.printf("Cut status %s\n", dual::status_to_string(cut_status).c_str()); + settings_.log.printf("Scratch resolve status %s; dual phase2 after cuts was %s\n", + lp_status_to_string(scratch_status).c_str(), + dual::status_to_string(cut_status).c_str()); #ifdef WRITE_CUT_INFEASIBLE_MPS original_lp_.write_mps("cut_infeasible.mps"); #endif From 14dff02fd0bdf06e2c52b6b2bf816b320bc9782f Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Wed, 1 Apr 2026 15:54:15 -0700 Subject: [PATCH 29/30] Fix compilation error --- cpp/src/pdlp/termination_strategy/infeasibility_information.cu | 2 ++ cpp/tests/routing/unit_tests/breaks.cu | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu index dbb35b732d..0e001b802f 100644 --- a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu +++ b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu @@ -24,6 +24,8 @@ #include #include +#include + namespace cuopt::linear_programming::detail { template infeasibility_information_t::infeasibility_information_t( diff --git a/cpp/tests/routing/unit_tests/breaks.cu b/cpp/tests/routing/unit_tests/breaks.cu index a2abc0ac8c..0d8a578b6e 100644 --- a/cpp/tests/routing/unit_tests/breaks.cu +++ b/cpp/tests/routing/unit_tests/breaks.cu @@ -354,7 +354,6 @@ TEST(vehicle_breaks, non_uniform_breaks) order_service[i] = route.service_time_h[i + 1]; } int num_v_type_1 = vehicle_num / 2; - int num_v_type_2 = vehicle_num - num_v_type_1; int num_breaks = 3; // Type 1: [40,50]/5, [100,120]/20, [170,180]/10 From 9a020db31aacb3c4d58870d047f311932ea2a817 Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Thu, 2 Apr 2026 11:26:23 -0700 Subject: [PATCH 30/30] Move the problem conversion to upstream --- cpp/src/branch_and_bound/branch_and_bound.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 729b6e4212..6d8ed3361b 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -271,6 +271,7 @@ branch_and_bound_t::branch_and_bound_t( original_problem_.A.print_matrix(); #endif + convert_greater_to_less_2(*mip_problem_ptr_); dualize_info_t dualize_info; convert_user_problem(original_problem_, settings_, original_lp_, new_slacks_, dualize_info); full_variable_types(original_problem_, original_lp_, var_types_); @@ -1931,9 +1932,9 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( std::atomic winner{0}; // 0=none, 1=dual, 2=PDLP, 3=Barrier if (enable_concurrent_lp_root_solve_ && mip_problem_ptr_ != nullptr) { - convert_greater_to_less_2(*mip_problem_ptr_); - // All three run in threads; main only starts them and joins. First to finish with OPTIMAL sets - // winner and halt. + // convert_greater_to_less_2(*mip_problem_ptr_); + // All three run in threads; main only starts them and joins. First to finish with OPTIMAL sets + // winner and halt. std::mutex first_solver_mutex; bool first_solver_callback_done = false; run_concurrent_pdlp_and_barrier_with_crossover(lp_settings, @@ -2528,7 +2529,8 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut "Resolving root LP from scratch.\n", dual::status_to_string(cut_status).c_str(), static_cast(cut_pass)); - basis_update_mpf_t scratch_basis(original_lp_.num_rows, settings_.refactor_frequency); + basis_update_mpf_t scratch_basis(original_lp_.num_rows, + settings_.refactor_frequency); lp_status_t scratch_status = solve_linear_program_with_advanced_basis(original_lp_, exploration_stats_.start_time,