Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
d3b7b12
Take advantage of hyper sparsity in dual push
rg20 Mar 2, 2026
6aae07d
Initial stab at moving root solves to B&B
rg20 Mar 6, 2026
1b6d98a
Move branch and bound problem to inside branch and bound
rg20 Mar 6, 2026
e311548
fix link errors
rg20 Mar 6, 2026
c522274
Fix link errors
rg20 Mar 6, 2026
55b2afb
Fix sync issues
rg20 Mar 9, 2026
1d213bd
remove stale code
rg20 Mar 10, 2026
9937868
Launch 3 threads, one for dual simplex, one for PDLP+crossover, one f…
rg20 Mar 19, 2026
0f1a6f9
Revert "Prepare release/26.04"
AyodeAwe Mar 19, 2026
e2ea687
Update to 26.06 (#975)
AyodeAwe Mar 19, 2026
d44661d
Unify two constructors
rg20 Mar 19, 2026
5309ca4
Merge remote-tracking branch 'upstream/main' into root_relaxation
rg20 Mar 19, 2026
c9e39d3
Fix compilation error
rg20 Mar 19, 2026
c99e1c3
Optimize right-looking LU factorization with O(1) degree-bucket ops …
rg20 Mar 10, 2026
07cb595
crossover: hoist delta_zN and delta_expanded out of dual push loop
rg20 Mar 16, 2026
953b83e
Added review comments
rg20 Mar 16, 2026
787fadf
Remove code duplication
rg20 Mar 16, 2026
197bf8d
keep the dense vector path alive
rg20 Mar 16, 2026
990bcd0
Revert "Update to 26.06 (#975)"
rg20 Mar 20, 2026
40b9e49
Cleanup unnecessary changes
rg20 Mar 20, 2026
c0d1514
Remove unused variable
rg20 Mar 23, 2026
3164108
Disable green context
rg20 Mar 30, 2026
b920f9a
Move to appropriate file
rg20 Mar 30, 2026
f4d0fa5
Cleanup
rg20 Mar 30, 2026
9509000
Fix missing entries
rg20 Mar 30, 2026
2a1ff14
Cleanup concurrent halt handling
rg20 Mar 30, 2026
bb074ac
add missing include
rg20 Mar 30, 2026
47daeae
Add missing include
rg20 Mar 30, 2026
70ab58f
Handle failures after cut generation cleanly
rg20 Mar 31, 2026
14dff02
Fix compilation error
rg20 Apr 1, 2026
9a020db
Move the problem conversion to upstream
rg20 Apr 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion RAPIDS_BRANCH
Original file line number Diff line number Diff line change
@@ -1 +1 @@
release/26.04
main
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Avoid pointing release flow to main for this branch marker.
Using main here can pull moving-head workflow/config changes into a release line and make builds non-reproducible. Prefer pinning to the release branch/tag used for this train (e.g., release/26.04 or an explicit immutable ref).

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@RAPIDS_BRANCH` at line 1, The RAPIDS_BRANCH file currently points at the
mutable "main" branch which risks pulling moving-head changes into the release
flow; update the branch marker to a stable release branch or tag (for example
use "release/26.04" or an explicit immutable ref) by replacing the "main" entry
in RAPIDS_BRANCH with the chosen release branch or tag so builds remain
reproducible.

Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,8 @@ class pdlp_solver_settings_t {
bool inside_mip{false};
// For concurrent termination
std::atomic<int>* concurrent_halt{nullptr};
/** If true, solver does not set concurrent_halt; caller sets it after crossover. */
bool halt_set_by_caller{false};
static constexpr f_t minimal_absolute_tolerance = 1.0e-12;
pdlp_hyper_params::pdlp_hyper_params_t hyper_params;
// Holds the information of new variable lower and upper bounds for each climber in the format:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ class optimization_problem_solution_t : public base_solution_t {
* @return rmm::device_uvector<i_t> The device memory container for the reduced cost.
*/
rmm::device_uvector<f_t>& get_reduced_cost();
const rmm::device_uvector<f_t>& get_reduced_cost() const;

/**
* @brief Get termination reason
Expand Down
43 changes: 20 additions & 23 deletions cpp/src/barrier/barrier.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <barrier/iterative_refinement.hpp>
#include <barrier/sparse_cholesky.cuh>
#include <barrier/sparse_matrix_kernels.cuh>
#include <dual_simplex/concurrent_halt.hpp>

#include <dual_simplex/presolve.hpp>
#include <dual_simplex/solve.hpp>
Expand Down Expand Up @@ -289,7 +290,7 @@ class iteration_data_t {
// Ignore Q matrix for now
find_dense_columns(
lp.A, settings, dense_columns_unordered, n_dense_rows, max_row_nz, estimated_nz_AAT);
if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; }
if (concurrent_halt_is_set(settings.concurrent_halt)) { return; }
#ifdef PRINT_INFO
for (i_t j : dense_columns_unordered) {
settings.log.printf("Dense column %6d\n", j);
Expand Down Expand Up @@ -350,7 +351,7 @@ class iteration_data_t {
inv_sqrt_diag.set_scalar(1.0);
if (n_upper_bounds > 0 || (has_Q && !use_augmented)) { inv_diag.sqrt(inv_sqrt_diag); }

if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; }
if (concurrent_halt_is_set(settings.concurrent_halt)) { return; }

// Copy A into AD
AD = lp.A;
Expand Down Expand Up @@ -396,22 +397,22 @@ class iteration_data_t {
device_A.copy(host_A_CSR, lp.handle_ptr->get_stream());
RAFT_CHECK_CUDA(handle_ptr->get_stream());

if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; }
if (concurrent_halt_is_set(settings.concurrent_halt)) { return; }
i_t factorization_size = use_augmented ? lp.num_rows + lp.num_cols : lp.num_rows;
chol =
std::make_unique<sparse_cholesky_cudss_t<i_t, f_t>>(handle_ptr, settings, factorization_size);
chol->set_positive_definite(false);
if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; }
if (concurrent_halt_is_set(settings.concurrent_halt)) { return; }
// Perform symbolic analysis
symbolic_status = 0;
if (use_augmented) {
// Build the sparsity pattern of the augmented system
form_augmented(true);
if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; }
if (concurrent_halt_is_set(settings.concurrent_halt)) { return; }
symbolic_status = chol->analyze(device_augmented);
} else {
form_adat(true);
if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; }
if (concurrent_halt_is_set(settings.concurrent_halt)) { return; }
symbolic_status = chol->analyze(device_ADAT);
}
}
Expand Down Expand Up @@ -581,7 +582,7 @@ class iteration_data_t {
span_x[i] *= span_scale[span_col_ind[i]];
});
RAFT_CHECK_CUDA(stream_view_);
if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { return; }
if (concurrent_halt_is_set(settings_.concurrent_halt)) { return; }
if (first_call) {
try {
initialize_cusparse_data<i_t, f_t>(
Expand All @@ -591,7 +592,7 @@ class iteration_data_t {
return;
}
}
if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { return; }
if (concurrent_halt_is_set(settings_.concurrent_halt)) { return; }

multiply_kernels<i_t, f_t>(handle_ptr, device_A, device_AD, device_ADAT, cusparse_info);
handle_ptr->sync_stream();
Expand Down Expand Up @@ -682,9 +683,7 @@ class iteration_data_t {
dense_vector_t<i_t, f_t> M_col(AD.m);
solve_status = chol->solve(U_col, M_col);
if (solve_status != 0) { return solve_status; }
if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) {
return CONCURRENT_HALT_RETURN;
}
if (concurrent_halt_is_set(settings_.concurrent_halt)) { return CONCURRENT_HALT_RETURN; }
M.set_column(k, M_col);

if (debug) {
Expand All @@ -701,9 +700,7 @@ class iteration_data_t {
for (i_t k = 0; k < n_dense_columns; k++) {
AD_dense.transpose_multiply(
1.0, M.values.data() + k * M.m, 0.0, H.values.data() + k * H.m);
if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) {
return CONCURRENT_HALT_RETURN;
}
if (concurrent_halt_is_set(settings_.concurrent_halt)) { return CONCURRENT_HALT_RETURN; }
}

dense_vector_t<i_t, f_t> e(n_dense_columns);
Expand Down Expand Up @@ -1193,7 +1190,7 @@ class iteration_data_t {
delta_nz[j] +=
fill; // Capture contributions from A(:, j). j will be encountered multiple times
}
if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; }
if (concurrent_halt_is_set(settings.concurrent_halt)) { return; }
}

int64_t sparse_nz_C = 0;
Expand Down Expand Up @@ -1233,7 +1230,7 @@ class iteration_data_t {
delta_nz[j] + static_cast<int64_t>(
fill_estimate)); // Capture the estimated fill associated with column j
}
if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; }
if (concurrent_halt_is_set(settings.concurrent_halt)) { return; }
}

int64_t estimated_nz_C = 0;
Expand Down Expand Up @@ -3429,7 +3426,7 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time,
if (lp.Q.n > 0) { create_Q(lp, Q); }

iteration_data_t<i_t, f_t> data(lp, num_upper_bounds, Q, settings);
if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
if (concurrent_halt_is_set(settings.concurrent_halt)) {
settings.log.printf("Barrier solver halted\n");
return lp_status_t::CONCURRENT_LIMIT;
}
Expand Down Expand Up @@ -3458,7 +3455,7 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time,
settings.log.printf("Barrier time limit exceeded\n");
return lp_status_t::TIME_LIMIT;
}
if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
if (concurrent_halt_is_set(settings.concurrent_halt)) {
settings.log.printf("Barrier solver halted\n");
return lp_status_t::CONCURRENT_LIMIT;
}
Expand Down Expand Up @@ -3557,7 +3554,7 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time,
settings.log.printf("Barrier time limit exceeded\n");
return lp_status_t::TIME_LIMIT;
}
if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
if (concurrent_halt_is_set(settings.concurrent_halt)) {
settings.log.printf("Barrier solver halted\n");
return lp_status_t::CONCURRENT_LIMIT;
}
Expand All @@ -3568,7 +3565,7 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time,

i_t status = gpu_compute_search_direction(
data, data.dw_aff, data.dx_aff, data.dy_aff, data.dv_aff, data.dz_aff, max_affine_residual);
if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
if (concurrent_halt_is_set(settings.concurrent_halt)) {
settings.log.printf("Barrier solver halted\n");
return lp_status_t::CONCURRENT_LIMIT;
}
Expand All @@ -3593,7 +3590,7 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time,
settings.log.printf("Barrier time limit exceeded\n");
return lp_status_t::TIME_LIMIT;
}
if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
if (concurrent_halt_is_set(settings.concurrent_halt)) {
settings.log.printf("Barrier solver halted\n");
return lp_status_t::CONCURRENT_LIMIT;
}
Expand All @@ -3607,7 +3604,7 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time,

status = gpu_compute_search_direction(
data, data.dw, data.dx, data.dy, data.dv, data.dz, max_corrector_residual);
if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
if (concurrent_halt_is_set(settings.concurrent_halt)) {
settings.log.printf("Barrier solver halted\n");
return lp_status_t::CONCURRENT_LIMIT;
}
Expand All @@ -3633,7 +3630,7 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time,
settings.log.printf("Barrier time limit exceeded\n");
return lp_status_t::TIME_LIMIT;
}
if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
if (concurrent_halt_is_set(settings.concurrent_halt)) {
settings.log.printf("Barrier solver halted\n");
return lp_status_t::CONCURRENT_LIMIT;
}
Expand Down
42 changes: 17 additions & 25 deletions cpp/src/barrier/sparse_cholesky.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <barrier/dense_vector.hpp>
#include <barrier/device_sparse_matrix.cuh>

#include <dual_simplex/concurrent_halt.hpp>
#include <dual_simplex/simplex_solver_settings.hpp>
#include <dual_simplex/sparse_matrix.hpp>
#include <dual_simplex/tic_toc.hpp>
Expand Down Expand Up @@ -131,6 +132,8 @@ std::size_t compute_hash(const f_t* arr, size_t size)
return seed;
}

// #define USE_BARRIER_GREEN_CONTEXT

template <typename i_t, typename f_t>
class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
public:
Expand All @@ -155,6 +158,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
cuda_error = cudaSuccess;
status = CUDSS_STATUS_SUCCESS;

#ifdef USE_BARRIER_GREEN_CONTEXT
if (CUDART_VERSION >= 13000 && settings_.concurrent_halt != nullptr &&
settings_.num_gpus == 1) {
cuGetErrorString_func = cuopt::detail::get_driver_entry_point("cuGetErrorString");
Expand Down Expand Up @@ -238,6 +242,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
&stream, barrier_green_ctx, CU_STREAM_NON_BLOCKING, stream_priority),
reinterpret_cast<decltype(::cuGetErrorString)*>(cuGetErrorString_func));
}
#endif

auto cudss_device_idx = handle_ptr_->get_device();
auto cudss_device_count = 1;
Expand Down Expand Up @@ -363,6 +368,8 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
CUDSS_CALL_AND_CHECK_EXIT(cudssConfigDestroy(solverConfig), status, "cudssConfigDestroy");
CUDSS_CALL_AND_CHECK_EXIT(cudssDestroy(handle), status, "cudssDestroy");
CUDA_CALL_AND_CHECK_EXIT(cudaStreamSynchronize(stream), "cudaStreamSynchronize");

#ifdef USE_BARRIER_GREEN_CONTEXT
#if CUDART_VERSION >= 13000
if (settings_.concurrent_halt != nullptr && settings_.num_gpus == 1) {
auto cuStreamDestroy_func = cuopt::detail::get_driver_entry_point("cuStreamDestroy");
Expand All @@ -374,6 +381,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
reinterpret_cast<decltype(::cuGetErrorString)*>(cuGetErrorString_func));
handle_ptr_->get_stream().synchronize();
}
#endif
#endif
}

Expand Down Expand Up @@ -445,9 +453,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
raft::common::nvtx::range fun_scope("Barrier: cuDSS Analyze : CUDSS_PHASE_ANALYSIS");
status =
cudssExecute(handle, CUDSS_PHASE_REORDERING, solverConfig, solverData, A, cudss_x, cudss_b);
if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) {
return CONCURRENT_HALT_RETURN;
}
if (concurrent_halt_is_set(settings_.concurrent_halt)) { return CONCURRENT_HALT_RETURN; }
if (status != CUDSS_STATUS_SUCCESS) {
settings_.log.printf(
"FAILED: CUDSS call ended unsuccessfully with status = %d, details: cuDSSExecute for "
Expand All @@ -461,9 +467,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {

status = cudssExecute(
handle, CUDSS_PHASE_SYMBOLIC_FACTORIZATION, solverConfig, solverData, A, cudss_x, cudss_b);
if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) {
return CONCURRENT_HALT_RETURN;
}
if (concurrent_halt_is_set(settings_.concurrent_halt)) { return CONCURRENT_HALT_RETURN; }
if (status != CUDSS_STATUS_SUCCESS) {
settings_.log.printf(
"FAILED: CUDSS call ended unsuccessfully with status = %d, details: cuDSSExecute for "
Expand Down Expand Up @@ -519,9 +523,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
f_t start_numeric = tic();
status = cudssExecute(
handle, CUDSS_PHASE_FACTORIZATION, solverConfig, solverData, A, cudss_x, cudss_b);
if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) {
return CONCURRENT_HALT_RETURN;
}
if (concurrent_halt_is_set(settings_.concurrent_halt)) { return CONCURRENT_HALT_RETURN; }
if (status != CUDSS_STATUS_SUCCESS) {
settings_.log.printf(
"FAILED: CUDSS call ended unsuccessfully with status = %d, details: cuDSSExecute for "
Expand All @@ -535,9 +537,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
#endif

f_t numeric_time = toc(start_numeric);
if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) {
return CONCURRENT_HALT_RETURN;
}
if (concurrent_halt_is_set(settings_.concurrent_halt)) { return CONCURRENT_HALT_RETURN; }

int info;
size_t sizeWritten = 0;
Expand Down Expand Up @@ -635,19 +635,15 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
A_created = true;

// Perform symbolic analysis
if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) {
return CONCURRENT_HALT_RETURN;
}
if (concurrent_halt_is_set(settings_.concurrent_halt)) { return CONCURRENT_HALT_RETURN; }
f_t start_analysis = tic();
CUDSS_CALL_AND_CHECK(
cudssExecute(handle, CUDSS_PHASE_REORDERING, solverConfig, solverData, A, cudss_x, cudss_b),
status,
"cudssExecute for reordering");

f_t reorder_time = toc(start_analysis);
if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) {
return CONCURRENT_HALT_RETURN;
}
if (concurrent_halt_is_set(settings_.concurrent_halt)) { return CONCURRENT_HALT_RETURN; }

f_t start_symbolic = tic();

Expand All @@ -660,7 +656,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
f_t symbolic_time = toc(start_symbolic);
f_t analysis_time = toc(start_analysis);
settings_.log.printf("Symbolic factorization time : %.2fs\n", symbolic_time);
if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) {
if (concurrent_halt_is_set(settings_.concurrent_halt)) {
RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
handle_ptr_->get_stream().synchronize();
return CONCURRENT_HALT_RETURN;
Expand Down Expand Up @@ -711,9 +707,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
"cudssExecute for factorization");

f_t numeric_time = toc(start_numeric);
if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) {
return CONCURRENT_HALT_RETURN;
}
if (concurrent_halt_is_set(settings_.concurrent_halt)) { return CONCURRENT_HALT_RETURN; }

int info;
size_t sizeWritten = 0;
Expand Down Expand Up @@ -776,9 +770,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
cudssMatrixSetValues(cudss_x, x.data()), status, "cudssMatrixSetValues for x");

status = cudssExecute(handle, CUDSS_PHASE_SOLVE, solverConfig, solverData, A, cudss_x, cudss_b);
if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) {
return CONCURRENT_HALT_RETURN;
}
if (concurrent_halt_is_set(settings_.concurrent_halt)) { return CONCURRENT_HALT_RETURN; }
if (status != CUDSS_STATUS_SUCCESS) {
settings_.log.printf(
"FAILED: CUDSS call ended unsuccessfully with status = %d, details: cuDSSExecute for "
Expand Down
Loading