From a00bb7a719f474c6b326fe622a8224a716f49771 Mon Sep 17 00:00:00 2001 From: notnotraju Date: Tue, 17 Mar 2026 13:41:58 +0000 Subject: [PATCH 01/24] eccvm: widen constants WNAF_DIGITS_PER_ROW 4->8, ADDITIONS_PER_ROW 4->8 This is the first step toward halving the Precomputed and MSM table heights by doubling their width. The key changes: - WNAF_DIGITS_PER_ROW: 4 -> 8 (process 8 wNAF digits per precompute row) - ADDITIONS_PER_ROW: 4 -> 8 (process 8 point additions per MSM row) - DOUBLINGS_PER_ROW: new constant, always NUM_WNAF_DIGIT_BITS (= 4) The new DOUBLINGS_PER_ROW constant decouples the doubling chain length (which must remain 4, matching the wNAF digit width w=4) from ADDITIONS_PER_ROW (which we are doubling to 8). Previously, these were conflated because ADDITIONS_PER_ROW happened to equal NUM_WNAF_DIGIT_BITS. --- .../cpp/src/barretenberg/eccvm/eccvm_builder_types.hpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_builder_types.hpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_builder_types.hpp index 82de4ed738cc..eb472fe1e7a0 100644 --- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_builder_types.hpp +++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_builder_types.hpp @@ -16,11 +16,12 @@ static constexpr size_t NUM_WNAF_DIGITS_PER_SCALAR = NUM_SCALAR_BITS / NUM_WNAF_ static constexpr uint64_t WNAF_MASK = static_cast((1ULL << NUM_WNAF_DIGIT_BITS) - 1ULL); static constexpr size_t POINT_TABLE_SIZE = 1ULL << (NUM_WNAF_DIGIT_BITS); // Corresponds to the odd multiples of [P] between -(2^w - 1) and 2^w - 1. -static constexpr size_t WNAF_DIGITS_PER_ROW = 4; +static constexpr size_t WNAF_DIGITS_PER_ROW = 8; static constexpr size_t ADDITIONS_PER_ROW = - 4; // In the Straus algorithm for MSM, we proceed "digit-by-digit". (Here, digit means wNAF digit.) We chunk - // `ADDITIONS_PER_ROW` additions, all in the *same digit-slot*, in a row of the ECCVM's MSM table. Various parts - // of the implemention exploit the fact that `ADDITIONS_PER_ROWS == NUM_WNAF_DIGIT_BITS`. + 8; // In the Straus algorithm for MSM, we proceed "digit-by-digit". (Here, digit means wNAF digit.) We chunk + // `ADDITIONS_PER_ROW` additions, all in the *same digit-slot*, in a row of the ECCVM's MSM table. +static constexpr size_t DOUBLINGS_PER_ROW = + NUM_WNAF_DIGIT_BITS; // Number of doublings per doubling row. Always 4 (= w), independent of ADDITIONS_PER_ROW. template struct ScalarMul { uint32_t pc; From 458345dcf61bedb0c18b71ea985cd5c0829c483c Mon Sep 17 00:00:00 2001 From: notnotraju Date: Tue, 17 Mar 2026 13:44:43 +0000 Subject: [PATCH 02/24] eccvm: update msm_builder for 8-wide additions and 4-wide doublings Key changes to MSMRow and trace computation: - AddState array: hardcoded size 4 -> ADDITIONS_PER_ROW (now 8) - Doubling loops: use DOUBLINGS_PER_ROW (= 4) instead of ADDITIONS_PER_ROW for the doubling phase, since we always do w=4 doublings regardless of how many additions we pack per row - Trace sizing: (num_msm_rows - 2) * 4 -> * ADDITIONS_PER_ROW - trace_index computation: * 4 -> * ADDITIONS_PER_ROW - After doubling loops, advance trace_index by (ADDITIONS_PER_ROW - DOUBLINGS_PER_ROW) to skip unused slots allocated in the point trace - Final row add_state: use ADDITIONS_PER_ROW-sized array fill --- .../src/barretenberg/eccvm/msm_builder.hpp | 59 ++++++++++--------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp b/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp index 7dfe7c6ab817..e49244b2258f 100644 --- a/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp +++ b/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp @@ -24,6 +24,7 @@ class ECCVMMSMMBuilder { using MSM = bb::eccvm::MSM; static constexpr size_t ADDITIONS_PER_ROW = bb::eccvm::ADDITIONS_PER_ROW; + static constexpr size_t DOUBLINGS_PER_ROW = bb::eccvm::DOUBLINGS_PER_ROW; static constexpr size_t NUM_WNAF_DIGITS_PER_SCALAR = bb::eccvm::NUM_WNAF_DIGITS_PER_SCALAR; struct alignas(64) MSMRow { @@ -61,10 +62,11 @@ class ECCVMMSMMBuilder { // case exceptions, i.e., we want the VM proof to fail if we're doing a point // addition where (x1 == x2). to do this, we simply provide an inverse to x1 - x2. }; - std::array add_state{ AddState{ false, 0, { 0, 0 }, 0, 0 }, - AddState{ false, 0, { 0, 0 }, 0, 0 }, - AddState{ false, 0, { 0, 0 }, 0, 0 }, - AddState{ false, 0, { 0, 0 }, 0, 0 } }; + std::array add_state = []() { + std::array arr; + arr.fill(AddState{ false, 0, { 0, 0 }, 0, 0 }); + return arr; + }(); // The accumulator here is, in general, the result of four EC additions: A + Q_1 + Q_2 + Q_3 + Q_4. // We do not explicitly store the intermediate values A + Q_1, A + Q_1 + Q_2, and A + Q_1 + Q_2 + Q_3, although // these values are implicitly used in the values of `AddState.lambda` and `AddState.collision_inverse`. @@ -240,14 +242,12 @@ class ECCVMMSMMBuilder { // operations // This section sets up the data structures we need to store all intermediate ECC operations in projective form - const size_t num_point_adds_and_doubles = - (num_msm_rows - 2) * 4; // `num_msm_rows - 2` is the actual number of rows in the table required to compute - // the MSM; the msm table itself has a dummy row at the beginning and an extra row - // with the `x` and `y` coordinates of the accumulator at the end. (In general, the - // output of the accumulator from the computation at row `i` is present on row - // `i+1`. We multiply by 4 because each "row" of the VM processes 4 point-additions - // (and the fact that w = 4 means we must interleave with 4 doublings). This - // "corresponds" to the fact that `MSMROW.add_state` has 4 entries. + // Each MSM row processes up to ADDITIONS_PER_ROW point operations (additions or doublings). + // However, doubling rows only use DOUBLINGS_PER_ROW (= 4) slots. + // For the point trace, we allocate conservatively: each row uses at most ADDITIONS_PER_ROW slots. + // `num_msm_rows - 2` is the actual number of active rows (excluding the leading dummy and trailing + // accumulator row). + const size_t num_point_adds_and_doubles = (num_msm_rows - 2) * ADDITIONS_PER_ROW; const size_t num_accumulators = num_msm_rows - 1; // for every row after the first row, we have an accumulator. // In what follows, either p1 + p2 = p3, or p1.dbl() = p3 // We create 1 vector to store the entire point trace. We split into multiple containers using std::span @@ -284,9 +284,9 @@ class ECCVMMSMMBuilder { 0); // the Straus algorithm proceeds by incrementing through the digit-slots and doing // computations *across* the `ScalarMul`s that make up our MSM. Each digit-slot therefore // contributes the *ceiling* of `msm_size`/`ADDITIONS_PER_ROW`. - size_t trace_index = - (msm_row_counts[msm_idx] - 1) * 4; // tracks the index in the traces of `p1`, `p2`, `p3`, and - // `accumulator_trace` that we are filling out + size_t trace_index = (msm_row_counts[msm_idx] - 1) * + ADDITIONS_PER_ROW; // tracks the index in the traces of `p1`, `p2`, `p3`, and + // `accumulator_trace` that we are filling out // for each digit-slot (`digit_idx`), and then for each row of the VM (which does `ADDITIONS_PER_ROW` point // additions), we either enter in/process (`ADDITIONS_PER_ROW`) `AddState` objects, and then if necessary @@ -342,10 +342,9 @@ class ECCVMMSMMBuilder { msm_row_index++; } // after processing each digit-slot, we now take care of doubling (as long as we are not at the last - // digit). We add an `MSMRow`, `row`, whose four `AddState` objects in `row.add_state` - // are null, but we also populate `p1_trace`, `p2_trace`, `p3_trace`, and `is_double_or_add` for four - // indices, corresponding to the w=4 doubling operations we need to perform. This embodies the numerical - // "coincidence" that `ADDITIONS_PER_ROW == NUM_WNAF_DIGIT_BITS` + // digit). We add an `MSMRow`, `row`, and populate `p1_trace`, `p2_trace`, `p3_trace`, and + // `is_double_or_add` for DOUBLINGS_PER_ROW (= w = 4) indices, corresponding to the w doublings we + // need to perform between digit-slots. Note: DOUBLINGS_PER_ROW != ADDITIONS_PER_ROW in general. if (digit_idx < NUM_WNAF_DIGITS_PER_SCALAR - 1) { auto& row = msm_rows[msm_row_index]; row.msm_transition = false; @@ -355,7 +354,7 @@ class ECCVMMSMMBuilder { row.q_add = false; row.q_double = true; row.q_skew = false; - for (size_t point_idx = 0; point_idx < ADDITIONS_PER_ROW; ++point_idx) { + for (size_t point_idx = 0; point_idx < DOUBLINGS_PER_ROW; ++point_idx) { auto& add_state = row.add_state[point_idx]; add_state.add = false; add_state.slice = 0; @@ -369,6 +368,8 @@ class ECCVMMSMMBuilder { is_double_or_add[trace_index] = true; trace_index++; } + // Skip unused trace slots for this row (ADDITIONS_PER_ROW allocated, only DOUBLINGS_PER_ROW used) + trace_index += (ADDITIONS_PER_ROW - DOUBLINGS_PER_ROW); accumulator_trace[msm_row_index] = accumulator; msm_row_index++; } else // process `wnaf_skew`, i.e., the skew digit. @@ -436,8 +437,8 @@ class ECCVMMSMMBuilder { }); // complete the computation of the ECCVM execution trace, by adding the affine intermediate point data - // i.e. row.accumulator_x, row.accumulator_y, row.add_state[0...3].collision_inverse, - // row.add_state[0...3].lambda + // i.e. row.accumulator_x, row.accumulator_y, row.add_state[i].collision_inverse, + // row.add_state[i].lambda for (size_t msm_idx = 0; msm_idx < msms.size(); msm_idx++) { const auto& msm = msms[msm_idx]; size_t trace_index = ((msm_row_counts[msm_idx] - 1) * ADDITIONS_PER_ROW); @@ -481,7 +482,7 @@ class ECCVMMSMMBuilder { const FF& acc_y = normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.y; row.accumulator_x = acc_x; row.accumulator_y = acc_y; - for (size_t point_idx = 0; point_idx < ADDITIONS_PER_ROW; ++point_idx) { + for (size_t point_idx = 0; point_idx < DOUBLINGS_PER_ROW; ++point_idx) { auto& add_state = row.add_state[point_idx]; add_state.collision_inverse = 0; // no notion of "different x values" for a point doubling const FF& dx = p1_trace[trace_index].x; @@ -489,6 +490,9 @@ class ECCVMMSMMBuilder { add_state.lambda = ((dx + dx + dx) * dx) * inverse; trace_index++; } + // Advance trace_index past the unused slots (ADDITIONS_PER_ROW - DOUBLINGS_PER_ROW) + // These slots were allocated in the point trace but are unused for doubling rows. + trace_index += (ADDITIONS_PER_ROW - DOUBLINGS_PER_ROW); accumulator_index++; msm_row_index++; } else // this row corresponds to performing point additions to handle WNAF skew @@ -535,10 +539,11 @@ class ECCVMMSMMBuilder { final_row.q_add = false; final_row.q_double = false; final_row.q_skew = false; - final_row.add_state = { typename MSMRow::AddState{ false, 0, AffineElement{ 0, 0 }, 0, 0 }, - typename MSMRow::AddState{ false, 0, AffineElement{ 0, 0 }, 0, 0 }, - typename MSMRow::AddState{ false, 0, AffineElement{ 0, 0 }, 0, 0 }, - typename MSMRow::AddState{ false, 0, AffineElement{ 0, 0 }, 0, 0 } }; + final_row.add_state = []() { + std::array arr; + arr.fill(typename MSMRow::AddState{ false, 0, AffineElement{ 0, 0 }, 0, 0 }); + return arr; + }(); return { msm_rows, point_table_read_counts }; } From dd7f49b4f1a2d9d388291aa2cece0267e9b1aea2 Mon Sep 17 00:00:00 2001 From: notnotraju Date: Tue, 17 Mar 2026 13:47:46 +0000 Subject: [PATCH 03/24] eccvm: widen precomputed_tables_builder for 8 wNAF digits per row With WNAF_DIGITS_PER_ROW doubled from 4 to 8: - num_rows_per_scalar drops from 8 to 4 (32 digits / 8 per row) - Each row now encodes 8 wNAF digits via 16 two-bit slices (s1..s16), up from 4 digits / 8 slices (s1..s8) - Each row stores 2 precomputed points (precompute_accumulator and precompute_accumulator2), since we have 8 points to store across 4 rows. Row i stores table[POINT_TABLE_SIZE-1-2i] and table[POINT_TABLE_SIZE-2-2i]. - Horner scalar accumulation shifts by 2^32 (was 2^16) since each row now contributes 8*4 = 32 bits of scalar data. - row_chunk computation extended to sum all 8 wNAF digits. - Removed static_assert(WNAF_DIGITS_PER_ROW == 4), replaced with static_assert(WNAF_DIGITS_PER_ROW == 8). - Updated POINT_TABLE_SIZE/2 == num_rows_per_scalar*2 assert to reflect the new 2-points-per-row layout. --- .../eccvm/precomputed_tables_builder.hpp | 78 +++++++++++++------ 1 file changed, 56 insertions(+), 22 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/eccvm/precomputed_tables_builder.hpp b/barretenberg/cpp/src/barretenberg/eccvm/precomputed_tables_builder.hpp index 1057d1506af4..72926f8cb041 100644 --- a/barretenberg/cpp/src/barretenberg/eccvm/precomputed_tables_builder.hpp +++ b/barretenberg/cpp/src/barretenberg/eccvm/precomputed_tables_builder.hpp @@ -21,15 +21,14 @@ class ECCVMPointTablePrecomputationBuilder { static constexpr size_t NUM_WNAF_DIGITS_PER_SCALAR = bb::eccvm::NUM_WNAF_DIGITS_PER_SCALAR; static constexpr size_t WNAF_DIGITS_PER_ROW = bb::eccvm::WNAF_DIGITS_PER_ROW; static constexpr size_t NUM_WNAF_DIGIT_BITS = bb::eccvm::NUM_WNAF_DIGIT_BITS; - // Note that our implementation takes advantage of a numerical coincidence: - // `NUM_WNAF_DIGITS_PER_SCALAR`/`WNAF_DIGITS_PER_ROW`, the number of rows per scalar multiplication, is the same as - // |{P, 3P, ..., (2ʷ-1)P}| = 2ʷ⁻¹ == 8, which is basically the number of multiples of P we need to precompute. (To - // be precise, we also compute 2P, but this occurs on every row.) + static constexpr size_t POINT_TABLE_SIZE = bb::eccvm::POINT_TABLE_SIZE; + + // With WNAF_DIGITS_PER_ROW = 8, we have num_rows_per_scalar = 32/8 = 4. + // We need to store 8 precomputed points (P, 3P, ..., 15P), so we store 2 per row. struct PointTablePrecomputationRow { - // s1, ..., s8 are each 2 bits, so they jointly encode 16 bits of information, which corresponds precisely to - // the data of 4 wNAF digits. they are ordered from "highest order" to "lowest order". this means that s1s2 - // encodes the first (highest order) wNAF digit in consideration, and so on. the explicit encoding is: the - // concatenation, s_{2i}s_{2i+1}, is naturally a number in {0, 1, ..., 15}; to obtain the corresponding wNAF + // s1, ..., s16 are each 2 bits, so they jointly encode 32 bits of information, which corresponds precisely to + // the data of 8 wNAF digits. They are ordered from "highest order" to "lowest order". The encoding is: + // the concatenation s_{2i-1}s_{2i} is naturally a number in {0, 1, ..., 15}; to obtain the corresponding wNAF // digit, multiply by 2 and subtract 15. int s1 = 0; int s2 = 0; @@ -39,6 +38,14 @@ class ECCVMPointTablePrecomputationBuilder { int s6 = 0; int s7 = 0; int s8 = 0; + int s9 = 0; + int s10 = 0; + int s11 = 0; + int s12 = 0; + int s13 = 0; + int s14 = 0; + int s15 = 0; + int s16 = 0; bool skew = false; bool point_transition = false; uint32_t pc = 0; @@ -47,23 +54,27 @@ class ECCVMPointTablePrecomputationBuilder { AffineElement precompute_accumulator{ 0, 0 }; // contains a precomputed element, i.e., something in {P, 3P, ..., 15P}. + AffineElement precompute_accumulator2{ + 0, 0 + }; // second precomputed element for this row (2 points per row now that num_rows_per_scalar = 4). AffineElement precompute_double{ 0, 0 }; }; static std::vector compute_rows( const std::vector>& ecc_muls) { - static constexpr size_t num_rows_per_scalar = NUM_WNAF_DIGITS_PER_SCALAR / WNAF_DIGITS_PER_ROW; - static_assert(num_rows_per_scalar == bb::eccvm::POINT_TABLE_SIZE / 2, - "precompute_accumulator fill loop assumes num_rows_per_scalar == POINT_TABLE_SIZE / 2"); + static constexpr size_t num_rows_per_scalar = NUM_WNAF_DIGITS_PER_SCALAR / WNAF_DIGITS_PER_ROW; // 32/8 = 4 + // We need to store POINT_TABLE_SIZE/2 = 8 precomputed points across num_rows_per_scalar = 4 rows, + // so 2 points per row. + static_assert(POINT_TABLE_SIZE / 2 == num_rows_per_scalar * 2, + "precompute_accumulator fill loop assumes 2 points per row"); const size_t num_precompute_rows = num_rows_per_scalar * ecc_muls.size() + 1; std::vector precompute_state(num_precompute_rows); // start with empty row (shiftable polynomials must have 0 as first coefficient) precompute_state[0] = PointTablePrecomputationRow{}; - // current impl doesn't work if not 4 - static_assert(WNAF_DIGITS_PER_ROW == 4); + static_assert(WNAF_DIGITS_PER_ROW == 8); parallel_for_range(ecc_muls.size(), [&](size_t start, size_t end) { for (size_t j = start; j < end; j++) { @@ -73,18 +84,28 @@ class ECCVMPointTablePrecomputationBuilder { for (size_t i = 0; i < num_rows_per_scalar; ++i) { PointTablePrecomputationRow row; + + // Extract 8 wNAF digits for this row const int slice0 = slices[i * WNAF_DIGITS_PER_ROW]; const int slice1 = slices[i * WNAF_DIGITS_PER_ROW + 1]; const int slice2 = slices[i * WNAF_DIGITS_PER_ROW + 2]; const int slice3 = slices[i * WNAF_DIGITS_PER_ROW + 3]; + const int slice4 = slices[i * WNAF_DIGITS_PER_ROW + 4]; + const int slice5 = slices[i * WNAF_DIGITS_PER_ROW + 5]; + const int slice6 = slices[i * WNAF_DIGITS_PER_ROW + 6]; + const int slice7 = slices[i * WNAF_DIGITS_PER_ROW + 7]; - // {-15, -13. ..., 13, 15} --> {0, 1, ..., 15} + // {-15, -13, ..., 13, 15} --> {0, 1, ..., 15} const int slice0base2 = (slice0 + 15) / 2; const int slice1base2 = (slice1 + 15) / 2; const int slice2base2 = (slice2 + 15) / 2; const int slice3base2 = (slice3 + 15) / 2; + const int slice4base2 = (slice4 + 15) / 2; + const int slice5base2 = (slice5 + 15) / 2; + const int slice6base2 = (slice6 + 15) / 2; + const int slice7base2 = (slice7 + 15) / 2; - // convert into 2-bit chunks + // convert into 2-bit chunks (16 slices for 8 digits) row.s1 = slice0base2 >> 2; row.s2 = slice0base2 & 3; row.s3 = slice1base2 >> 2; @@ -93,6 +114,14 @@ class ECCVMPointTablePrecomputationBuilder { row.s6 = slice2base2 & 3; row.s7 = slice3base2 >> 2; row.s8 = slice3base2 & 3; + row.s9 = slice4base2 >> 2; + row.s10 = slice4base2 & 3; + row.s11 = slice5base2 >> 2; + row.s12 = slice5base2 & 3; + row.s13 = slice6base2 >> 2; + row.s14 = slice6base2 & 3; + row.s15 = slice7base2 >> 2; + row.s16 = slice7base2 & 3; bool last_row = (i == num_rows_per_scalar - 1); row.skew = last_row ? entry.wnaf_skew : false; @@ -101,10 +130,13 @@ class ECCVMPointTablePrecomputationBuilder { // N.B. we apply a constraint that requires slice1 to be positive for the 1st row of each scalar // sum. This ensures we do not have WNAF representations of negative values - const int row_chunk = slice3 + slice2 * (1 << 4) + slice1 * (1 << 8) + slice0 * (1 << 12); + const int row_chunk = slice7 + (slice6 * (1 << 4)) + (slice5 * (1 << 8)) + (slice4 * (1 << 12)) + + (slice3 * (1 << 16)) + (slice2 * (1 << 20)) + (slice1 * (1 << 24)) + + (slice0 * (1 << 28)); bool chunk_negative = row_chunk < 0; + // Shift by 32 bits (8 digits * 4 bits each) scalar_sum = scalar_sum << (NUM_WNAF_DIGIT_BITS * WNAF_DIGITS_PER_ROW); if (chunk_negative) { scalar_sum -= static_cast(-row_chunk); @@ -119,12 +151,14 @@ class ECCVMPointTablePrecomputationBuilder { BB_ASSERT(scalar_sum - entry.wnaf_skew, entry.scalar); } // the last element of the `precomputed_table` field of a `ScalarMul` is the double of the point. - row.precompute_double = entry.precomputed_table[bb::eccvm::POINT_TABLE_SIZE]; - // fill accumulator in reverse order i.e. first row = 15[P], then 13[P], ..., 1[P] - // note that this reflects a coincidence: the number of rows (per scalar multiplication) is - // the number of multiples that we need to precompute. Indeed, the latter is 2ʷ⁻¹, while the former - // depends both on w and on `NUM_SCALAR_BITS`. - row.precompute_accumulator = entry.precomputed_table[bb::eccvm::POINT_TABLE_SIZE - 1 - i]; + row.precompute_double = entry.precomputed_table[POINT_TABLE_SIZE]; + // fill accumulators: 2 precomputed points per row, in reverse order. + // Row 0: table[POINT_TABLE_SIZE-1] = 15P, table[POINT_TABLE_SIZE-2] = 13P + // Row 1: table[POINT_TABLE_SIZE-3] = 11P, table[POINT_TABLE_SIZE-4] = 9P + // ... + // Row 3: table[POINT_TABLE_SIZE-7] = 3P, table[POINT_TABLE_SIZE-8] = P + row.precompute_accumulator = entry.precomputed_table[POINT_TABLE_SIZE - 1 - (2 * i)]; + row.precompute_accumulator2 = entry.precomputed_table[POINT_TABLE_SIZE - 2 - (2 * i)]; precompute_state[j * num_rows_per_scalar + i + 1] = (row); } } From 3f5fcdd04d3d5c744ccfdbea483b1e00e5cb2db3 Mon Sep 17 00:00:00 2001 From: notnotraju Date: Tue, 17 Mar 2026 13:51:09 +0000 Subject: [PATCH 04/24] eccvm: add 36 new flavor columns for 8-wide ECCVM Updates ECCVMFlavor entity counts and column definitions: NUM_WIRES: 85 -> 121 NUM_ALL_ENTITIES: 118 -> 156 NUM_WITNESS_ENTITIES: 87 -> 123 NUM_SHIFTED_ENTITIES: 26 -> 28 New WireNonShiftedEntities (+34 columns): - precompute_s5hi..s8lo: 8 new 2-bit slice columns for digits 5-8 - msm_add5..add8: 4 new addition selector columns - msm_x5..x8, msm_y5..y8: 8 new point coordinate columns - msm_collision_x5..x8: 4 new collision inverse columns - msm_lambda5..lambda8: 4 new slope columns - msm_slice5..slice8: 4 new wNAF slice columns - lookup_read_counts_2, _3: 2 new lookup read count columns New WireToBeShiftedWithoutAccumulatorsEntities (+2 columns): - precompute_tx2, precompute_ty2: 2nd precomputed point per row, needs shifting for inter-row point table constraints Corresponding ShiftedEntities updated with precompute_tx2_shift, precompute_ty2_shift. CommitmentLabels updated for all new columns. --- .../src/barretenberg/eccvm/eccvm_flavor.hpp | 167 +++++++++++++----- 1 file changed, 127 insertions(+), 40 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp index 8da09ea570fe..960137aeca6e 100644 --- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp +++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp @@ -65,7 +65,7 @@ class ECCVMFlavor { // they become too small. static constexpr size_t ECCVM_FIXED_SIZE = 1UL << CONST_ECCVM_LOG_N; - static constexpr size_t NUM_WIRES = 85; + static constexpr size_t NUM_WIRES = 121; // was 85, +36 for 8-wide // The number of entities added for ZK (gemini_masking_poly) static constexpr size_t NUM_MASKING_POLYNOMIALS = 1; @@ -73,16 +73,16 @@ class ECCVMFlavor { // The number of multivariate polynomials on which a sumcheck prover sumcheck operates (including shifts). We often // need containers of this size to hold related data, so we choose a name more agnostic than `NUM_POLYNOMIALS`. // Note: this number does not include the individual sorted list polynomials. - // Includes gemini_masking_poly for ZK (NUM_ALL_ENTITIES = 117 + NUM_MASKING_POLYNOMIALS) - static constexpr size_t NUM_ALL_ENTITIES = 118; + // NUM_ALL_ENTITIES = masking(1) + precomputed(4) + witness(123) + shifted(28) = 156 + static constexpr size_t NUM_ALL_ENTITIES = 156; // The number of polynomials precomputed to describe a circuit and to aid a prover in constructing a satisfying // assignment of witnesses. We again choose a neutral name. static constexpr size_t NUM_PRECOMPUTED_ENTITIES = 4; // The total number of witness entities not including shifts. - // Includes gemini_masking_poly for ZK (NUM_WITNESS_ENTITIES = 86 + NUM_MASKING_POLYNOMIALS) - static constexpr size_t NUM_WITNESS_ENTITIES = 87; + // witness = non-shifted(94) + to-be-shifted-no-acc(24) + accumulators(3) + derived(2) = 123 + static constexpr size_t NUM_WITNESS_ENTITIES = 123; // The number of entities in ShiftedEntities. - static constexpr size_t NUM_SHIFTED_ENTITIES = 26; + static constexpr size_t NUM_SHIFTED_ENTITIES = 28; // was 26, +2 for precompute_tx2/ty2 shifts // The number of entities in DerivedWitnessEntities that are not going to be shifted. static constexpr size_t NUM_DERIVED_WITNESS_ENTITIES_NON_SHIFTED = 1; // A container to be fed to ShpleminiVerifier to avoid redundant scalar muls, the first number is the index of the @@ -251,7 +251,48 @@ class ECCVMFlavor { transcript_msm_infinity, // column 56 transcript_msm_x_inverse, // column 57 transcript_msm_count_zero_at_transition, // column 58 - transcript_msm_count_at_transition_inverse) // column 59 + transcript_msm_count_at_transition_inverse, // column 59 + // --- 8-wide additions: new precompute slice columns --- + precompute_s5hi, // column 60 + precompute_s5lo, // column 61 + precompute_s6hi, // column 62 + precompute_s6lo, // column 63 + precompute_s7hi, // column 64 + precompute_s7lo, // column 65 + precompute_s8hi, // column 66 + precompute_s8lo, // column 67 + // --- 8-wide additions: new MSM add selectors --- + msm_add5, // column 68 + msm_add6, // column 69 + msm_add7, // column 70 + msm_add8, // column 71 + // --- 8-wide additions: new MSM point coordinates --- + msm_x5, // column 72 + msm_y5, // column 73 + msm_x6, // column 74 + msm_y6, // column 75 + msm_x7, // column 76 + msm_y7, // column 77 + msm_x8, // column 78 + msm_y8, // column 79 + // --- 8-wide additions: new MSM collision inverses --- + msm_collision_x5, // column 80 + msm_collision_x6, // column 81 + msm_collision_x7, // column 82 + msm_collision_x8, // column 83 + // --- 8-wide additions: new MSM lambdas --- + msm_lambda5, // column 84 + msm_lambda6, // column 85 + msm_lambda7, // column 86 + msm_lambda8, // column 87 + // --- 8-wide additions: new MSM slices --- + msm_slice5, // column 88 + msm_slice6, // column 89 + msm_slice7, // column 90 + msm_slice8, // column 91 + // --- 8-wide additions: additional lookup read counts --- + lookup_read_counts_2, // column 92 + lookup_read_counts_3) // column 93 }; /** @@ -271,28 +312,31 @@ class ECCVMFlavor { template class WireToBeShiftedWithoutAccumulatorsEntities { public: DEFINE_FLAVOR_MEMBERS(DataType, - transcript_mul, // column 60 - transcript_msm_count, // column 61 - precompute_scalar_sum, // column 62 - precompute_s1hi, // column 63 - precompute_dx, // column 64 - precompute_dy, // column 65 - precompute_tx, // column 66 - precompute_ty, // column 67 - msm_transition, // column 68 - msm_add, // column 69 - msm_double, // column 70 - msm_skew, // column 71 - msm_accumulator_x, // column 72 - msm_accumulator_y, // column 73 - msm_count, // column 74 - msm_round, // column 75 - msm_add1, // column 76 - msm_pc, // column 77 - precompute_pc, // column 78 - transcript_pc, // column 79 - precompute_round, // column 80 - precompute_select) // column 81 + transcript_mul, // column 94 + transcript_msm_count, // column 95 + precompute_scalar_sum, // column 96 + precompute_s1hi, // column 97 + precompute_dx, // column 98 + precompute_dy, // column 99 + precompute_tx, // column 100 + precompute_ty, // column 101 + msm_transition, // column 102 + msm_add, // column 103 + msm_double, // column 104 + msm_skew, // column 105 + msm_accumulator_x, // column 106 + msm_accumulator_y, // column 107 + msm_count, // column 108 + msm_round, // column 109 + msm_add1, // column 110 + msm_pc, // column 111 + precompute_pc, // column 112 + transcript_pc, // column 113 + precompute_round, // column 114 + precompute_select, // column 115 + // --- 8-wide additions: 2nd precomputed point (needs shift for inter-row constraints) --- + precompute_tx2, // column 116 + precompute_ty2) // column 117 }; /** @@ -302,9 +346,9 @@ class ECCVMFlavor { template class WireToBeShiftedAccumulatorEntities { public: DEFINE_FLAVOR_MEMBERS(DataType, - transcript_accumulator_not_empty, // column 82 - transcript_accumulator_x, // column 83 - transcript_accumulator_y) // column 84 + transcript_accumulator_not_empty, // column 118 + transcript_accumulator_x, // column 119 + transcript_accumulator_y) // column 120 }; /** @@ -365,10 +409,12 @@ class ECCVMFlavor { transcript_pc_shift, // column 19 precompute_round_shift, // column 20 precompute_select_shift, // column 21 - transcript_accumulator_not_empty_shift, // column 22 - transcript_accumulator_x_shift, // column 23 - transcript_accumulator_y_shift, // column 24 - z_perm_shift); // column 25 + precompute_tx2_shift, // column 22 + precompute_ty2_shift, // column 23 + transcript_accumulator_not_empty_shift, // column 24 + transcript_accumulator_x_shift, // column 25 + transcript_accumulator_y_shift, // column 26 + z_perm_shift); // column 27 }; template @@ -397,10 +443,12 @@ class ECCVMFlavor { entities.transcript_pc, // column 19 entities.precompute_round, // column 20 entities.precompute_select, // column 21 - entities.transcript_accumulator_not_empty, // column 22 - entities.transcript_accumulator_x, // column 23 - entities.transcript_accumulator_y, // column 24 - entities.z_perm }; // column 25 + entities.precompute_tx2, // column 22 + entities.precompute_ty2, // column 23 + entities.transcript_accumulator_not_empty, // column 24 + entities.transcript_accumulator_x, // column 25 + entities.transcript_accumulator_y, // column 26 + entities.z_perm }; // column 27 } /** @@ -881,11 +929,50 @@ class ECCVMFlavor { Base::msm_slice2 = "MSM_SLICE2"; Base::msm_slice3 = "MSM_SLICE3"; Base::msm_slice4 = "MSM_SLICE4"; + // 8-wide additions: new precompute slice labels + Base::precompute_s5hi = "PRECOMPUTE_S5HI"; + Base::precompute_s5lo = "PRECOMPUTE_S5LO"; + Base::precompute_s6hi = "PRECOMPUTE_S6HI"; + Base::precompute_s6lo = "PRECOMPUTE_S6LO"; + Base::precompute_s7hi = "PRECOMPUTE_S7HI"; + Base::precompute_s7lo = "PRECOMPUTE_S7LO"; + Base::precompute_s8hi = "PRECOMPUTE_S8HI"; + Base::precompute_s8lo = "PRECOMPUTE_S8LO"; + // 8-wide additions: new MSM labels + Base::msm_add5 = "MSM_ADD5"; + Base::msm_add6 = "MSM_ADD6"; + Base::msm_add7 = "MSM_ADD7"; + Base::msm_add8 = "MSM_ADD8"; + Base::msm_x5 = "MSM_X5"; + Base::msm_y5 = "MSM_Y5"; + Base::msm_x6 = "MSM_X6"; + Base::msm_y6 = "MSM_Y6"; + Base::msm_x7 = "MSM_X7"; + Base::msm_y7 = "MSM_Y7"; + Base::msm_x8 = "MSM_X8"; + Base::msm_y8 = "MSM_Y8"; + Base::msm_collision_x5 = "MSM_COLLISION_X5"; + Base::msm_collision_x6 = "MSM_COLLISION_X6"; + Base::msm_collision_x7 = "MSM_COLLISION_X7"; + Base::msm_collision_x8 = "MSM_COLLISION_X8"; + Base::msm_lambda5 = "MSM_LAMBDA5"; + Base::msm_lambda6 = "MSM_LAMBDA6"; + Base::msm_lambda7 = "MSM_LAMBDA7"; + Base::msm_lambda8 = "MSM_LAMBDA8"; + Base::msm_slice5 = "MSM_SLICE5"; + Base::msm_slice6 = "MSM_SLICE6"; + Base::msm_slice7 = "MSM_SLICE7"; + Base::msm_slice8 = "MSM_SLICE8"; + // 8-wide additions: new precomputed point labels + Base::precompute_tx2 = "PRECOMPUTE_TX2"; + Base::precompute_ty2 = "PRECOMPUTE_TY2"; Base::transcript_accumulator_not_empty = "TRANSCRIPT_ACCUMULATOR_NOT_EMPTY"; Base::transcript_reset_accumulator = "TRANSCRIPT_RESET_ACCUMULATOR"; Base::precompute_select = "PRECOMPUTE_SELECT"; Base::lookup_read_counts_0 = "LOOKUP_READ_COUNTS_0"; Base::lookup_read_counts_1 = "LOOKUP_READ_COUNTS_1"; + Base::lookup_read_counts_2 = "LOOKUP_READ_COUNTS_2"; + Base::lookup_read_counts_3 = "LOOKUP_READ_COUNTS_3"; Base::transcript_base_infinity = "TRANSCRIPT_BASE_INFINITY"; Base::transcript_base_x_inverse = "TRANSCRIPT_BASE_X_INVERSE"; Base::transcript_base_y_inverse = "TRANSCRIPT_BASE_Y_INVERSE"; From 84a8d232ced81d72800f53516ca57e7c53b951b8 Mon Sep 17 00:00:00 2001 From: notnotraju Date: Tue, 17 Mar 2026 13:53:57 +0000 Subject: [PATCH 05/24] eccvm: wire up new 8-wide columns in ProverPolynomials constructor Extends the ProverPolynomials constructor to populate the 36 new flavor columns from the builder row data: Precompute section: - Wire precompute_s5hi..s8lo from point_table_rows[i].s9..s16 - Wire precompute_tx2/ty2 from point_table_rows[i].precompute_accumulator2 MSM section (all from add_state[4..7]): - Wire msm_add5..add8 from add_state[4..7].add - Wire msm_x5..x8, msm_y5..y8 from add_state[4..7].point - Wire msm_collision_x5..x8 from add_state[4..7].collision_inverse - Wire msm_lambda5..lambda8 from add_state[4..7].lambda - Wire msm_slice5..slice8 from add_state[4..7].slice lookup_read_counts_2/_3 columns are declared but not yet populated; they will be wired when the lookup relation is updated to support 4 table terms per precompute row. --- .../src/barretenberg/eccvm/eccvm_flavor.hpp | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp index 960137aeca6e..a85ddeb9e705 100644 --- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp +++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp @@ -759,6 +759,14 @@ class ECCVMFlavor { precompute_s3lo.set_if_valid_index(i, point_table_rows[i].s6); precompute_s4hi.set_if_valid_index(i, point_table_rows[i].s7); precompute_s4lo.set_if_valid_index(i, point_table_rows[i].s8); + precompute_s5hi.set_if_valid_index(i, point_table_rows[i].s9); + precompute_s5lo.set_if_valid_index(i, point_table_rows[i].s10); + precompute_s6hi.set_if_valid_index(i, point_table_rows[i].s11); + precompute_s6lo.set_if_valid_index(i, point_table_rows[i].s12); + precompute_s7hi.set_if_valid_index(i, point_table_rows[i].s13); + precompute_s7lo.set_if_valid_index(i, point_table_rows[i].s14); + precompute_s8hi.set_if_valid_index(i, point_table_rows[i].s15); + precompute_s8lo.set_if_valid_index(i, point_table_rows[i].s16); // If skew is active (i.e. we need to subtract a base point from the msm result), // write `7` into rows.precompute_skew. `7`, in binary representation, equals `-1` when converted // into WNAF form @@ -767,6 +775,8 @@ class ECCVMFlavor { precompute_dy.set_if_valid_index(i, point_table_rows[i].precompute_double.y); precompute_tx.set_if_valid_index(i, point_table_rows[i].precompute_accumulator.x); precompute_ty.set_if_valid_index(i, point_table_rows[i].precompute_accumulator.y); + precompute_tx2.set_if_valid_index(i, point_table_rows[i].precompute_accumulator2.x); + precompute_ty2.set_if_valid_index(i, point_table_rows[i].precompute_accumulator2.y); } }); @@ -787,6 +797,10 @@ class ECCVMFlavor { msm_add2.set_if_valid_index(i, static_cast(msm_rows[i].add_state[1].add)); msm_add3.set_if_valid_index(i, static_cast(msm_rows[i].add_state[2].add)); msm_add4.set_if_valid_index(i, static_cast(msm_rows[i].add_state[3].add)); + msm_add5.set_if_valid_index(i, static_cast(msm_rows[i].add_state[4].add)); + msm_add6.set_if_valid_index(i, static_cast(msm_rows[i].add_state[5].add)); + msm_add7.set_if_valid_index(i, static_cast(msm_rows[i].add_state[6].add)); + msm_add8.set_if_valid_index(i, static_cast(msm_rows[i].add_state[7].add)); msm_x1.set_if_valid_index(i, msm_rows[i].add_state[0].point.x); msm_y1.set_if_valid_index(i, msm_rows[i].add_state[0].point.y); msm_x2.set_if_valid_index(i, msm_rows[i].add_state[1].point.x); @@ -795,18 +809,38 @@ class ECCVMFlavor { msm_y3.set_if_valid_index(i, msm_rows[i].add_state[2].point.y); msm_x4.set_if_valid_index(i, msm_rows[i].add_state[3].point.x); msm_y4.set_if_valid_index(i, msm_rows[i].add_state[3].point.y); + msm_x5.set_if_valid_index(i, msm_rows[i].add_state[4].point.x); + msm_y5.set_if_valid_index(i, msm_rows[i].add_state[4].point.y); + msm_x6.set_if_valid_index(i, msm_rows[i].add_state[5].point.x); + msm_y6.set_if_valid_index(i, msm_rows[i].add_state[5].point.y); + msm_x7.set_if_valid_index(i, msm_rows[i].add_state[6].point.x); + msm_y7.set_if_valid_index(i, msm_rows[i].add_state[6].point.y); + msm_x8.set_if_valid_index(i, msm_rows[i].add_state[7].point.x); + msm_y8.set_if_valid_index(i, msm_rows[i].add_state[7].point.y); msm_collision_x1.set_if_valid_index(i, msm_rows[i].add_state[0].collision_inverse); msm_collision_x2.set_if_valid_index(i, msm_rows[i].add_state[1].collision_inverse); msm_collision_x3.set_if_valid_index(i, msm_rows[i].add_state[2].collision_inverse); msm_collision_x4.set_if_valid_index(i, msm_rows[i].add_state[3].collision_inverse); + msm_collision_x5.set_if_valid_index(i, msm_rows[i].add_state[4].collision_inverse); + msm_collision_x6.set_if_valid_index(i, msm_rows[i].add_state[5].collision_inverse); + msm_collision_x7.set_if_valid_index(i, msm_rows[i].add_state[6].collision_inverse); + msm_collision_x8.set_if_valid_index(i, msm_rows[i].add_state[7].collision_inverse); msm_lambda1.set_if_valid_index(i, msm_rows[i].add_state[0].lambda); msm_lambda2.set_if_valid_index(i, msm_rows[i].add_state[1].lambda); msm_lambda3.set_if_valid_index(i, msm_rows[i].add_state[2].lambda); msm_lambda4.set_if_valid_index(i, msm_rows[i].add_state[3].lambda); + msm_lambda5.set_if_valid_index(i, msm_rows[i].add_state[4].lambda); + msm_lambda6.set_if_valid_index(i, msm_rows[i].add_state[5].lambda); + msm_lambda7.set_if_valid_index(i, msm_rows[i].add_state[6].lambda); + msm_lambda8.set_if_valid_index(i, msm_rows[i].add_state[7].lambda); msm_slice1.set_if_valid_index(i, msm_rows[i].add_state[0].slice); msm_slice2.set_if_valid_index(i, msm_rows[i].add_state[1].slice); msm_slice3.set_if_valid_index(i, msm_rows[i].add_state[2].slice); msm_slice4.set_if_valid_index(i, msm_rows[i].add_state[3].slice); + msm_slice5.set_if_valid_index(i, msm_rows[i].add_state[4].slice); + msm_slice6.set_if_valid_index(i, msm_rows[i].add_state[5].slice); + msm_slice7.set_if_valid_index(i, msm_rows[i].add_state[6].slice); + msm_slice8.set_if_valid_index(i, msm_rows[i].add_state[7].slice); } }); this->set_shifted(); From e7e0283b6e2651c28b7fd34a558865fad78fde32 Mon Sep 17 00:00:00 2001 From: notnotraju Date: Tue, 17 Mar 2026 13:56:39 +0000 Subject: [PATCH 06/24] eccvm: extend MSM relation for 8 additions per row The MSM relation now supports 8 point additions per row (was 4). The doubling chain remains 4-wide (= wNAF digit width w = 4). Key changes: - Addition chain: first_add + 7 conditional adds (was first_add + 3) - Skew chain: 8 conditional skew additions (was 4) - Collision checks: 8 inverse checks (was 4) - Slice-zero enforcement: 8 checks (was 4) - Count update: sum of add1..add8 (was add1..add4) - Addition continuity: add{i+1} * (-add{i} + 1) for i=1..7 (was 1..3) - Cross-row continuity: (-add8 + 1) * add1_shift (was -add4 + 1) Subrelation count: 47 -> 67 (20 new subrelations) New subrelations: ADD slopes 5-8, SKEW slopes 5-8, collision 5-8, slice-zero 5-8, continuity add5-8. MAX_PARTIAL_RELATION_LENGTH for this relation: 8 -> 12 (due to the longer addition chain increasing the degree of the accumulator output). --- .../relations/ecc_vm/ecc_msm_relation.hpp | 21 +- .../ecc_vm/ecc_msm_relation_impl.hpp | 545 +++++------------- 2 files changed, 168 insertions(+), 398 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation.hpp index b72fc3138c96..4626847f4dc9 100644 --- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation.hpp @@ -43,9 +43,24 @@ namespace bb { template class ECCVMMSMRelationImpl { public: using FF = FF_; - static constexpr std::array SUBRELATION_PARTIAL_LENGTHS{ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 }; + // 67 subrelations. Max partial length = 12 (for the acc output after 8 chained additions). + // The degree of y_t8 is 10 (degree doubles through chaining: first_add gives deg 3 y, then each subsequent + // add increments by ~1). With q_add gating, the final degree is 12 (rounded up for safety). + // Most subrelations remain degree <= 8. The new addition-chain subrelations (47-50, 51-54) have higher degree. + static constexpr std::array SUBRELATION_PARTIAL_LENGTHS{ + 12, 12, 8, 8, 12, 8, // 0-5: ADD acc(x,y), slope1; SKEW acc(x,y), slope1 + 8, 8, 8, 8, 8, 8, // 6-11: collision1-4; DOUBLE acc(x,y) + 8, 8, 8, 8, 8, 8, // 12-17: DOUBLE slope1; slice-zero 1-4; mutual excl + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 12, // 18-31: round tx, selectors, count, continuity + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, // 32-46: add1=q_add+q_skew, skew ctrls, ADD/DOUBLE/SKEW slopes 2-4, no-op + 12, 12, 12, 12, // 47-50: ADD slopes 5-8 + 12, 12, 12, 12, // 51-54: SKEW slopes 5-8 + 8, 8, 8, 8, // 55-58: collision 5-8 + 8, 8, 8, 8, // 59-62: slice-zero 5-8 + 8, 8, 8, 8 // 63-66: continuity add5-8 + }; template static void accumulate(ContainerOverSubrelations& accumulator, diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation_impl.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation_impl.hpp index 04f64d755729..88dc32cd3935 100644 --- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation_impl.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation_impl.hpp @@ -30,18 +30,15 @@ namespace bb { * * ADDITION round (round = j): * [Acc] = [Acc] + T_i[a_{i, j}] for all i in [0, ... k-1] + * (up to 8 additions per ECCVM row) * * DOUBLE round: - * [Acc] = 16 * [Acc] (four point doublings) + * [Acc] = 16 * [Acc] (four point doublings, using lambda1..lambda4) * * SKEW round: * If skew_i == 1, [Acc] = [Acc] - [P_i] for all i in [0, ..., k - 1] * * The relations in ECCVMMSMRelationImpl constrain the ADDITION, DOUBLE and SKEW rounds - * @param accumulator transformed to `accumulator + C(in(X)...)*scaling_factor` - * @param in an std::array containing the fully extended Accumulator edges. - * @param parameters contains beta, gamma, and public_input_delta, .... - * @param scaling_factor optional term to scale the evaluation before adding to evals. */ template template @@ -53,6 +50,7 @@ void ECCVMMSMRelationImpl::accumulate(ContainerOverSubrelations& accumulator using Accumulator = typename std::tuple_element_t<0, ContainerOverSubrelations>; using View = typename Accumulator::View; + // Point coordinates for 8 additions per row const auto& x1 = View(in.msm_x1); const auto& y1 = View(in.msm_y1); const auto& x2 = View(in.msm_x2); @@ -61,319 +59,144 @@ void ECCVMMSMRelationImpl::accumulate(ContainerOverSubrelations& accumulator const auto& y3 = View(in.msm_y3); const auto& x4 = View(in.msm_x4); const auto& y4 = View(in.msm_y4); + const auto& x5 = View(in.msm_x5); + const auto& y5 = View(in.msm_y5); + const auto& x6 = View(in.msm_x6); + const auto& y6 = View(in.msm_y6); + const auto& x7 = View(in.msm_x7); + const auto& y7 = View(in.msm_y7); + const auto& x8 = View(in.msm_x8); + const auto& y8 = View(in.msm_y8); + // Collision inverses const auto& collision_inverse1 = View(in.msm_collision_x1); const auto& collision_inverse2 = View(in.msm_collision_x2); const auto& collision_inverse3 = View(in.msm_collision_x3); const auto& collision_inverse4 = View(in.msm_collision_x4); + const auto& collision_inverse5 = View(in.msm_collision_x5); + const auto& collision_inverse6 = View(in.msm_collision_x6); + const auto& collision_inverse7 = View(in.msm_collision_x7); + const auto& collision_inverse8 = View(in.msm_collision_x8); + // Lambdas (slopes). lambda1..4 used for both additions and doublings. + // lambda5..8 used only for additions 5-8. const auto& lambda1 = View(in.msm_lambda1); const auto& lambda2 = View(in.msm_lambda2); const auto& lambda3 = View(in.msm_lambda3); const auto& lambda4 = View(in.msm_lambda4); + const auto& lambda5 = View(in.msm_lambda5); + const auto& lambda6 = View(in.msm_lambda6); + const auto& lambda7 = View(in.msm_lambda7); + const auto& lambda8 = View(in.msm_lambda8); const auto& lagrange_first = View(in.lagrange_first); + // Add selectors const auto& add1 = View(in.msm_add1); const auto& add1_shift = View(in.msm_add1_shift); const auto& add2 = View(in.msm_add2); const auto& add3 = View(in.msm_add3); const auto& add4 = View(in.msm_add4); + const auto& add5 = View(in.msm_add5); + const auto& add6 = View(in.msm_add6); + const auto& add7 = View(in.msm_add7); + const auto& add8 = View(in.msm_add8); + // Accumulator const auto& acc_x = View(in.msm_accumulator_x); const auto& acc_y = View(in.msm_accumulator_y); const auto& acc_x_shift = View(in.msm_accumulator_x_shift); const auto& acc_y_shift = View(in.msm_accumulator_y_shift); + // Slices const auto& slice1 = View(in.msm_slice1); const auto& slice2 = View(in.msm_slice2); const auto& slice3 = View(in.msm_slice3); const auto& slice4 = View(in.msm_slice4); + const auto& slice5 = View(in.msm_slice5); + const auto& slice6 = View(in.msm_slice6); + const auto& slice7 = View(in.msm_slice7); + const auto& slice8 = View(in.msm_slice8); + // Control signals const auto& msm_transition = View(in.msm_transition); const auto& msm_transition_shift = View(in.msm_transition_shift); const auto& round = View(in.msm_round); const auto& round_shift = View(in.msm_round_shift); - const auto& q_add = View(in.msm_add); // is 1 iff we are at an ADD row in Straus algorithm + const auto& q_add = View(in.msm_add); const auto& q_add_shift = View(in.msm_add_shift); const auto& q_skew = View(in.msm_skew); const auto& q_skew_shift = View(in.msm_skew_shift); - const auto& q_double = View(in.msm_double); // is 1 iff we are at an DOUBLE row in Straus algorithm + const auto& q_double = View(in.msm_double); const auto& q_double_shift = View(in.msm_double_shift); const auto& msm_size = View(in.msm_size_of_msm); - const auto& pc = View(in.msm_pc); // pc stands for `point-counter`. + const auto& pc = View(in.msm_pc); const auto& pc_shift = View(in.msm_pc_shift); const auto& count = View(in.msm_count); const auto& count_shift = View(in.msm_count_shift); auto is_not_first_row = (-lagrange_first + 1); - /** - * @brief Evaluating ADDITION rounds - * - * This comment describes the algorithm we want the Prover to perform. - * The relations we constrain are supposed to make an honest Prover compute witnesses consistent with the following: - * - * For an MSM of size-k... - * - * Algorithm to determine if round at shifted row is an ADDITION round: - * 1. count_shift < msm_size - * 2. round != 32 - * - * Algorithm to process MSM ADDITION round: - * 1. If `round == 0` set `count = 0` - * 2. For j = pc + count, perform the following: - * 2a. If j + 3 < k: [P_{j + 3}] = T_{j+ 3}[slice_{j + 3}] - * 2b. If j + 2 < k: [P_{j + 2}] = T_{j+ 2}[slice_{j + 2}] - * 2c. If j + 1 < k: [P_{j + 1}] = T_{j+ 1}[slice_{j + 1}] - * 2d. [P_{j}] = T_{j}[slice_{j}] - * 2e. If j + 3 < k: [Acc_shift] = [Acc] + [P_j] + [P_{j+1}] + [P_{j+2}] + [P_{j+3}] - * 2f. Else If j + 2 < k: [Acc_shift] = [Acc] + [P_j] + [P_{j+1}] + [P_{j+2}] - * 2g. Else IF j + 1 < k: [Acc_shift] = [Acc] + [P_j] + [P_{j+1}] - * 2h. Else : [Acc_shift] = [Acc] + [P_j] - * 3. `count_shift = count + 1 + (j + 1 < k) + (j + 2 < k) + (j + 3 < k)` - */ - - /** - * @brief Constraining addition rounds via a multiset-equality check - * - * @details - * The boolean column q_add describes whether a round is an ADDITION round. - * The values of q_add are Prover-defined. We need to ensure they set q_add correctly. We will do this via a - * multiset-equality check (formerly called a "strict lookup"), which allows the various tables to "communicate". - * On a high level, this table "reads" (pc, round, wnaf_slice), another table (Precomputed) "writes" - * a potentially different set of (pc, round, wnaf_slice), and we demand that the reads match the writes. - * Alternatively said, the MSM columns spawn a multiset of tuples of the form (pc, round, wnaf_slice), the - * Precomputed Table columns spawn a potentially different multiset of tuples of the form (pc, round, wnaf_slice), - * and we _check_ that these two multisets match. - * - * The above description does not reference how we will _prove_ that the two multisets are equal. As usual, we use a - * grand product argument. A happy byproduct of this is that we can use the grand product technique, which is - * powerful enough to allow our multiset equality testing to support _conditional adds_; this means that we only add - * a tuple if some particular condition occurs. - * - * This (pc, round, wnaf_slice) multiset equality testing is made more difficult by the fact that the values of - * `precomputed_pc` are _not the same_ as the values of `msm_pc`. The former indexes over every (non-trivial, 128 - * bit) scalar multiplication, while the latter jumps values and is constant on MSM rows corresponding to a fixed - * MSM. However, the transition values should match. - * - * Given a row of the MSM table, we have four selectors q_add1, q_add2, q_add3, q_add4, as well as a q_skew - * selector. For the MSM side of the multiset corresponding to (pc, round, wnaf_slice), we add: - * - * 1. (msm_pc - msm_count, round, wnaf_slice_{count}) when q_add1 = 1 - * 2. (msm_pc - msm_count - 1, round, wnaf_slice_{count + 1}) when q_add2 = 1 - * 3. (msm_pc - msm_count - 2, round, wnaf_slice_{count + 2}) when q_add3 = 1 - * 4. (msm_pc - msm_count - 3, round, wnaf_slice_{count + 3}) when q_add4 = 1 - * - * That this is "what we want" comes from the following facts: msm_pc is the number of (non-trivial, 128-bit) Point - * multiplications we have done _until the start of_ the current MSM, and `msm_count` is the number of Point * wNAF - * slice multiplications/lookups we have done _in this round_. (Recall that a round corresponds to a wNAF digit.) In - * particular, `msm_count` updates by the appropriate amount (usually 4, more accurately q_add1 + q_add2 + q_add3 + - * q_add4) per row of the MSM table. - * - * On the other side, given a row of the Precomputed columns, if `precompute_select == 1`, we add - * 1. (precompute_pc, 4 * precompute_round, w_1) - * 2. (precompute_pc, 4 * precompute_round + 1, w_2) - * 3. (precompute_pc, 4 * precompute_round + 2, w_3) - * 4. (precompute_pc, 4 * precompute_round + 3, w_4) - * Additionally, if `precompute_point_transition == 1`, we add - * 5. (precompute_pc, 4 * precompute_round + 4, precompute_skew) - * - * ELSE `precompute_select == 0` and we add: - * 1. (0, 0, 0) - * - * Here, w_K is the compressed wNAF slices corresponding to `precompute_sKhi` and `precompute_sKlo`, for K ∈ {1, 2, - * 3, 4} and precompute_skew ∈ {0, 7}. - * - * SKETCH OF PROOF: We now argue that, under the following assumptions, if the multiset equality holds, then the - * `q_addK` and also `q_add` are all correctly constrained for K ∈ {1, 2, 3, 4}. - * 1. The Precomputed table is correctly constrained; in particular, the values `precompute_pc`, - * `precompute_round`, `precompute_skew`, `precompute_select`, and `wK` are all correctly constrained. - * 2. `round` monotonically increases from 0 to 32 before reseting back to 0. `round_shift - round == 1` - * precisely when `q_double == 1`. - * 3. `pc` is monotonic and only updates when there is an `msm_transition`. Here, it updates by `msm_size`, - * which must be constrained somewhere else by a multiset argument. We detail this below. - * 4. `q_add`, `q_skew`, and `q_double` are pairwise mutually exclusive. - * 5. `q_add1 == 1` iff either `q_add == 1` OR `q_skew == 1`. - * 6. The lookup table is implemented correctly. - * - * First of all, note the asymmetry: we do not explicitly add tuples corresponding to skew on the MSM side of the - * table. Indeeed, this is implicit with `msm_round == 32`. Now, the point is that the pair (pc, round) uniquely - * specifies the point + wNAF digit that we are processing (and adding to the accumulator) and both `pc` and `round` - * are directly constrained to be monotonic. - * - * Suppose the Prover sets `q_addK = 0` when an honest Prover would set `q_addK == 1`. Then there would be some (pc, - * round, wnaf_slice) that the Precomputed table added to its multiset that the prover did not add. The Prover can - * _never_ "compensate" for this, as `pc` is locally constrained to be monotonic and `round` is constrained to be - * periodic; this means that the Prover has "lost their chance" to add this element to the multiset and hence the - * multiset equality check will fail. - * - * Conversely, if the Prover sets `q_addK = 1` when it should be set to 0, there are several options: either - * we are at the end of a `round` (so e.g. `q_add4 ` _should_ be 0), or we are at a double row, or we are at a row - * that should be all 0s. In the first two cases, as long as the Precomputed table is correctly constrained, again - * we would be adding a tuple to the multiset that can never be hit by the Precomputed table due to `precompute_pc` - * monotonicty and `precompute_round` periodicity (enforced in the precomputed columns.). In the final case, the - * only way we don't break the multiset check is if `wnaf_slice == 0` for the corresponding `q_addK` that is on. But - * then the lookup argument will fail, as there is no corresponding point when `pc = 0`. (Here it is helpful to - * remember that `pc` stands for _point-counter_.) Note that this requires that `precompute_pc` is well-formed. - * - * - * We apply consistency/continuity checks to q_add1/q_add2/q_add3/q_add4: - * 1. If q_add2 = 1, require q_add1 = 1 - * 2. If q_add3 = 1, require q_add2 = 1 - * 3. If q_add4 = 1, require q_add3 = 1 - * 4. If q_add1_shift = 1 AND round does not update between rows, require q_add4 = 1 - * - */ - - /** - * @brief Constrain msm_size and output of MSM computation via multiset equality - * - * @details - * As explained in the section on constraining the addition wire values, to make everything work we also need to - * constrain `msm_size`, something directly computed in the Transcript columns. We also need to "send" the final - * output value of an MSM from the MSM table to the transcript table so it can continue its processing. (Send here - * is a euphemism for constrain.) We do this via a multiset equality check of the form: - * (pc, P.x, P.y, msm-size) - * From the perspective of the MSM table, we add such a tuple only when `msm_transition_shift == 1` (i.e., the next - * row begins a new MSM, meaning the current row is the last row of the just-completed MSM). The terms P.x and P.y - * refer to the output values of the MSM just computed by the MSM table. `msm_size` is the size of the _just - * completed_ MSM. - * - * - */ - - /** - * @brief Looking up the slice-point products {-15[P], -13[P], ..., 13[P], 15[P]} - * - * @details - * In the Point Table, for every point [P] that occurs in the MSM table, we compute the list of points: {-15[P], - * -13[P], ..., 13[P], 15[P]}. (Note that these never vanish, as we only send a point to each table if they are - * non-zero.) We then constrain the "slice products" that occur here via a lookup argument. For completeness, we - * briefly sketch this. - * - * The PointTable will "write" the following row to the lookup table: (pc, slice, x, y), where if `pc` corresponds - * to an elliptic curve point [P] (`pc` is a decreasing counter of the non-zero points that occur in our - * computation), slice ∈ {0, ..., 15}, and (x, y) are the affine coordinates of (2 * slice - 15)[P]. - * - * The MSM table will then read a row of the same form. This constrains the MSM table to have correctly used the - * wNAF * point in the Straus algorithm. - * - */ - - /** - * @brief Addition relation - * - * All addition operations in ECCVMMSMRelationImpl are conditional additions, as we sometimes want to add values and - * other times simply want to propagate values (consider, e.g., when `q_add2 == 0`). - * - * This method returns three Accumulators: (x_out, y_out, slope_relation) - * - x_out, y_out: Output point coordinates. Either an addition of inputs (if `selector == 1`), or xa/ya (if - * `selector == 0`). - * - slope_relation: Constraint enforcing that lambda is computed correctly (lambda = 0 if selector = 0, else - * lambda = (yb - ya) / (xb - xa)) - * - * The `collision_relation` parameter tracks a subrelation that validates xb != xa. This collision check means that - * our system is only statistically complete, not perfectly complete. (See the `offset_generator` and the - * `first_add` method for details.) - * - * Repeated calls to this method will increase the max degree of the Accumulator output: deg(x_out) = 1 + - * max(deg(xa), deg(xb)), deg(y_out) = max(1 + deg(x_out), 1 + deg(ya)). In our application, we chain together 4 of - * these with the pattern in such a way that the final x_out will have degree 5 and the final y_out will have - * degree 6. - */ + // ======================================================================== + // Addition helper: conditional add of (xb, yb) into (xa, ya) + // ======================================================================== auto add = [&](auto& xb, auto& yb, auto& xa, auto& ya, auto& lambda, auto& selector, auto& collision_relation) { - // computation of lambda is valid: if q == 1, then L == (yb - ya) / (xb - xa) - // if q == 0, then L == 0. combining these into a single constraint yields: - // q * (L * (xb - xa - 1) - (yb - ya)) + L = 0 auto slope_relation = selector * (lambda * (xb - xa - 1) - (yb - ya)) + lambda; collision_relation += selector * (xb - xa); - // x_out = L.L + (-xb - xa) * q + (1 - q) xa - // deg L = 1, deg q = 1, min(deg(xa), deg(xb))≥ 1. - // hence deg(x_out) = 1 + max(deg(xa), deg(xb)) auto x_out = lambda.sqr() + (-xb - xa - xa) * selector + xa; - - // y_out = L . (xa - x_out) - ya * q + (1 - q) ya - // hence deg(y_out) = max(1 + deg(x_out), 1 + deg(ya)) auto y_out = lambda * (xa - x_out) + (-ya - ya) * selector + ya; return std::array{ x_out, y_out, slope_relation }; }; - /** - * @brief First Addition relation - handles the first addition in each row with inhomogeneous base point selection - * - * @details The first add operation per row is treated inhomogeneously based on whether we are starting a new MSM - * or continuing an existing one across multiple rows. - * - * This method returns three Accumulators: (x_out, y_out, slope_relation), computed as follows: - * - * **Case 1: Continuing MSM (selector == 0, i.e., msm_transition == 0)** - * - Add point (xa, ya) to the accumulator (xb, yb) from the previous row - * - Example: In an MSM of size 9 spanning 3 rows, rows 2 and 3 use selector=0 to continue with their respective - * accumulators - * - * **Case 2: Starting new MSM (selector == 1, i.e., msm_transition == 1)** - * - Add point (xa, ya) to the fixed "offset generator" point (xo, yo) - * - The offset generator serves two purposes for honest Provers: - * (a) Ensures no intermediate MSM computations produce points at infinity - * (b) Eliminates branch logic for the EC ops - * - Example: In an MSM of size 9, only row 1 has msm_transition=1 and uses the offset generator - * - * **Soundness via collision check:** - * We enforce soundness by constraining that the x-coordinates of the two input points must differ (xa != xb or - * xa != xo). This prevents the Prover from exploiting incomplete addition formula edge cases (point doubling or - * adding inverses). The collision_relation accumulator tracks this constraint, which is later verified via an - * inverse check. - * - * **Statistical completeness:** - * Note that this technique is only *statistically* complete. There exist valid MSM computations where an honest - * Prover would encounter x-coordinate collisions (xa == xb or xa == xo), causing the constraints to become - * unsatisfiable. However, the probability of such a collision is negligible—equivalent to solving the discrete - * logarithm problem. - */ + // ======================================================================== + // First Addition: handles MSM start (offset generator) vs continuation + // ======================================================================== auto first_add = [&](auto& xb, auto& yb, auto& xa, auto& ya, auto& lambda, auto& selector, auto& collision_relation) { - // N.B. this is brittle - should be curve agnostic but we don't propagate the curve parameter into - // relations! constexpr auto offset_generator = get_precomputed_generators()[0]; constexpr uint256_t oxu = offset_generator.x; constexpr uint256_t oyu = offset_generator.y; const Accumulator xo(oxu); const Accumulator yo(oyu); - // set (x, y) to be either accumulator if `selector == 0` or OFFSET if `selector == 1`. auto x = xo * selector + xb * (-selector + 1); auto y = yo * selector + yb * (-selector + 1); - auto slope_relation = lambda * (x - xa) - (y - ya); // degree 3 + auto slope_relation = lambda * (x - xa) - (y - ya); collision_relation += (xa - x); auto x_out = lambda * lambda + (-x - xa); auto y_out = lambda * (xa - x_out) - ya; return std::array{ x_out, y_out, slope_relation }; }; - // ADD operations (if row represents ADD round, not SKEW or DOUBLE) + // ======================================================================== + // ADDITION chain (8 additions per row) + // ======================================================================== Accumulator x1_collision_relation(0); Accumulator x2_collision_relation(0); Accumulator x3_collision_relation(0); Accumulator x4_collision_relation(0); - // If `msm_transition == 1`, we have started a new MSM. We need to treat the current value of [Acc] as the point at - // infinity! + Accumulator x5_collision_relation(0); + Accumulator x6_collision_relation(0); + Accumulator x7_collision_relation(0); + Accumulator x8_collision_relation(0); + auto [x_t1, y_t1, add_slope_relation1] = - first_add(acc_x, acc_y, x1, y1, lambda1, msm_transition, x1_collision_relation); // [deg 2, deg 3] - auto [x_t2, y_t2, add_slope_relation2] = - add(x2, y2, x_t1, y_t1, lambda2, add2, x2_collision_relation); // [deg 3, deg 4] - auto [x_t3, y_t3, add_slope_relation3] = - add(x3, y3, x_t2, y_t2, lambda3, add3, x3_collision_relation); // [deg 4, deg 5] - auto [x_t4, y_t4, add_slope_relation4] = - add(x4, y4, x_t3, y_t3, lambda4, add4, x4_collision_relation); // [deg 5, deg 6] + first_add(acc_x, acc_y, x1, y1, lambda1, msm_transition, x1_collision_relation); + auto [x_t2, y_t2, add_slope_relation2] = add(x2, y2, x_t1, y_t1, lambda2, add2, x2_collision_relation); + auto [x_t3, y_t3, add_slope_relation3] = add(x3, y3, x_t2, y_t2, lambda3, add3, x3_collision_relation); + auto [x_t4, y_t4, add_slope_relation4] = add(x4, y4, x_t3, y_t3, lambda4, add4, x4_collision_relation); + auto [x_t5, y_t5, add_slope_relation5] = add(x5, y5, x_t4, y_t4, lambda5, add5, x5_collision_relation); + auto [x_t6, y_t6, add_slope_relation6] = add(x6, y6, x_t5, y_t5, lambda6, add6, x6_collision_relation); + auto [x_t7, y_t7, add_slope_relation7] = add(x7, y7, x_t6, y_t6, lambda7, add7, x7_collision_relation); + auto [x_t8, y_t8, add_slope_relation8] = add(x8, y8, x_t7, y_t7, lambda8, add8, x8_collision_relation); // Validate accumulator output matches ADD output if q_add = 1 - std::get<0>(accumulator) += q_add * (acc_x_shift - x_t4) * scaling_factor; - std::get<1>(accumulator) += q_add * (acc_y_shift - y_t4) * scaling_factor; + std::get<0>(accumulator) += q_add * (acc_x_shift - x_t8) * scaling_factor; + std::get<1>(accumulator) += q_add * (acc_y_shift - y_t8) * scaling_factor; // Validate slope relations for each addition separately to prevent cancellation attacks std::get<2>(accumulator) += q_add * add_slope_relation1 * scaling_factor; std::get<36>(accumulator) += q_add * add_slope_relation2 * scaling_factor; std::get<37>(accumulator) += q_add * add_slope_relation3 * scaling_factor; std::get<38>(accumulator) += q_add * add_slope_relation4 * scaling_factor; - - /** - * @brief doubles a point. - * - * Degree of x_out = 2 - * Degree of y_out = 3 - * Degree of relation = 4 - */ + std::get<47>(accumulator) += q_add * add_slope_relation5 * scaling_factor; + std::get<48>(accumulator) += q_add * add_slope_relation6 * scaling_factor; + std::get<49>(accumulator) += q_add * add_slope_relation7 * scaling_factor; + std::get<50>(accumulator) += q_add * add_slope_relation8 * scaling_factor; + + // ======================================================================== + // DOUBLING chain (4 doublings per row, using lambda1..4) + // ======================================================================== auto dbl = [&](auto& x, auto& y, auto& lambda) { auto two_x = x + x; auto slope_relation = lambda * (y + y) - (two_x + x) * x; @@ -382,60 +205,38 @@ void ECCVMMSMRelationImpl::accumulate(ContainerOverSubrelations& accumulator return std::array{ x_out, y_out, slope_relation }; }; - /** - * @brief - * - * Algorithm to determine if round is a DOUBLE round: - * 1. count_shift >= msm_size - * 2. round != 32 - * - * Algorithm to process MSM DOUBLE round: - * [Acc_shift] = ((([Acc].double()).double()).double()).double() - * - * As with additions, the column q_double describes whether row is a double round. It is Prover-defined. - * The value of `msm_round` can only update when `q_double = 1` and we use this to ensure Prover correctly sets - * `q_double`. The reason for this is that `msm_round` witnesses the wNAF digit we are processing, and we only - * perform the four doublings when we are done processing a wNAF digit. See round transition relations further down. - */ auto [x_d1, y_d1, double_slope_relation1] = dbl(acc_x, acc_y, lambda1); auto [x_d2, y_d2, double_slope_relation2] = dbl(x_d1, y_d1, lambda2); auto [x_d3, y_d3, double_slope_relation3] = dbl(x_d2, y_d2, lambda3); auto [x_d4, y_d4, double_slope_relation4] = dbl(x_d3, y_d3, lambda4); std::get<10>(accumulator) += q_double * (acc_x_shift - x_d4) * scaling_factor; std::get<11>(accumulator) += q_double * (acc_y_shift - y_d4) * scaling_factor; - // Validate slope relations for each doubling separately to prevent cancellation attacks std::get<12>(accumulator) += q_double * double_slope_relation1 * scaling_factor; std::get<39>(accumulator) += q_double * double_slope_relation2 * scaling_factor; std::get<40>(accumulator) += q_double * double_slope_relation3 * scaling_factor; std::get<41>(accumulator) += q_double * double_slope_relation4 * scaling_factor; - /** - * @brief SKEW operations - * When computing x * [P], if x is even we must subtract [P] from accumulator - * (this is because our windowed non-adjacent-form can only represent odd numbers) - * Round 32 represents "skew" round. - * If scalar slice == 7, we add into accumulator (point_table[7] maps to -[P]) - * If scalar slice == 0, we do not add into accumulator - * i.e. for the skew round we can use the slice values as our "selector" when doing conditional point adds - * - * As with addition and doubling, the column q_skew is prover-defined. It is precisely turned on when the round - * is 32. We implement this constraint slightly differently. For more details, see the round transition relations - * below. - */ + // ======================================================================== + // SKEW chain (8 conditional additions per row) + // ======================================================================== static FF inverse_seven = FF(7).invert(); auto skew1_select = slice1 * inverse_seven; auto skew2_select = slice2 * inverse_seven; auto skew3_select = slice3 * inverse_seven; auto skew4_select = slice4 * inverse_seven; + auto skew5_select = slice5 * inverse_seven; + auto skew6_select = slice6 * inverse_seven; + auto skew7_select = slice7 * inverse_seven; + auto skew8_select = slice8 * inverse_seven; Accumulator x1_skew_collision_relation(0); Accumulator x2_skew_collision_relation(0); Accumulator x3_skew_collision_relation(0); Accumulator x4_skew_collision_relation(0); - // add skew points iff row is a SKEW row AND slice = 7 (point_table[7] maps to -[P]) - // N.B. while it would be nice to have one `add` relation for both ADD and SKEW rounds, - // this would increase degree of sumcheck identity vs evaluating them separately. - // This is because, for add rounds, the result of adding [P1], [Acc] is [P1 + Acc] or [P1] - // but for skew rounds, the result of adding [P1], [Acc] is [P1 + Acc] or [Acc] + Accumulator x5_skew_collision_relation(0); + Accumulator x6_skew_collision_relation(0); + Accumulator x7_skew_collision_relation(0); + Accumulator x8_skew_collision_relation(0); + auto [x_s1, y_s1, skew_slope_relation1] = add(x1, y1, acc_x, acc_y, lambda1, skew1_select, x1_skew_collision_relation); auto [x_s2, y_s2, skew_slope_relation2] = @@ -444,140 +245,114 @@ void ECCVMMSMRelationImpl::accumulate(ContainerOverSubrelations& accumulator add(x3, y3, x_s2, y_s2, lambda3, skew3_select, x3_skew_collision_relation); auto [x_s4, y_s4, skew_slope_relation4] = add(x4, y4, x_s3, y_s3, lambda4, skew4_select, x4_skew_collision_relation); + auto [x_s5, y_s5, skew_slope_relation5] = + add(x5, y5, x_s4, y_s4, lambda5, skew5_select, x5_skew_collision_relation); + auto [x_s6, y_s6, skew_slope_relation6] = + add(x6, y6, x_s5, y_s5, lambda6, skew6_select, x6_skew_collision_relation); + auto [x_s7, y_s7, skew_slope_relation7] = + add(x7, y7, x_s6, y_s6, lambda7, skew7_select, x7_skew_collision_relation); + auto [x_s8, y_s8, skew_slope_relation8] = + add(x8, y8, x_s7, y_s7, lambda8, skew8_select, x8_skew_collision_relation); // Validate accumulator output matches SKEW output if q_skew = 1 - std::get<3>(accumulator) += q_skew * (acc_x_shift - x_s4) * scaling_factor; - std::get<4>(accumulator) += q_skew * (acc_y_shift - y_s4) * scaling_factor; - // Validate slope relations for each skew addition separately to prevent cancellation attacks + std::get<3>(accumulator) += q_skew * (acc_x_shift - x_s8) * scaling_factor; + std::get<4>(accumulator) += q_skew * (acc_y_shift - y_s8) * scaling_factor; std::get<5>(accumulator) += q_skew * skew_slope_relation1 * scaling_factor; std::get<42>(accumulator) += q_skew * skew_slope_relation2 * scaling_factor; std::get<43>(accumulator) += q_skew * skew_slope_relation3 * scaling_factor; std::get<44>(accumulator) += q_skew * skew_slope_relation4 * scaling_factor; - - // Check x-coordinates do not collide if row is an ADD row or a SKEW row - // if either q_add or q_skew = 1, an inverse should exist for each computed relation - // Step 1: construct boolean selectors that describe whether we added a point at the current row + std::get<51>(accumulator) += q_skew * skew_slope_relation5 * scaling_factor; + std::get<52>(accumulator) += q_skew * skew_slope_relation6 * scaling_factor; + std::get<53>(accumulator) += q_skew * skew_slope_relation7 * scaling_factor; + std::get<54>(accumulator) += q_skew * skew_slope_relation8 * scaling_factor; + + // ======================================================================== + // Collision checks (x-coordinates must differ for all 8 additions) + // ======================================================================== const auto add_first_point = add1 * q_add + q_skew * skew1_select; const auto add_second_point = add2 * q_add + q_skew * skew2_select; const auto add_third_point = add3 * q_add + q_skew * skew3_select; const auto add_fourth_point = add4 * q_add + q_skew * skew4_select; - // Step 2: construct the difference a.k.a. delta between x-coordinates for each point add (depending on if row is - // ADD or SKEW) + const auto add_fifth_point = add5 * q_add + q_skew * skew5_select; + const auto add_sixth_point = add6 * q_add + q_skew * skew6_select; + const auto add_seventh_point = add7 * q_add + q_skew * skew7_select; + const auto add_eighth_point = add8 * q_add + q_skew * skew8_select; + const auto x1_delta = x1_skew_collision_relation * q_skew + x1_collision_relation * q_add; const auto x2_delta = x2_skew_collision_relation * q_skew + x2_collision_relation * q_add; const auto x3_delta = x3_skew_collision_relation * q_skew + x3_collision_relation * q_add; const auto x4_delta = x4_skew_collision_relation * q_skew + x4_collision_relation * q_add; - // Step 3: x_delta * inverse - 1 = 0 if we performed a point addition (else x_delta * inverse = 0) + const auto x5_delta = x5_skew_collision_relation * q_skew + x5_collision_relation * q_add; + const auto x6_delta = x6_skew_collision_relation * q_skew + x6_collision_relation * q_add; + const auto x7_delta = x7_skew_collision_relation * q_skew + x7_collision_relation * q_add; + const auto x8_delta = x8_skew_collision_relation * q_skew + x8_collision_relation * q_add; + std::get<6>(accumulator) += (x1_delta * collision_inverse1 - add_first_point) * scaling_factor; std::get<7>(accumulator) += (x2_delta * collision_inverse2 - add_second_point) * scaling_factor; std::get<8>(accumulator) += (x3_delta * collision_inverse3 - add_third_point) * scaling_factor; std::get<9>(accumulator) += (x4_delta * collision_inverse4 - add_fourth_point) * scaling_factor; - - // When add_i = 0, force slice_i to ALSO be 0 + std::get<55>(accumulator) += (x5_delta * collision_inverse5 - add_fifth_point) * scaling_factor; + std::get<56>(accumulator) += (x6_delta * collision_inverse6 - add_sixth_point) * scaling_factor; + std::get<57>(accumulator) += (x7_delta * collision_inverse7 - add_seventh_point) * scaling_factor; + std::get<58>(accumulator) += (x8_delta * collision_inverse8 - add_eighth_point) * scaling_factor; + + // ======================================================================== + // When add_i = 0, force slice_i to also be 0 + // ======================================================================== std::get<13>(accumulator) += (-add1 + 1) * slice1 * scaling_factor; std::get<14>(accumulator) += (-add2 + 1) * slice2 * scaling_factor; std::get<15>(accumulator) += (-add3 + 1) * slice3 * scaling_factor; std::get<16>(accumulator) += (-add4 + 1) * slice4 * scaling_factor; + std::get<59>(accumulator) += (-add5 + 1) * slice5 * scaling_factor; + std::get<60>(accumulator) += (-add6 + 1) * slice6 * scaling_factor; + std::get<61>(accumulator) += (-add7 + 1) * slice7 * scaling_factor; + std::get<62>(accumulator) += (-add8 + 1) * slice8 * scaling_factor; + // ======================================================================== // SELECTORS ARE MUTUALLY EXCLUSIVE - // at most one of q_skew, q_double, q_add can be nonzero. - // note that as we can expect our table to be zero padded, we _do not_ insist that q_add + q_double + q_skew == 1. + // ======================================================================== std::get<17>(accumulator) += (q_add * q_double + q_add * q_skew + q_double * q_skew) * scaling_factor; + // ======================================================================== // ACCUMULATOR PRESERVATION ON NO-OP ROWS - // If no phase selector is active (q_add = q_double = q_skew = 0), the accumulator must not change. - // Without this constraint, a malicious prover could insert no-op rows between active rows and - // set arbitrary accumulator values on the next row, because the accumulator-update constraints - // are all gated by their respective phase selectors. - // We exclude two boundary cases: - // - msm_transition = 1 on the current row: msm_transition marks the first row of a new MSM - // (where q_add is also 1), but the final row of the entire MSM trace ALSO has msm_transition = 1 - // with ALL phase selectors off. On that row, acc holds the MSM output and acc_shift need not - // be preserved. This is safe because the set relation constrains (pc, acc_x, acc_y, msm_size) - // at transitions. (NOTE: this is a design choice specified by the builder; we could equivalently propagate the - // accumulator one past the last MSM row and then not turn off the constraint when `msm_transition == 1`.) - // - lagrange_first = 1 (row 0): the first row of the trace is zero-padded and the next row - // starts a fresh MSM whose accumulator is initialized via first_add, not by continuity. + // ======================================================================== auto no_op_selector = - (-q_add + 1) * (-q_double + 1) * (-q_skew + 1) * (-msm_transition + 1) * (-lagrange_first + 1); // degree 5 - std::get<45>(accumulator) += no_op_selector * (acc_x_shift - acc_x) * scaling_factor; // degree 6 - std::get<46>(accumulator) += no_op_selector * (acc_y_shift - acc_y) * scaling_factor; // degree 6 + (-q_add + 1) * (-q_double + 1) * (-q_skew + 1) * (-msm_transition + 1) * (-lagrange_first + 1); + std::get<45>(accumulator) += no_op_selector * (acc_x_shift - acc_x) * scaling_factor; + std::get<46>(accumulator) += no_op_selector * (acc_y_shift - acc_y) * scaling_factor; - // Validate that if q_add = 1 or q_skew = 1, add1 also is 1 - // NOTE(#2222): could just get rid of add1 as a column, as it is a linear combination. + // ======================================================================== + // add1 = q_add + q_skew + // ======================================================================== std::get<32>(accumulator) += (add1 - q_add - q_skew) * scaling_factor; + // ======================================================================== // ROUND TRANSITION LOGIC - // `round_transition` describes whether we are transitioning between "rounds" of the MSM according to the Straus - // algorithm. In particular, the `round` corresponds to the wNAF digit we are currently processing. - + // ======================================================================== const auto round_delta = round_shift - round; - // If `msm_transition == 0` (next row) then `round_delta` is boolean; the round is internal to a given MSM and - // represents the wNAF digit currently being processed. `round_delta == 0` means that the current and next steps of - // the Straus algorithm are processing the same wNAF digit place. - - // `round_transition == 0` if `round_delta == 0` or the next row is an MSM transition. - // if `round_transition != 0`, then `round_transition == round_delta == 1` by the following constraint. - // in particular, `round_transition` is boolean. (`round_delta` is not boolean precisely one step before an MSM - // transition, but that does not concern us here.) const auto round_transition = round_delta * (-msm_transition_shift + 1); std::get<18>(accumulator) += round_transition * (round_delta - 1) * scaling_factor; - // If `round_transition == 1`, then `round_delta == 1` and `msm_transition_shift == 0`. Therefore, we wish to - // constrain next row in the VM to either be a double (if `round != 31`) or skew (if `round == 31`). In either case, - // the point is that we have finished processing a wNAF digit place and need to either perform the doublings to move - // on to the next place _or_ we are at the last place and need to perform the skew computation to finish. These are - // equationally represented as: - // round_transition * skew_shift * (round - 31) = 0 (if round tx and skew, then round == 31); - // round_transition * (skew_shift + double_shift - 1) = 0 (if round tx, then skew XOR double = 1). - // (-round_delta + 1) * q_double_shift = 1 (if q_double_shift == 1, then round_transition = 1) - // together, these have the following implications: if round tx and round != 31, then double_shift = 1. - // conversely, if round tx and double_shift == 0, then `q_skew_shift == 1` (which then forces `round == 31`). - // similarly, if q_double_shift == 1, then round_transition == 0, - // the fact that a round_transition occurs at the first time skew_shift == 1 follows from the fact that skew == 1 - // implies round == 32 and the above three relations, together with the _definition_ of round_transition. std::get<19>(accumulator) += round_transition * q_skew_shift * (round - 31) * scaling_factor; std::get<20>(accumulator) += round_transition * (q_skew_shift + q_double_shift - 1) * scaling_factor; std::get<35>(accumulator) += (-round_delta + 1) * q_double_shift * scaling_factor; - // if the next is neither double nor skew, and we are not at an msm_transition, then round_delta = 0 and the next - // "row" of our VM is processing the same wNAF digit place. std::get<21>(accumulator) += round_transition * (-q_double_shift + 1) * (-q_skew_shift + 1) * scaling_factor; + // ======================================================================== // CONSTRAINING Q_DOUBLE AND Q_SKEW - // NOTE: we have already constrained q_add, q_skew, and q_double to be mutually exclusive. - - // if double, next add = 1. As q_double, q_add, and q_skew are mutually exclusive, this suffices to force - // q_double_shift == q_skew_shift == 0. + // ======================================================================== std::get<22>(accumulator) += q_double * (-q_add_shift + 1) * scaling_factor; - // if the current row has q_skew == 1 and the next row is _not_ an MSM transition, then q_skew_shift = 1. - // this forces q_skew to precisely correspond to the rows where `round == 32`. Indeed, note that the first q_skew - // bit is set correctly: - // round == 31, round_transition == 1 ==> q_skew_shift == 1. (if, to the contrary, q_double_shift == 1, then - // the q_add_shift_shift == 1, but we assume that we have correctly constrained the q_adds via the multiset - // argument. this means that q_double_shift == 0, which forces q_skew_shift == 1 because round_transition - // == 1.) - // this means that the first row with `round == 32` has q_skew == 1. then all subsequent q_skew entries must be 1, - // _until_ we start our new MSM. std::get<33>(accumulator) += (-msm_transition_shift + 1) * q_skew * (-q_skew_shift + 1) * scaling_factor; - // if q_skew == 1, then round == 32. This is almost certainly redundant but psychologically useful to "constrain - // both ends". std::get<34>(accumulator) += q_skew * (-round + 32) * scaling_factor; - // UPDATING THE COUNT - - // if we are changing the `round` (i.e., starting to process a new wNAF digit or at an msm transition), the - // count_shift must be 0. + // ======================================================================== + // UPDATING THE COUNT (now sums 8 add selectors) + // ======================================================================== std::get<23>(accumulator) += round_delta * count_shift * scaling_factor; - // if msm_transition_shift = 0 and round_delta = 0, then the next "row" of the VM is processing the same wNAF digit. - // this means that the count must increase: count_shift = count + add1 + add2 + add3 + add4 std::get<24>(accumulator) += (-msm_transition_shift + 1) * (-round_delta + 1) * - (count_shift - count - add1 - add2 - add3 - add4) * scaling_factor; + (count_shift - count - add1 - add2 - add3 - add4 - add5 - add6 - add7 - add8) * + scaling_factor; - // at least one of the following must be true: - // the next step is an MSM transition; - // the next count is zero (meaning we are starting the processing of a new wNAF digit) - // the next step is processing the same wNAF digit (i.e., round_delta == 0) - // (note that at the start of a new MSM, the count is also zero, so the above are not mutually exclusive.) std::get<25>(accumulator) += is_not_first_row * (-msm_transition_shift + 1) * round_delta * count_shift * scaling_factor; @@ -585,42 +360,22 @@ void ECCVMMSMRelationImpl::accumulate(ContainerOverSubrelations& accumulator std::get<26>(accumulator) += msm_transition * round * scaling_factor; // if msm_transition_shift = 1, pc = pc_shift + msm_size - // NB: `ecc_set_relation` ensures `msm_size` maps to `transcript.msm_count` for the current value of `pc` std::get<27>(accumulator) += is_not_first_row * msm_transition_shift * (msm_size + pc_shift - pc) * scaling_factor; - // Addition continuity checks - // We want to RULE OUT the following scenarios: - // Case 1: add2 = 1, add1 = 0 - // Case 2: add3 = 1, add2 = 0 - // Case 3: add4 = 1, add3 = 0 - // These checks ensure that the current row does not skip points (for both ADD and SKEW ops) - // This is part of a wider set of checks we use to ensure that all point data is used in the assigned - // multiscalar multiplication operation (and not in a different MSM operation). + // ======================================================================== + // Addition continuity checks (extended from 4 to 8) + // ======================================================================== std::get<28>(accumulator) += add2 * (-add1 + 1) * scaling_factor; std::get<29>(accumulator) += add3 * (-add2 + 1) * scaling_factor; std::get<30>(accumulator) += add4 * (-add3 + 1) * scaling_factor; + std::get<63>(accumulator) += add5 * (-add4 + 1) * scaling_factor; + std::get<64>(accumulator) += add6 * (-add5 + 1) * scaling_factor; + std::get<65>(accumulator) += add7 * (-add6 + 1) * scaling_factor; + std::get<66>(accumulator) += add8 * (-add7 + 1) * scaling_factor; - // Final continuity check. - // If an addition spans two rows, we need to make sure that the following scenario is RULED OUT: - // add4 = 0 on the CURRENT row, add1 = 1 on the NEXT row - // We must apply the above for the two cases: - // Case 1: q_add = 1 on the CURRENT row, q_add = 1 on the NEXT row - // Case 2: q_skew = 1 on the CURRENT row, q_skew = 1 on the NEXT row - // (i.e. if q_skew = 1, q_add_shift = 1 this implies an MSM transition so we skip this continuity check) + // Cross-row continuity: if add8 = 0 on current row, add1 = 0 on next row (within same phase) std::get<31>(accumulator) += - (q_add * q_add_shift + q_skew * q_skew_shift) * (-add4 + 1) * add1_shift * scaling_factor; - - // remaining checks (done in ecc_set_relation.hpp, ecc_lookup_relation.hpp) - // when transition occurs, perform set membership lookup on (accumulator / pc / msm_size) - // perform set membership lookups on add_i * (pc / round / slice_i) - // perform lookups on (pc / slice_i / x / y) - - // We look up wnaf slices by mapping round + pc -> slice - // We use an exact set membership check to validate that - // wnafs written in wnaf_relation == wnafs read in msm relation - // We use `add1/add2/add3/add4` to flag whether we are performing a wnaf read op - // We can set these to be Prover-defined as the set membership check implicitly ensures that the correct reads - // have occurred. + (q_add * q_add_shift + q_skew * q_skew_shift) * (-add8 + 1) * add1_shift * scaling_factor; } } // namespace bb From 3b393a88bfce1987b469c67abc409e3e3708ed82 Mon Sep 17 00:00:00 2001 From: notnotraju Date: Tue, 17 Mar 2026 13:57:26 +0000 Subject: [PATCH 07/24] eccvm: add boolean checks for msm_add5..8 Extend the bools relation with 4 new boolean constraints for the msm_add5 through msm_add8 columns (indices 23-26). Subrelation count: 23 -> 27. --- .../barretenberg/relations/ecc_vm/ecc_bools_relation.hpp | 4 ++-- .../relations/ecc_vm/ecc_bools_relation_impl.hpp | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_bools_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_bools_relation.hpp index d5417e3d4b4b..23959425cf09 100644 --- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_bools_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_bools_relation.hpp @@ -23,8 +23,8 @@ template class ECCVMBoolsRelationImpl { public: using FF = FF_; - static constexpr std::array SUBRELATION_PARTIAL_LENGTHS{ - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + static constexpr std::array SUBRELATION_PARTIAL_LENGTHS{ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, }; template diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_bools_relation_impl.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_bools_relation_impl.hpp index 3182751f253c..130851af035e 100644 --- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_bools_relation_impl.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_bools_relation_impl.hpp @@ -55,6 +55,10 @@ void ECCVMBoolsRelationImpl::accumulate(ContainerOverSubrelations& accumulat auto msm_add2 = View(in.msm_add2); auto msm_add3 = View(in.msm_add3); auto msm_add4 = View(in.msm_add4); + auto msm_add5 = View(in.msm_add5); + auto msm_add6 = View(in.msm_add6); + auto msm_add7 = View(in.msm_add7); + auto msm_add8 = View(in.msm_add8); std::get<0>(accumulator) += q_eq * (q_eq - 1) * scaling_factor; std::get<1>(accumulator) += q_add * (q_add - 1) * scaling_factor; @@ -79,5 +83,9 @@ void ECCVMBoolsRelationImpl::accumulate(ContainerOverSubrelations& accumulat std::get<20>(accumulator) += msm_add2 * (msm_add2 - 1) * scaling_factor; std::get<21>(accumulator) += msm_add3 * (msm_add3 - 1) * scaling_factor; std::get<22>(accumulator) += msm_add4 * (msm_add4 - 1) * scaling_factor; + std::get<23>(accumulator) += msm_add5 * (msm_add5 - 1) * scaling_factor; + std::get<24>(accumulator) += msm_add6 * (msm_add6 - 1) * scaling_factor; + std::get<25>(accumulator) += msm_add7 * (msm_add7 - 1) * scaling_factor; + std::get<26>(accumulator) += msm_add8 * (msm_add8 - 1) * scaling_factor; } } // namespace bb From 4e12c20c4f548b8699a433b14e3175bde1c75894 Mon Sep 17 00:00:00 2001 From: notnotraju Date: Tue, 17 Mar 2026 14:03:29 +0000 Subject: [PATCH 08/24] feat(eccvm): widen WNAF relation to 8 digits per row Update ecc_wnaf_relation to process 8 wNAF digits per precompute row (was 4), halving the number of rows per scalar from 8 to 4. Key changes: - SUBRELATION_PARTIAL_LENGTHS expanded from 23 to 35 entries - 16 two-bit range checks (was 8) for slices s1hi..s8lo - 8 wNAF conversions w0..w7 (was 4 w0..w3) - Horner accumulation uses 2^32 shift (was 2^16) for 8 digits - Round max changed from 7 to 3 (NUM_WNAF_DIGITS_PER_SCALAR/8 - 1) - Added slice-zero checks for w4..w7 (subrelations 31-34) - Updated header docstring to reflect 4-row layout --- .../relations/ecc_vm/ecc_wnaf_relation.hpp | 39 +-- .../ecc_vm/ecc_wnaf_relation_impl.hpp | 233 ++++++------------ 2 files changed, 101 insertions(+), 171 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_wnaf_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_wnaf_relation.hpp index e7113776bea0..c339adce5b67 100644 --- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_wnaf_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_wnaf_relation.hpp @@ -11,24 +11,20 @@ namespace bb { /** * @brief ECCVMWnafRelationImpl evaluates relations that convert scalar multipliers into 4-bit WNAF slices * @details Each WNAF slice is a 4-bit slice representing one of 16 integers { -15, -13, ..., 15 } - * Each WNAF slice is represented via two 2-bit columns (precompute_s1hi, ..., precompute_s4lo) - * One 128-bit scalar multiplier is processed across 8 rows, indexed by a round variable. + * Each WNAF slice is represented via two 2-bit columns (precompute_s1hi, ..., precompute_s8lo) + * One 128-bit scalar multiplier is processed across 4 rows (8 digits/row), indexed by a round variable. * The following table describes the structure for one scalar. * - * | point_transition | round | slices | skew | scalar_sum | - * | ---------------- | ----- | --------------- | ------ | ------------------------------- | - * | 0 | 0 | s0,s1,s2,s3 | 0 | 0 | - * | 0 | 1 | s4,s5,s6,s7 | 0 | \sum_{i=0}^4 16^i * s_{3 - i} | - * | 0 | 2 | s8,s9,s10,s11 | 0 | \sum_{i=0}^8 16^i * s_{7 - i} | - * | 0 | 3 | s12,s13,s14,s14 | 0 | \sum_{i=0}^12 16^i * s_{11 - i} | - * | 0 | 4 | s16,s17,s18,s19 | 0 | \sum_{i=0}^16 16^i * s_{15 - i} | - * | 0 | 5 | s20,s21,s22,s23 | 0 | \sum_{i=0}^20 16^i * s_{19 - i} | - * | 0 | 6 | s24,s25,s26,s27 | 0 | \sum_{i=0}^24 16^i * s_{23 - i} | - * | 1 | 7 | s28,s29,s30,s31 | s_skew | \sum_{i=0}^28 16^i * s_{27 - i} | + * | point_transition | round | slices | skew | scalar_sum | + * | ---------------- | ----- | ------------------------------- | ------ | --------------------------------- | + * | 0 | 0 | s0,s1,s2,s3,s4,s5,s6,s7 | 0 | 0 | + * | 0 | 1 | s8,s9,s10,s11,s12,s13,s14,s15 | 0 | \sum_{i=0}^7 16^i * s_{7 - i} | + * | 0 | 2 | s16,s17,s18,s19,s20,s21,s22,s23 | 0 | \sum_{i=0}^15 16^i * s_{15 - i} | + * | 1 | 3 | s24,s25,s26,s27,s28,s29,s30,s31 | s_skew | \sum_{i=0}^23 16^i * s_{23 - i} | * * The value of the input scalar is equal to the following: * - * scalar = 2^16 * scalar_sum + 2^12 * s28 + 2^8 * s29 + 2^4 * s30 + s31 - s_skew + * scalar = 2^32 * scalar_sum + 2^28*s24 + ... + s31 - s_skew * * We use a multiset equality check in `ecc_set_relation.hpp` to validate the above value maps to the correct input * scalar for a given value of `pc` (i.e., for a given non-trivial EC point). In other words, this constrains that the @@ -44,8 +40,21 @@ template class ECCVMWnafRelationImpl { public: using FF = FF_; - static constexpr std::array SUBRELATION_PARTIAL_LENGTHS{ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 }; + // 35 subrelations: + // 0-7: range checks for slices 0-7 (degree 5) + // 8: scalar sum consistency (degree 5) + // 9-12: round/PC transition logic (degree 5) + // 13: skew validation (degree 5) + // 14-17: slice-zero checks for w0-w3 (degree 5) + // 18-19: round/pc zero when inactive (degree 5) + // 20: s1hi MSB positive at transitions (degree 5) + // 21: q_transition zero when inactive (degree 5) + // 22: precompute_select monotonicity (degree 5) + // 23-30: range checks for slices 8-15 (degree 5) + // 31-34: slice-zero checks for w4-w7 (degree 5) + static constexpr std::array SUBRELATION_PARTIAL_LENGTHS{ + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + }; template static void accumulate(ContainerOverSubrelations& accumulator, diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_wnaf_relation_impl.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_wnaf_relation_impl.hpp index 4aba841aa36a..e9d325955dc5 100644 --- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_wnaf_relation_impl.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_wnaf_relation_impl.hpp @@ -11,35 +11,17 @@ namespace bb { /** * @brief ECCVMWnafRelationImpl evaluates relations that convert scalar multipliers into 4-bit WNAF slices * @details Each WNAF slice is a 4-bit slice representing one of 16 integers { -15, -13, ..., 15 } - * Each WNAF slice is represented via two 2-bit columns (precompute_s1hi, ..., precompute_s4lo) - * One 128-bit scalar multiplier is processed across 8 rows, indexed by a round variable. - * The following table describes the structure for one scalar. + * Each WNAF slice is represented via two 2-bit columns (precompute_s1hi, ..., precompute_s8lo) + * One 128-bit scalar multiplier is processed across 4 rows (8 digits/row), indexed by a round variable. * - * | point_transition | round | slices | skew | scalar_sum | - * | ---------------- | ----- | --------------- | ------ | ------------------------------- | - * | 0 | 0 | s0,s1,s2,s3 | 0 | 0 | - * | 0 | 1 | s4,s5,s6,s7 | 0 | \sum_{i=0}^3 16^i * s_{3 - i} | - * | 0 | 2 | s8,s9,s10,s11 | 0 | \sum_{i=0}^7 16^i * s_{7 - i} | - * | 0 | 3 | s12,s13,s14,s15 | 0 | \sum_{i=0}^11 16^i * s_{11 - i} | - * | 0 | 4 | s16,s17,s18,s19 | 0 | \sum_{i=0}^15 16^i * s_{15 - i} | - * | 0 | 5 | s20,s21,s22,s23 | 0 | \sum_{i=0}^19 16^i * s_{19 - i} | - * | 0 | 6 | s24,s25,s26,s27 | 0 | \sum_{i=0}^23 16^i * s_{23 - i} | - * | 1 | 7 | s28,s29,s30,s31 | s_skew | \sum_{i=0}^27 16^i * s_{27 - i} | + * | point_transition | round | slices | skew | scalar_sum | + * | ---------------- | ----- | ------------------------------- | ------ | --------------------------------- | + * | 0 | 0 | s0,s1,s2,s3,s4,s5,s6,s7 | 0 | 0 | + * | 0 | 1 | s8,s9,s10,s11,s12,s13,s14,s15 | 0 | \sum_{i=0}^7 16^i * s_{7 - i} | + * | 0 | 2 | s16,s17,s18,s19,s20,s21,s22,s23 | 0 | \sum_{i=0}^15 16^i * s_{15 - i} | + * | 1 | 3 | s24,s25,s26,s27,s28,s29,s30,s31 | s_skew | \sum_{i=0}^23 16^i * s_{23 - i} | * - * The value of the input scalar is equal to the following: - * - * scalar = 2^16 * scalar_sum + 2^12 * s28 + 2^8 * s29 + 2^4 * s30 + s31 - s_skew - * - * We use a multiset equality check in `ecc_set_relation.hpp` to validate the above value maps to the correct input - * scalar for a given value of `pc` (i.e., for a given non-trivial EC point). In other words, this constrains that the - * wNAF expansion is correct. Note that, from the perpsective of the Precomputed table, we only add the tuple (pc, - * round, slice) to the multiset when point_transition == 1. - * - * Furthermore, as the column `point_transition` is committed to by the Prover, we must constrain it is correctly - * computed (see also `ECCVMPointTableRelationImpl` for a description of what the table looks like.) - * - * @tparam FF - * @tparam AccumulatorTypes + * scalar = 2^32 * scalar_sum + 2^28*s24 + ... + s31 - s_skew */ template template @@ -56,30 +38,24 @@ void ECCVMWnafRelationImpl::accumulate(ContainerOverSubrelations& accumulato auto q_transition = View(in.precompute_point_transition); auto round = View(in.precompute_round); auto round_shift = View(in.precompute_round_shift); - auto pc = View(in.precompute_pc); // note that this is a _point-counter_. + auto pc = View(in.precompute_pc); auto pc_shift = View(in.precompute_pc_shift); - // precompute_select is a boolean column that is 0 at the initial row and 1 at all subsequent active rows in the - // precompute table. We only evaluate the ecc_wnaf_relation if `precompute_select=1`. As a reminder, this latter is - // 0 at the initial row and then 1 at the rest of the (active) rows of the Precomputed table. The fact that - // `precompute_select` is correctly computed is mediated by the set relation. auto precompute_select = View(in.precompute_select); - auto precompute_select_shift = View(in.precompute_select_shift); - const auto& precompute_skew = View(in.precompute_skew); - const std::array slices{ + // 16 two-bit slices encoding 8 wNAF digits + const std::array slices{ View(in.precompute_s1hi), View(in.precompute_s1lo), View(in.precompute_s2hi), View(in.precompute_s2lo), View(in.precompute_s3hi), View(in.precompute_s3lo), View(in.precompute_s4hi), View(in.precompute_s4lo), + View(in.precompute_s5hi), View(in.precompute_s5lo), View(in.precompute_s6hi), View(in.precompute_s6lo), + View(in.precompute_s7hi), View(in.precompute_s7lo), View(in.precompute_s8hi), View(in.precompute_s8lo), }; const auto range_constraint_slice_to_2_bits = [&scaling_factor](const View& s, auto& acc) { acc += ((s - 1).sqr() - 1) * ((s - 2).sqr() - 1) * scaling_factor; }; - // given two 2-bit numbers `hi`, `lo`, convert to a wNAF digit (in {-15, -13, ..., 13, 15}) via the formula: - // `2(4*hi + lo) - 15`. (Here, `4*hi + lo` represents the 4-bit number corresponding to the concatenation of `hi` - // and `lo`.) const auto convert_to_wnaf = [](const View& hi, const View& lo) { auto t = hi + hi; t += t; @@ -89,16 +65,10 @@ void ECCVMWnafRelationImpl::accumulate(ContainerOverSubrelations& accumulato }; const auto scaled_transition = q_transition * scaling_factor; - const auto scaled_transition_is_zero = - -scaled_transition + scaling_factor; // `scaling_factor * (1 - q_transition)`, i.e., is the scaling_factor if we - // are _not_ at a transition, else 0. + const auto scaled_transition_is_zero = -scaled_transition + scaling_factor; + const auto scaled_lagrange_first = scaling_factor * lagrange_first; - const auto scaled_lagrange_first = scaling_factor * lagrange_first; // for edge-case handling - /** - * @brief Constrain each of our scalar slice chunks (s1, ..., s8) to be 2 bits. - * Doing range checks this way vs permutation-based range check removes need to create sorted list + grand product - * polynomial. Probably cheaper even if we have to split each 4-bit WNAF slice into 2-bit chunks. - */ + // Range-check all 16 two-bit slices range_constraint_slice_to_2_bits(slices[0], std::get<0>(accumulator)); range_constraint_slice_to_2_bits(slices[1], std::get<1>(accumulator)); range_constraint_slice_to_2_bits(slices[2], std::get<2>(accumulator)); @@ -107,45 +77,35 @@ void ECCVMWnafRelationImpl::accumulate(ContainerOverSubrelations& accumulato range_constraint_slice_to_2_bits(slices[5], std::get<5>(accumulator)); range_constraint_slice_to_2_bits(slices[6], std::get<6>(accumulator)); range_constraint_slice_to_2_bits(slices[7], std::get<7>(accumulator)); - - /** - * @brief If we are processing a new scalar (q_transition = 1), validate that the first slice is positive. - * This requires us to validate slice1 is in the range [8, ... 15]. - * (when converted into wnaf form this maps to the range [1, 3, ..., 15]). - * We do this to ensure the final scalar sum is positive. - * We already know slice1 is in the range [0, ..., 15] - * To check the range [8, ..., 15] we validate the most significant 2 bits (s1) are >=2 - */ + range_constraint_slice_to_2_bits(slices[8], std::get<23>(accumulator)); + range_constraint_slice_to_2_bits(slices[9], std::get<24>(accumulator)); + range_constraint_slice_to_2_bits(slices[10], std::get<25>(accumulator)); + range_constraint_slice_to_2_bits(slices[11], std::get<26>(accumulator)); + range_constraint_slice_to_2_bits(slices[12], std::get<27>(accumulator)); + range_constraint_slice_to_2_bits(slices[13], std::get<28>(accumulator)); + range_constraint_slice_to_2_bits(slices[14], std::get<29>(accumulator)); + range_constraint_slice_to_2_bits(slices[15], std::get<30>(accumulator)); + + // Validate first slice is positive at transitions const auto s1hi_shift = View(in.precompute_s1hi_shift); const auto s1hi_shift_msb_set = (s1hi_shift - 2) * (s1hi_shift - 3); const auto scaled_transition_plus_lagrange_first = scaled_transition + scaled_lagrange_first; - // away from row zero, add `scaled_transition * precompute_select_shift * s1hi_shift_msb_set`. however, - // `q_transition[0] == 0`, so this constraint will not turn on at the 0th row unless we add - // `scaled_lagrange_first`. std::get<20>(accumulator) += scaled_transition_plus_lagrange_first * precompute_select_shift * s1hi_shift_msb_set; - /** - * @brief Convert each pair of 2-bit scalar slices into a 4-bit windowed-non-adjacent-form slice. - * Conversion from binary -> wnaf = 2 * binary - 15. - * Converts a value in [0, ..., 15] into [-15, -13, -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11 , 13, 15]. - * We use WNAF representation to avoid case where we are conditionally adding a point in our MSM algo. - */ + + // Convert 16 two-bit slices into 8 wNAF digits const auto w0 = convert_to_wnaf(slices[0], slices[1]); const auto w1 = convert_to_wnaf(slices[2], slices[3]); const auto w2 = convert_to_wnaf(slices[4], slices[5]); const auto w3 = convert_to_wnaf(slices[6], slices[7]); - - /** - * @brief Slice consistency check. - * We require that `scalar_sum` on the next row correctly accumulates the 4 WNAF slices present on the current row - * (i.e. 16 WNAF bits). - * i.e. next_scalar_sum - 2^{16} * current_scalar_sum - 2^12 * w_0 - 2^8 * w_1 - 2^4 * w_2 - w_3 = 0 - * @note We only perform slice_consistency check when next row is processing the same scalar as the current row! - * i.e. when q_transition = 0 - * Note(@zac-williamson): improve WNAF use (#2224) - */ - auto row_slice = w0; // row_slice will eventually contain the truncated scalar corresponding to the current row, - // which is 2^12 * w_0 + 2^8 * w_1 + 2^4 * w_2 + w_3. (If one just looks at the wNAF digits in - // this row, this is the resulting odd number. Note that it is not necessarily positive.) + const auto w4 = convert_to_wnaf(slices[8], slices[9]); + const auto w5 = convert_to_wnaf(slices[10], slices[11]); + const auto w6 = convert_to_wnaf(slices[12], slices[13]); + const auto w7 = convert_to_wnaf(slices[14], slices[15]); + + // Scalar sum consistency: accumulate 8 wNAF digits via Horner's method + // row_slice = 2^28*w0 + 2^24*w1 + ... + 2^4*w6 + w7 + auto row_slice = w0; + // Multiply by 16 (shift left 4 bits), then add next digit. Repeat 7 times. row_slice += row_slice; row_slice += row_slice; row_slice += row_slice; @@ -161,107 +121,68 @@ void ECCVMWnafRelationImpl::accumulate(ContainerOverSubrelations& accumulato row_slice += row_slice; row_slice += row_slice; row_slice += w3; - auto sum_delta = scalar_sum * FF(1ULL << 16) + row_slice; + row_slice += row_slice; + row_slice += row_slice; + row_slice += row_slice; + row_slice += row_slice; + row_slice += w4; + row_slice += row_slice; + row_slice += row_slice; + row_slice += row_slice; + row_slice += row_slice; + row_slice += w5; + row_slice += row_slice; + row_slice += row_slice; + row_slice += row_slice; + row_slice += row_slice; + row_slice += w6; + row_slice += row_slice; + row_slice += row_slice; + row_slice += row_slice; + row_slice += row_slice; + row_slice += w7; + // Shift by 2^32 (8 digits * 4 bits each) + auto sum_delta = scalar_sum * FF(1ULL << 32) + row_slice; const auto check_sum = scalar_sum_shift - sum_delta; std::get<8>(accumulator) += precompute_select * check_sum * scaled_transition_is_zero; - // We must constrain `precompute_select` to be of the correct shape: 0 1 1 ... 1 0 ...0. In other words, after the - // first row, it is monotonically non-decreasing. In other words, a malicious prover cannot inject the value '0' in - // the middle. - const auto scaled_lagrange_first_minus_one = - scaled_lagrange_first - scaling_factor; // (if not at the first row, is -1, else 0) * scaling_factor + + // precompute_select monotonicity + const auto scaled_lagrange_first_minus_one = scaled_lagrange_first - scaling_factor; const auto precompute_select_check = precompute_select_shift * (precompute_select - 1); std::get<22>(accumulator) += scaled_lagrange_first_minus_one * precompute_select_check; - /** - * @brief Transition logic with `round` and `q_transition`. - * Goal: `round` is an integer in [0, ... 7] that tracks how many slices we have processed for a given scalar. - * i.e., the number of 4-bit WNAF slices processed = round * 4. - * We must ensure that `q_transition` is well-formed and that `round` is correctly constrained. Recall that `pc` - * stands for point-counter. - * - * For the former, we force the following: - * 1. When `q_transition == 1`, then `scalar_sum_shift == 0`, `round_shift == 0`, `round == 7`, and `pc_shift - * == pc - 1`. - * 2. When `q_transition == 0`, then `round_shift - round == 1` and `pc_shift == pc` - * - * For the latter: note that we don't actually range-constrain `round` (expensive if we don't need to!). We - * nonetheless can correctly constrain `round`, because of the multiset checks. There are two multiset equality - * checks that we perform that implicate the wNAF relation: - * 1. (pc, msm_round, wnaf_slice) - * 2. (pc, P.x, P.y, scalar-multiplier) - * The first is used to communicate with the MSM table, to validate that the slice * point values the MSM tables use - * are indeed what we have precomputed. The second facilitates communication with the Transcript table, to ensure - * that the wNAF expansion of the scalar is indeed correct. Moreover, the second is only "sent" to the multiset when - * `q_transition == 1`. (It is helpful to recall that `pc` is monotonic: one per each point involved in a - * non-trivial scalar multiplication.) - * - * Here is the logic. We must ensure that `round` can never be set to a value > 7. If this were possible at row `i`, - * then `q_transition == 0` for all subsequent rows by the incrementing logic. There are (at least) two problems. - * - * 1. The implicit MSM round (accounted for in (1)) is between `4 * round` and `4 * round + 3` (in fact `4 * - * round + 4` iff we are at a skew). As the `round` must increment, this means that the `msm_round` will be - * larger than 32, which can't happen due to the internal constraints in the MSM table. In particular, the multiset - * equality check will fail, as the MSM tables can never send an entry with a round larger than 32. - * - * 2. This forces `precompute_pc` to be constant from here on out. This will violate the multiset equalities both - * of terms (1) _and_ (2). For the former, we will write too many entries with the given `pc`. (However, we've - * already shown how this multset equality fails due to `round`.) More importantly, for the latter, we will _never_ - * "send" the tuple (pc, P.x, P.x, scalar-multiplier) to the multiset, for this value of `pc` and all potentially - * subsequent values. We explicate this latter failure. The transcript table will certainly fill _some_ values in - * for (pc, P.x, P.y, scalar-multipler) (at least with correct pc and scalar-multiplier values), which will cause - * the multiset equality check to fail. - * - * As always, we are relying on the monotonicity of the `pc` in these arguments. - * - */ - // We combine two checks into a single relation - // q_transition * (round - 7) + (-q_transition + 1) * (round_shift - round - 1) - // => q_transition * (round - 7 - round_shift + round + 1) + (round_shift - round - 1) - // => q_transition * (2 * round - round_shift - 6) + (round_shift - round - 1) + // Round transition logic: round now goes 0-3 (was 0-7) + // Combined check: q_transition * (round - 3) + (-q_transition + 1) * (round_shift - round - 1) const auto round_check = round_shift - round - 1; - // This selector is 1 at row 0 (via lagrange_first) and at transition rows where precompute_select == 1. - // It's used to constrain shifted values (like round_shift, scalar_sum_shift) that need to be checked - // both at the first active row AND at subsequent transitions between scalars. const auto precompute_select_transition_plus_lagrange_first = precompute_select * scaled_transition + scaled_lagrange_first; + // WNAF_DIGITS_PER_ROW - 1 = 7 was used for the old round max; now it's + // (NUM_WNAF_DIGITS_PER_SCALAR / WNAF_DIGITS_PER_ROW) - 1 = 3 + constexpr size_t MAX_ROUND = 3; // 32/8 - 1 std::get<9>(accumulator) += - precompute_select * (scaled_transition * (round - round_check - 7) + scaling_factor * round_check); - // At a transition (or at row 0 via lagrange_first), the next round must be 0. + precompute_select * + (scaled_transition * (round - round_check - static_cast(MAX_ROUND)) + scaling_factor * round_check); std::get<10>(accumulator) += precompute_select_transition_plus_lagrange_first * round_shift; - /** - * @brief Scalar transition/PC checks. - * 1: if q_transition = 1 or if lagrange_first = 1, scalar_sum_new = 0. (note that q_transition[0] == 0.) - * 2: if q_transition = 0, pc at next row = pc at current row - * 3: if q_transition = 1, pc at next row = pc at current row - 1 (decrements by 1) - * (we combine 2 and 3 into a single relation) - */ + // Scalar transition / PC checks std::get<11>(accumulator) += precompute_select_transition_plus_lagrange_first * scalar_sum_shift; - // (2, 3 combined): q_transition * (pc - pc_shift - 1) + (-q_transition + 1) * (pc_shift - pc) - // => q_transition * (-2 * (pc_shift - pc) - 1) + (pc_shift - pc) const auto pc_delta = pc_shift - pc; std::get<12>(accumulator) += precompute_select * (scaled_transition * ((-pc_delta - pc_delta - 1)) + pc_delta * scaling_factor); - /** - * @brief Validate skew is 0 or 7 - * 7 is the wnaf representation of -1. - * We have one skew variable per scalar multiplier. We can only represent odd integers in WNAF form. - * If input scalar is even, we must subtract 1 from WNAF scalar sum to get actual value (i.e. where skew = 7) - * We use skew in two places. - * 1: when validating sum of wnaf slices matches input scalar (we add skew to scalar_sum in ecc_set_relation) - * 2: in ecc_msm_relation. Final MSM round uses skew to conditionally subtract a point from the accumulator - */ + // Validate skew is 0 or 7 std::get<13>(accumulator) += precompute_select * (precompute_skew * (precompute_skew - 7)) * scaling_factor; - // Set slices (a.k.a. compressed digits), pc, and round all to zero when `precompute_select == 0`. - // (this is for one of the multiset equality checks.) Defensively, we also set precompute_point_transition to 0 when - // precompute_select == 0. + // Set slices, pc, round, q_transition to zero when precompute_select == 0 const auto precompute_select_zero = (-precompute_select + 1) * scaling_factor; std::get<14>(accumulator) += precompute_select_zero * (w0 + 15); std::get<15>(accumulator) += precompute_select_zero * (w1 + 15); std::get<16>(accumulator) += precompute_select_zero * (w2 + 15); std::get<17>(accumulator) += precompute_select_zero * (w3 + 15); + std::get<31>(accumulator) += precompute_select_zero * (w4 + 15); + std::get<32>(accumulator) += precompute_select_zero * (w5 + 15); + std::get<33>(accumulator) += precompute_select_zero * (w6 + 15); + std::get<34>(accumulator) += precompute_select_zero * (w7 + 15); std::get<18>(accumulator) += precompute_select_zero * round; std::get<19>(accumulator) += precompute_select_zero * pc; From f797494dde18c5d735eceb085fb66b771d5c5ab0 Mon Sep 17 00:00:00 2001 From: notnotraju Date: Tue, 17 Mar 2026 14:08:17 +0000 Subject: [PATCH 09/24] feat(eccvm): widen set relation to 8 slice fingerprints and 8 add-gated tuples Update ECCVMSetRelation for 8-wide precompute and MSM tables: Numerator changes: - 8 slice fingerprints instead of 4, with round encoding 8*round+j - Scalar reconstruction uses 8 wNAF digits with 2^32 shift (was 4, 2^16) - Skew tuple uses round offset 8 (was 4) - eccvm_set_permutation_delta comment updated for 8-term product Denominator changes: - 8 add-gated (pc, round, slice) tuples instead of 4 - PC offsets 0..7 (was 0..3) for msm_add1..msm_add8 SUBRELATION_PARTIAL_LENGTHS updated to {29, 3} (was {22, 3}) to accommodate the higher degree from the 8-wide grand product. --- .../relations/ecc_vm/ecc_set_relation.hpp | 2 +- .../ecc_vm/ecc_set_relation_impl.hpp | 197 ++++++++++++++---- 2 files changed, 161 insertions(+), 38 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation.hpp index c0f18ac081b8..7eb63c879be4 100644 --- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation.hpp @@ -27,7 +27,7 @@ template class ECCVMSetRelationImpl { static constexpr uint64_t THIRD_TERM_TAG = 3; // (pc, P.x, P.y, msm_size) static constexpr std::array SUBRELATION_PARTIAL_LENGTHS{ - 22, // grand product construction sub-relation + 29, // grand product construction sub-relation (8-wide: denom degree 28 + 1) 3 // left-shiftable polynomial sub-relation }; // prover optimization to allow for skipping the computation of sub-relations at certain points in sumcheck. diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation_impl.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation_impl.hpp index 369b963f2e17..a9c3ccdfea0e 100644 --- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation_impl.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation_impl.hpp @@ -29,7 +29,7 @@ namespace bb { * @note This ensures the following: * * every WNAF slice computed during scalar decomposition must be used exactly once during the MSM computation. * @warning There is a subtlety in this table, which slightly complicates the abstraction of multiset-equality testing. - * On the denominator side, when `addX == 0` for all `X ∈ {1, 2, 3, 4}` (automatically forced by `add1 == 0`), we + * On the denominator side, when `addX == 0` for all `X ∈ {1, ..., 8}` (automatically forced by `add1 == 0`), we * multiply by 1. On the numerator side, to balance this out, this means that when `precompute_select == 0`, we must * multiply by an additional `eccvm_set_permutation_delta`, which is the _inverse_ of the fingerprint of the tuple `(0, * 0, 0)`. (This corresponds to "removing" the tuple `(0, 0, 0)` from the left multiset when `precompute_select == 0`). @@ -94,14 +94,15 @@ Accumulator ECCVMSetRelationImpl::compute_grand_product_numerator(const AllE * part of ECCVMWnafRelation. * * @details - * There are 4 tuple entries per row of the Precompute table. Moreover, the element that "increments" is - * 4 * `precompute_round`, due to the fact that the Precompute columns contain four "digits"/slices per row. + * There are 8 tuple entries per row of the Precompute table. Moreover, the element that "increments" is + * 8 * `precompute_round`, due to the fact that the Precompute columns contain eight "digits"/slices per row. * * @note * We only add this tuple if `precompute_select == 1`. Otherwise, we add a the tuple (0, 0, 0). */ - // OPTIMIZE(@zac-williamson #2226) optimize degrees + // precompute_round8 = 8 * precompute_round (each row holds 8 digits) + const auto precompute_round8 = precompute_round4 + precompute_round4; Accumulator numerator(1); // degree-0 { @@ -113,7 +114,7 @@ Accumulator ECCVMSetRelationImpl::compute_grand_product_numerator(const AllE wnaf_slice += s1; const auto wnaf_slice_input0 = - wnaf_slice + gamma + precompute_pc * beta + precompute_round4 * beta_sqr + first_term_tag; + wnaf_slice + gamma + precompute_pc * beta + precompute_round8 * beta_sqr + first_term_tag; numerator *= wnaf_slice_input0; // degree-1 } { @@ -125,7 +126,7 @@ Accumulator ECCVMSetRelationImpl::compute_grand_product_numerator(const AllE wnaf_slice += s1; const auto wnaf_slice_input1 = - wnaf_slice + gamma + precompute_pc * beta + (precompute_round4 + 1) * beta_sqr + first_term_tag; + wnaf_slice + gamma + precompute_pc * beta + (precompute_round8 + 1) * beta_sqr + first_term_tag; numerator *= wnaf_slice_input1; // degree-2 } { @@ -137,7 +138,7 @@ Accumulator ECCVMSetRelationImpl::compute_grand_product_numerator(const AllE wnaf_slice += s1; const auto wnaf_slice_input2 = - wnaf_slice + gamma + precompute_pc * beta + (precompute_round4 + 2) * beta_sqr + first_term_tag; + wnaf_slice + gamma + precompute_pc * beta + (precompute_round8 + 2) * beta_sqr + first_term_tag; numerator *= wnaf_slice_input2; // degree-3 } { @@ -147,27 +148,76 @@ Accumulator ECCVMSetRelationImpl::compute_grand_product_numerator(const AllE auto wnaf_slice = s0 + s0; wnaf_slice += wnaf_slice; wnaf_slice += s1; + const auto wnaf_slice_input3 = - wnaf_slice + gamma + precompute_pc * beta + (precompute_round4 + 3) * beta_sqr + first_term_tag; + wnaf_slice + gamma + precompute_pc * beta + (precompute_round8 + 3) * beta_sqr + first_term_tag; numerator *= wnaf_slice_input3; // degree-4 } + { + const auto& s0 = View(in.precompute_s5hi); + const auto& s1 = View(in.precompute_s5lo); + + auto wnaf_slice = s0 + s0; + wnaf_slice += wnaf_slice; + wnaf_slice += s1; + + const auto wnaf_slice_input4 = + wnaf_slice + gamma + precompute_pc * beta + (precompute_round8 + 4) * beta_sqr + first_term_tag; + numerator *= wnaf_slice_input4; // degree-5 + } + { + const auto& s0 = View(in.precompute_s6hi); + const auto& s1 = View(in.precompute_s6lo); + + auto wnaf_slice = s0 + s0; + wnaf_slice += wnaf_slice; + wnaf_slice += s1; + + const auto wnaf_slice_input5 = + wnaf_slice + gamma + precompute_pc * beta + (precompute_round8 + 5) * beta_sqr + first_term_tag; + numerator *= wnaf_slice_input5; // degree-6 + } + { + const auto& s0 = View(in.precompute_s7hi); + const auto& s1 = View(in.precompute_s7lo); + + auto wnaf_slice = s0 + s0; + wnaf_slice += wnaf_slice; + wnaf_slice += s1; + + const auto wnaf_slice_input6 = + wnaf_slice + gamma + precompute_pc * beta + (precompute_round8 + 6) * beta_sqr + first_term_tag; + numerator *= wnaf_slice_input6; // degree-7 + } + { + const auto& s0 = View(in.precompute_s8hi); + const auto& s1 = View(in.precompute_s8lo); + + auto wnaf_slice = s0 + s0; + wnaf_slice += wnaf_slice; + wnaf_slice += s1; + + const auto wnaf_slice_input7 = + wnaf_slice + gamma + precompute_pc * beta + (precompute_round8 + 7) * beta_sqr + first_term_tag; + numerator *= wnaf_slice_input7; // degree-8 + } { // skew product if relevant const auto& skew = View(in.precompute_skew); const auto& precompute_point_transition = View(in.precompute_point_transition); const auto skew_input = precompute_point_transition * (skew + gamma + precompute_pc * beta + - (precompute_round4 + 4) * beta_sqr + first_term_tag) + + (precompute_round8 + 8) * beta_sqr + first_term_tag) + (-precompute_point_transition + 1); - numerator *= skew_input; // degree-6 + numerator *= skew_input; // degree-10 } { // in `EccvmProver` and `ECCVMVerifier`, we see that `eccvm_set_permutation_delta` is initially computed as - // (γ+t·β⁴)·(γ+β²+t·β⁴)·(γ+2β²+t·β⁴)·(γ+3β²+t·β⁴) (where t = FIRST_TERM_TAG) and _then_ inverted. + // the product of 8 terms (γ+j·β²+t·β⁴) for j=0..7 (where t = FIRST_TERM_TAG) and _then_ inverted. const auto& eccvm_set_permutation_delta = params.eccvm_set_permutation_delta; // if `precompute_select == 1`, don't change the numerator. if it is 0, then to get the grand product argument // to work (as we have zero-padded the rows of the MSM table), we must multiply by the inverse of the // fingerprint of (0, 0, 0). - numerator *= precompute_select * (-eccvm_set_permutation_delta + 1) + eccvm_set_permutation_delta; // degree-7 + numerator *= precompute_select * (-eccvm_set_permutation_delta + 1) + eccvm_set_permutation_delta; // degree-11 } /** @@ -205,7 +255,12 @@ Accumulator ECCVMSetRelationImpl::compute_grand_product_numerator(const AllE const auto w1 = convert_to_wnaf(View(in.precompute_s2hi), View(in.precompute_s2lo)); const auto w2 = convert_to_wnaf(View(in.precompute_s3hi), View(in.precompute_s3lo)); const auto w3 = convert_to_wnaf(View(in.precompute_s4hi), View(in.precompute_s4lo)); + const auto w4 = convert_to_wnaf(View(in.precompute_s5hi), View(in.precompute_s5lo)); + const auto w5 = convert_to_wnaf(View(in.precompute_s6hi), View(in.precompute_s6lo)); + const auto w6 = convert_to_wnaf(View(in.precompute_s7hi), View(in.precompute_s7lo)); + const auto w7 = convert_to_wnaf(View(in.precompute_s8hi), View(in.precompute_s8lo)); + // row_slice = 2^28*w0 + 2^24*w1 + ... + 2^4*w6 + w7 (Horner for 8 digits) auto row_slice = w0; row_slice += row_slice; row_slice += row_slice; @@ -221,26 +276,62 @@ Accumulator ECCVMSetRelationImpl::compute_grand_product_numerator(const AllE row_slice += row_slice; row_slice += row_slice; row_slice += row_slice; - row_slice += w3; // row_slice = 2^12 w_0 + 2^8 w_1 + 2^4 w_2 + 2^0 w_3 + row_slice += w3; + row_slice += row_slice; + row_slice += row_slice; + row_slice += row_slice; + row_slice += row_slice; + row_slice += w4; + row_slice += row_slice; + row_slice += row_slice; + row_slice += row_slice; + row_slice += row_slice; + row_slice += w5; + row_slice += row_slice; + row_slice += row_slice; + row_slice += row_slice; + row_slice += row_slice; + row_slice += w6; + row_slice += row_slice; + row_slice += row_slice; + row_slice += row_slice; + row_slice += row_slice; + row_slice += w7; + // scalar_sum_full = 2^32 * wnaf_scalar_sum + row_slice + adjusted_skew auto scalar_sum_full = wnaf_scalar_sum + wnaf_scalar_sum; - scalar_sum_full += scalar_sum_full; - scalar_sum_full += scalar_sum_full; - scalar_sum_full += scalar_sum_full; - scalar_sum_full += scalar_sum_full; - scalar_sum_full += scalar_sum_full; - scalar_sum_full += scalar_sum_full; - scalar_sum_full += scalar_sum_full; - scalar_sum_full += scalar_sum_full; - scalar_sum_full += scalar_sum_full; - scalar_sum_full += scalar_sum_full; - scalar_sum_full += scalar_sum_full; - scalar_sum_full += scalar_sum_full; - scalar_sum_full += scalar_sum_full; - scalar_sum_full += scalar_sum_full; - scalar_sum_full += scalar_sum_full; - scalar_sum_full += - row_slice + adjusted_skew; // scalar_sum_full = 2^16 * wnaf_scalar_sum + row_slice + adjusted_skew + scalar_sum_full += scalar_sum_full; // 4x + scalar_sum_full += scalar_sum_full; // 8x + scalar_sum_full += scalar_sum_full; // 16x + scalar_sum_full += scalar_sum_full; // 32x + scalar_sum_full += scalar_sum_full; // 64x + scalar_sum_full += scalar_sum_full; // 128x + scalar_sum_full += scalar_sum_full; // 256x + scalar_sum_full += scalar_sum_full; // 512x + scalar_sum_full += scalar_sum_full; // 1024x + scalar_sum_full += scalar_sum_full; // 2048x + scalar_sum_full += scalar_sum_full; // 4096x + scalar_sum_full += scalar_sum_full; // 8192x + scalar_sum_full += scalar_sum_full; // 16384x + scalar_sum_full += scalar_sum_full; // 32768x + scalar_sum_full += scalar_sum_full; // 65536x + scalar_sum_full += scalar_sum_full; // 2^17 + scalar_sum_full += scalar_sum_full; // 2^18 + scalar_sum_full += scalar_sum_full; // 2^19 + scalar_sum_full += scalar_sum_full; // 2^20 + scalar_sum_full += scalar_sum_full; // 2^21 + scalar_sum_full += scalar_sum_full; // 2^22 + scalar_sum_full += scalar_sum_full; // 2^23 + scalar_sum_full += scalar_sum_full; // 2^24 + scalar_sum_full += scalar_sum_full; // 2^25 + scalar_sum_full += scalar_sum_full; // 2^26 + scalar_sum_full += scalar_sum_full; // 2^27 + scalar_sum_full += scalar_sum_full; // 2^28 + scalar_sum_full += scalar_sum_full; // 2^29 + scalar_sum_full += scalar_sum_full; // 2^30 + scalar_sum_full += scalar_sum_full; // 2^31 + scalar_sum_full += scalar_sum_full; // 2^32 + scalar_sum_full += row_slice + adjusted_skew; auto precompute_point_transition = View(in.precompute_point_transition); @@ -306,7 +397,7 @@ Accumulator ECCVMSetRelationImpl::compute_grand_product_denominator(const Al { using View = typename Accumulator::View; - // OPTIMIZE(@zac-williamson). The degree of this contribution is 17! makes overall relation degree 19. + // OPTIMIZE(@zac-williamson). The degree of the denominator is 28, making overall relation degree ~29. // Can potentially optimize by refining the algebra. const auto& gamma = params.gamma; const auto& beta = params.beta; @@ -324,7 +415,7 @@ Accumulator ECCVMSetRelationImpl::compute_grand_product_denominator(const Al /** * @brief First term: tuple of (pc, round, wnaf_slice), used to determine which points we extract from lookup tables - * when evaluaing MSMs in ECCVMMsmRelation. + * when evaluating MSMs in ECCVMMsmRelation. * These values must be equivalent to the values computed in the 1st term of `compute_grand_product_numerator` */ Accumulator denominator(1); // degree-0 @@ -363,6 +454,38 @@ Accumulator ECCVMSetRelationImpl::compute_grand_product_denominator(const Al (-add4 + 1); denominator *= wnaf_slice_output4; // degree-8 } + { + const auto& add5 = View(in.msm_add5); + const auto& msm_slice5 = View(in.msm_slice5); + auto wnaf_slice_output5 = + add5 * (msm_slice5 + gamma + (msm_pc - msm_count - 4) * beta + msm_round * beta_sqr + first_term_tag) + + (-add5 + 1); + denominator *= wnaf_slice_output5; // degree-10 + } + { + const auto& add6 = View(in.msm_add6); + const auto& msm_slice6 = View(in.msm_slice6); + auto wnaf_slice_output6 = + add6 * (msm_slice6 + gamma + (msm_pc - msm_count - 5) * beta + msm_round * beta_sqr + first_term_tag) + + (-add6 + 1); + denominator *= wnaf_slice_output6; // degree-12 + } + { + const auto& add7 = View(in.msm_add7); + const auto& msm_slice7 = View(in.msm_slice7); + auto wnaf_slice_output7 = + add7 * (msm_slice7 + gamma + (msm_pc - msm_count - 6) * beta + msm_round * beta_sqr + first_term_tag) + + (-add7 + 1); + denominator *= wnaf_slice_output7; // degree-14 + } + { + const auto& add8 = View(in.msm_add8); + const auto& msm_slice8 = View(in.msm_slice8); + auto wnaf_slice_output8 = + add8 * (msm_slice8 + gamma + (msm_pc - msm_count - 7) * beta + msm_round * beta_sqr + first_term_tag) + + (-add8 + 1); + denominator *= wnaf_slice_output8; // degree-16 + } /** * @brief Second term: tuple of the form `(transcript_pc, transcript_Px, transcript_Py, z1)` OR `(transcript_pc, @@ -419,7 +542,7 @@ Accumulator ECCVMSetRelationImpl::compute_grand_product_denominator(const Al // point_table_init_write = degree 7 auto point_table_init_write = transcript_mul * transcript_product + (-transcript_mul + 1); - denominator *= point_table_init_write; // degree 17 + denominator *= point_table_init_write; // degree-25 } /** * @brief Third term: tuple of (pc, P.x, P.y, msm-size) from ECCVMTranscriptRelation. @@ -454,7 +577,7 @@ Accumulator ECCVMSetRelationImpl::compute_grand_product_denominator(const Al auto msm_result_read = transcript_pc_shift + transcript_msm_x * beta + transcript_msm_y * beta_sqr + full_msm_count * beta_cube + third_term_tag; msm_result_read = transcript_msm_transition * (msm_result_read + gamma) + (-transcript_msm_transition + 1); - denominator *= msm_result_read; // degree-20 + denominator *= msm_result_read; // degree-28 } return denominator; } @@ -480,10 +603,10 @@ void ECCVMSetRelationImpl::accumulate(ContainerOverSubrelations& accumulator using View = typename Accumulator::View; using ShortView = typename std::tuple_element_t<1, ContainerOverSubrelations>::View; - // degree-11 + // degree-15 (8 slices + skew + delta + second term + third term) Accumulator numerator_evaluation = compute_grand_product_numerator(in, params); - // degree-20 + // degree-27 (8 add-gated tuples + second term + third term) Accumulator denominator_evaluation = compute_grand_product_denominator(in, params); const auto& lagrange_first = View(in.lagrange_first); @@ -494,7 +617,7 @@ void ECCVMSetRelationImpl::accumulate(ContainerOverSubrelations& accumulator const auto& z_perm_shift = View(in.z_perm_shift); const auto& z_perm_shift_short = ShortView(in.z_perm_shift); - // degree-21 + // degree-28 std::get<0>(accumulator) += ((z_perm + lagrange_first) * numerator_evaluation - (z_perm_shift + lagrange_last) * denominator_evaluation) * scaling_factor; From fed78c736860c2f40a9dfa177ba4a8565b854c03 Mon Sep 17 00:00:00 2001 From: notnotraju Date: Tue, 17 Mar 2026 14:18:42 +0000 Subject: [PATCH 10/24] feat(eccvm): widen lookup relation to 8 reads and 4 table terms Update ECCVMLookupRelation for 8-wide MSM and 2 precomputed points per precompute row: - NUM_LOOKUP_TERMS: 4 -> 8 (msm_add1..msm_add8 gated reads) - NUM_TABLE_TERMS: 2 -> 4 (positive/negative for each of 2 points) - LENGTH: 9 -> 15 Table term structure (4 terms covering all 16 slice values): - table_index 0: point 1 positive, slice = 15 - 2*round -> {15,13,11,9} - table_index 1: point 1 negative, slice = 2*round -> {0,2,4,6} - table_index 2: point 2 positive, slice = 14 - 2*round -> {14,12,10,8} - table_index 3: point 2 negative, slice = 2*round + 1 -> {1,3,5,7} Lookup read counts expanded from 2 to 4 columns (lookup_read_counts_0..3) to match the 4 table terms. --- .../relations/ecc_vm/ecc_lookup_relation.hpp | 164 ++++++++++-------- 1 file changed, 91 insertions(+), 73 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_lookup_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_lookup_relation.hpp index aa1438379f6b..b0d299cd1d9d 100644 --- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_lookup_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_lookup_relation.hpp @@ -17,10 +17,10 @@ namespace bb { template class ECCVMLookupRelationImpl { public: using FF = FF_; - static constexpr size_t NUM_LOOKUP_TERMS = 4; - static constexpr size_t NUM_TABLE_TERMS = 2; + static constexpr size_t NUM_LOOKUP_TERMS = 8; + static constexpr size_t NUM_TABLE_TERMS = 4; // 1 + polynomial degree of this relation - static constexpr size_t LENGTH = NUM_LOOKUP_TERMS + NUM_TABLE_TERMS + 3; // 9 + static constexpr size_t LENGTH = NUM_LOOKUP_TERMS + NUM_TABLE_TERMS + 3; // 15 static constexpr std::array SUBRELATION_PARTIAL_LENGTHS{ LENGTH, // grand product construction sub-relation @@ -65,6 +65,12 @@ template class ECCVMLookupRelationImpl { if constexpr (index == 1) { return Accumulator(View(in.lookup_read_counts_1)); } + if constexpr (index == 2) { + return Accumulator(View(in.lookup_read_counts_2)); + } + if constexpr (index == 3) { + return Accumulator(View(in.lookup_read_counts_3)); + } return Accumulator(1); } @@ -86,6 +92,18 @@ template class ECCVMLookupRelationImpl { if constexpr (lookup_index == 3) { return Accumulator(View(in.msm_add4)); } + if constexpr (lookup_index == 4) { + return Accumulator(View(in.msm_add5)); + } + if constexpr (lookup_index == 5) { + return Accumulator(View(in.msm_add6)); + } + if constexpr (lookup_index == 6) { + return Accumulator(View(in.msm_add7)); + } + if constexpr (lookup_index == 7) { + return Accumulator(View(in.msm_add8)); + } return Accumulator(1); } @@ -95,9 +113,12 @@ template class ECCVMLookupRelationImpl { using View = typename Accumulator::View; // anytime `precompute_select` is on, we "turn on" the table predicate. This concretely means that the sP, where // s is a WNAF slice and P is the point being processed, are "written" to the lookup table, i.e., may be - // read/looked up later. `table_index == 0` corresponds to positive WNAF entries, `table_index == 1` corresponds - // to negative WNAF entries. - if constexpr (table_index == 0 || table_index == 1) { + // read/looked up later. + // table_index 0: point 1 positive WNAF entries + // table_index 1: point 1 negative WNAF entries + // table_index 2: point 2 positive WNAF entries + // table_index 3: point 2 negative WNAF entries + if constexpr (table_index < NUM_TABLE_TERMS) { return Accumulator(View(in.precompute_select)); } return Accumulator(1); @@ -105,6 +126,19 @@ template class ECCVMLookupRelationImpl { /** * @brief Returns the fingerprint of `(precompute_pc, compressed_slice, (2 * compressed_slice - 15)[P])`, where [P] * is the point corresponding to `precompute_pc` and `compressed_slice`∈{0, ..., 15}. + * + * @details With 2 points per precompute row (tx/ty and tx2/ty2), we have 4 table terms: + * table_index 0: point 1 positive — slice = 15 - 2*round, covers {15,13,11,9} + * table_index 1: point 1 negative — slice = 2*round, covers {0,2,4,6} + * table_index 2: point 2 positive — slice = 14 - 2*round, covers {14,12,10,8} + * table_index 3: point 2 negative — slice = 2*round + 1, covers {1,3,5,7} + * + * Together these cover all 16 slice values {0, ..., 15}. + * + * Point 1 (tx, ty) at row round = table[15 - 2*round]: + * round 0: 15P, round 1: 13P [was 11P], round 2: 11P [was 7P], round 3: 9P [was 3P] + * Point 2 (tx2, ty2) at row round = table[14 - 2*round]: + * round 0: 13P, round 1: 9P, round 2: 5P, round 3: P */ template static Accumulator compute_table_term(const AllEntities& in, const Parameters& params) @@ -112,58 +146,41 @@ template class ECCVMLookupRelationImpl { using View = typename Accumulator::View; static_assert(table_index < NUM_TABLE_TERMS); - // table_index == 0 means our wNAF digit is positive (i.e., ∈{1, 3..., 15}). - // table_index == 1 means our wNAF digit is negative (i.e., ∈{-15, -13..., -1}) - - // round starts at 0 and increments to 7 - // point starts at 15[P] and decrements to [P] - // a slice value of 0 maps to -15[P] - - // we have computed `(15 - 2 * round)[P] =: (precompute_tx, precompute_ty)`. - // `round`∈{0, 1..., 7} - // if table_index == 0, we want to write (pc, 15 - 2 * round, precompute_tx, precompute_ty) - // if table_index == 1, we want to write (pc, round, precompute_tx, -precompute_ty) - // to sum up, both: - // (pc, round, precompute_tx, -precompute_ty) _and_ - // (pc, 15 - 2 * round, precompute_tx, precompute_ty) - // will be written to the lookup table. - // - // therefore, if `pc` corresponds to the elliptic curve point [P], we will write: - // | pc | 0 | -15[P].x | -15[P].y | - // | pc | 1 | -13[P].x | -13[P].y | - // | pc | 2 | -11[P].x | -11[P].y | - // | pc | 3 | -9[P].x | -9[P].y | - // | pc | 4 | -7[P].x | -7[P].y | - // | pc | 5 | -5[P].x | -5[P].y | - // | pc | 6 | -3[P].x | -3[P].y | - // | pc | 7 | -1[P].x | -1[P].y | - // | pc | 8 | [P].x | [P].y | - // | pc | 9 | 3[P].x | 3[P].y | - // | pc | 10 | 5[P].x | 5[P].y | - // | pc | 11 | 7[P].x | 7[P].y | - // | pc | 12 | 9[P].x | 9[P].y | - // | pc | 13 | 11[P].x | 11[P].y | - // | pc | 14 | 13[P].x | 13[P].y | - // | pc | 15 | 15[P].x | 15[P].y | const auto& precompute_pc = View(in.precompute_pc); - const auto& tx = View(in.precompute_tx); - const auto& ty = View(in.precompute_ty); const auto& precompute_round = View(in.precompute_round); const auto& gamma = params.gamma; const auto& beta = params.beta; const auto& beta_sqr = params.beta_sqr; const auto& beta_cube = params.beta_cube; + const auto precompute_round2 = precompute_round + precompute_round; if constexpr (table_index == 0) { - const auto positive_slice_value = -(precompute_round) + 15; - const auto positive_term = - precompute_pc + gamma + positive_slice_value * beta + tx * beta_sqr + ty * beta_cube; - return positive_term; // degree 1 + // Point 1 positive: slice = 15 - 2*round + const auto& tx = View(in.precompute_tx); + const auto& ty = View(in.precompute_ty); + const auto positive_slice_value = -(precompute_round2) + 15; + return precompute_pc + gamma + positive_slice_value * beta + tx * beta_sqr + ty * beta_cube; // degree 1 } if constexpr (table_index == 1) { - const auto negative_term = precompute_pc + gamma + precompute_round * beta + tx * beta_sqr - ty * beta_cube; - return negative_term; // degree 1 + // Point 1 negative: slice = 2*round + const auto& tx = View(in.precompute_tx); + const auto& ty = View(in.precompute_ty); + return precompute_pc + gamma + precompute_round2 * beta + tx * beta_sqr - ty * beta_cube; // degree 1 + } + if constexpr (table_index == 2) { + // Point 2 positive: slice = 14 - 2*round + const auto& tx2 = View(in.precompute_tx2); + const auto& ty2 = View(in.precompute_ty2); + const auto positive_slice_value2 = -(precompute_round2) + 14; + return precompute_pc + gamma + positive_slice_value2 * beta + tx2 * beta_sqr + ty2 * beta_cube; // degree 1 + } + if constexpr (table_index == 3) { + // Point 2 negative: slice = 2*round + 1 + const auto& tx2 = View(in.precompute_tx2); + const auto& ty2 = View(in.precompute_ty2); + const auto negative_slice_value2 = precompute_round2 + 1; + return precompute_pc + gamma + negative_slice_value2 * beta + tx2 * beta_sqr - ty2 * beta_cube; // degree 1 } return Accumulator(1); } @@ -182,45 +199,46 @@ template class ECCVMLookupRelationImpl { const auto& beta_cube = params.beta_cube; const auto& msm_pc = View(in.msm_pc); const auto& msm_count = View(in.msm_count); - const auto& msm_slice1 = View(in.msm_slice1); - const auto& msm_slice2 = View(in.msm_slice2); - const auto& msm_slice3 = View(in.msm_slice3); - const auto& msm_slice4 = View(in.msm_slice4); - const auto& msm_x1 = View(in.msm_x1); - const auto& msm_x2 = View(in.msm_x2); - const auto& msm_x3 = View(in.msm_x3); - const auto& msm_x4 = View(in.msm_x4); - const auto& msm_y1 = View(in.msm_y1); - const auto& msm_y2 = View(in.msm_y2); - const auto& msm_y3 = View(in.msm_y3); - const auto& msm_y4 = View(in.msm_y4); // Recall that `pc` stands for point-counter. We recall how to compute the current pc. // // row pc = value of pc after msm // msm_count = number of (128-bit) multiplications processed so far in current MSM round (NOT INCLUDING current - // row) current_pc = msm_pc - msm_count next_pc = current_pc - {0, 1, 2, 3}, depending on how many adds are + // row) current_pc = msm_pc - msm_count next_pc = current_pc - {0, 1, ..., 7}, depending on how many adds are // performed in the current row. const auto current_pc = msm_pc - msm_count; if constexpr (lookup_index == 0) { - const auto lookup_term1 = (current_pc) + gamma + msm_slice1 * beta + msm_x1 * beta_sqr + msm_y1 * beta_cube; - return lookup_term1; // degree 1 + return (current_pc) + gamma + View(in.msm_slice1) * beta + View(in.msm_x1) * beta_sqr + + View(in.msm_y1) * beta_cube; // degree 1 } if constexpr (lookup_index == 1) { - const auto lookup_term2 = - (current_pc - 1) + gamma + msm_slice2 * beta + msm_x2 * beta_sqr + msm_y2 * beta_cube; - return lookup_term2; // degree 1 + return (current_pc - 1) + gamma + View(in.msm_slice2) * beta + View(in.msm_x2) * beta_sqr + + View(in.msm_y2) * beta_cube; // degree 1 } if constexpr (lookup_index == 2) { - const auto lookup_term3 = - (current_pc - 2) + gamma + msm_slice3 * beta + msm_x3 * beta_sqr + msm_y3 * beta_cube; - return lookup_term3; // degree 1 + return (current_pc - 2) + gamma + View(in.msm_slice3) * beta + View(in.msm_x3) * beta_sqr + + View(in.msm_y3) * beta_cube; // degree 1 } if constexpr (lookup_index == 3) { - const auto lookup_term4 = - (current_pc - 3) + gamma + msm_slice4 * beta + msm_x4 * beta_sqr + msm_y4 * beta_cube; - return lookup_term4; // degree 1 + return (current_pc - 3) + gamma + View(in.msm_slice4) * beta + View(in.msm_x4) * beta_sqr + + View(in.msm_y4) * beta_cube; // degree 1 + } + if constexpr (lookup_index == 4) { + return (current_pc - 4) + gamma + View(in.msm_slice5) * beta + View(in.msm_x5) * beta_sqr + + View(in.msm_y5) * beta_cube; // degree 1 + } + if constexpr (lookup_index == 5) { + return (current_pc - 5) + gamma + View(in.msm_slice6) * beta + View(in.msm_x6) * beta_sqr + + View(in.msm_y6) * beta_cube; // degree 1 + } + if constexpr (lookup_index == 6) { + return (current_pc - 6) + gamma + View(in.msm_slice7) * beta + View(in.msm_x7) * beta_sqr + + View(in.msm_y7) * beta_cube; // degree 1 + } + if constexpr (lookup_index == 7) { + return (current_pc - 7) + gamma + View(in.msm_slice8) * beta + View(in.msm_x8) * beta_sqr + + View(in.msm_y8) * beta_cube; // degree 1 } return Accumulator(1); } @@ -231,8 +249,8 @@ template class ECCVMLookupRelationImpl { * Table writes: ECCVMPointTable columns: we define Straus point table: * { {0, -15[P]}, {1, -13[P]}, ..., {15, 15[P]} } * write source: { precompute_round, precompute_tx, precompute_ty } - * Table reads: ECCVMMSM columns. Each row adds up to 4 points into MSM accumulator - * read source: { msm_slice1, msm_x1, msm_y1 }, ..., { msm_slice4, msm_x4, msm_y4 } + * Table reads: ECCVMMSM columns. Each row adds up to 8 points into MSM accumulator + * read source: { msm_slice1, msm_x1, msm_y1 }, ..., { msm_slice8, msm_x8, msm_y8 } * @param accumulator transformed to `evals + C(in(X)...)*scaling_factor` * @param in an std::array containing the fully extended Accumulator edges. * @param relation_params contains beta, gamma, and public_input_delta, .... From 762be817b24fbef90c9abad1542b8b2f74a36be5 Mon Sep 17 00:00:00 2001 From: notnotraju Date: Tue, 17 Mar 2026 14:23:14 +0000 Subject: [PATCH 11/24] feat(eccvm): add 2nd precomputed point constraint to point table relation Update ECCVMPointTableRelation for 2 precomputed points per row (Tx/Ty and Tx2/Ty2): SUBRELATION_PARTIAL_LENGTHS expanded from 6 to 8 entries: - Subrelations 0-1: Doubling constraint, now uses Tx2/Ty2 as the base point (at transition row, Tx2=P so Dx=2P) - Subrelations 2-3: Dx/Dy continuity (unchanged) - Subrelations 4-5: NEW intra-row addition (Tx = Tx2 + Dx), gated by precompute_select. Validates first point = second point + 2P. - Subrelations 6-7: NEW inter-row addition (Tx2 = Tx_shift + Dx), gated by not-transition and not-first-row. Validates second point of row i equals first point of row i+1 plus 2P. Row layout example for point P: round 0: Tx=15P, Tx2=13P | round 1: Tx=11P, Tx2=9P round 2: Tx=7P, Tx2=5P | round 3: Tx=3P, Tx2=P --- .../ecc_vm/ecc_point_table_relation.hpp | 3 +- .../ecc_vm/ecc_point_table_relation_impl.hpp | 173 +++++++----------- 2 files changed, 72 insertions(+), 104 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation.hpp index b5e1ed1e8dd4..eaa4b529f588 100644 --- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation.hpp @@ -25,7 +25,8 @@ template class ECCVMPointTableRelationImpl { public: using FF = FF_; - static constexpr std::array SUBRELATION_PARTIAL_LENGTHS{ 6, 6, 6, 6, 6, 6 }; + // 8 subrelations: 0-1 doubling, 2-3 Dx/Dy continuity, 4-5 intra-row add (Tx=Tx2+D), 6-7 inter-row add (Tx2=Tx'+D) + static constexpr std::array SUBRELATION_PARTIAL_LENGTHS{ 6, 6, 6, 6, 6, 6, 6, 6 }; template static void accumulate(ContainerOverSubrelations& accumulator, diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation_impl.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation_impl.hpp index 071860f39a92..942c801db6df 100644 --- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation_impl.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation_impl.hpp @@ -47,83 +47,43 @@ void ECCVMPointTableRelationImpl::accumulate(ContainerOverSubrelations& accu const auto& lagrange_first = View(in.lagrange_first); /** - * @brief Row structure + * @brief Row structure (8-wide: 2 precomputed points per row, 4 rows per scalar) * * Consider the set of (128-bit scalar multiplier, point, pc) tuples in the transcript columns. - * The point table columns process one tuple every 8 rows. The tuple with the largest pc value is first. + * The point table columns process one tuple every 4 rows. The tuple with the largest pc value is first. * When transitioning between tuple elements, pc decrements by 1. * - * The following table gives an example for two points. - * In the table, the point associated with `pc = 1` is labelled P. - * the point associated with `pc = 0` is labelled Q. - * - * | precompute_pc | precompute_point_transition | precompute_round | Tx | Ty | Dx | Dy | - * | ------------- | ---------------------------- | ------------------- | ----- | ----- | ---- | ---- | - * | 1 | 0 | 0 | 15P.x | 15P.y | 2P.x | 2P.y | - * | 1 | 0 | 1 | 13P.x | 13P.y | 2P.x | 2P.y | - * | 1 | 0 | 2 | 11P.x | 11P.y | 2P.x | 2P.y | - * | 1 | 0 | 3 | 9P.x | 9P.y | 2P.x | 2P.y | - * | 1 | 0 | 4 | 7P.x | 7P.y | 2P.x | 2P.y | - * | 1 | 0 | 5 | 5P.x | 5P.y | 2P.x | 2P.y | - * | 1 | 0 | 6 | 3P.x | 3P.y | 2P.x | 2P.y | - * | 1 | 1 | 7 | P.x | P.y | 2P.x | 2P.y | - * | 0 | 0 | 0 | 15Q.x | 15Q.y | 2Q.x | 2Q.y | - * | 0 | 0 | 1 | 13Q.x | 13Q.y | 2Q.x | 2Q.y | - * | 0 | 0 | 2 | 11Q.x | 11Q.y | 2Q.x | 2Q.y | - * | 0 | 0 | 3 | 9Q.x | 9Q.y | 2Q.x | 2Q.y | - * | 0 | 0 | 4 | 7Q.x | 7Q.y | 2Q.x | 2Q.y | - * | 0 | 0 | 5 | 5Q.x | 5Q.y | 2Q.x | 2Q.y | - * | 0 | 0 | 6 | 3Q.x | 3Q.y | 2Q.x | 2Q.y | - * | 0 | 1 | 7 | Q.x | Q.y | 2Q.x | 2Q.y | - * - * We apply the following relations to constrain the above table: - * - * 1. If precompute_point_transition = 0, (Dx, Dy) = (Dx_shift, Dy_shift) - * 2. If precompute_point_transition = 1, (Dx, Dy) = 2 (Px, Py) - * 3. If precompute_point_transition = 0, (Tx, Ty) = (Tx_shift, Ty_shift) + (Dx, Dy) - * - * The relations that constrain `precompute_point_transition` and `precompute_pc` are in `ecc_wnaf_relation.hpp` - * - * When precompute_point_transition = 1, the next row corresponds to the beginning of the processing of a new point. - * We use a multiset-equality check, `ecc_set_relation.hpp` to validate (pc, Tx, Ty, scalar-multiplier) is the same - * as something derived from the transcript columns. In other words, the multiset equality check allows the tables - * to communicate, and in particular validates that we are populating our PointTable with precomputed values that - * indeed arise from the Transcript columns. (Formerly, we referred to this as a "strict" lookup protocol = every - * item in the table must be read from once, and only once) - * - * For every row, we use a lookup protocol in `ecc_lookup_relation.hpp` to write the following tuples into a lookup - * table: - * 1. (pc, 15 - precompute_round, Tx, Ty) - * 2. (pc, precompute_round, Tx, -Ty) - * - * The value `15 - precompute_round` describes the multiplier applied to P at the current row. - * (this can be expanded into a wnaf value by taking `2x - 15` where `x = 15 - precompute_round`) . - * The value `precompute_round` describes the *negative multiplier* applied to P at the current row. - * This is also expanded into a wnaf value by taking `2x - 15` where `x = precompute_round`. - * - * The following table describes how taking (15 - precompute_round) for positive values and (precompute_round) for - * negative values produces the WNAF slice values that correspond to the multipliers for (Tx, Ty) and (Tx, -Ty): - * - * | Tx | Ty | x = 15 - precompute_round | 2x - 15 | y = precompute_round | 2y - 15 | - * | ----- | ----- | -------------------- | ------- | --------------- | ------- | - * | 15P.x | 15P.y | 15 | 15 | 0 | -15 | - * | 13P.x | 13P.y | 14 | 13 | 1 | -13 | - * | 11P.x | 11P.y | 13 | 11 | 2 | -11 | - * | 9P.x | 9P.y | 12 | 9 | 3 | -9 | - * | 7P.x | 7P.y | 11 | 7 | 4 | -7 | - * | 5P.x | 5P.y | 10 | 5 | 5 | -5 | - * | 3P.x | 3P.y | 9 | 3 | 6 | -3 | - * | P.x | P.y | 8 | 1 | 7 | -1 | + * Each row stores two precomputed points: + * (Tx, Ty) = table[15 - 2*round] (first point, odd table index) + * (Tx2, Ty2) = table[14 - 2*round] (second point, even table index) + * + * | pc | transition | round | Tx | Ty | Tx2 | Ty2 | Dx | Dy | + * | -- | ---------- | ----- | ----- | ----- | ----- | ----- | ---- | ---- | + * | 1 | 0 | 0 | 15P.x | 15P.y | 13P.x | 13P.y | 2P.x | 2P.y | + * | 1 | 0 | 1 | 11P.x | 11P.y | 9P.x | 9P.y | 2P.x | 2P.y | + * | 1 | 0 | 2 | 7P.x | 7P.y | 5P.x | 5P.y | 2P.x | 2P.y | + * | 1 | 1 | 3 | 3P.x | 3P.y | P.x | P.y | 2P.x | 2P.y | + * + * We apply the following relations: + * 1. If precompute_point_transition = 1, (Dx, Dy) = 2(Tx2, Ty2) [doubling at transition, Tx2=P at last row] + * 2. If precompute_point_transition = 0, (Dx, Dy) = (Dx_shift, Dy_shift) [continuity] + * 3. (Tx, Ty) = (Tx2, Ty2) + (Dx, Dy) [intra-row: first point = second point + 2P] + * 4. If precompute_point_transition = 0, (Tx2, Ty2) = (Tx_shift, Ty_shift) + (Dx, Dy) + * [inter-row: second point = next row's first point + 2P] */ + const auto& Tx2 = View(in.precompute_tx2); + const auto& Tx2_shift = View(in.precompute_tx2_shift); + const auto& Ty2 = View(in.precompute_ty2); + const auto& Ty2_shift = View(in.precompute_ty2_shift); + /** - * @brief Validate Dx, Dy correctness relation + * @brief Validate Dx, Dy correctness (doubling relation) * - * When computing a point table for point [P] = (Px, Py), we require [D] (Dx, Dy) = 2.[P] - * If all other relations are satisfied, we know that (Tx, Ty) = (Px, Py) - * i.e. (Dx, Dy) = 2(Px, Py) when precompute_round_transition = 1. + * When precompute_point_transition = 1, the current row is the last row for this point. + * At the last row (round=3), Tx2 = P (the base point). So (Dx, Dy) = 2(Tx2, Ty2) = 2P. * - * Double formula: + * Double formula (for curve a=0, using 3x^2 shortcut since a=0 => slope = 3x^2/(2y)): * x_3 = 9x^4 / 4y^2 - 2x * y_3 = (3x^2 / 2y) * (x - x_3) - y * @@ -131,20 +91,19 @@ void ECCVMPointTableRelationImpl::accumulate(ContainerOverSubrelations& accu * (x_3 + 2x) * 4y^2 - 9x^4 = 0 * (y3 + y) * 2y - 3x^2 * (x - x_3) = 0 */ - auto two_x = Tx + Tx; - auto three_x = two_x + Tx; - auto three_xx = Tx * three_x; - auto nine_xxxx = three_xx.sqr(); - auto two_y = Ty + Ty; - auto four_yy = two_y.sqr(); - auto x_double_check = (Dx + two_x) * four_yy - nine_xxxx; - auto y_double_check = (Ty + Dy) * two_y + three_xx * (Dx - Tx); + auto two_x2 = Tx2 + Tx2; + auto three_x2 = two_x2 + Tx2; + auto three_x2x2 = Tx2 * three_x2; + auto nine_x2x2x2x2 = three_x2x2.sqr(); + auto two_y2 = Ty2 + Ty2; + auto four_y2y2 = two_y2.sqr(); + auto x_double_check = (Dx + two_x2) * four_y2y2 - nine_x2x2x2x2; + auto y_double_check = (Ty2 + Dy) * two_y2 + three_x2x2 * (Dx - Tx2); std::get<0>(accumulator) += precompute_point_transition * x_double_check * scaling_factor; std::get<1>(accumulator) += precompute_point_transition * y_double_check * scaling_factor; /** - * @brief If precompute_round_transition = 0, (Dx_shift, Dy_shift) = (Dx, Dy) - * + * @brief If precompute_point_transition = 0, (Dx_shift, Dy_shift) = (Dx, Dy) [continuity] * 1st row is empty => don't apply if lagrange_first == 1 */ std::get<2>(accumulator) += @@ -153,37 +112,45 @@ void ECCVMPointTableRelationImpl::accumulate(ContainerOverSubrelations& accu (-lagrange_first + 1) * (-precompute_point_transition + 1) * (Dy - Dy_shift) * scaling_factor; /** - * @brief Valdiate (Tx, Ty) is correctly computed from (Tx_shift, Ty_shift), (Dx, Dy). - * If precompute_round_transition = 0, [T] = [T_shift] + [D]. + * @brief Intra-row addition: (Tx, Ty) = (Tx2, Ty2) + (Dx, Dy) * - * Add formula: - * x_3 = (y_2 - y_1)^2 / (x_2 - x_1)^2 - x_2 - x_1 - * y_3 = ((y_2 - y_1) / (x_2 - x_1)) * (x_1 - x_3) - y_1 + * The first precomputed point = second precomputed point + 2P. + * E.g., at round 0: 15P = 13P + 2P. * - * Expanding into relations: + * This is gated by precompute_select (active when processing a scalar). + * + * Add formula (denominator form): * (x_3 + x_2 + x_1) * (x_2 - x_1)^2 - (y_2 - y_1)^2 = 0 * (y_3 + y_1) * (x_2 - x_1) + (x_3 - x_1) * (y_2 - y_1) = 0 + */ + { + const auto& precompute_select = View(in.precompute_select); + const auto lambda_num_intra = Dy - Ty2; + const auto lambda_den_intra = Dx - Tx2; + auto x_add_check_intra = (Tx + Dx + Tx2) * lambda_den_intra.sqr() - lambda_num_intra.sqr(); + auto y_add_check_intra = (Ty + Ty2) * lambda_den_intra + (Tx - Tx2) * lambda_num_intra; + std::get<4>(accumulator) += precompute_select * x_add_check_intra * scaling_factor; + std::get<5>(accumulator) += precompute_select * y_add_check_intra * scaling_factor; + } + + /** + * @brief Inter-row addition: (Tx2, Ty2) = (Tx_shift, Ty_shift) + (Dx, Dy) + * + * The second precomputed point of row i = first precomputed point of row i+1 + 2P. + * E.g., row 0 Tx2 = 13P = row 1 Tx (11P) + 2P. * - * We don't need to check for incomplete point addition edge case (x_1 == x_2); the only cases this would correspond - * to are y2 == y1 or y2 == -y1. Both of these cases may be ruled out as follows. - * 1. y2 == y1. Then 2P == kP, where k∈{1, ..., 13}, which of course cannot happen because the order r of E(Fₚ) - * is a large prime and P is already assumed to not be the neutral element. - * 2. y2 == -y1. Again, then -2P == kP, k∈{1, ..., 13}, and we get the same contradiction. + * Gated by: not first row, not transition (same as old inter-row constraint). */ - const auto& x1 = Tx_shift; - const auto& y1 = Ty_shift; - const auto& x2 = Dx; - const auto& y2 = Dy; - const auto& x3 = Tx; - const auto& y3 = Ty; - const auto lambda_numerator = y2 - y1; - const auto lambda_denominator = x2 - x1; - auto x_add_check = (x3 + x2 + x1) * lambda_denominator.sqr() - lambda_numerator.sqr(); - auto y_add_check = (y3 + y1) * lambda_denominator + (x3 - x1) * lambda_numerator; - std::get<4>(accumulator) += - (-lagrange_first + 1) * (-precompute_point_transition + 1) * x_add_check * scaling_factor; - std::get<5>(accumulator) += - (-lagrange_first + 1) * (-precompute_point_transition + 1) * y_add_check * scaling_factor; + { + const auto lambda_num_inter = Dy - Ty_shift; + const auto lambda_den_inter = Dx - Tx_shift; + auto x_add_check_inter = (Tx2 + Dx + Tx_shift) * lambda_den_inter.sqr() - lambda_num_inter.sqr(); + auto y_add_check_inter = (Ty2 + Ty_shift) * lambda_den_inter + (Tx2 - Tx_shift) * lambda_num_inter; + std::get<6>(accumulator) += + (-lagrange_first + 1) * (-precompute_point_transition + 1) * x_add_check_inter * scaling_factor; + std::get<7>(accumulator) += + (-lagrange_first + 1) * (-precompute_point_transition + 1) * y_add_check_inter * scaling_factor; + } } } // namespace bb From b89332527c4d86d0735561320f2897fd45ae889e Mon Sep 17 00:00:00 2001 From: notnotraju Date: Tue, 17 Mar 2026 14:27:57 +0000 Subject: [PATCH 12/24] feat(eccvm): update eccvm_set_permutation_delta to product of 8 terms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With 8 wNAF digits per precompute row, the zero-tuple fingerprint used for padding inactive rows must be the product of 8 terms (γ + j·β² + t·β⁴) for j = 0..7, rather than 4 terms for j = 0..3. Updated in all three locations: - eccvm_prover.cpp - eccvm_verifier.cpp - eccvm_trace_checker.cpp --- .../cpp/src/barretenberg/eccvm/eccvm_prover.cpp | 11 ++++++----- .../src/barretenberg/eccvm/eccvm_trace_checker.cpp | 7 ++++--- .../cpp/src/barretenberg/eccvm/eccvm_verifier.cpp | 8 +++++--- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_prover.cpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_prover.cpp index 065cb9c4b3e6..1d0e0dbed0f3 100644 --- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_prover.cpp +++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_prover.cpp @@ -94,12 +94,13 @@ void ECCVMProver::execute_log_derivative_commitments_round() // constrain (pc, round, wnaf_slice) to match between the MSM table and the Precomputed table. The number of rows we // add per short scalar `mul` is slightly less in the Precomputed table as in the MSM table, so to get the // permutation argument to work out, when `precompute_select == 0`, we must implicitly _remove_ (0, 0, 0) as a tuple - // on the wNAF side. This corresponds to dividing by - // (γ+t·β⁴)·(γ+β²+t·β⁴)·(γ+2β²+t·β⁴)·(γ+3β²+t·β⁴), where t = FIRST_TERM_TAG. + // on the wNAF side. With 8 digits per row, we have 8 zero-tuple fingerprints to remove: + // product of (γ + j·β² + t·β⁴) for j = 0..7, where t = FIRST_TERM_TAG. auto first_term_tag = beta_quartic; // FIRST_TERM_TAG (= 1) * beta_quartic - relation_parameters.eccvm_set_permutation_delta = (gamma + first_term_tag) * (gamma + beta_sqr + first_term_tag) * - (gamma + beta_sqr + beta_sqr + first_term_tag) * - (gamma + beta_sqr + beta_sqr + beta_sqr + first_term_tag); + relation_parameters.eccvm_set_permutation_delta = FF(1); + for (size_t j = 0; j < 8; ++j) { + relation_parameters.eccvm_set_permutation_delta *= (gamma + FF(j) * beta_sqr + first_term_tag); + } relation_parameters.eccvm_set_permutation_delta = relation_parameters.eccvm_set_permutation_delta.invert(); // Compute inverse polynomial for our logarithmic-derivative lookup method compute_logderivative_inverse params{ .eta = 0, diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_verifier.cpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_verifier.cpp index 9bd2765ab2f5..ff64027efba7 100644 --- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_verifier.cpp +++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_verifier.cpp @@ -56,10 +56,12 @@ typename ECCVMVerifier_::ReductionResult ECCVMVerifier_::reduce_ relation_parameters.beta_sqr = beta_sqr; relation_parameters.beta_cube = beta_sqr * beta; relation_parameters.beta_quartic = beta_quartic; + // Product of 8 zero-tuple fingerprints (γ + j·β² + t·β⁴) for j = 0..7, then inverted. auto first_term_tag = beta_quartic; // FIRST_TERM_TAG (= 1) * beta_quartic - relation_parameters.eccvm_set_permutation_delta = (gamma + first_term_tag) * (gamma + beta_sqr + first_term_tag) * - (gamma + beta_sqr + beta_sqr + first_term_tag) * - (gamma + beta_sqr + beta_sqr + beta_sqr + first_term_tag); + relation_parameters.eccvm_set_permutation_delta = FF(1); + for (size_t j = 0; j < 8; ++j) { + relation_parameters.eccvm_set_permutation_delta *= (gamma + FF(j) * beta_sqr + first_term_tag); + } relation_parameters.eccvm_set_permutation_delta = relation_parameters.eccvm_set_permutation_delta.invert(); // Get commitment to permutation and lookup grand products From d4c16a45aa5f2b63bd20ba0625e200717f381ced Mon Sep 17 00:00:00 2001 From: notnotraju Date: Tue, 17 Mar 2026 14:53:11 +0000 Subject: [PATCH 13/24] chore(eccvm): add ECCVM univariate benchmarks for sumcheck prover Add benchmarks for ECCVM relation evaluation using Sumcheck univariates (prover-side work), in addition to the existing values-based benchmarks (verifier-side work). --- .../benchmark/relations_bench/relations.bench.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp index 1a2acbf93f2e..e58f9bb6665e 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp @@ -86,7 +86,7 @@ BENCHMARK(execute_relation_for_values>); BENCHMARK(execute_relation_for_values>); -// ECCVM +// ECCVM (verifier work) BENCHMARK(execute_relation_for_values>); BENCHMARK(execute_relation_for_values>); BENCHMARK(execute_relation_for_values>); @@ -95,6 +95,15 @@ BENCHMARK(execute_relation_for_values>) BENCHMARK(execute_relation_for_values>); BENCHMARK(execute_relation_for_values>); +// ECCVM (Sumcheck prover work — univariate accumulation) +BENCHMARK(execute_relation_for_univariates>); +BENCHMARK(execute_relation_for_univariates>); +BENCHMARK(execute_relation_for_univariates>); +BENCHMARK(execute_relation_for_univariates>); +BENCHMARK(execute_relation_for_univariates>); +BENCHMARK(execute_relation_for_univariates>); +BENCHMARK(execute_relation_for_univariates>); + } // namespace bb::benchmark::relations BENCHMARK_MAIN(); From da2fbef80a7b74ecc251e12677fccceb4b8d28b5 Mon Sep 17 00:00:00 2001 From: notnotraju Date: Tue, 17 Mar 2026 15:02:16 +0000 Subject: [PATCH 14/24] fix(eccvm): fix MSM relation partial lengths and remove unused vars - Set all MSM relation SUBRELATION_PARTIAL_LENGTHS to 12 (was mixed 8/12). Required because the View type is derived from the max partial length subrelation (index 0 = 12), so all intermediate Univariates are 12-wide and can only be accumulated into 12-wide accumulators. - Fixed element count: was 68, now 67 (matching the array declaration). - Removed unused Tx2_shift/Ty2_shift variables from point table relation (the inter-row constraint uses Tx_shift/Ty_shift, not the shifted versions of the second point). --- .../relations/ecc_vm/ecc_msm_relation.hpp | 27 ++++++++++--------- .../ecc_vm/ecc_point_table_relation_impl.hpp | 2 -- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation.hpp index 4626847f4dc9..5324c44e5f95 100644 --- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation.hpp @@ -47,19 +47,22 @@ template class ECCVMMSMRelationImpl { // The degree of y_t8 is 10 (degree doubles through chaining: first_add gives deg 3 y, then each subsequent // add increments by ~1). With q_add gating, the final degree is 12 (rounded up for safety). // Most subrelations remain degree <= 8. The new addition-chain subrelations (47-50, 51-54) have higher degree. + // All subrelations use partial length 12 because the View type (used for all wire reads) + // is Univariate (derived from the max-degree subrelation in this relation). + // The 8-chained addition/skew outputs reach degree ~10 at the accumulator output (indices 0,1,4). static constexpr std::array SUBRELATION_PARTIAL_LENGTHS{ - 12, 12, 8, 8, 12, 8, // 0-5: ADD acc(x,y), slope1; SKEW acc(x,y), slope1 - 8, 8, 8, 8, 8, 8, // 6-11: collision1-4; DOUBLE acc(x,y) - 8, 8, 8, 8, 8, 8, // 12-17: DOUBLE slope1; slice-zero 1-4; mutual excl - 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 12, // 18-31: round tx, selectors, count, continuity - 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, // 32-46: add1=q_add+q_skew, skew ctrls, ADD/DOUBLE/SKEW slopes 2-4, no-op - 12, 12, 12, 12, // 47-50: ADD slopes 5-8 - 12, 12, 12, 12, // 51-54: SKEW slopes 5-8 - 8, 8, 8, 8, // 55-58: collision 5-8 - 8, 8, 8, 8, // 59-62: slice-zero 5-8 - 8, 8, 8, 8 // 63-66: continuity add5-8 + 12, 12, 12, 12, 12, 12, // 0-5 + 12, 12, 12, 12, 12, 12, // 6-11 + 12, 12, 12, 12, 12, 12, // 12-17 + 12, 12, 12, 12, 12, 12, 12, // 18-24 + 12, 12, 12, 12, 12, 12, 12, // 25-31 + 12, 12, 12, 12, 12, 12, 12, 12, // 32-39 + 12, 12, 12, 12, 12, 12, 12, // 40-46 + 12, 12, 12, 12, // 47-50: ADD slopes 5-8 + 12, 12, 12, 12, // 51-54: SKEW slopes 5-8 + 12, 12, 12, 12, // 55-58: collision 5-8 + 12, 12, 12, 12, // 59-62: slice-zero 5-8 + 12, 12, 12, 12 // 63-66: continuity add5-8 }; template diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation_impl.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation_impl.hpp index 942c801db6df..238eabc87f8c 100644 --- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation_impl.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation_impl.hpp @@ -73,9 +73,7 @@ void ECCVMPointTableRelationImpl::accumulate(ContainerOverSubrelations& accu */ const auto& Tx2 = View(in.precompute_tx2); - const auto& Tx2_shift = View(in.precompute_tx2_shift); const auto& Ty2 = View(in.precompute_ty2); - const auto& Ty2_shift = View(in.precompute_ty2_shift); /** * @brief Validate Dx, Dy correctness (doubling relation) From 80589aedf259ffe286cde29cb2f7b89f63339341 Mon Sep 17 00:00:00 2001 From: notnotraju Date: Tue, 17 Mar 2026 15:39:31 +0000 Subject: [PATCH 15/24] fix(eccvm): resolve crashes in MSM builder for 8-wide layout Two bugs fixed: 1. batch_normalize crash on zero z-coordinates: With ADDITIONS_PER_ROW=8 and DOUBLINGS_PER_ROW=4, doubling rows only use 4 of 8 trace slots. The unused slots had default Element{} with z=0 (point at infinity), causing batch_normalize to fail when inverting z-coordinates. Fix: fill unused slots with valid (non-infinity) dummy points and track which slots are used via is_used vector to skip them during collision_inverse computation. 2. Signed integer overflow in precomputed_tables_builder: With 8 wNAF digits per row, row_chunk = slice0 * (1<<28) can reach ~4 billion, exceeding INT_MAX. This was undefined behavior causing incorrect scalar_sum values. Fix: use int64_t for row_chunk computation. --- .../src/barretenberg/eccvm/msm_builder.hpp | 22 ++++++++++++++++--- .../eccvm/precomputed_tables_builder.hpp | 9 +++++--- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp b/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp index e49244b2258f..a5c097939812 100644 --- a/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp +++ b/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp @@ -263,6 +263,9 @@ class ECCVMMSMMBuilder { // doubling. if it is `true`, then we are doubling (i.e., the condition is that `p3 = p1.dbl()`), else we are // adding (i.e., the condition is that `p3 = p1 + p2`). std::vector is_double_or_add(num_point_adds_and_doubles); + // `is_used` records whether a trace entry is actually used (as opposed to padding for ADDITIONS_PER_ROW > + // DOUBLINGS_PER_ROW in doubling rows). Unused entries must be skipped during batch inversion. + std::vector is_used(num_point_adds_and_doubles, false); // accumulator_trace tracks the value of the ECCVM accumulator for each row std::span accumulator_trace(&points_to_normalize[num_point_adds_and_doubles * 3], num_accumulators); @@ -328,6 +331,7 @@ class ECCVMMSMMBuilder { p2_trace[trace_index] = p2; p3_trace[trace_index] = accumulator; is_double_or_add[trace_index] = false; + is_used[trace_index] = true; trace_index++; } // Now, `row.add_state` has been fully processed and we fill in the rest of the members of `row`. @@ -366,10 +370,17 @@ class ECCVMMSMMBuilder { accumulator = accumulator.dbl(); p3_trace[trace_index] = accumulator; is_double_or_add[trace_index] = true; + is_used[trace_index] = true; + trace_index++; + } + // Fill unused trace slots with dummy non-infinity points so batch_normalize doesn't + // fail on z=0. These entries are not used for any relation computation. + for (size_t pad = 0; pad < (ADDITIONS_PER_ROW - DOUBLINGS_PER_ROW); ++pad) { + p1_trace[trace_index] = accumulator; + p2_trace[trace_index] = accumulator; + p3_trace[trace_index] = accumulator; trace_index++; } - // Skip unused trace slots for this row (ADDITIONS_PER_ROW allocated, only DOUBLINGS_PER_ROW used) - trace_index += (ADDITIONS_PER_ROW - DOUBLINGS_PER_ROW); accumulator_trace[msm_row_index] = accumulator; msm_row_index++; } else // process `wnaf_skew`, i.e., the skew digit. @@ -402,6 +413,7 @@ class ECCVMMSMMBuilder { p2_trace[trace_index] = add_state.point; p3_trace[trace_index] = accumulator; is_double_or_add[trace_index] = false; + is_used[trace_index] = true; trace_index++; } row.q_add = false; @@ -427,7 +439,11 @@ class ECCVMMSMMBuilder { std::vector inverse_trace(num_point_adds_and_doubles); parallel_for_range(num_point_adds_and_doubles, [&](size_t start, size_t end) { for (size_t operation_idx = start; operation_idx < end; ++operation_idx) { - if (is_double_or_add[operation_idx]) { + if (!is_used[operation_idx]) { + // Unused trace slots (padding for ADDITIONS_PER_ROW > DOUBLINGS_PER_ROW in doubling rows). + // Set to 1 so batch_invert doesn't fail on zero. + inverse_trace[operation_idx] = 1; + } else if (is_double_or_add[operation_idx]) { inverse_trace[operation_idx] = (p1_trace[operation_idx].y + p1_trace[operation_idx].y); } else { inverse_trace[operation_idx] = (p2_trace[operation_idx].x - p1_trace[operation_idx].x); diff --git a/barretenberg/cpp/src/barretenberg/eccvm/precomputed_tables_builder.hpp b/barretenberg/cpp/src/barretenberg/eccvm/precomputed_tables_builder.hpp index 72926f8cb041..d678664a4251 100644 --- a/barretenberg/cpp/src/barretenberg/eccvm/precomputed_tables_builder.hpp +++ b/barretenberg/cpp/src/barretenberg/eccvm/precomputed_tables_builder.hpp @@ -130,9 +130,12 @@ class ECCVMPointTablePrecomputationBuilder { // N.B. we apply a constraint that requires slice1 to be positive for the 1st row of each scalar // sum. This ensures we do not have WNAF representations of negative values - const int row_chunk = slice7 + (slice6 * (1 << 4)) + (slice5 * (1 << 8)) + (slice4 * (1 << 12)) + - (slice3 * (1 << 16)) + (slice2 * (1 << 20)) + (slice1 * (1 << 24)) + - (slice0 * (1 << 28)); + // Use int64_t to avoid signed overflow: with 8 digits, slice0*(1<<28) can exceed INT_MAX + const int64_t row_chunk = + static_cast(slice7) + (static_cast(slice6) << 4) + + (static_cast(slice5) << 8) + (static_cast(slice4) << 12) + + (static_cast(slice3) << 16) + (static_cast(slice2) << 20) + + (static_cast(slice1) << 24) + (static_cast(slice0) << 28); bool chunk_negative = row_chunk < 0; From 17d669a6e3c07d88bf31c1380a6524548009f0d6 Mon Sep 17 00:00:00 2001 From: notnotraju Date: Tue, 17 Mar 2026 15:52:23 +0000 Subject: [PATCH 16/24] fix(eccvm): update lookup read counts and set relation for 2-point-per-row layout Three fixes: 1. Lookup read counts: Reworked MSM builder to return 4 read count columns (was 2). With 2 precomputed points per row and 4 table terms in the lookup relation, each compressed slice value maps to one of 4 tables based on parity and magnitude: - Table 0: odd slices >= 8 (point 1 positive) - Table 1: even slices < 8 (point 1 negative) - Table 2: even slices >= 8 (point 2 positive) - Table 3: odd slices < 8 (point 2 negative) ProverPolynomials now wires all 4 read count columns. 2. Set relation second term: Changed base point reference from precompute_tx/ty to precompute_tx2/ty2. In the 2-point-per-row layout, the base point P is stored in tx2/ty2 at the transition row (round=3), not in tx/ty (which holds 3P). 3. Removed debug trace code from trace checker. --- .../src/barretenberg/eccvm/eccvm_flavor.hpp | 10 +- .../src/barretenberg/eccvm/msm_builder.hpp | 101 ++++++++---------- .../ecc_vm/ecc_set_relation_impl.hpp | 6 +- 3 files changed, 51 insertions(+), 66 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp index a85ddeb9e705..17fda703f0a5 100644 --- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp +++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp @@ -689,14 +689,12 @@ class ECCVMFlavor { lagrange_third.at(2) = 1; lagrange_last.at(unmasked_witness_size - 1) = 1; for (size_t i = 0; i < point_table_read_counts[0].size(); ++i) { - // Explanation of off-by-one offset: - // When computing the WNAF slice for a point at point counter value `pc` and a round index `round`, the - // row number that computes the slice can be derived. This row number is then mapped to the index of - // `lookup_read_counts`. We do this mapping in `ecc_msm_relation`. We are off-by-one because we add an - // empty row at the start of the WNAF columns that is not accounted for (index of lookup_read_counts - // maps to the row in our WNAF columns that computes a slice for a given value of pc and round) + // Off-by-one: the first precompute row is empty (for shifted polynomials), so read counts + // are stored at index i+1. Each read count column has ROWS_PER_POINT (= 4) entries per point. lookup_read_counts_0.at(i + 1) = point_table_read_counts[0][i]; lookup_read_counts_1.at(i + 1) = point_table_read_counts[1][i]; + lookup_read_counts_2.at(i + 1) = point_table_read_counts[2][i]; + lookup_read_counts_3.at(i + 1) = point_table_read_counts[3][i]; } // compute polynomials for transcript columns diff --git a/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp b/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp index a5c097939812..f2bf17edeceb 100644 --- a/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp +++ b/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp @@ -89,72 +89,57 @@ class ECCVMMSMMBuilder { * @param num_msm_rows * @return std::vector */ - static std::tuple, std::array, 2>> compute_rows( + static constexpr size_t NUM_READ_COUNT_COLUMNS = 4; + static constexpr size_t ROWS_PER_POINT = eccvm::NUM_WNAF_DIGITS_PER_SCALAR / eccvm::WNAF_DIGITS_PER_ROW; // 4 + + static std::tuple, std::array, NUM_READ_COUNT_COLUMNS>> compute_rows( const std::vector& msms, const uint32_t total_number_of_muls, const size_t num_msm_rows) { - // To perform a scalar multiplication of a point P by a scalar x, we precompute a table of points - // -15P, -13P, ..., -3P, -P, P, 3P, ..., 15P - // When we perform a scalar multiplication, we decompose x into base-16 wNAF digits then look these precomputed - // values up with digit-by-digit. As we are performing lookups with the log-derivative argument, we have to - // record read counts. We record read counts in a table with the following structure: - // 1st write column = positive wNAF digits - // 2nd write column = negative wNAF digits - // the row number is a function of pc and wnaf digit: - // point_idx = total_number_of_muls - pc - // row = point_idx * rows_per_point_table + (some function of the slice value) - // - // Illustration: - // Block Structure: - // | 0 | 1 | - // | - | - | - // 1 | # | # | -1 - // 3 | # | # | -3 - // 5 | # | # | -5 - // 7 | # | # | -7 - // 9 | # | # | -9 - // 11 | # | # | -11 - // 13 | # | # | -13 - // 15 | # | # | -15 + // With 2 precomputed points per row and 4 rows per point, the point table has 4 table terms: + // table 0 (read_counts_0): point 1 positive — compressed slices {15,13,11,9} at rounds 0,1,2,3 + // table 1 (read_counts_1): point 1 negative — compressed slices {0,2,4,6} at rounds 0,1,2,3 + // table 2 (read_counts_2): point 2 positive — compressed slices {14,12,10,8} at rounds 0,1,2,3 + // table 3 (read_counts_3): point 2 negative — compressed slices {1,3,5,7} at rounds 0,1,2,3 // - // Table structure: - // | Block_{0} | <-- pc = total_number_of_muls - // | Block_{1} | <-- pc = total_number_of_muls-(num muls in msm 0) - // | ... | ... - // | Block_{total_number_of_muls-1} | <-- pc = num muls in last msm - - const size_t num_rows_in_read_counts_table = - static_cast(total_number_of_muls) * - (eccvm::POINT_TABLE_SIZE >> 1); // `POINT_TABLE_SIZE` is 2ʷ, where in our case w = 4. As noted above, with - // respect to *read counts*, we are record looking up the positive and - // negative odd multiples of [P] in two separate columns, each of size 2ʷ⁻¹. - std::array, 2> point_table_read_counts; - point_table_read_counts[0].reserve(num_rows_in_read_counts_table); - point_table_read_counts[1].reserve(num_rows_in_read_counts_table); - for (size_t i = 0; i < num_rows_in_read_counts_table; ++i) { - point_table_read_counts[0].emplace_back(0); - point_table_read_counts[1].emplace_back(0); + // Each read count column has ROWS_PER_POINT (= 4) entries per point. + // Row index = point_idx * ROWS_PER_POINT + round. + + const size_t num_rows_in_read_counts_table = static_cast(total_number_of_muls) * ROWS_PER_POINT; + std::array, NUM_READ_COUNT_COLUMNS> point_table_read_counts; + for (auto& col : point_table_read_counts) { + col.resize(num_rows_in_read_counts_table, 0); } const auto update_read_count = [&point_table_read_counts](const size_t point_idx, const int slice) { - /** - * AUDITTODO: verify and correct the point table ordering described below. - * The wNAF digits for base 16 lie in the range -15, -13, ..., 13, 15. - * The *point table* format is the following: - * (for positive point table) T[0] = P, T[1] = 3P, ..., T[7] = 15P - * (for negative point table) T[0] = -P, T[1] = -3P, ..., T[15] = -15P - * i.e. if the slice value is negative, we can use the compressed WNAF directly as the table index - * if the slice value is positive, we must take 15 - (compressed wNAF) to get the table index - */ - const size_t row_index_offset = point_idx * 8; - if (slice < 0) { - // negative table: T[0] = -15P, T[1] = -13P, ..., T[7] = -P - const auto table_index = static_cast((slice + 15) / 2); - point_table_read_counts[1][row_index_offset + table_index]++; + // `slice` is the wNAF digit in {-15, -13, ..., 13, 15}. + // `compressed` is the compressed form in {0, 1, ..., 15}. + const size_t compressed = static_cast((slice + 15) / 2); + + // Determine which table term and round this compressed slice maps to: + // table 0: compressed ∈ {15,13,11,9} (odd, ≥8) → round = (15-compressed)/2 + // table 1: compressed ∈ {0,2,4,6} (even, <8) → round = compressed/2 + // table 2: compressed ∈ {14,12,10,8} (even, ≥8) → round = (14-compressed)/2 + // table 3: compressed ∈ {1,3,5,7} (odd, <8) → round = (compressed-1)/2 + size_t table_idx; + size_t round; + const bool is_positive = (compressed >= 8); + const bool is_odd = (compressed & 1) != 0; + if (is_positive && is_odd) { + table_idx = 0; + round = (15 - compressed) / 2; + } else if (!is_positive && !is_odd) { + table_idx = 1; + round = compressed / 2; + } else if (is_positive && !is_odd) { + table_idx = 2; + round = (14 - compressed) / 2; } else { - // positive table: T[0] = 15P, T[1] = 13P, ..., T[7] = P - const auto table_index = static_cast((15 - slice) / 2); - point_table_read_counts[0][row_index_offset + table_index]++; + table_idx = 3; + round = (compressed - 1) / 2; } + + const size_t row_index = point_idx * ROWS_PER_POINT + round; + point_table_read_counts[table_idx][row_index]++; }; // compute which row index each multiscalar multiplication will start at. diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation_impl.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation_impl.hpp index a9c3ccdfea0e..630758c9fbfc 100644 --- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation_impl.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation_impl.hpp @@ -234,8 +234,10 @@ Accumulator ECCVMSetRelationImpl::compute_grand_product_numerator(const AllE * We only add the tuple to the multiset if `precompute_point_transition == 1`. */ { - const auto& table_x = View(in.precompute_tx); - const auto& table_y = View(in.precompute_ty); + // At the transition row (round=3), Tx2/Ty2 = P (the base point). + // The old layout had Tx = P at transition, but the 2-point-per-row layout puts P in Tx2. + const auto& table_x = View(in.precompute_tx2); + const auto& table_y = View(in.precompute_ty2); const auto& precompute_skew = View(in.precompute_skew); const auto negative_inverse_seven = []() { From 7dd5ce968019fe8fed26469c3e8678525bfc52e5 Mon Sep 17 00:00:00 2001 From: notnotraju Date: Tue, 17 Mar 2026 16:03:39 +0000 Subject: [PATCH 17/24] fix(eccvm): update test infrastructure for 8-wide layout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - eccvm.test.cpp: Fix eccvm_set_permutation_delta in complete_proving_key_for_test() to compute the product of 8 zero-tuple fingerprints (γ + j·β² + t·β⁴) for j=0..7, matching the prover/verifier/trace_checker. Previously only computed 4 terms, causing CommittedSumcheck test to fail. - eccvm_transcript.test.cpp: Update hardcoded prover manifest in construct_eccvm_honk_manifest() with all new wire columns added for the 8-wide layout: - PRECOMPUTE_S5HI through PRECOMPUTE_S8LO (8 columns) - MSM_ADD5 through MSM_ADD8 (4 columns) - MSM_X5/Y5 through MSM_X8/Y8 (8 columns) - MSM_COLLISION_X5 through MSM_COLLISION_X8 (4 columns) - MSM_LAMBDA5 through MSM_LAMBDA8 (4 columns) - MSM_SLICE5 through MSM_SLICE8 (4 columns) - LOOKUP_READ_COUNTS_2, LOOKUP_READ_COUNTS_3 (2 columns) - PRECOMPUTE_TX2, PRECOMPUTE_TY2 (2 columns) All 41 eccvm_tests now pass. --- .../cpp/src/barretenberg/eccvm/eccvm.test.cpp | 8 ++-- .../eccvm/eccvm_transcript.test.cpp | 44 +++++++++++++++++++ 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm.test.cpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm.test.cpp index 1a955be8bba8..fa309c8ec90d 100644 --- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm.test.cpp +++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm.test.cpp @@ -141,9 +141,11 @@ void complete_proving_key_for_test(bb::RelationParameters& relation_paramete relation_parameters.beta_cube = beta_sqr * beta; relation_parameters.beta_quartic = beta_quartic; auto first_term_tag = beta_quartic; // FIRST_TERM_TAG (= 1) * beta_quartic - relation_parameters.eccvm_set_permutation_delta = (gamma + first_term_tag) * (gamma + beta_sqr + first_term_tag) * - (gamma + beta_sqr + beta_sqr + first_term_tag) * - (gamma + beta_sqr + beta_sqr + beta_sqr + first_term_tag); + // Product of 8 zero-tuple fingerprints (γ + j·β² + t·β⁴) for j = 0..7, then inverted. + relation_parameters.eccvm_set_permutation_delta = FF(1); + for (size_t j = 0; j < 8; ++j) { + relation_parameters.eccvm_set_permutation_delta *= (gamma + FF(j) * beta_sqr + first_term_tag); + } relation_parameters.eccvm_set_permutation_delta = relation_parameters.eccvm_set_permutation_delta.invert(); const size_t unmasked_witness_size = pk->circuit_size - NUM_DISABLED_ROWS_IN_SUMCHECK; diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_transcript.test.cpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_transcript.test.cpp index 9db51a2bf07e..bce2a8835c1e 100644 --- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_transcript.test.cpp +++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_transcript.test.cpp @@ -104,6 +104,47 @@ class ECCVMTranscriptTests : public ::testing::Test { manifest_expected.add_entry(round, "TRANSCRIPT_MSM_X_INVERSE", frs_per_G); manifest_expected.add_entry(round, "TRANSCRIPT_MSM_COUNT_ZERO_AT_TRANSITION", frs_per_G); manifest_expected.add_entry(round, "TRANSCRIPT_MSM_COUNT_AT_TRANSITION_INVERSE", frs_per_G); + // --- 8-wide additions: new precompute slice columns --- + manifest_expected.add_entry(round, "PRECOMPUTE_S5HI", frs_per_G); + manifest_expected.add_entry(round, "PRECOMPUTE_S5LO", frs_per_G); + manifest_expected.add_entry(round, "PRECOMPUTE_S6HI", frs_per_G); + manifest_expected.add_entry(round, "PRECOMPUTE_S6LO", frs_per_G); + manifest_expected.add_entry(round, "PRECOMPUTE_S7HI", frs_per_G); + manifest_expected.add_entry(round, "PRECOMPUTE_S7LO", frs_per_G); + manifest_expected.add_entry(round, "PRECOMPUTE_S8HI", frs_per_G); + manifest_expected.add_entry(round, "PRECOMPUTE_S8LO", frs_per_G); + // --- 8-wide additions: new MSM add selectors --- + manifest_expected.add_entry(round, "MSM_ADD5", frs_per_G); + manifest_expected.add_entry(round, "MSM_ADD6", frs_per_G); + manifest_expected.add_entry(round, "MSM_ADD7", frs_per_G); + manifest_expected.add_entry(round, "MSM_ADD8", frs_per_G); + // --- 8-wide additions: new MSM point coordinates --- + manifest_expected.add_entry(round, "MSM_X5", frs_per_G); + manifest_expected.add_entry(round, "MSM_Y5", frs_per_G); + manifest_expected.add_entry(round, "MSM_X6", frs_per_G); + manifest_expected.add_entry(round, "MSM_Y6", frs_per_G); + manifest_expected.add_entry(round, "MSM_X7", frs_per_G); + manifest_expected.add_entry(round, "MSM_Y7", frs_per_G); + manifest_expected.add_entry(round, "MSM_X8", frs_per_G); + manifest_expected.add_entry(round, "MSM_Y8", frs_per_G); + // --- 8-wide additions: new MSM collision inverses --- + manifest_expected.add_entry(round, "MSM_COLLISION_X5", frs_per_G); + manifest_expected.add_entry(round, "MSM_COLLISION_X6", frs_per_G); + manifest_expected.add_entry(round, "MSM_COLLISION_X7", frs_per_G); + manifest_expected.add_entry(round, "MSM_COLLISION_X8", frs_per_G); + // --- 8-wide additions: new MSM lambdas --- + manifest_expected.add_entry(round, "MSM_LAMBDA5", frs_per_G); + manifest_expected.add_entry(round, "MSM_LAMBDA6", frs_per_G); + manifest_expected.add_entry(round, "MSM_LAMBDA7", frs_per_G); + manifest_expected.add_entry(round, "MSM_LAMBDA8", frs_per_G); + // --- 8-wide additions: new MSM slices --- + manifest_expected.add_entry(round, "MSM_SLICE5", frs_per_G); + manifest_expected.add_entry(round, "MSM_SLICE6", frs_per_G); + manifest_expected.add_entry(round, "MSM_SLICE7", frs_per_G); + manifest_expected.add_entry(round, "MSM_SLICE8", frs_per_G); + // --- 8-wide additions: additional lookup read counts --- + manifest_expected.add_entry(round, "LOOKUP_READ_COUNTS_2", frs_per_G); + manifest_expected.add_entry(round, "LOOKUP_READ_COUNTS_3", frs_per_G); manifest_expected.add_entry(round, "TRANSCRIPT_MUL", frs_per_G); manifest_expected.add_entry(round, "TRANSCRIPT_MSM_COUNT", frs_per_G); manifest_expected.add_entry(round, "PRECOMPUTE_SCALAR_SUM", frs_per_G); @@ -126,6 +167,9 @@ class ECCVMTranscriptTests : public ::testing::Test { manifest_expected.add_entry(round, "TRANSCRIPT_PC", frs_per_G); manifest_expected.add_entry(round, "PRECOMPUTE_ROUND", frs_per_G); manifest_expected.add_entry(round, "PRECOMPUTE_SELECT", frs_per_G); + // --- 8-wide additions: 2nd precomputed point --- + manifest_expected.add_entry(round, "PRECOMPUTE_TX2", frs_per_G); + manifest_expected.add_entry(round, "PRECOMPUTE_TY2", frs_per_G); manifest_expected.add_entry(round, "TRANSCRIPT_ACCUMULATOR_NOT_EMPTY", frs_per_G); manifest_expected.add_entry(round, "TRANSCRIPT_ACCUMULATOR_X", frs_per_G); manifest_expected.add_entry(round, "TRANSCRIPT_ACCUMULATOR_Y", frs_per_G); From cedc13d091d569a884a308228faa98f08fa9a760 Mon Sep 17 00:00:00 2001 From: notnotraju Date: Tue, 17 Mar 2026 16:19:52 +0000 Subject: [PATCH 18/24] chore(eccvm): update recursive verifier gate count for 8-wide layout The ECCVM recursive verifier gate count increased from 224,657 to 269,130 due to the wider relation columns and higher-degree subrelations in the 8-wide ECCVM layout. The recursive flavor inherits all entity/relation changes automatically from the native ECCVMFlavor via templates, so no code changes were needed in the stdlib recursive verifier itself. --- .../src/barretenberg/dsl/acir_format/gate_count_constants.hpp | 4 ++-- .../dsl/acir_format/mock_verifier_inputs.test.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/dsl/acir_format/gate_count_constants.hpp b/barretenberg/cpp/src/barretenberg/dsl/acir_format/gate_count_constants.hpp index 8077eca18fe5..fecc60544e8d 100644 --- a/barretenberg/cpp/src/barretenberg/dsl/acir_format/gate_count_constants.hpp +++ b/barretenberg/cpp/src/barretenberg/dsl/acir_format/gate_count_constants.hpp @@ -113,7 +113,7 @@ constexpr std::tuple HONK_RECURSION_CONSTANTS( // ======================================== // Gate count for Chonk recursive verification (Ultra with RollupIO) -inline constexpr size_t CHONK_RECURSION_GATES = 1493584; +inline constexpr size_t CHONK_RECURSION_GATES = 1538056; // ======================================== // Hypernova Recursion Constants @@ -147,7 +147,7 @@ inline constexpr size_t HIDING_KERNEL_ULTRA_OPS = 124; // ======================================== // Gate count for ECCVM recursive verifier (Ultra-arithmetized) -inline constexpr size_t ECCVM_RECURSIVE_VERIFIER_GATE_COUNT = 224657; +inline constexpr size_t ECCVM_RECURSIVE_VERIFIER_GATE_COUNT = 269130; // ======================================== // Goblin AVM Recursive Verifier Constants diff --git a/barretenberg/cpp/src/barretenberg/dsl/acir_format/mock_verifier_inputs.test.cpp b/barretenberg/cpp/src/barretenberg/dsl/acir_format/mock_verifier_inputs.test.cpp index 659c3cc0986c..bd739a485a12 100644 --- a/barretenberg/cpp/src/barretenberg/dsl/acir_format/mock_verifier_inputs.test.cpp +++ b/barretenberg/cpp/src/barretenberg/dsl/acir_format/mock_verifier_inputs.test.cpp @@ -23,7 +23,7 @@ static_assert(HIDING_KERNEL_PUBLIC_INPUTS_SIZE == 28, // Component proof lengths (used in Noir) static_assert(MERGE_PROOF_SIZE == 42, "MERGE_PROOF_SIZE changed - update constants.nr"); -static_assert(ECCVMFlavor::PROOF_LENGTH == 608, "ECCVM proof size changed - update constants.nr"); +static_assert(ECCVMFlavor::PROOF_LENGTH == 756, "ECCVM proof size changed - update constants.nr"); static_assert(IPA_PROOF_LENGTH == 64, "IPA_PROOF_LENGTH changed - update constants.nr"); static_assert(TranslatorFlavor::PROOF_LENGTH == 483, "Translator proof size changed - update constants.nr"); @@ -32,7 +32,7 @@ static_assert( ProofLength::Honk::expected_proof_size>( UltraFlavor::VIRTUAL_LOG_N) == 449, "RECURSIVE_PROOF_LENGTH changed - update constants.nr"); -static_assert(ChonkProof::PROOF_LENGTH == 1330, "CHONK_PROOF_LENGTH changed - update constants.nr"); +static_assert(ChonkProof::PROOF_LENGTH == 1478, "CHONK_PROOF_LENGTH changed - update constants.nr"); static_assert(ProofLength::MultilinearBatching::LENGTH == 121, "MultilinearBatching proof size changed - update constants.nr"); From 911f5bd8e2cf7b84e71844983ac4ff580f5a54a0 Mon Sep 17 00:00:00 2001 From: notnotraju Date: Wed, 18 Mar 2026 09:48:17 +0000 Subject: [PATCH 19/24] chore(eccvm): update stale comments for 8-wide layout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update comments across msm_builder.hpp, eccvm_flavor.hpp, and ecc_lookup_relation_impl.hpp to reflect the new 8-wide layout: - "4 point-additions per row" → 8 - "size-4 array" → size-8 - "result of four EC additions" → eight - Document msm_x/y/add/lambda/slice/collision_x 1..8 - Document precompute_s1..s8 (8 slices per row) - Document precompute_tx2/ty2 (2 points per row) - Document all 4 lookup_read_counts columns --- .../src/barretenberg/eccvm/eccvm_flavor.hpp | 50 ++++++------------- .../src/barretenberg/eccvm/msm_builder.hpp | 14 +++--- .../ecc_vm/ecc_lookup_relation_impl.hpp | 4 +- 3 files changed, 25 insertions(+), 43 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp index 17fda703f0a5..792f42b54bdd 100644 --- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp +++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp @@ -588,13 +588,13 @@ class ECCVMFlavor { * precompute_round: round counter for Straus precomputation algorithm * precompute_scalar_sum: accumulating sum of Straus scalar slices * precompute_s1hi/lo: 2-bit hi/lo components of a Straus 4-bit scalar slice - * precompute_s2hilo/precompute_s3hi/loprecompute_s4hi/lo: same as above but for a total of 4 Straus - 4-bit scalar slices + * precompute_s2hi/lo through precompute_s8hi/lo: same as above, for a total of 8 Straus + 4-bit scalar slices per row * precompute_skew: Straus WNAF skew parameter for a single scalar multiplier - * precompute_tx: x-coordinate of point accumulator used to generate Straus lookup table for an input - point (from transcript) - * precompute_ty: y-coordinate of point accumulator used to generate Straus lookup table for an input - point (from transcript) + * precompute_tx: x-coordinate of first precomputed point in row (Straus lookup table entry) + * precompute_ty: y-coordinate of first precomputed point in row + * precompute_tx2: x-coordinate of second precomputed point in row (2 points per row) + * precompute_ty2: y-coordinate of second precomputed point in row * precompute_dx: x-coordinate of D = 2 * input point we are evaluating Straus over * precompute_dy: y-coordinate of D * msm_pc: point counter for Straus MSM columns @@ -606,34 +606,16 @@ class ECCVMFlavor { * msm_size_of_msm: size of multiscalar multiplication current row is a part of * msm_round: describes which round of the Straus MSM algorithm the current row represents * msm_count: number of points processed for the round indicated by `msm_round` - * msm_x1: x-coordinate of potential point in Straus MSM round - * msm_y1: y-coordinate of potential point in Straus MSM round - * msm_x2: x-coordinate of potential point in Straus MSM round - * msm_y2: y-coordinate of potential point in Straus MSM round - * msm_x3: x-coordinate of potential point in Straus MSM round - * msm_y3: y-coordinate of potential point in Straus MSM round - * msm_x4: x-coordinate of potential point in Straus MSM round - * msm_y4: y-coordinate of potential point in Straus MSM round - * msm_add1: are we adding msm_x1/msm_y1 into accumulator at current round? - * msm_add2: are we adding msm_x2/msm_y2 into accumulator at current round? - * msm_add3: are we adding msm_x3/msm_y3 into accumulator at current round? - * msm_add4: are we adding msm_x4/msm_y4 into accumulator at current round? - * msm_lambda1: temp variable used for ecc point addition algorithm if msm_add1 = 1 - * msm_lambda2: temp variable used for ecc point addition algorithm if msm_add2 = 1 - * msm_lambda3: temp variable used for ecc point addition algorithm if msm_add3 = 1 - * msm_lambda4: temp variable used for ecc point addition algorithm if msm_add4 = 1 - * msm_slice1: wNAF digit/slice for first add - * msm_slice2: wNAF digit/slice for second add - * msm_slice3: wNAF digit/slice for third add - * msm_slice4: wNAF digit/slice for fourth add - * msm_collision_x1: used to ensure incomplete ecc addition exceptions not triggered if msm_add1 = 1 - * msm_collision_x2: used to ensure incomplete ecc addition exceptions not triggered if msm_add2 = 1 - * msm_collision_x3: used to ensure incomplete ecc addition exceptions not triggered if msm_add3 = 1 - * msm_collision_x4: used to ensure incomplete ecc addition exceptions not triggered if msm_add4 = 1 - * lookup_read_counts_0: stores number of times a point has been read from a Straus precomputation - table (reads come from msm_x/y1, msm_x/y2) - * lookup_read_counts_1: stores number of times a point has been read from a Straus precomputation - table (reads come from msm_x/y3, msm_x/y4) + * msm_x1..msm_x8: x-coordinates of potential points in Straus MSM round (8 per row) + * msm_y1..msm_y8: y-coordinates of potential points in Straus MSM round (8 per row) + * msm_add1..msm_add8: are we adding msm_xi/msm_yi into accumulator at current round? + * msm_lambda1..msm_lambda8: temp variables for ecc point addition algorithm + * msm_slice1..msm_slice8: wNAF digit/slice for each of the 8 additions per row + * msm_collision_x1..msm_collision_x8: ensure incomplete ecc addition exceptions not triggered + * lookup_read_counts_0: read counts for Straus lookup table term 0 (point 1 positive slices) + * lookup_read_counts_1: read counts for Straus lookup table term 1 (point 1 negative slices) + * lookup_read_counts_2: read counts for Straus lookup table term 2 (point 2 positive slices) + * lookup_read_counts_3: read counts for Straus lookup table term 3 (point 2 negative slices) * @return ProverPolynomials */ ProverPolynomials(const CircuitBuilder& builder) diff --git a/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp b/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp index f2bf17edeceb..186794216244 100644 --- a/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp +++ b/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp @@ -45,13 +45,13 @@ class ECCVMMSMMBuilder { bool q_double = false; bool q_skew = false; - // Each row in the MSM portion of the ECCVM can handle (up to) 4 point-additions. - // For each row in the VM we represent the point addition data via a size-4 array of + // Each row in the MSM portion of the ECCVM can handle (up to) 8 point-additions. + // For each row in the VM we represent the point addition data via a size-8 array of // AddState objects. struct AddState { bool add = false; // are we adding a point at this location in the VM? - // e.g if the MSM is of size-2 then the 3rd and 4th AddState objects will have this set - // to `false`. + // e.g if the MSM is of size-2 then the 3rd through 8th AddState objects will have this + // set to `false`. int slice = 0; // wNAF slice value. This has values in {0, ..., 15} and corresponds to an odd number in the // range {-15, -13, ..., 15} via the monotonic bijection. AffineElement point{ 0, 0 }; // point being added into the accumulator. (This is of the form nP, @@ -67,8 +67,8 @@ class ECCVMMSMMBuilder { arr.fill(AddState{ false, 0, { 0, 0 }, 0, 0 }); return arr; }(); - // The accumulator here is, in general, the result of four EC additions: A + Q_1 + Q_2 + Q_3 + Q_4. - // We do not explicitly store the intermediate values A + Q_1, A + Q_1 + Q_2, and A + Q_1 + Q_2 + Q_3, although + // The accumulator here is, in general, the result of eight EC additions: A + Q_1 + ... + Q_8. + // We do not explicitly store the intermediate values A + Q_1, ..., A + Q_1 + ... + Q_7, although // these values are implicitly used in the values of `AddState.lambda` and `AddState.collision_inverse`. FF accumulator_x = 0; // `(accumulator_x, accumulator_y)` is the accumulator to which I potentially want to add @@ -294,7 +294,7 @@ class ECCVMMSMMBuilder { auto& add_state = row.add_state[point_idx]; add_state.add = num_points_in_row > point_idx; int slice = add_state.add ? msm[offset + point_idx].wnaf_digits[digit_idx] : 0; - // In the MSM columns in the ECCVM circuit, we can add up to 4 points per row. + // In the MSM columns in the ECCVM circuit, we can add up to 8 points per row. // if `row.add_state[point_idx].add = 1`, this indicates that we want to add the // `point_idx`'th point in the MSM columns into the MSM accumulator `add_state.slice` = A // 4-bit WNAF slice of the scalar multiplier associated with the point we are adding (the diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_lookup_relation_impl.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_lookup_relation_impl.hpp index be82d88d3ef1..a41f5ecc9f59 100644 --- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_lookup_relation_impl.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_lookup_relation_impl.hpp @@ -18,8 +18,8 @@ namespace bb { * Table writes: ECCVMPointTable columns: we define Straus point table: * { {0, -15[P]}, {1, -13[P]}, ..., {15, 15[P]} } * write source: { precompute_round, precompute_tx, precompute_ty } - * Table reads: ECCVMMSM columns. Each row adds up to 4 points into MSM accumulator - * read source: { msm_slice1, msm_x1, msm_y1 }, ..., { msm_slice4, msm_x4, msm_y4 } + * Table reads: ECCVMMSM columns. Each row adds up to 8 points into MSM accumulator + * read source: { msm_slice1, msm_x1, msm_y1 }, ..., { msm_slice8, msm_x8, msm_y8 } * @param evals transformed to `evals + C(in(X)...)*scaling_factor` * @param in an std::array containing the fully extended Accumulator edges. * @param parameters contains beta, gamma, and public_input_delta, .... From 4b9154d0938d81a43ab3a01f8b7fe7c8ca744f78 Mon Sep 17 00:00:00 2001 From: notnotraju Date: Wed, 18 Mar 2026 09:58:21 +0000 Subject: [PATCH 20/24] chore(eccvm): revert benchmark file changes Remove ECCVM univariate benchmark additions from relations.bench.cpp to keep this PR focused on the 8-wide layout change. --- .../benchmark/relations_bench/relations.bench.cpp | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp index e58f9bb6665e..1a2acbf93f2e 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp @@ -86,7 +86,7 @@ BENCHMARK(execute_relation_for_values>); BENCHMARK(execute_relation_for_values>); -// ECCVM (verifier work) +// ECCVM BENCHMARK(execute_relation_for_values>); BENCHMARK(execute_relation_for_values>); BENCHMARK(execute_relation_for_values>); @@ -95,15 +95,6 @@ BENCHMARK(execute_relation_for_values>) BENCHMARK(execute_relation_for_values>); BENCHMARK(execute_relation_for_values>); -// ECCVM (Sumcheck prover work — univariate accumulation) -BENCHMARK(execute_relation_for_univariates>); -BENCHMARK(execute_relation_for_univariates>); -BENCHMARK(execute_relation_for_univariates>); -BENCHMARK(execute_relation_for_univariates>); -BENCHMARK(execute_relation_for_univariates>); -BENCHMARK(execute_relation_for_univariates>); -BENCHMARK(execute_relation_for_univariates>); - } // namespace bb::benchmark::relations BENCHMARK_MAIN(); From 393dfc054329227305a2ef444df27c29b716e14b Mon Sep 17 00:00:00 2001 From: notnotraju Date: Wed, 18 Mar 2026 11:05:34 +0000 Subject: [PATCH 21/24] chore(eccvm): fix stale degree comments and design doc proof size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix ECCVM proof size in design doc: ~716 → 756 Fr (confirmed by static_assert in proof_compression.hpp) - Correct set relation degree comments: denominator sub-products are 16 (8 add-gated tuples) + 6 (transcript z1/z2) + 4 (MSM output) = 26, not the previously claimed 28. Full GP subrelation degree = 27, partial length upper bound = 29. - Fix duplicate comment blocks in set relation numerator/denominator third term docstrings - Update inline cumulative degree annotations throughout compute_grand_product_numerator/denominator --- .../barretenberg/eccvm/WIDE_SHORT_DESIGN.md | 188 ++++++++++++++++++ .../ecc_vm/ecc_set_relation_impl.hpp | 37 ++-- 2 files changed, 203 insertions(+), 22 deletions(-) create mode 100644 barretenberg/cpp/src/barretenberg/eccvm/WIDE_SHORT_DESIGN.md diff --git a/barretenberg/cpp/src/barretenberg/eccvm/WIDE_SHORT_DESIGN.md b/barretenberg/cpp/src/barretenberg/eccvm/WIDE_SHORT_DESIGN.md new file mode 100644 index 000000000000..cc45cc9a5e46 --- /dev/null +++ b/barretenberg/cpp/src/barretenberg/eccvm/WIDE_SHORT_DESIGN.md @@ -0,0 +1,188 @@ +# ECCVM Wider-and-Shorter: Design Spec + +## Goal + +Halve the Precomputed and MSM table heights by doubling their width (`WNAF_DIGITS_PER_ROW` 4→8, `ADDITIONS_PER_ROW` 4→8). This halves the number of rows consumed per scalar, so with `CONST_ECCVM_LOG_N` staying at 15 we can handle twice as many scalar multiplications — i.e. **double the stack depth**. + +Alternatively, if stack depth is sufficient, `CONST_ECCVM_LOG_N` could drop from 15→14, halving the IPA MSM and speeding up native proving and root rollup verification. + +## Impact + +Here, "split" refers to the option of splitting up the multiset-equality check into three wires, which would help with the degree. However, this seems to not be especially critical. + +| Metric | Before | After (no split) | After (with split) | +|--------|:------:|:-----------------:|:------------------:| +| NUM_WIRES | 85 | 121 (+36) | 123 (+38) | +| Precompute rows/scalar | 8 | 4 | 4 | +| MSM ADD rows/digit | ⌈m/4⌉ | ⌈m/8⌉ | ⌈m/8⌉ | +| MAX_PARTIAL_RELATION_LENGTH | 22 | 29 | 17 | +| ECCVM proof size (Fr) | 608 | 756 (+148) | ~764 (+156) | + +The split adds +2 columns (z_perm_b, z_perm_c) and +8 proof elements but drops degree from 29→17. + +Note: proof size = 756 was confirmed via `static_assert` in `proof_compression.hpp`. + +## Implementation plan + +### Step 1: Widen tables (ADDITIONS_PER_ROW + WNAF_DIGITS_PER_ROW, together) + +These **cannot** be separated: the lookup relation's table terms reference `Tx2/Ty2` (from WNAF widening) but also need `NUM_LOOKUP_TERMS = 8` (from additions widening). Ship as one diff. + +#### 1a. Constants + +**`eccvm_builder_types.hpp`:** +```cpp +WNAF_DIGITS_PER_ROW = 8; // was 4 +ADDITIONS_PER_ROW = 8; // was 4 +DOUBLINGS_PER_ROW = NUM_WNAF_DIGIT_BITS; // NEW, always 4 +``` + +#### 1b. Builders + +**`msm_builder.hpp`:** +- `MSMRow::add_state`: `std::array` → `` (line 64) +- DOUBLE loop (line 358): change bound from `ADDITIONS_PER_ROW` to `DOUBLINGS_PER_ROW` +- Trace sizing (lines 243, 288): replace hardcoded `* 4` with `* ADDITIONS_PER_ROW` +- All other loops already use `ADDITIONS_PER_ROW` — auto-adjust + +**`precomputed_tables_builder.hpp`:** +- Remove `static_assert(WNAF_DIGITS_PER_ROW == 4)` (line 66) and `num_rows_per_scalar == POINT_TABLE_SIZE / 2` (line 57) +- `PointTablePrecomputationRow`: add `s9..s16` (8 more 2-bit slices), add `precompute_accumulator2` +- `num_rows_per_scalar` = 32/8 = 4. Each row stores 2 precomputed points: `table[POINT_TABLE_SIZE - 1 - 2*i]` and `table[POINT_TABLE_SIZE - 2 - 2*i]` +- Digit loop: 8 digits → 16 two-bit slices (s1–s16) +- Horner: `prev_sum * 2^32 + row_chunk` (was `2^16`) + +#### 1c. Flavor columns + +**`eccvm_flavor.hpp` — add to `WireNonShiftedEntities` (+36 columns):** + +| Group | New columns | Count | +|-------|------------|:-----:| +| Precompute slices | `precompute_s5hi..s8lo` | +8 | +| Precompute 2nd point | `precompute_tx2, precompute_ty2` | +2 | +| MSM adds | `msm_add5..add8` | +4 | +| MSM points | `msm_x5..x8, msm_y5..y8` | +8 | +| MSM collision | `msm_collision_x5..x8` | +4 | +| MSM lambdas | `msm_lambda5..lambda8` | +4 | +| MSM slices | `msm_slice5..slice8` | +4 | +| Lookup read counts | `lookup_read_counts_2, _3` | +2 | + +Wire up in `eccvm_circuit_builder.hpp` `ProverPolynomials` constructor (follows existing patterns at lines 706–713). + +#### 1d. Relations + +**`ecc_msm_relation_impl.hpp`:** +- Extend addition chain 4→8, skew chain 4→8. Doubling chain **unchanged** (4, lambda1–4). +- 4 new add slope subrelations + 4 skew slope subrelations (separate to prevent cancellation) +- Extend collision checks, slice-zero, addition continuity, count update to 8 +- Cross-row: `(-add8 + 1) * add1_shift`. Max partial length: 8→**12**. + +**`ecc_set_relation_impl.hpp` (ECCVMSetRelation — unchanged structure):** +- Numerator: 8 slice fingerprints instead of 4 (degree 7→11), round encoding `8 * precompute_round + j` +- Denominator: 8 add-gated tuples instead of 4 (degree 8→16 in slice sub-product, total denom degree 27) +- `eccvm_set_permutation_delta`: product of 8 terms instead of 4 (update in prover + verifier) +- `SUBRELATION_PARTIAL_LENGTHS = {29, 3}` (was `{22, 3}`) + +**`ecc_lookup_relation.hpp`:** +- `NUM_LOOKUP_TERMS = 8`, `NUM_TABLE_TERMS = 4`, `LENGTH = 15` +- 4 table terms: positive/negative for each of 2 points per precompute row +- Coverage: `{0..15}` fully covered (verify in tests) + +**`ecc_wnaf_relation_impl.hpp`:** +- 8 extra range checks (s5hi–s8lo), Horner for 8 digits, `* 2^32` shift +- Round: 0–3 (was 0–7), replace hardcoded `7` with `WNAF_DIGITS_PER_ROW - 1` + +**`ecc_point_table_relation_impl.hpp`:** +- Add intra-row constraint: `(Tx, Ty) = (Tx2, Ty2) + (Dx, Dy)` +- Doubling target: `2 * (Tx2, Ty2)` at transitions (Tx2 = 1·P, not Tx = 3·P) +- Inter-row addition links via Tx2. Subrelations: 6→8. + +**`ecc_bools_relation_impl.hpp`:** +4 boolean checks for `msm_add5..8`. + +#### 1e. Post-widening MAX_PARTIAL_RELATION_LENGTH (no split) + +| Relation | Partial length | +|----------|:---:| +| Set (combined GP) | **29** | +| Lookup | 15 | +| MSM | 12 | +| WNAF | 5 | +| Point Table | 6 | +| Bools | 3 | +| **MAX** | **29** | + +### Step 2: Constants, VKs, recursive verifier + +- Update `SUBRELATION_PARTIAL_LENGTHS` in `ecc_set_relation.hpp` to `{29, 3}` +- Update static_asserts in `mock_verifier_inputs.test.cpp` +- Check `proof_compression.hpp` for hardcoded offsets +- Update `constants.nr`, run `yarn remake-constants` +- Regenerate VKs: `./test_chonk_standalone_vks_havent_changed.sh --update_inputs` +- Recursive verifier: relation changes auto-propagate via templates. Flavor entity changes (new columns) need manual mirroring in `stdlib/eccvm_verifier/` + +### Step 3: Test and measure + +- `eccvm_tests` after widening +- `chonk_tests`, `goblin_tests` after full integration +- Measure actual workload row counts → decide if `CONST_ECCVM_LOG_N` can drop 15→14 +- If yes: update `constants.hpp`, cascade to Noir/TS + +### Step 4 (optional): Grand product split + +Split the single `ECCVMSetRelation` grand product into 3 independent ones to drop `MAX_PARTIAL_RELATION_LENGTH` from 29→17. Only worth doing if degree 29 causes measurable performance issues. + +**Key constraint: `compute_grand_products()` supports exactly one GP per relation class** — it calls `get_grand_product_polynomial()` (singular) and `compute_grand_product_numerator/denominator()` (each returning a single scalar). There is no index-templated overload. So you must create **3 separate relation classes**, not one relation with 3 sub-products. + +**New relation classes** (replace `ECCVMSetRelation`): +- `ECCVMSetRelationSlices` — GP A: `(pc, round, wnaf_slice)` multiset. N = slice fingerprints + skew + delta. D = add-gated `(pc, round, slice)` from MSM. +- `ECCVMSetRelationPointTable` — GP B: `(pc, Px, Py, scalar)` multiset. N = point-table tuples at `point_transition`. D = transcript `z1/z2` scalar tuples. +- `ECCVMSetRelationMSMOutput` — GP C: `(pc, acc_x, acc_y, msm_size)` multiset. N = MSM accumulator at `msm_transition`. D = transcript MSM output. + +Each class provides its own `get_grand_product_polynomial()`, `compute_grand_product_numerator/denominator()`, `accumulate()`, and `skip()`. + +**Post-split degrees (with widening):** + +| GP | Numerator degree | Denominator degree | Partial length | +|----|:---:|:---:|:---:| +| A (slices) | 15 | 16 | **17** | +| B (point-table) | 9 | 7 | **10** | +| C (MSM output) | 11 | 4 | **12** | + +**Flavor changes:** +- Add `z_perm_b`, `z_perm_c` to `DerivedWitnessEntities` +- Add `z_perm_b_shift`, `z_perm_c_shift` to shifted entities + `get_to_be_shifted()` +- Update `GrandProductRelations` tuple: `std::tuple, ECCVMSetRelationPointTable, ECCVMSetRelationMSMOutput>` + +**Each new relation's `skip()`:** Must check its own z_perm, not the others. + +**Constants cascade:** +2 commitments × 2 Fr + 4 evaluations × 1 Fr = ~8 Fr added to proof. Update static_asserts, Noir constants, VKs. + +## Why the grand product split is optional + +The original plan required splitting the grand product to "keep relation degrees sane." Analysis shows this isn't load-bearing: + +**Without split, after widening:** `MAX_PARTIAL_RELATION_LENGTH` goes from 22 → **29**. The combined denominator grows because the slice sub-product doubles (degree 8→16), making the cumulative denominator degree 16+6+4 = 26. The full accumulate expression `(z_perm + lagrange_first) * numerator - (z_perm_shift + lagrange_last) * denominator` has degree max(1+16, 1+26) = 27, partial length 28. We set `SUBRELATION_PARTIAL_LENGTHS[0] = 29` as a conservative upper bound. (Sub-product degrees: 16 for 8 add-gated tuples, 6 for transcript z1/z2, 4 for MSM output.) + +**Why degree 29 is acceptable:** + +1. **Sumcheck is ~40% of ECCVM proving (~500ms).** Higher degree means the prover sends a larger univariate polynomial per sumcheck round (29 evaluations instead of 22). This is a ~30% increase in sumcheck work — so maybe +150ms. That's noise compared to the IPA MSM savings from dropping `log_N` (which saves seconds in the recursive verifier). + +2. **Per-round communication doesn't change structurally.** The prover still sends one univariate per round for `log2(N)` rounds. Each univariate is just a few more field elements. Total sumcheck proof overhead: `(29-22) × log2(N) = ~100 extra Fr` — dwarfed by the IPA MSM savings. + +3. **The recursive verifier is only 215K gates total.** A higher-degree sumcheck means ~7 extra multiplications per round to evaluate the univariate (Horner's method, degree 28 vs 21). Over `log2(N) = 14` rounds that's ~98 extra gates — 0.05% of the 215K total. Negligible. + +4. **The GP split adds complexity for little benefit:** 3 new relation classes, 2 extra z_perm polynomials (+2 commitments, +4 evaluations in the proof), flavor/prover/verifier changes, and more surface area for bugs. All to save ~150ms of sumcheck time. + +**Bottom line:** Ship the widening without the split. If degree 29 turns out to matter later, the split can be done as a follow-up. + +## Corrections to the original spec / previous version of this plan + +1. **"Grand product split is required alongside the widening"** — It isn't. Sumcheck is ~40% of ECCVM proving (~500ms). Going from degree 22 to 29 adds ~30% to sumcheck cost (~150ms), which is negligible vs. the IPA MSM savings from dropping `log_N`. The recursive verifier is only 215K gates — a few extra gates per sumcheck round for higher-degree univariates doesn't matter. The split is a nice-to-have, not a prerequisite. + +2. **"One relation with 3 indexed grand products"** — The `compute_grand_product` library doesn't support this. `get_grand_product_polynomial()` returns a single polynomial; `compute_grand_product_numerator/denominator()` each return a single scalar. One GP per relation class. Must create **3 separate relation classes** in `GrandProductRelations`. + +3. **"Steps 2 and 3 (additions vs digits) can be done independently"** — They can't. The lookup table terms reference `Tx2/Ty2` (from WNAF widening) but need `NUM_LOOKUP_TERMS = 8` (from additions widening). Circular dependency. Ship together. + +4. **"Step 4: Wire up builders → flavor" as separate step** — Not a real step. You can't add flavor columns without wiring the builder. These are part of the same diff. + +5. **The degree analysis (corrected sub-product breakdown).** Current denominator is degree 18 (sub-products: 8 + 6 + 4 = 18). After widening without split, slice sub-product doubles (degree 8→16), making total denominator degree 16+6+4 = 26. The full GP subrelation degree is max(1+numerator, 1+denominator) = max(17, 27) = 27, partial length 28. We set `SUBRELATION_PARTIAL_LENGTHS[0] = 29` as a conservative upper bound. After widening with split, each GP carries only its own sub-product, so the bottleneck is GP A. diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation_impl.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation_impl.hpp index 630758c9fbfc..ee84a33e599e 100644 --- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation_impl.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation_impl.hpp @@ -342,15 +342,13 @@ Accumulator ECCVMSetRelationImpl::compute_grand_product_numerator(const AllE point_table_init_read = precompute_point_transition * (point_table_init_read + gamma) + (-precompute_point_transition + 1); - numerator *= point_table_init_read; // degree-9 + numerator *= point_table_init_read; // degree-13 (cumulative: 11 + 2) } /** - * @brief Third term: tuple of (pc, P.x, P.y, msm-size) from ECCVMMSMRelation. * @brief Third term: tuple of (pc, P.x, P.y, msm-size) from ECCVMMSMRelation. * (P.x, P.y) is the output of a multi-scalar-multiplication evaluated in ECCVMMSMRelation. * We need to validate that the same values (P.x, P.y) are present in the Transcript columns and describe a * multi-scalar multiplication of size `msm-size`, starting at `pc`. - * multi-scalar multiplication of size `msm-size`, starting at `pc`. * * If `msm_transition_shift == 1`, this indicates the current row is the last row of a multiscalar * multiplication evaluation. The output of the MSM will be present on `(msm_accumulator_x_shift, @@ -388,7 +386,7 @@ Accumulator ECCVMSetRelationImpl::compute_grand_product_numerator(const AllE // msm_result_write = degree 2 msm_result_write = msm_transition_shift * (msm_result_write + gamma) + (-msm_transition_shift + 1); - numerator *= msm_result_write; // degree-11 + numerator *= msm_result_write; // degree-16 (cumulative: 13 + 3; msm_transition_shift is degree 2) } return numerator; } @@ -399,8 +397,8 @@ Accumulator ECCVMSetRelationImpl::compute_grand_product_denominator(const Al { using View = typename Accumulator::View; - // OPTIMIZE(@zac-williamson). The degree of the denominator is 28, making overall relation degree ~29. - // Can potentially optimize by refining the algebra. + // OPTIMIZE(@zac-williamson). The degree of the denominator is 26, making overall relation degree 27 + // (partial length upper bound = 29). Can potentially optimize by refining the algebra. const auto& gamma = params.gamma; const auto& beta = params.beta; const auto& beta_sqr = params.beta_sqr; @@ -518,7 +516,7 @@ Accumulator ECCVMSetRelationImpl::compute_grand_product_denominator(const Al auto transcript_input1 = transcript_pc + transcript_Px * beta + transcript_Py * beta_sqr + z1 * beta_cube + second_term_tag; // degree = 1 auto transcript_input2 = (transcript_pc - lookup_first) + transcript_Px * cube_root_unity * beta - - transcript_Py * beta_sqr + z2 * beta_cube + second_term_tag; // degree = 2 + transcript_Py * beta_sqr + z2 * beta_cube + second_term_tag; // degree = 1 // The following diagram expresses a fingerprint of part of the tuple. It does not include `transcript_pc` and // has not weighted the X and Y with beta and beta_sqr respectively. The point is nonetheless to show exactly @@ -537,14 +535,14 @@ Accumulator ECCVMSetRelationImpl::compute_grand_product_denominator(const Al // | 1 | 1 | 0 | 1 | 1 | // | 1 | 1 | 1 | 1 | 1 | transcript_input1 = (transcript_input1 + gamma) * lookup_first + (-lookup_first + 1); // degree 2 - transcript_input2 = (transcript_input2 + gamma) * lookup_second + (-lookup_second + 1); // degree 3 + transcript_input2 = (transcript_input2 + gamma) * lookup_second + (-lookup_second + 1); // degree 2 - // transcript_product = degree 6 + // transcript_product = degree 5 (deg2 * deg2 * deg1 + deg0) auto transcript_product = (transcript_input1 * transcript_input2) * (-base_infinity + 1) + base_infinity; - // point_table_init_write = degree 7 + // point_table_init_write = degree 6 (deg1 * deg5 + deg1) auto point_table_init_write = transcript_mul * transcript_product + (-transcript_mul + 1); - denominator *= point_table_init_write; // degree-25 + denominator *= point_table_init_write; // degree-22 (cumulative: 16 + 6) } /** * @brief Third term: tuple of (pc, P.x, P.y, msm-size) from ECCVMTranscriptRelation. @@ -554,12 +552,6 @@ Accumulator ECCVMSetRelationImpl::compute_grand_product_denominator(const Al * `transcript_pc` and has size `transcript_msm_count`. * @note In the case of an honest prover, `(transcript_msm_output_x, transcript_msm_output_y)` is the value of the * just-completed MSM + `OFFSET` (as this is what the MSM table computes with to avoid branch logic.) - * - * in `transcript_msm_output_x, transcript_msm_output_y`, for a given multi-scalar multiplication starting at - * `transcript_pc` and has size `transcript_msm_count`. - * @note In the case of an honest prover, `(transcript_msm_output_x, transcript_msm_output_y)` is the value of the - * just-completed MSM + `OFFSET` (as this is what the MSM table computes with to avoid branch logic.) - * */ { const auto& transcript_pc_shift = View(in.transcript_pc_shift); @@ -575,11 +567,12 @@ Accumulator ECCVMSetRelationImpl::compute_grand_product_denominator(const Al // do not add to count if point at infinity! auto full_msm_count = transcript_msm_count + transcript_mul * ((-z1_zero + 1) + (-z2_zero + 1)) * (-base_infinity + 1); - // msm_result_read = degree 2 + // msm_result_read = degree 3 (dominated by full_msm_count which is degree 3) auto msm_result_read = transcript_pc_shift + transcript_msm_x * beta + transcript_msm_y * beta_sqr + full_msm_count * beta_cube + third_term_tag; + // after gating by transcript_msm_transition (degree 1): degree 1 * degree 3 + degree 1 = degree 4 msm_result_read = transcript_msm_transition * (msm_result_read + gamma) + (-transcript_msm_transition + 1); - denominator *= msm_result_read; // degree-28 + denominator *= msm_result_read; // degree-26 (cumulative: 22 + 4) } return denominator; } @@ -605,10 +598,10 @@ void ECCVMSetRelationImpl::accumulate(ContainerOverSubrelations& accumulator using View = typename Accumulator::View; using ShortView = typename std::tuple_element_t<1, ContainerOverSubrelations>::View; - // degree-15 (8 slices + skew + delta + second term + third term) + // numerator degree = 16 (8 slice fingerprints + skew + delta + second term + third term) Accumulator numerator_evaluation = compute_grand_product_numerator(in, params); - // degree-27 (8 add-gated tuples + second term + third term) + // denominator degree = 26 (16 from 8 add-gated tuples + 6 from second term + 4 from third term) Accumulator denominator_evaluation = compute_grand_product_denominator(in, params); const auto& lagrange_first = View(in.lagrange_first); @@ -619,7 +612,7 @@ void ECCVMSetRelationImpl::accumulate(ContainerOverSubrelations& accumulator const auto& z_perm_shift = View(in.z_perm_shift); const auto& z_perm_shift_short = ShortView(in.z_perm_shift); - // degree-28 + // full expression degree = max(1+16, 1+26) = 27; partial length upper bound = 29 std::get<0>(accumulator) += ((z_perm + lagrange_first) * numerator_evaluation - (z_perm_shift + lagrange_last) * denominator_evaluation) * scaling_factor; From 913c4eb113a82cbe1dca40d7f861148dbb63cc18 Mon Sep 17 00:00:00 2001 From: notnotraju Date: Wed, 18 Mar 2026 13:32:32 +0000 Subject: [PATCH 22/24] chore(eccvm): make MaxCapacityPassing test compute max apps from LOG_N Instead of hardcoding 17 apps, compute the max number of app circuits that fit in the ECCVM based on CONST_ECCVM_LOG_N. Each app adds ~1104 ECCVM rows with ~1494 base overhead. At LOG_N=15: 28 apps; LOG_N=14: 13. --- .../cpp/src/barretenberg/chonk/chonk.test.cpp | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/chonk/chonk.test.cpp b/barretenberg/cpp/src/barretenberg/chonk/chonk.test.cpp index 3ec89c9fdf1e..839b54d4970d 100644 --- a/barretenberg/cpp/src/barretenberg/chonk/chonk.test.cpp +++ b/barretenberg/cpp/src/barretenberg/chonk/chonk.test.cpp @@ -398,14 +398,31 @@ TEST_F(ChonkTests, VKIndependenceFromCircuitSize) }; /** - * @brief Test to establish the "max" number of apps that can be accumulated due to limitations on the ECCVM size - * + * @brief Test to establish the "max" number of apps that can be accumulated due to limitations on the ECCVM size. + * @details With 8-wide layout (ADDITIONS_PER_ROW = 8, WNAF_DIGITS_PER_ROW = 8), each app adds ~1104 ECCVM rows with a + * base overhead of ~1494 rows. At CONST_ECCVM_LOG_N = 15 (32768 rows): max apps = floor((32768 - 4 - 1494) / 1104) = + * 28. At CONST_ECCVM_LOG_N = 14 (16384 rows): max apps = 13. */ HEAVY_TEST(ChonkKernelCapacity, MaxCapacityPassing) { bb::srs::init_file_crs_factory(bb::srs::bb_crs_path()); - const size_t NUM_APP_CIRCUITS = 17; + // Each app adds ~1104 ECCVM rows; base overhead ~1494 rows. + // LOG_N=15: floor((32768 - 4 - 1494) / 1104) = 28 + // LOG_N=14: floor((16384 - 4 - 1494) / 1104) = 13 + constexpr size_t ECCVM_ROWS_PER_APP = 1104; + constexpr size_t ECCVM_BASE_ROWS = 1494; + constexpr size_t ECCVM_FIXED_SIZE = 1UL << CONST_ECCVM_LOG_N; + constexpr size_t MAX_USABLE_ROWS = ECCVM_FIXED_SIZE - NUM_DISABLED_ROWS_IN_SUMCHECK; + const size_t NUM_APP_CIRCUITS = (MAX_USABLE_ROWS - ECCVM_BASE_ROWS) / ECCVM_ROWS_PER_APP; + + info("MaxCapacityPassing: LOG_N=", + CONST_ECCVM_LOG_N, + ", ECCVM_FIXED_SIZE=", + ECCVM_FIXED_SIZE, + ", max apps=", + NUM_APP_CIRCUITS); + auto [proof, vk] = ChonkTests::accumulate_and_prove_ivc(NUM_APP_CIRCUITS); bool verified = ChonkTests::verify_chonk(proof, vk); From f19f2852cdd5ec932fbc23752d1f746881492365 Mon Sep 17 00:00:00 2001 From: notnotraju Date: Wed, 18 Mar 2026 13:53:21 +0000 Subject: [PATCH 23/24] =?UTF-8?q?chore(eccvm):=20add=20Step=203=20to=20des?= =?UTF-8?q?ign=20doc=20=E2=80=94=20double=20the=20doublings=20per=20row?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plan to pack 2 doubling rounds into 1 MSM row (DOUBLINGS_PER_ROW 4→8), cutting doubling rows from 31 to 16 per MSM. Reuses lambda5..8 on doubling rows (free since q_add/q_double are mutually exclusive). No new columns needed. MSM formula: 33*ceil(m/8)+16 (was +31). --- .../barretenberg/eccvm/WIDE_SHORT_DESIGN.md | 99 ++++++++++++++++++- 1 file changed, 94 insertions(+), 5 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/eccvm/WIDE_SHORT_DESIGN.md b/barretenberg/cpp/src/barretenberg/eccvm/WIDE_SHORT_DESIGN.md index cc45cc9a5e46..6f44b45a8884 100644 --- a/barretenberg/cpp/src/barretenberg/eccvm/WIDE_SHORT_DESIGN.md +++ b/barretenberg/cpp/src/barretenberg/eccvm/WIDE_SHORT_DESIGN.md @@ -15,6 +15,8 @@ Here, "split" refers to the option of splitting up the multiset-equality check i | NUM_WIRES | 85 | 121 (+36) | 123 (+38) | | Precompute rows/scalar | 8 | 4 | 4 | | MSM ADD rows/digit | ⌈m/4⌉ | ⌈m/8⌉ | ⌈m/8⌉ | +| MSM doubling rows | 31 | 16 (with Step 3) | 16 | +| MSM row formula | `33·⌈m/4⌉+31` | `33·⌈m/8⌉+16` | `33·⌈m/8⌉+16` | | MAX_PARTIAL_RELATION_LENGTH | 22 | 29 | 17 | | ECCVM proof size (Fr) | 608 | 756 (+148) | ~764 (+156) | @@ -72,7 +74,7 @@ Wire up in `eccvm_circuit_builder.hpp` `ProverPolynomials` constructor (follows #### 1d. Relations **`ecc_msm_relation_impl.hpp`:** -- Extend addition chain 4→8, skew chain 4→8. Doubling chain **unchanged** (4, lambda1–4). +- Extend addition chain 4→8, skew chain 4→8. Doubling chain unchanged in Step 1 (4, lambda1–4); widened to 8 in Step 3. - 4 new add slope subrelations + 4 skew slope subrelations (separate to prevent cancellation) - Extend collision checks, slice-zero, addition continuity, count update to 8 - Cross-row: `(-add8 + 1) * add1_shift`. Max partial length: 8→**12**. @@ -120,14 +122,101 @@ Wire up in `eccvm_circuit_builder.hpp` `ProverPolynomials` constructor (follows - Regenerate VKs: `./test_chonk_standalone_vks_havent_changed.sh --update_inputs` - Recursive verifier: relation changes auto-propagate via templates. Flavor entity changes (new columns) need manual mirroring in `stdlib/eccvm_verifier/` -### Step 3: Test and measure +### Step 3: Double the doublings per row (DOUBLINGS_PER_ROW 4→8) -- `eccvm_tests` after widening -- `chonk_tests`, `goblin_tests` after full integration +Pack 2 doubling rounds into 1 row, cutting doubling rows from 31 to 16 per MSM. This requires **no new columns**: on doubling rows, `lambda5..lambda8` are unused (they're only for addition slots 5-8 on add rows), and `q_add`/`q_double`/`q_skew` are mutually exclusive. So we reuse `lambda5..lambda8` for the second set of 4 doublings on doubling rows. + +**Why this matters:** Without this, the 8-wide change only achieves ~1.65x capacity (28 apps) instead of the expected 2x. The 31 doubling rows per MSM are a fixed cost that doesn't benefit from the additions widening. With this fix, the MSM row formula changes: + +| | Old (4-wide) | After Step 1 (8-wide, 4 dbl/row) | After Step 3 (8-wide, 8 dbl/row) | +|--|:--:|:--:|:--:| +| MSM rows | `33·⌈m/4⌉ + 31` | `33·⌈m/8⌉ + 31` | `33·⌈m/8⌉ + 16` | +| Max apps (LOG_N=15) | 17 | 28 | ~30 | + +#### 3a. Constants + +**`eccvm_builder_types.hpp`:** +```cpp +DOUBLINGS_PER_ROW = 2 * NUM_WNAF_DIGIT_BITS; // was NUM_WNAF_DIGIT_BITS (4), now 8 +``` + +#### 3b. Builder (`msm_builder.hpp`) + +The doubling loop (currently iterating `DOUBLINGS_PER_ROW = 4` times per doubling row) now iterates 8 times. Each doubling row performs 8 point doublings (= 2 rounds of 4 doublings each = multiply accumulator by 2^8 = 256). + +- The doubling row generation loop at line ~346 already uses `DOUBLINGS_PER_ROW` — it will auto-adjust to 8. +- The number of doubling rows changes: currently `NUM_WNAF_DIGITS_PER_SCALAR - 1 = 31` rows. With 2 rounds per row: `ceil(31/2) = 16` rows. But 31 is odd, so the last doubling row only does 4 doublings (1 round), not 8. Must handle this: either pad to 32 rounds (adding an extra no-op doubling at the start), or track a "half-doubling" flag for the last row. + +**Simplest approach:** Keep 31 rounds, emit `ceil(31/2) = 16` doubling rows. The first 15 rows each do 8 doublings (2 rounds). The last row does 4 doublings (1 round) with `lambda5..8` unused/zeroed. The relation must handle this — use the existing `add_state[i].point` slots for "is this doubling slot active" or simply check if `point_idx < actual_doublings_this_row`. + +Actually, even simpler: the relation doesn't need to know whether a doubling slot is "active" — the doubling chain is purely sequential. If we always do 8 doublings, the last doubling row would do an extra 4 doublings that shouldn't happen. So we need a selector or convention: + +**Option A (recommended):** Change from 31 to 30 doubling rounds by adjusting the Straus algorithm: use `NUM_WNAF_DIGITS_PER_SCALAR = 32` digit slots but start the MSM at digit 31 instead of digit 32. This makes the number of inter-digit doublings 30 (even), giving exactly 15 doubling rows with 8 doublings each. This requires a small tweak to the scalar decomposition — the leading digit is constrained to be in a smaller range (no change to security, just a tighter range check on the most significant digit). + +**Option B:** Keep 31 doubling rounds, emit 16 rows. The last row uses only 4 doublings (lambda1..4). Gate the second set of 4 doublings with a new boolean column `q_double_second` (or reuse a spare signal, e.g. the last doubling row has `msm_round` that distinguishes it). Alternatively, the relation just checks: if `round == 0` (first round after the leading digit), only 4 doublings; otherwise, 8. + +**Option C (simplest):** Add one extra dummy doubling round (32 total inter-digit gaps by starting from an identity-like state), making it 32 rounds = 16 rows × 8 doublings. The extra doubling at the end is a no-op since the skew round follows. + +Recommend **Option B** — it's the most straightforward and doesn't change the scalar decomposition. The relation already has `q_double` and `round` available. On a "half" doubling row, constrain `acc_x_shift = x_d4` (after 4 doublings) instead of `acc_x_shift = x_d8` (after 8). The condition is: this is the last doubling row, i.e., the row where `round` transitions from digit 0 to the skew round. In practice, every other doubling row can be detected by checking if the *next* doubling row follows (via `q_double_shift`) or if an add/skew row follows. + +#### 3c. Relation (`ecc_msm_relation_impl.hpp`) + +Currently the doubling chain does: +``` +[x_d1, y_d1] = dbl(acc_x, acc_y, lambda1) +[x_d2, y_d2] = dbl(x_d1, y_d1, lambda2) +[x_d3, y_d3] = dbl(x_d2, y_d2, lambda3) +[x_d4, y_d4] = dbl(x_d3, y_d3, lambda4) +constrain: acc_x_shift = x_d4, acc_y_shift = y_d4 +``` + +Extend to: +``` +[x_d5, y_d5] = dbl(x_d4, y_d4, lambda5) +[x_d6, y_d6] = dbl(x_d5, y_d5, lambda6) +[x_d7, y_d7] = dbl(x_d6, y_d6, lambda7) +[x_d8, y_d8] = dbl(x_d7, y_d7, lambda8) +``` + +For a "full" doubling row (2 rounds): `acc_shift = (x_d8, y_d8)` +For a "half" doubling row (1 round, last one): `acc_shift = (x_d4, y_d4)` + +The output constraint becomes: +``` +q_double * q_double_shift * (acc_x_shift - x_d8) = 0 // full: next row is also double +q_double * (-q_double_shift + 1) * (acc_x_shift - x_d4) = 0 // half: next row is NOT double +``` +(Same for y.) This adds 2 subrelations and replaces the existing 2 output subrelations (indices 10, 11). Max degree: `q_double * q_double_shift * (acc_x_shift - x_d8)` = degree 1+1+1 = 3. No increase to MSM relation max partial length (still 12). + +New doubling slope subrelations for `lambda5..8`: 4 new subrelations (same structure as existing `double_slope_relation1..4`). Total MSM subrelations: 67 + 4 + 2 = ~73. (The +2 is for splitting the output constraint into full/half cases; the original 2 are replaced.) + +#### 3d. Row tracker (`eccvm_row_tracker.hpp`) + +Update `num_eccvm_msm_rows`: +```cpp +const size_t num_double_rounds = eccvm::NUM_WNAF_DIGITS_PER_SCALAR - 1; // 31 +const size_t num_double_rows = (num_double_rounds + 1) / 2; // ceil(31/2) = 16 +``` + +#### 3e. Capacity impact + +With `DOUBLINGS_PER_ROW = 8`: +- MSM rows per MSM: `33 * ceil(m/8) + 16` (was `33 * ceil(m/8) + 31`) +- Per-app saving: ~15 fewer doubling rows per MSM × ~2 MSMs per app ≈ ~30 rows/app +- Expected max apps at LOG_N=15: ~30 (up from 28, closer to the theoretical 2x of 34) + +#### 3f. No new columns needed + +This is key: `lambda5..lambda8` already exist in the flavor for additions 5-8. On doubling rows (`q_double = 1`), additions are inactive (`q_add = 0`), so `lambda5..8` are free to be repurposed for doublings 5-8. The relation just needs to read them in both the addition and doubling sections, gated by the respective selectors. + +### Step 4: Test and measure + +- `eccvm_tests` after doubling widening +- `chonk_tests` MaxCapacityPassing — verify increased capacity - Measure actual workload row counts → decide if `CONST_ECCVM_LOG_N` can drop 15→14 - If yes: update `constants.hpp`, cascade to Noir/TS -### Step 4 (optional): Grand product split +### Step 5 (optional): Grand product split Split the single `ECCVMSetRelation` grand product into 3 independent ones to drop `MAX_PARTIAL_RELATION_LENGTH` from 29→17. Only worth doing if degree 29 causes measurable performance issues. From fcd3c7660d5a520d82eeaac6aba07f658c491114 Mon Sep 17 00:00:00 2001 From: notnotraju Date: Wed, 18 Mar 2026 14:08:36 +0000 Subject: [PATCH 24/24] chore(eccvm): document why doubling widening is infeasible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 31 doubling rows per MSM cannot be halved because each occurs between consecutive digit-slot ADD phases in the Straus algorithm. Combining two DBL rounds into one row would require 8-bit digits (point table size 256), which is impractical. The 8-wide change achieves ~1.65x capacity (17→28 apps), not 2x. --- .../barretenberg/eccvm/WIDE_SHORT_DESIGN.md | 99 +++---------------- 1 file changed, 14 insertions(+), 85 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/eccvm/WIDE_SHORT_DESIGN.md b/barretenberg/cpp/src/barretenberg/eccvm/WIDE_SHORT_DESIGN.md index 6f44b45a8884..a2fff219be0c 100644 --- a/barretenberg/cpp/src/barretenberg/eccvm/WIDE_SHORT_DESIGN.md +++ b/barretenberg/cpp/src/barretenberg/eccvm/WIDE_SHORT_DESIGN.md @@ -15,8 +15,8 @@ Here, "split" refers to the option of splitting up the multiset-equality check i | NUM_WIRES | 85 | 121 (+36) | 123 (+38) | | Precompute rows/scalar | 8 | 4 | 4 | | MSM ADD rows/digit | ⌈m/4⌉ | ⌈m/8⌉ | ⌈m/8⌉ | -| MSM doubling rows | 31 | 16 (with Step 3) | 16 | -| MSM row formula | `33·⌈m/4⌉+31` | `33·⌈m/8⌉+16` | `33·⌈m/8⌉+16` | +| MSM doubling rows | 31 | 31 (unchanged) | 31 | +| MSM row formula | `33·⌈m/4⌉+31` | `33·⌈m/8⌉+31` | `33·⌈m/8⌉+31` | | MAX_PARTIAL_RELATION_LENGTH | 22 | 29 | 17 | | ECCVM proof size (Fr) | 608 | 756 (+148) | ~764 (+156) | @@ -74,7 +74,7 @@ Wire up in `eccvm_circuit_builder.hpp` `ProverPolynomials` constructor (follows #### 1d. Relations **`ecc_msm_relation_impl.hpp`:** -- Extend addition chain 4→8, skew chain 4→8. Doubling chain unchanged in Step 1 (4, lambda1–4); widened to 8 in Step 3. +- Extend addition chain 4→8, skew chain 4→8. Doubling chain **unchanged** (4, lambda1–4). - 4 new add slope subrelations + 4 skew slope subrelations (separate to prevent cancellation) - Extend collision checks, slice-zero, addition continuity, count update to 8 - Cross-row: `(-add8 + 1) * add1_shift`. Max partial length: 8→**12**. @@ -122,101 +122,30 @@ Wire up in `eccvm_circuit_builder.hpp` `ProverPolynomials` constructor (follows - Regenerate VKs: `./test_chonk_standalone_vks_havent_changed.sh --update_inputs` - Recursive verifier: relation changes auto-propagate via templates. Flavor entity changes (new columns) need manual mirroring in `stdlib/eccvm_verifier/` -### Step 3: Double the doublings per row (DOUBLINGS_PER_ROW 4→8) +### Step 3 (infeasible): Double the doublings per row -Pack 2 doubling rounds into 1 row, cutting doubling rows from 31 to 16 per MSM. This requires **no new columns**: on doubling rows, `lambda5..lambda8` are unused (they're only for addition slots 5-8 on add rows), and `q_add`/`q_double`/`q_skew` are mutually exclusive. So we reuse `lambda5..lambda8` for the second set of 4 doublings on doubling rows. +~~Pack 2 doubling rounds into 1 row, cutting doubling rows from 31 to 16.~~ -**Why this matters:** Without this, the 8-wide change only achieves ~1.65x capacity (28 apps) instead of the expected 2x. The 31 doubling rows per MSM are a fixed cost that doesn't benefit from the additions widening. With this fix, the MSM row formula changes: +**This doesn't work.** The 31 doubling rounds are structurally tied to the Straus algorithm: each one occurs between two consecutive digit-slot ADD phases. The sequence is: -| | Old (4-wide) | After Step 1 (8-wide, 4 dbl/row) | After Step 3 (8-wide, 8 dbl/row) | -|--|:--:|:--:|:--:| -| MSM rows | `33·⌈m/4⌉ + 31` | `33·⌈m/8⌉ + 31` | `33·⌈m/8⌉ + 16` | -| Max apps (LOG_N=15) | 17 | 28 | ~30 | - -#### 3a. Constants - -**`eccvm_builder_types.hpp`:** -```cpp -DOUBLINGS_PER_ROW = 2 * NUM_WNAF_DIGIT_BITS; // was NUM_WNAF_DIGIT_BITS (4), now 8 ``` - -#### 3b. Builder (`msm_builder.hpp`) - -The doubling loop (currently iterating `DOUBLINGS_PER_ROW = 4` times per doubling row) now iterates 8 times. Each doubling row performs 8 point doublings (= 2 rounds of 4 doublings each = multiply accumulator by 2^8 = 256). - -- The doubling row generation loop at line ~346 already uses `DOUBLINGS_PER_ROW` — it will auto-adjust to 8. -- The number of doubling rows changes: currently `NUM_WNAF_DIGITS_PER_SCALAR - 1 = 31` rows. With 2 rounds per row: `ceil(31/2) = 16` rows. But 31 is odd, so the last doubling row only does 4 doublings (1 round), not 8. Must handle this: either pad to 32 rounds (adding an extra no-op doubling at the start), or track a "half-doubling" flag for the last row. - -**Simplest approach:** Keep 31 rounds, emit `ceil(31/2) = 16` doubling rows. The first 15 rows each do 8 doublings (2 rounds). The last row does 4 doublings (1 round) with `lambda5..8` unused/zeroed. The relation must handle this — use the existing `add_state[i].point` slots for "is this doubling slot active" or simply check if `point_idx < actual_doublings_this_row`. - -Actually, even simpler: the relation doesn't need to know whether a doubling slot is "active" — the doubling chain is purely sequential. If we always do 8 doublings, the last doubling row would do an extra 4 doublings that shouldn't happen. So we need a selector or convention: - -**Option A (recommended):** Change from 31 to 30 doubling rounds by adjusting the Straus algorithm: use `NUM_WNAF_DIGITS_PER_SCALAR = 32` digit slots but start the MSM at digit 31 instead of digit 32. This makes the number of inter-digit doublings 30 (even), giving exactly 15 doubling rows with 8 doublings each. This requires a small tweak to the scalar decomposition — the leading digit is constrained to be in a smaller range (no change to security, just a tighter range check on the most significant digit). - -**Option B:** Keep 31 doubling rounds, emit 16 rows. The last row uses only 4 doublings (lambda1..4). Gate the second set of 4 doublings with a new boolean column `q_double_second` (or reuse a spare signal, e.g. the last doubling row has `msm_round` that distinguishes it). Alternatively, the relation just checks: if `round == 0` (first round after the leading digit), only 4 doublings; otherwise, 8. - -**Option C (simplest):** Add one extra dummy doubling round (32 total inter-digit gaps by starting from an identity-like state), making it 32 rounds = 16 rows × 8 doublings. The extra doubling at the end is a no-op since the skew round follows. - -Recommend **Option B** — it's the most straightforward and doesn't change the scalar decomposition. The relation already has `q_double` and `round` available. On a "half" doubling row, constrain `acc_x_shift = x_d4` (after 4 doublings) instead of `acc_x_shift = x_d8` (after 8). The condition is: this is the last doubling row, i.e., the row where `round` transitions from digit 0 to the skew round. In practice, every other doubling row can be detected by checking if the *next* doubling row follows (via `q_double_shift`) or if an add/skew row follows. - -#### 3c. Relation (`ecc_msm_relation_impl.hpp`) - -Currently the doubling chain does: +ADD(d0) → DBL(×16) → ADD(d1) → DBL(×16) → ADD(d2) → ... → ADD(d31) → SKEW ``` -[x_d1, y_d1] = dbl(acc_x, acc_y, lambda1) -[x_d2, y_d2] = dbl(x_d1, y_d1, lambda2) -[x_d3, y_d3] = dbl(x_d2, y_d2, lambda3) -[x_d4, y_d4] = dbl(x_d3, y_d3, lambda4) -constrain: acc_x_shift = x_d4, acc_y_shift = y_d4 -``` - -Extend to: -``` -[x_d5, y_d5] = dbl(x_d4, y_d4, lambda5) -[x_d6, y_d6] = dbl(x_d5, y_d5, lambda6) -[x_d7, y_d7] = dbl(x_d6, y_d6, lambda7) -[x_d8, y_d8] = dbl(x_d7, y_d7, lambda8) -``` - -For a "full" doubling row (2 rounds): `acc_shift = (x_d8, y_d8)` -For a "half" doubling row (1 round, last one): `acc_shift = (x_d4, y_d4)` - -The output constraint becomes: -``` -q_double * q_double_shift * (acc_x_shift - x_d8) = 0 // full: next row is also double -q_double * (-q_double_shift + 1) * (acc_x_shift - x_d4) = 0 // half: next row is NOT double -``` -(Same for y.) This adds 2 subrelations and replaces the existing 2 output subrelations (indices 10, 11). Max degree: `q_double * q_double_shift * (acc_x_shift - x_d8)` = degree 1+1+1 = 3. No increase to MSM relation max partial length (still 12). - -New doubling slope subrelations for `lambda5..8`: 4 new subrelations (same structure as existing `double_slope_relation1..4`). Total MSM subrelations: 67 + 4 + 2 = ~73. (The +2 is for splitting the output constraint into full/half cases; the original 2 are replaced.) - -#### 3d. Row tracker (`eccvm_row_tracker.hpp`) - -Update `num_eccvm_msm_rows`: -```cpp -const size_t num_double_rounds = eccvm::NUM_WNAF_DIGITS_PER_SCALAR - 1; // 31 -const size_t num_double_rows = (num_double_rounds + 1) / 2; // ceil(31/2) = 16 -``` - -#### 3e. Capacity impact -With `DOUBLINGS_PER_ROW = 8`: -- MSM rows per MSM: `33 * ceil(m/8) + 16` (was `33 * ceil(m/8) + 31`) -- Per-app saving: ~15 fewer doubling rows per MSM × ~2 MSMs per app ≈ ~30 rows/app -- Expected max apps at LOG_N=15: ~30 (up from 28, closer to the theoretical 2x of 34) +Each DBL must happen before the next ADD because the additions for digit d_{j+1} operate on the already-shifted accumulator. You cannot combine two consecutive DBL rounds into one row without removing the ADD round between them. -#### 3f. No new columns needed +To remove the intervening ADD, you'd need to process digit pairs (d_j, d_{j+1}) in one cycle and do ×256 between cycles. But this changes the scalar decomposition from 4-bit to 8-bit digits, requiring a point table of size 256 (currently 16). That's impractical. -This is key: `lambda5..lambda8` already exist in the flavor for additions 5-8. On doubling rows (`q_double = 1`), additions are inactive (`q_add = 0`), so `lambda5..8` are free to be repurposed for doublings 5-8. The relation just needs to read them in both the addition and doubling sections, gated by the respective selectors. +**Bottom line:** The 31 doubling rows per MSM are an inherent cost of the 4-bit wNAF structure. The 8-wide change achieves ~1.65x capacity improvement (17→28 apps at LOG_N=15), not the theoretical 2x, because doubling rows don't benefit from addition widening. -### Step 4: Test and measure +### Step 3 (actual): Test and measure -- `eccvm_tests` after doubling widening -- `chonk_tests` MaxCapacityPassing — verify increased capacity +- `eccvm_tests` after widening +- `chonk_tests`, `goblin_tests` after full integration - Measure actual workload row counts → decide if `CONST_ECCVM_LOG_N` can drop 15→14 - If yes: update `constants.hpp`, cascade to Noir/TS -### Step 5 (optional): Grand product split +### Step 4 (optional): Grand product split Split the single `ECCVMSetRelation` grand product into 3 independent ones to drop `MAX_PARTIAL_RELATION_LENGTH` from 29→17. Only worth doing if degree 29 causes measurable performance issues.