From a00bb7a719f474c6b326fe622a8224a716f49771 Mon Sep 17 00:00:00 2001
From: notnotraju <raju@aztec-labs.com>
Date: Tue, 17 Mar 2026 13:41:58 +0000
Subject: [PATCH 01/24] eccvm: widen constants WNAF_DIGITS_PER_ROW 4->8,
 ADDITIONS_PER_ROW 4->8

This is the first step toward halving the Precomputed and MSM table heights
by doubling their width. The key changes:

- WNAF_DIGITS_PER_ROW: 4 -> 8 (process 8 wNAF digits per precompute row)
- ADDITIONS_PER_ROW: 4 -> 8 (process 8 point additions per MSM row)
- DOUBLINGS_PER_ROW: new constant, always NUM_WNAF_DIGIT_BITS (= 4)

The new DOUBLINGS_PER_ROW constant decouples the doubling chain length
(which must remain 4, matching the wNAF digit width w=4) from
ADDITIONS_PER_ROW (which we are doubling to 8). Previously, these were
conflated because ADDITIONS_PER_ROW happened to equal NUM_WNAF_DIGIT_BITS.
---
 .../cpp/src/barretenberg/eccvm/eccvm_builder_types.hpp   | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_builder_types.hpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_builder_types.hpp
index 82de4ed738cc..eb472fe1e7a0 100644
--- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_builder_types.hpp
+++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_builder_types.hpp
@@ -16,11 +16,12 @@ static constexpr size_t NUM_WNAF_DIGITS_PER_SCALAR = NUM_SCALAR_BITS / NUM_WNAF_
 static constexpr uint64_t WNAF_MASK = static_cast<uint64_t>((1ULL << NUM_WNAF_DIGIT_BITS) - 1ULL);
 static constexpr size_t POINT_TABLE_SIZE =
     1ULL << (NUM_WNAF_DIGIT_BITS); // Corresponds to the odd multiples of [P] between -(2^w - 1) and 2^w - 1.
-static constexpr size_t WNAF_DIGITS_PER_ROW = 4;
+static constexpr size_t WNAF_DIGITS_PER_ROW = 8;
 static constexpr size_t ADDITIONS_PER_ROW =
-    4; // In the Straus algorithm for MSM, we proceed "digit-by-digit". (Here, digit means wNAF digit.) We chunk
-       // `ADDITIONS_PER_ROW` additions, all in the *same digit-slot*, in a row of the ECCVM's MSM table. Various parts
-       // of the implemention exploit the fact that `ADDITIONS_PER_ROWS == NUM_WNAF_DIGIT_BITS`.
+    8; // In the Straus algorithm for MSM, we proceed "digit-by-digit". (Here, digit means wNAF digit.) We chunk
+       // `ADDITIONS_PER_ROW` additions, all in the *same digit-slot*, in a row of the ECCVM's MSM table.
+static constexpr size_t DOUBLINGS_PER_ROW =
+    NUM_WNAF_DIGIT_BITS; // Number of doublings per doubling row. Always 4 (= w), independent of ADDITIONS_PER_ROW.
 
 template <typename CycleGroup> struct ScalarMul {
     uint32_t pc;

From 458345dcf61bedb0c18b71ea985cd5c0829c483c Mon Sep 17 00:00:00 2001
From: notnotraju <raju@aztec-labs.com>
Date: Tue, 17 Mar 2026 13:44:43 +0000
Subject: [PATCH 02/24] eccvm: update msm_builder for 8-wide additions and
 4-wide doublings

Key changes to MSMRow and trace computation:

- AddState array: hardcoded size 4 -> ADDITIONS_PER_ROW (now 8)
- Doubling loops: use DOUBLINGS_PER_ROW (= 4) instead of ADDITIONS_PER_ROW
  for the doubling phase, since we always do w=4 doublings regardless of
  how many additions we pack per row
- Trace sizing: (num_msm_rows - 2) * 4 -> * ADDITIONS_PER_ROW
- trace_index computation: * 4 -> * ADDITIONS_PER_ROW
- After doubling loops, advance trace_index by (ADDITIONS_PER_ROW -
  DOUBLINGS_PER_ROW) to skip unused slots allocated in the point trace
- Final row add_state: use ADDITIONS_PER_ROW-sized array fill
---
 .../src/barretenberg/eccvm/msm_builder.hpp    | 59 ++++++++++---------
 1 file changed, 32 insertions(+), 27 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp b/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp
index 7dfe7c6ab817..e49244b2258f 100644
--- a/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp
+++ b/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp
@@ -24,6 +24,7 @@ class ECCVMMSMMBuilder {
     using MSM = bb::eccvm::MSM<CycleGroup>;
 
     static constexpr size_t ADDITIONS_PER_ROW = bb::eccvm::ADDITIONS_PER_ROW;
+    static constexpr size_t DOUBLINGS_PER_ROW = bb::eccvm::DOUBLINGS_PER_ROW;
     static constexpr size_t NUM_WNAF_DIGITS_PER_SCALAR = bb::eccvm::NUM_WNAF_DIGITS_PER_SCALAR;
 
     struct alignas(64) MSMRow {
@@ -61,10 +62,11 @@ class ECCVMMSMMBuilder {
                                       // case exceptions, i.e., we want the VM proof to fail if we're doing a point
                                       // addition where (x1 == x2). to do this, we simply provide an inverse to x1 - x2.
         };
-        std::array<AddState, 4> add_state{ AddState{ false, 0, { 0, 0 }, 0, 0 },
-                                           AddState{ false, 0, { 0, 0 }, 0, 0 },
-                                           AddState{ false, 0, { 0, 0 }, 0, 0 },
-                                           AddState{ false, 0, { 0, 0 }, 0, 0 } };
+        std::array<AddState, ADDITIONS_PER_ROW> add_state = []() {
+            std::array<AddState, ADDITIONS_PER_ROW> arr;
+            arr.fill(AddState{ false, 0, { 0, 0 }, 0, 0 });
+            return arr;
+        }();
         // The accumulator here is, in general, the result of four EC additions: A + Q_1 + Q_2 + Q_3 + Q_4.
         // We do not explicitly store the intermediate values A + Q_1, A + Q_1 + Q_2, and A + Q_1 + Q_2 + Q_3, although
         // these values are implicitly used in the values of `AddState.lambda` and `AddState.collision_inverse`.
@@ -240,14 +242,12 @@ class ECCVMMSMMBuilder {
         //   operations
         // This section sets up the data structures we need to store all intermediate ECC operations in projective form
 
-        const size_t num_point_adds_and_doubles =
-            (num_msm_rows - 2) * 4; // `num_msm_rows - 2` is the actual number of rows in the table required to compute
-                                    // the MSM; the msm table itself has a dummy row at the beginning and an extra row
-                                    // with the `x` and `y` coordinates of the accumulator at the end. (In general, the
-                                    // output of the accumulator from the computation at row `i` is present on row
-                                    // `i+1`. We multiply by 4 because each "row" of the VM processes 4 point-additions
-                                    // (and the fact that w = 4 means we must interleave with 4 doublings). This
-                                    // "corresponds" to the fact that `MSMROW.add_state` has 4 entries.
+        // Each MSM row processes up to ADDITIONS_PER_ROW point operations (additions or doublings).
+        // However, doubling rows only use DOUBLINGS_PER_ROW (= 4) slots.
+        // For the point trace, we allocate conservatively: each row uses at most ADDITIONS_PER_ROW slots.
+        // `num_msm_rows - 2` is the actual number of active rows (excluding the leading dummy and trailing
+        // accumulator row).
+        const size_t num_point_adds_and_doubles = (num_msm_rows - 2) * ADDITIONS_PER_ROW;
         const size_t num_accumulators = num_msm_rows - 1; // for every row after the first row, we have an accumulator.
         // In what follows, either p1 + p2 = p3, or p1.dbl() = p3
         // We create 1 vector to store the entire point trace. We split into multiple containers using std::span
@@ -284,9 +284,9 @@ class ECCVMMSMMBuilder {
                  0); // the Straus algorithm proceeds by incrementing through the digit-slots and doing
                      // computations *across* the `ScalarMul`s that make up our MSM. Each digit-slot therefore
                      // contributes the *ceiling* of `msm_size`/`ADDITIONS_PER_ROW`.
-            size_t trace_index =
-                (msm_row_counts[msm_idx] - 1) * 4; // tracks the index in the traces of `p1`, `p2`, `p3`, and
-                                                   // `accumulator_trace` that we are filling out
+            size_t trace_index = (msm_row_counts[msm_idx] - 1) *
+                                 ADDITIONS_PER_ROW; // tracks the index in the traces of `p1`, `p2`, `p3`, and
+                                                    // `accumulator_trace` that we are filling out
 
             // for each digit-slot (`digit_idx`), and then for each row of the VM (which does `ADDITIONS_PER_ROW` point
             // additions), we either enter in/process (`ADDITIONS_PER_ROW`) `AddState` objects, and then if necessary
@@ -342,10 +342,9 @@ class ECCVMMSMMBuilder {
                     msm_row_index++;
                 }
                 // after processing each digit-slot, we now take care of doubling (as long as we are not at the last
-                // digit). We add an `MSMRow`, `row`, whose four `AddState` objects in `row.add_state`
-                // are null, but we also populate `p1_trace`, `p2_trace`, `p3_trace`, and `is_double_or_add` for four
-                // indices, corresponding to the w=4 doubling operations we need to perform. This embodies the numerical
-                // "coincidence" that `ADDITIONS_PER_ROW == NUM_WNAF_DIGIT_BITS`
+                // digit). We add an `MSMRow`, `row`, and populate `p1_trace`, `p2_trace`, `p3_trace`, and
+                // `is_double_or_add` for DOUBLINGS_PER_ROW (= w = 4) indices, corresponding to the w doublings we
+                // need to perform between digit-slots. Note: DOUBLINGS_PER_ROW != ADDITIONS_PER_ROW in general.
                 if (digit_idx < NUM_WNAF_DIGITS_PER_SCALAR - 1) {
                     auto& row = msm_rows[msm_row_index];
                     row.msm_transition = false;
@@ -355,7 +354,7 @@ class ECCVMMSMMBuilder {
                     row.q_add = false;
                     row.q_double = true;
                     row.q_skew = false;
-                    for (size_t point_idx = 0; point_idx < ADDITIONS_PER_ROW; ++point_idx) {
+                    for (size_t point_idx = 0; point_idx < DOUBLINGS_PER_ROW; ++point_idx) {
                         auto& add_state = row.add_state[point_idx];
                         add_state.add = false;
                         add_state.slice = 0;
@@ -369,6 +368,8 @@ class ECCVMMSMMBuilder {
                         is_double_or_add[trace_index] = true;
                         trace_index++;
                     }
+                    // Skip unused trace slots for this row (ADDITIONS_PER_ROW allocated, only DOUBLINGS_PER_ROW used)
+                    trace_index += (ADDITIONS_PER_ROW - DOUBLINGS_PER_ROW);
                     accumulator_trace[msm_row_index] = accumulator;
                     msm_row_index++;
                 } else // process `wnaf_skew`, i.e., the skew digit.
@@ -436,8 +437,8 @@ class ECCVMMSMMBuilder {
         });
 
         // complete the computation of the ECCVM execution trace, by adding the affine intermediate point data
-        // i.e. row.accumulator_x, row.accumulator_y, row.add_state[0...3].collision_inverse,
-        // row.add_state[0...3].lambda
+        // i.e. row.accumulator_x, row.accumulator_y, row.add_state[i].collision_inverse,
+        // row.add_state[i].lambda
         for (size_t msm_idx = 0; msm_idx < msms.size(); msm_idx++) {
             const auto& msm = msms[msm_idx];
             size_t trace_index = ((msm_row_counts[msm_idx] - 1) * ADDITIONS_PER_ROW);
@@ -481,7 +482,7 @@ class ECCVMMSMMBuilder {
                     const FF& acc_y = normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.y;
                     row.accumulator_x = acc_x;
                     row.accumulator_y = acc_y;
-                    for (size_t point_idx = 0; point_idx < ADDITIONS_PER_ROW; ++point_idx) {
+                    for (size_t point_idx = 0; point_idx < DOUBLINGS_PER_ROW; ++point_idx) {
                         auto& add_state = row.add_state[point_idx];
                         add_state.collision_inverse = 0; // no notion of "different x values" for a point doubling
                         const FF& dx = p1_trace[trace_index].x;
@@ -489,6 +490,9 @@ class ECCVMMSMMBuilder {
                         add_state.lambda = ((dx + dx + dx) * dx) * inverse;
                         trace_index++;
                     }
+                    // Advance trace_index past the unused slots (ADDITIONS_PER_ROW - DOUBLINGS_PER_ROW)
+                    // These slots were allocated in the point trace but are unused for doubling rows.
+                    trace_index += (ADDITIONS_PER_ROW - DOUBLINGS_PER_ROW);
                     accumulator_index++;
                     msm_row_index++;
                 } else // this row corresponds to performing point additions to handle WNAF skew
@@ -535,10 +539,11 @@ class ECCVMMSMMBuilder {
         final_row.q_add = false;
         final_row.q_double = false;
         final_row.q_skew = false;
-        final_row.add_state = { typename MSMRow::AddState{ false, 0, AffineElement{ 0, 0 }, 0, 0 },
-                                typename MSMRow::AddState{ false, 0, AffineElement{ 0, 0 }, 0, 0 },
-                                typename MSMRow::AddState{ false, 0, AffineElement{ 0, 0 }, 0, 0 },
-                                typename MSMRow::AddState{ false, 0, AffineElement{ 0, 0 }, 0, 0 } };
+        final_row.add_state = []() {
+            std::array<typename MSMRow::AddState, ADDITIONS_PER_ROW> arr;
+            arr.fill(typename MSMRow::AddState{ false, 0, AffineElement{ 0, 0 }, 0, 0 });
+            return arr;
+        }();
 
         return { msm_rows, point_table_read_counts };
     }

From dd7f49b4f1a2d9d388291aa2cece0267e9b1aea2 Mon Sep 17 00:00:00 2001
From: notnotraju <raju@aztec-labs.com>
Date: Tue, 17 Mar 2026 13:47:46 +0000
Subject: [PATCH 03/24] eccvm: widen precomputed_tables_builder for 8 wNAF
 digits per row

With WNAF_DIGITS_PER_ROW doubled from 4 to 8:
- num_rows_per_scalar drops from 8 to 4 (32 digits / 8 per row)
- Each row now encodes 8 wNAF digits via 16 two-bit slices (s1..s16),
  up from 4 digits / 8 slices (s1..s8)
- Each row stores 2 precomputed points (precompute_accumulator and
  precompute_accumulator2), since we have 8 points to store across
  4 rows. Row i stores table[POINT_TABLE_SIZE-1-2i] and
  table[POINT_TABLE_SIZE-2-2i].
- Horner scalar accumulation shifts by 2^32 (was 2^16) since each
  row now contributes 8*4 = 32 bits of scalar data.
- row_chunk computation extended to sum all 8 wNAF digits.
- Removed static_assert(WNAF_DIGITS_PER_ROW == 4), replaced with
  static_assert(WNAF_DIGITS_PER_ROW == 8).
- Updated POINT_TABLE_SIZE/2 == num_rows_per_scalar*2 assert to
  reflect the new 2-points-per-row layout.
---
 .../eccvm/precomputed_tables_builder.hpp      | 78 +++++++++++++------
 1 file changed, 56 insertions(+), 22 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/eccvm/precomputed_tables_builder.hpp b/barretenberg/cpp/src/barretenberg/eccvm/precomputed_tables_builder.hpp
index 1057d1506af4..72926f8cb041 100644
--- a/barretenberg/cpp/src/barretenberg/eccvm/precomputed_tables_builder.hpp
+++ b/barretenberg/cpp/src/barretenberg/eccvm/precomputed_tables_builder.hpp
@@ -21,15 +21,14 @@ class ECCVMPointTablePrecomputationBuilder {
     static constexpr size_t NUM_WNAF_DIGITS_PER_SCALAR = bb::eccvm::NUM_WNAF_DIGITS_PER_SCALAR;
     static constexpr size_t WNAF_DIGITS_PER_ROW = bb::eccvm::WNAF_DIGITS_PER_ROW;
     static constexpr size_t NUM_WNAF_DIGIT_BITS = bb::eccvm::NUM_WNAF_DIGIT_BITS;
-    // Note that our implementation takes advantage of a numerical coincidence:
-    // `NUM_WNAF_DIGITS_PER_SCALAR`/`WNAF_DIGITS_PER_ROW`, the number of rows per scalar multiplication, is the same as
-    // |{P, 3P, ..., (2ʷ-1)P}| = 2ʷ⁻¹ == 8, which is basically the number of multiples of P we need to precompute. (To
-    // be precise, we also compute 2P, but this occurs on every row.)
+    static constexpr size_t POINT_TABLE_SIZE = bb::eccvm::POINT_TABLE_SIZE;
+
+    // With WNAF_DIGITS_PER_ROW = 8, we have num_rows_per_scalar = 32/8 = 4.
+    // We need to store 8 precomputed points (P, 3P, ..., 15P), so we store 2 per row.
     struct PointTablePrecomputationRow {
-        // s1, ..., s8 are each 2 bits, so they jointly encode 16 bits of information, which corresponds precisely to
-        // the data of 4 wNAF digits. they are ordered from "highest order" to "lowest order". this means that s1s2
-        // encodes the first (highest order) wNAF digit in consideration, and so on. the explicit encoding is: the
-        // concatenation, s_{2i}s_{2i+1}, is naturally a number in {0, 1, ..., 15}; to obtain the corresponding wNAF
+        // s1, ..., s16 are each 2 bits, so they jointly encode 32 bits of information, which corresponds precisely to
+        // the data of 8 wNAF digits. They are ordered from "highest order" to "lowest order". The encoding is:
+        // the concatenation s_{2i-1}s_{2i} is naturally a number in {0, 1, ..., 15}; to obtain the corresponding wNAF
         // digit, multiply by 2 and subtract 15.
         int s1 = 0;
         int s2 = 0;
@@ -39,6 +38,14 @@ class ECCVMPointTablePrecomputationBuilder {
         int s6 = 0;
         int s7 = 0;
         int s8 = 0;
+        int s9 = 0;
+        int s10 = 0;
+        int s11 = 0;
+        int s12 = 0;
+        int s13 = 0;
+        int s14 = 0;
+        int s15 = 0;
+        int s16 = 0;
         bool skew = false;
         bool point_transition = false;
         uint32_t pc = 0;
@@ -47,23 +54,27 @@ class ECCVMPointTablePrecomputationBuilder {
         AffineElement precompute_accumulator{
             0, 0
         }; // contains a precomputed element, i.e., something in {P, 3P, ..., 15P}.
+        AffineElement precompute_accumulator2{
+            0, 0
+        }; // second precomputed element for this row (2 points per row now that num_rows_per_scalar = 4).
         AffineElement precompute_double{ 0, 0 };
     };
 
     static std::vector<PointTablePrecomputationRow> compute_rows(
         const std::vector<bb::eccvm::ScalarMul<CycleGroup>>& ecc_muls)
     {
-        static constexpr size_t num_rows_per_scalar = NUM_WNAF_DIGITS_PER_SCALAR / WNAF_DIGITS_PER_ROW;
-        static_assert(num_rows_per_scalar == bb::eccvm::POINT_TABLE_SIZE / 2,
-                      "precompute_accumulator fill loop assumes num_rows_per_scalar == POINT_TABLE_SIZE / 2");
+        static constexpr size_t num_rows_per_scalar = NUM_WNAF_DIGITS_PER_SCALAR / WNAF_DIGITS_PER_ROW; // 32/8 = 4
+        // We need to store POINT_TABLE_SIZE/2 = 8 precomputed points across num_rows_per_scalar = 4 rows,
+        // so 2 points per row.
+        static_assert(POINT_TABLE_SIZE / 2 == num_rows_per_scalar * 2,
+                      "precompute_accumulator fill loop assumes 2 points per row");
         const size_t num_precompute_rows = num_rows_per_scalar * ecc_muls.size() + 1;
         std::vector<PointTablePrecomputationRow> precompute_state(num_precompute_rows);
 
         // start with empty row (shiftable polynomials must have 0 as first coefficient)
         precompute_state[0] = PointTablePrecomputationRow{};
 
-        // current impl doesn't work if not 4
-        static_assert(WNAF_DIGITS_PER_ROW == 4);
+        static_assert(WNAF_DIGITS_PER_ROW == 8);
 
         parallel_for_range(ecc_muls.size(), [&](size_t start, size_t end) {
             for (size_t j = start; j < end; j++) {
@@ -73,18 +84,28 @@ class ECCVMPointTablePrecomputationBuilder {
 
                 for (size_t i = 0; i < num_rows_per_scalar; ++i) {
                     PointTablePrecomputationRow row;
+
+                    // Extract 8 wNAF digits for this row
                     const int slice0 = slices[i * WNAF_DIGITS_PER_ROW];
                     const int slice1 = slices[i * WNAF_DIGITS_PER_ROW + 1];
                     const int slice2 = slices[i * WNAF_DIGITS_PER_ROW + 2];
                     const int slice3 = slices[i * WNAF_DIGITS_PER_ROW + 3];
+                    const int slice4 = slices[i * WNAF_DIGITS_PER_ROW + 4];
+                    const int slice5 = slices[i * WNAF_DIGITS_PER_ROW + 5];
+                    const int slice6 = slices[i * WNAF_DIGITS_PER_ROW + 6];
+                    const int slice7 = slices[i * WNAF_DIGITS_PER_ROW + 7];
 
-                    // {-15, -13. ..., 13, 15} --> {0, 1, ..., 15}
+                    // {-15, -13, ..., 13, 15} --> {0, 1, ..., 15}
                     const int slice0base2 = (slice0 + 15) / 2;
                     const int slice1base2 = (slice1 + 15) / 2;
                     const int slice2base2 = (slice2 + 15) / 2;
                     const int slice3base2 = (slice3 + 15) / 2;
+                    const int slice4base2 = (slice4 + 15) / 2;
+                    const int slice5base2 = (slice5 + 15) / 2;
+                    const int slice6base2 = (slice6 + 15) / 2;
+                    const int slice7base2 = (slice7 + 15) / 2;
 
-                    // convert into 2-bit chunks
+                    // convert into 2-bit chunks (16 slices for 8 digits)
                     row.s1 = slice0base2 >> 2;
                     row.s2 = slice0base2 & 3;
                     row.s3 = slice1base2 >> 2;
@@ -93,6 +114,14 @@ class ECCVMPointTablePrecomputationBuilder {
                     row.s6 = slice2base2 & 3;
                     row.s7 = slice3base2 >> 2;
                     row.s8 = slice3base2 & 3;
+                    row.s9 = slice4base2 >> 2;
+                    row.s10 = slice4base2 & 3;
+                    row.s11 = slice5base2 >> 2;
+                    row.s12 = slice5base2 & 3;
+                    row.s13 = slice6base2 >> 2;
+                    row.s14 = slice6base2 & 3;
+                    row.s15 = slice7base2 >> 2;
+                    row.s16 = slice7base2 & 3;
                     bool last_row = (i == num_rows_per_scalar - 1);
 
                     row.skew = last_row ? entry.wnaf_skew : false;
@@ -101,10 +130,13 @@ class ECCVMPointTablePrecomputationBuilder {
 
                     // N.B. we apply a constraint that requires slice1 to be positive for the 1st row of each scalar
                     // sum. This ensures we do not have WNAF representations of negative values
-                    const int row_chunk = slice3 + slice2 * (1 << 4) + slice1 * (1 << 8) + slice0 * (1 << 12);
+                    const int row_chunk = slice7 + (slice6 * (1 << 4)) + (slice5 * (1 << 8)) + (slice4 * (1 << 12)) +
+                                          (slice3 * (1 << 16)) + (slice2 * (1 << 20)) + (slice1 * (1 << 24)) +
+                                          (slice0 * (1 << 28));
 
                     bool chunk_negative = row_chunk < 0;
 
+                    // Shift by 32 bits (8 digits * 4 bits each)
                     scalar_sum = scalar_sum << (NUM_WNAF_DIGIT_BITS * WNAF_DIGITS_PER_ROW);
                     if (chunk_negative) {
                         scalar_sum -= static_cast<uint64_t>(-row_chunk);
@@ -119,12 +151,14 @@ class ECCVMPointTablePrecomputationBuilder {
                         BB_ASSERT(scalar_sum - entry.wnaf_skew, entry.scalar);
                     }
                     // the last element of the `precomputed_table` field of a `ScalarMul` is the double of the point.
-                    row.precompute_double = entry.precomputed_table[bb::eccvm::POINT_TABLE_SIZE];
-                    // fill accumulator in reverse order i.e. first row = 15[P], then 13[P], ..., 1[P]
-                    // note that this reflects a coincidence: the number of rows (per scalar multiplication) is
-                    // the number of multiples that we need to precompute. Indeed, the latter is 2ʷ⁻¹, while the former
-                    // depends both on w and on `NUM_SCALAR_BITS`.
-                    row.precompute_accumulator = entry.precomputed_table[bb::eccvm::POINT_TABLE_SIZE - 1 - i];
+                    row.precompute_double = entry.precomputed_table[POINT_TABLE_SIZE];
+                    // fill accumulators: 2 precomputed points per row, in reverse order.
+                    // Row 0: table[POINT_TABLE_SIZE-1] = 15P, table[POINT_TABLE_SIZE-2] = 13P
+                    // Row 1: table[POINT_TABLE_SIZE-3] = 11P, table[POINT_TABLE_SIZE-4] = 9P
+                    // ...
+                    // Row 3: table[POINT_TABLE_SIZE-7] = 3P,  table[POINT_TABLE_SIZE-8] = P
+                    row.precompute_accumulator = entry.precomputed_table[POINT_TABLE_SIZE - 1 - (2 * i)];
+                    row.precompute_accumulator2 = entry.precomputed_table[POINT_TABLE_SIZE - 2 - (2 * i)];
                     precompute_state[j * num_rows_per_scalar + i + 1] = (row);
                 }
             }

From 3f5fcdd04d3d5c744ccfdbea483b1e00e5cb2db3 Mon Sep 17 00:00:00 2001
From: notnotraju <raju@aztec-labs.com>
Date: Tue, 17 Mar 2026 13:51:09 +0000
Subject: [PATCH 04/24] eccvm: add 36 new flavor columns for 8-wide ECCVM

Updates ECCVMFlavor entity counts and column definitions:

NUM_WIRES: 85 -> 121
NUM_ALL_ENTITIES: 118 -> 156
NUM_WITNESS_ENTITIES: 87 -> 123
NUM_SHIFTED_ENTITIES: 26 -> 28

New WireNonShiftedEntities (+34 columns):
- precompute_s5hi..s8lo: 8 new 2-bit slice columns for digits 5-8
- msm_add5..add8: 4 new addition selector columns
- msm_x5..x8, msm_y5..y8: 8 new point coordinate columns
- msm_collision_x5..x8: 4 new collision inverse columns
- msm_lambda5..lambda8: 4 new slope columns
- msm_slice5..slice8: 4 new wNAF slice columns
- lookup_read_counts_2, _3: 2 new lookup read count columns

New WireToBeShiftedWithoutAccumulatorsEntities (+2 columns):
- precompute_tx2, precompute_ty2: 2nd precomputed point per row,
  needs shifting for inter-row point table constraints

Corresponding ShiftedEntities updated with precompute_tx2_shift,
precompute_ty2_shift. CommitmentLabels updated for all new columns.
---
 .../src/barretenberg/eccvm/eccvm_flavor.hpp   | 167 +++++++++++++-----
 1 file changed, 127 insertions(+), 40 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp
index 8da09ea570fe..960137aeca6e 100644
--- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp
@@ -65,7 +65,7 @@ class ECCVMFlavor {
     // they become too small.
     static constexpr size_t ECCVM_FIXED_SIZE = 1UL << CONST_ECCVM_LOG_N;
 
-    static constexpr size_t NUM_WIRES = 85;
+    static constexpr size_t NUM_WIRES = 121; // was 85, +36 for 8-wide
 
     // The number of entities added for ZK (gemini_masking_poly)
     static constexpr size_t NUM_MASKING_POLYNOMIALS = 1;
@@ -73,16 +73,16 @@ class ECCVMFlavor {
     // The number of multivariate polynomials on which a sumcheck prover sumcheck operates (including shifts). We often
     // need containers of this size to hold related data, so we choose a name more agnostic than `NUM_POLYNOMIALS`.
     // Note: this number does not include the individual sorted list polynomials.
-    // Includes gemini_masking_poly for ZK (NUM_ALL_ENTITIES = 117 + NUM_MASKING_POLYNOMIALS)
-    static constexpr size_t NUM_ALL_ENTITIES = 118;
+    // NUM_ALL_ENTITIES = masking(1) + precomputed(4) + witness(123) + shifted(28) = 156
+    static constexpr size_t NUM_ALL_ENTITIES = 156;
     // The number of polynomials precomputed to describe a circuit and to aid a prover in constructing a satisfying
     // assignment of witnesses. We again choose a neutral name.
     static constexpr size_t NUM_PRECOMPUTED_ENTITIES = 4;
     // The total number of witness entities not including shifts.
-    // Includes gemini_masking_poly for ZK (NUM_WITNESS_ENTITIES = 86 + NUM_MASKING_POLYNOMIALS)
-    static constexpr size_t NUM_WITNESS_ENTITIES = 87;
+    // witness = non-shifted(94) + to-be-shifted-no-acc(24) + accumulators(3) + derived(2) = 123
+    static constexpr size_t NUM_WITNESS_ENTITIES = 123;
     // The number of entities in ShiftedEntities.
-    static constexpr size_t NUM_SHIFTED_ENTITIES = 26;
+    static constexpr size_t NUM_SHIFTED_ENTITIES = 28; // was 26, +2 for precompute_tx2/ty2 shifts
     // The number of entities in DerivedWitnessEntities that are not going to be shifted.
     static constexpr size_t NUM_DERIVED_WITNESS_ENTITIES_NON_SHIFTED = 1;
     // A container to be fed to ShpleminiVerifier to avoid redundant scalar muls, the first number is the index of the
@@ -251,7 +251,48 @@ class ECCVMFlavor {
                               transcript_msm_infinity,                    // column 56
                               transcript_msm_x_inverse,                   // column 57
                               transcript_msm_count_zero_at_transition,    // column 58
-                              transcript_msm_count_at_transition_inverse) // column 59
+                              transcript_msm_count_at_transition_inverse, // column 59
+                              // --- 8-wide additions: new precompute slice columns ---
+                              precompute_s5hi, // column 60
+                              precompute_s5lo, // column 61
+                              precompute_s6hi, // column 62
+                              precompute_s6lo, // column 63
+                              precompute_s7hi, // column 64
+                              precompute_s7lo, // column 65
+                              precompute_s8hi, // column 66
+                              precompute_s8lo, // column 67
+                              // --- 8-wide additions: new MSM add selectors ---
+                              msm_add5, // column 68
+                              msm_add6, // column 69
+                              msm_add7, // column 70
+                              msm_add8, // column 71
+                              // --- 8-wide additions: new MSM point coordinates ---
+                              msm_x5, // column 72
+                              msm_y5, // column 73
+                              msm_x6, // column 74
+                              msm_y6, // column 75
+                              msm_x7, // column 76
+                              msm_y7, // column 77
+                              msm_x8, // column 78
+                              msm_y8, // column 79
+                              // --- 8-wide additions: new MSM collision inverses ---
+                              msm_collision_x5, // column 80
+                              msm_collision_x6, // column 81
+                              msm_collision_x7, // column 82
+                              msm_collision_x8, // column 83
+                              // --- 8-wide additions: new MSM lambdas ---
+                              msm_lambda5, // column 84
+                              msm_lambda6, // column 85
+                              msm_lambda7, // column 86
+                              msm_lambda8, // column 87
+                              // --- 8-wide additions: new MSM slices ---
+                              msm_slice5, // column 88
+                              msm_slice6, // column 89
+                              msm_slice7, // column 90
+                              msm_slice8, // column 91
+                              // --- 8-wide additions: additional lookup read counts ---
+                              lookup_read_counts_2, // column 92
+                              lookup_read_counts_3) // column 93
     };
 
     /**
@@ -271,28 +312,31 @@ class ECCVMFlavor {
     template <typename DataType> class WireToBeShiftedWithoutAccumulatorsEntities {
       public:
         DEFINE_FLAVOR_MEMBERS(DataType,
-                              transcript_mul,        // column 60
-                              transcript_msm_count,  // column 61
-                              precompute_scalar_sum, // column 62
-                              precompute_s1hi,       // column 63
-                              precompute_dx,         // column 64
-                              precompute_dy,         // column 65
-                              precompute_tx,         // column 66
-                              precompute_ty,         // column 67
-                              msm_transition,        // column 68
-                              msm_add,               // column 69
-                              msm_double,            // column 70
-                              msm_skew,              // column 71
-                              msm_accumulator_x,     // column 72
-                              msm_accumulator_y,     // column 73
-                              msm_count,             // column 74
-                              msm_round,             // column 75
-                              msm_add1,              // column 76
-                              msm_pc,                // column 77
-                              precompute_pc,         // column 78
-                              transcript_pc,         // column 79
-                              precompute_round,      // column 80
-                              precompute_select)     // column 81
+                              transcript_mul,        // column 94
+                              transcript_msm_count,  // column 95
+                              precompute_scalar_sum, // column 96
+                              precompute_s1hi,       // column 97
+                              precompute_dx,         // column 98
+                              precompute_dy,         // column 99
+                              precompute_tx,         // column 100
+                              precompute_ty,         // column 101
+                              msm_transition,        // column 102
+                              msm_add,               // column 103
+                              msm_double,            // column 104
+                              msm_skew,              // column 105
+                              msm_accumulator_x,     // column 106
+                              msm_accumulator_y,     // column 107
+                              msm_count,             // column 108
+                              msm_round,             // column 109
+                              msm_add1,              // column 110
+                              msm_pc,                // column 111
+                              precompute_pc,         // column 112
+                              transcript_pc,         // column 113
+                              precompute_round,      // column 114
+                              precompute_select,     // column 115
+                              // --- 8-wide additions: 2nd precomputed point (needs shift for inter-row constraints) ---
+                              precompute_tx2, // column 116
+                              precompute_ty2) // column 117
     };
 
     /**
@@ -302,9 +346,9 @@ class ECCVMFlavor {
     template <typename DataType> class WireToBeShiftedAccumulatorEntities {
       public:
         DEFINE_FLAVOR_MEMBERS(DataType,
-                              transcript_accumulator_not_empty, // column 82
-                              transcript_accumulator_x,         // column 83
-                              transcript_accumulator_y)         // column 84
+                              transcript_accumulator_not_empty, // column 118
+                              transcript_accumulator_x,         // column 119
+                              transcript_accumulator_y)         // column 120
     };
 
     /**
@@ -365,10 +409,12 @@ class ECCVMFlavor {
                               transcript_pc_shift,                    // column 19
                               precompute_round_shift,                 // column 20
                               precompute_select_shift,                // column 21
-                              transcript_accumulator_not_empty_shift, // column 22
-                              transcript_accumulator_x_shift,         // column 23
-                              transcript_accumulator_y_shift,         // column 24
-                              z_perm_shift);                          // column 25
+                              precompute_tx2_shift,                   // column 22
+                              precompute_ty2_shift,                   // column 23
+                              transcript_accumulator_not_empty_shift, // column 24
+                              transcript_accumulator_x_shift,         // column 25
+                              transcript_accumulator_y_shift,         // column 26
+                              z_perm_shift);                          // column 27
     };
 
     template <typename DataType, typename PrecomputedAndWitnessEntitiesSuperset>
@@ -397,10 +443,12 @@ class ECCVMFlavor {
                          entities.transcript_pc,                    // column 19
                          entities.precompute_round,                 // column 20
                          entities.precompute_select,                // column 21
-                         entities.transcript_accumulator_not_empty, // column 22
-                         entities.transcript_accumulator_x,         // column 23
-                         entities.transcript_accumulator_y,         // column 24
-                         entities.z_perm };                         // column 25
+                         entities.precompute_tx2,                   // column 22
+                         entities.precompute_ty2,                   // column 23
+                         entities.transcript_accumulator_not_empty, // column 24
+                         entities.transcript_accumulator_x,         // column 25
+                         entities.transcript_accumulator_y,         // column 26
+                         entities.z_perm };                         // column 27
     }
 
     /**
@@ -881,11 +929,50 @@ class ECCVMFlavor {
             Base::msm_slice2 = "MSM_SLICE2";
             Base::msm_slice3 = "MSM_SLICE3";
             Base::msm_slice4 = "MSM_SLICE4";
+            // 8-wide additions: new precompute slice labels
+            Base::precompute_s5hi = "PRECOMPUTE_S5HI";
+            Base::precompute_s5lo = "PRECOMPUTE_S5LO";
+            Base::precompute_s6hi = "PRECOMPUTE_S6HI";
+            Base::precompute_s6lo = "PRECOMPUTE_S6LO";
+            Base::precompute_s7hi = "PRECOMPUTE_S7HI";
+            Base::precompute_s7lo = "PRECOMPUTE_S7LO";
+            Base::precompute_s8hi = "PRECOMPUTE_S8HI";
+            Base::precompute_s8lo = "PRECOMPUTE_S8LO";
+            // 8-wide additions: new MSM labels
+            Base::msm_add5 = "MSM_ADD5";
+            Base::msm_add6 = "MSM_ADD6";
+            Base::msm_add7 = "MSM_ADD7";
+            Base::msm_add8 = "MSM_ADD8";
+            Base::msm_x5 = "MSM_X5";
+            Base::msm_y5 = "MSM_Y5";
+            Base::msm_x6 = "MSM_X6";
+            Base::msm_y6 = "MSM_Y6";
+            Base::msm_x7 = "MSM_X7";
+            Base::msm_y7 = "MSM_Y7";
+            Base::msm_x8 = "MSM_X8";
+            Base::msm_y8 = "MSM_Y8";
+            Base::msm_collision_x5 = "MSM_COLLISION_X5";
+            Base::msm_collision_x6 = "MSM_COLLISION_X6";
+            Base::msm_collision_x7 = "MSM_COLLISION_X7";
+            Base::msm_collision_x8 = "MSM_COLLISION_X8";
+            Base::msm_lambda5 = "MSM_LAMBDA5";
+            Base::msm_lambda6 = "MSM_LAMBDA6";
+            Base::msm_lambda7 = "MSM_LAMBDA7";
+            Base::msm_lambda8 = "MSM_LAMBDA8";
+            Base::msm_slice5 = "MSM_SLICE5";
+            Base::msm_slice6 = "MSM_SLICE6";
+            Base::msm_slice7 = "MSM_SLICE7";
+            Base::msm_slice8 = "MSM_SLICE8";
+            // 8-wide additions: new precomputed point labels
+            Base::precompute_tx2 = "PRECOMPUTE_TX2";
+            Base::precompute_ty2 = "PRECOMPUTE_TY2";
             Base::transcript_accumulator_not_empty = "TRANSCRIPT_ACCUMULATOR_NOT_EMPTY";
             Base::transcript_reset_accumulator = "TRANSCRIPT_RESET_ACCUMULATOR";
             Base::precompute_select = "PRECOMPUTE_SELECT";
             Base::lookup_read_counts_0 = "LOOKUP_READ_COUNTS_0";
             Base::lookup_read_counts_1 = "LOOKUP_READ_COUNTS_1";
+            Base::lookup_read_counts_2 = "LOOKUP_READ_COUNTS_2";
+            Base::lookup_read_counts_3 = "LOOKUP_READ_COUNTS_3";
             Base::transcript_base_infinity = "TRANSCRIPT_BASE_INFINITY";
             Base::transcript_base_x_inverse = "TRANSCRIPT_BASE_X_INVERSE";
             Base::transcript_base_y_inverse = "TRANSCRIPT_BASE_Y_INVERSE";

From 84a8d232ced81d72800f53516ca57e7c53b951b8 Mon Sep 17 00:00:00 2001
From: notnotraju <raju@aztec-labs.com>
Date: Tue, 17 Mar 2026 13:53:57 +0000
Subject: [PATCH 05/24] eccvm: wire up new 8-wide columns in ProverPolynomials
 constructor

Extends the ProverPolynomials constructor to populate the 36 new flavor
columns from the builder row data:

Precompute section:
- Wire precompute_s5hi..s8lo from point_table_rows[i].s9..s16
- Wire precompute_tx2/ty2 from point_table_rows[i].precompute_accumulator2

MSM section (all from add_state[4..7]):
- Wire msm_add5..add8 from add_state[4..7].add
- Wire msm_x5..x8, msm_y5..y8 from add_state[4..7].point
- Wire msm_collision_x5..x8 from add_state[4..7].collision_inverse
- Wire msm_lambda5..lambda8 from add_state[4..7].lambda
- Wire msm_slice5..slice8 from add_state[4..7].slice

lookup_read_counts_2/_3 columns are declared but not yet populated;
they will be wired when the lookup relation is updated to support
4 table terms per precompute row.
---
 .../src/barretenberg/eccvm/eccvm_flavor.hpp   | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp
index 960137aeca6e..a85ddeb9e705 100644
--- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp
@@ -759,6 +759,14 @@ class ECCVMFlavor {
                     precompute_s3lo.set_if_valid_index(i, point_table_rows[i].s6);
                     precompute_s4hi.set_if_valid_index(i, point_table_rows[i].s7);
                     precompute_s4lo.set_if_valid_index(i, point_table_rows[i].s8);
+                    precompute_s5hi.set_if_valid_index(i, point_table_rows[i].s9);
+                    precompute_s5lo.set_if_valid_index(i, point_table_rows[i].s10);
+                    precompute_s6hi.set_if_valid_index(i, point_table_rows[i].s11);
+                    precompute_s6lo.set_if_valid_index(i, point_table_rows[i].s12);
+                    precompute_s7hi.set_if_valid_index(i, point_table_rows[i].s13);
+                    precompute_s7lo.set_if_valid_index(i, point_table_rows[i].s14);
+                    precompute_s8hi.set_if_valid_index(i, point_table_rows[i].s15);
+                    precompute_s8lo.set_if_valid_index(i, point_table_rows[i].s16);
                     // If skew is active (i.e. we need to subtract a base point from the msm result),
                     // write `7` into rows.precompute_skew. `7`, in binary representation, equals `-1` when converted
                     // into WNAF form
@@ -767,6 +775,8 @@ class ECCVMFlavor {
                     precompute_dy.set_if_valid_index(i, point_table_rows[i].precompute_double.y);
                     precompute_tx.set_if_valid_index(i, point_table_rows[i].precompute_accumulator.x);
                     precompute_ty.set_if_valid_index(i, point_table_rows[i].precompute_accumulator.y);
+                    precompute_tx2.set_if_valid_index(i, point_table_rows[i].precompute_accumulator2.x);
+                    precompute_ty2.set_if_valid_index(i, point_table_rows[i].precompute_accumulator2.y);
                 }
             });
 
@@ -787,6 +797,10 @@ class ECCVMFlavor {
                     msm_add2.set_if_valid_index(i, static_cast<int>(msm_rows[i].add_state[1].add));
                     msm_add3.set_if_valid_index(i, static_cast<int>(msm_rows[i].add_state[2].add));
                     msm_add4.set_if_valid_index(i, static_cast<int>(msm_rows[i].add_state[3].add));
+                    msm_add5.set_if_valid_index(i, static_cast<int>(msm_rows[i].add_state[4].add));
+                    msm_add6.set_if_valid_index(i, static_cast<int>(msm_rows[i].add_state[5].add));
+                    msm_add7.set_if_valid_index(i, static_cast<int>(msm_rows[i].add_state[6].add));
+                    msm_add8.set_if_valid_index(i, static_cast<int>(msm_rows[i].add_state[7].add));
                     msm_x1.set_if_valid_index(i, msm_rows[i].add_state[0].point.x);
                     msm_y1.set_if_valid_index(i, msm_rows[i].add_state[0].point.y);
                     msm_x2.set_if_valid_index(i, msm_rows[i].add_state[1].point.x);
@@ -795,18 +809,38 @@ class ECCVMFlavor {
                     msm_y3.set_if_valid_index(i, msm_rows[i].add_state[2].point.y);
                     msm_x4.set_if_valid_index(i, msm_rows[i].add_state[3].point.x);
                     msm_y4.set_if_valid_index(i, msm_rows[i].add_state[3].point.y);
+                    msm_x5.set_if_valid_index(i, msm_rows[i].add_state[4].point.x);
+                    msm_y5.set_if_valid_index(i, msm_rows[i].add_state[4].point.y);
+                    msm_x6.set_if_valid_index(i, msm_rows[i].add_state[5].point.x);
+                    msm_y6.set_if_valid_index(i, msm_rows[i].add_state[5].point.y);
+                    msm_x7.set_if_valid_index(i, msm_rows[i].add_state[6].point.x);
+                    msm_y7.set_if_valid_index(i, msm_rows[i].add_state[6].point.y);
+                    msm_x8.set_if_valid_index(i, msm_rows[i].add_state[7].point.x);
+                    msm_y8.set_if_valid_index(i, msm_rows[i].add_state[7].point.y);
                     msm_collision_x1.set_if_valid_index(i, msm_rows[i].add_state[0].collision_inverse);
                     msm_collision_x2.set_if_valid_index(i, msm_rows[i].add_state[1].collision_inverse);
                     msm_collision_x3.set_if_valid_index(i, msm_rows[i].add_state[2].collision_inverse);
                     msm_collision_x4.set_if_valid_index(i, msm_rows[i].add_state[3].collision_inverse);
+                    msm_collision_x5.set_if_valid_index(i, msm_rows[i].add_state[4].collision_inverse);
+                    msm_collision_x6.set_if_valid_index(i, msm_rows[i].add_state[5].collision_inverse);
+                    msm_collision_x7.set_if_valid_index(i, msm_rows[i].add_state[6].collision_inverse);
+                    msm_collision_x8.set_if_valid_index(i, msm_rows[i].add_state[7].collision_inverse);
                     msm_lambda1.set_if_valid_index(i, msm_rows[i].add_state[0].lambda);
                     msm_lambda2.set_if_valid_index(i, msm_rows[i].add_state[1].lambda);
                     msm_lambda3.set_if_valid_index(i, msm_rows[i].add_state[2].lambda);
                     msm_lambda4.set_if_valid_index(i, msm_rows[i].add_state[3].lambda);
+                    msm_lambda5.set_if_valid_index(i, msm_rows[i].add_state[4].lambda);
+                    msm_lambda6.set_if_valid_index(i, msm_rows[i].add_state[5].lambda);
+                    msm_lambda7.set_if_valid_index(i, msm_rows[i].add_state[6].lambda);
+                    msm_lambda8.set_if_valid_index(i, msm_rows[i].add_state[7].lambda);
                     msm_slice1.set_if_valid_index(i, msm_rows[i].add_state[0].slice);
                     msm_slice2.set_if_valid_index(i, msm_rows[i].add_state[1].slice);
                     msm_slice3.set_if_valid_index(i, msm_rows[i].add_state[2].slice);
                     msm_slice4.set_if_valid_index(i, msm_rows[i].add_state[3].slice);
+                    msm_slice5.set_if_valid_index(i, msm_rows[i].add_state[4].slice);
+                    msm_slice6.set_if_valid_index(i, msm_rows[i].add_state[5].slice);
+                    msm_slice7.set_if_valid_index(i, msm_rows[i].add_state[6].slice);
+                    msm_slice8.set_if_valid_index(i, msm_rows[i].add_state[7].slice);
                 }
             });
             this->set_shifted();

From e7e0283b6e2651c28b7fd34a558865fad78fde32 Mon Sep 17 00:00:00 2001
From: notnotraju <raju@aztec-labs.com>
Date: Tue, 17 Mar 2026 13:56:39 +0000
Subject: [PATCH 06/24] eccvm: extend MSM relation for 8 additions per row

The MSM relation now supports 8 point additions per row (was 4).
The doubling chain remains 4-wide (= wNAF digit width w = 4).

Key changes:
- Addition chain: first_add + 7 conditional adds (was first_add + 3)
- Skew chain: 8 conditional skew additions (was 4)
- Collision checks: 8 inverse checks (was 4)
- Slice-zero enforcement: 8 checks (was 4)
- Count update: sum of add1..add8 (was add1..add4)
- Addition continuity: add{i+1} * (-add{i} + 1) for i=1..7 (was 1..3)
- Cross-row continuity: (-add8 + 1) * add1_shift (was -add4 + 1)

Subrelation count: 47 -> 67 (20 new subrelations)
New subrelations: ADD slopes 5-8, SKEW slopes 5-8, collision 5-8,
slice-zero 5-8, continuity add5-8.

MAX_PARTIAL_RELATION_LENGTH for this relation: 8 -> 12 (due to the
longer addition chain increasing the degree of the accumulator output).
---
 .../relations/ecc_vm/ecc_msm_relation.hpp     |  21 +-
 .../ecc_vm/ecc_msm_relation_impl.hpp          | 545 +++++-------------
 2 files changed, 168 insertions(+), 398 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation.hpp
index b72fc3138c96..4626847f4dc9 100644
--- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation.hpp
+++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation.hpp
@@ -43,9 +43,24 @@ namespace bb {
 template <typename FF_> class ECCVMMSMRelationImpl {
   public:
     using FF = FF_;
-    static constexpr std::array<size_t, 47> SUBRELATION_PARTIAL_LENGTHS{ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-                                                                         8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-                                                                         8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 };
+    // 67 subrelations. Max partial length = 12 (for the acc output after 8 chained additions).
+    // The degree of y_t8 is 10 (degree doubles through chaining: first_add gives deg 3 y, then each subsequent
+    // add increments by ~1). With q_add gating, the final degree is 12 (rounded up for safety).
+    // Most subrelations remain degree <= 8. The new addition-chain subrelations (47-50, 51-54) have higher degree.
+    static constexpr std::array<size_t, 67> SUBRELATION_PARTIAL_LENGTHS{
+        12, 12, 8,  8,  12, 8, // 0-5: ADD acc(x,y), slope1; SKEW acc(x,y), slope1
+        8,  8,  8,  8,  8,  8, // 6-11: collision1-4; DOUBLE acc(x,y)
+        8,  8,  8,  8,  8,  8, // 12-17: DOUBLE slope1; slice-zero 1-4; mutual excl
+        8,  8,  8,  8,  8,  8, 8,  8,
+        8,  8,  8,  8,  8,  8, 12, // 18-31: round tx, selectors, count, continuity
+        8,  8,  8,  8,  8,  8, 8,  8,
+        8,  8,  8,  8,  8,  8, 8, // 32-46: add1=q_add+q_skew, skew ctrls, ADD/DOUBLE/SKEW slopes 2-4, no-op
+        12, 12, 12, 12,           // 47-50: ADD slopes 5-8
+        12, 12, 12, 12,           // 51-54: SKEW slopes 5-8
+        8,  8,  8,  8,            // 55-58: collision 5-8
+        8,  8,  8,  8,            // 59-62: slice-zero 5-8
+        8,  8,  8,  8             // 63-66: continuity add5-8
+    };
 
     template <typename ContainerOverSubrelations, typename AllEntities, typename Parameters>
     static void accumulate(ContainerOverSubrelations& accumulator,
diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation_impl.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation_impl.hpp
index 04f64d755729..88dc32cd3935 100644
--- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation_impl.hpp
+++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation_impl.hpp
@@ -30,18 +30,15 @@ namespace bb {
  *
  * ADDITION round (round = j):
  * [Acc] = [Acc] + T_i[a_{i, j}] for all i in [0, ... k-1]
+ * (up to 8 additions per ECCVM row)
  *
  * DOUBLE round:
- * [Acc] = 16 * [Acc] (four point doublings)
+ * [Acc] = 16 * [Acc] (four point doublings, using lambda1..lambda4)
  *
  * SKEW round:
  * If skew_i == 1, [Acc] = [Acc] - [P_i] for all i in [0, ..., k - 1]
  *
  * The relations in ECCVMMSMRelationImpl constrain the ADDITION, DOUBLE and SKEW rounds
- * @param accumulator transformed to `accumulator + C(in(X)...)*scaling_factor`
- * @param in an std::array containing the fully extended Accumulator edges.
- * @param parameters contains beta, gamma, and public_input_delta, ....
- * @param scaling_factor optional term to scale the evaluation before adding to evals.
  */
 template <typename FF>
 template <typename ContainerOverSubrelations, typename AllEntities, typename Parameters>
@@ -53,6 +50,7 @@ void ECCVMMSMRelationImpl<FF>::accumulate(ContainerOverSubrelations& accumulator
     using Accumulator = typename std::tuple_element_t<0, ContainerOverSubrelations>;
     using View = typename Accumulator::View;
 
+    // Point coordinates for 8 additions per row
     const auto& x1 = View(in.msm_x1);
     const auto& y1 = View(in.msm_y1);
     const auto& x2 = View(in.msm_x2);
@@ -61,319 +59,144 @@ void ECCVMMSMRelationImpl<FF>::accumulate(ContainerOverSubrelations& accumulator
     const auto& y3 = View(in.msm_y3);
     const auto& x4 = View(in.msm_x4);
     const auto& y4 = View(in.msm_y4);
+    const auto& x5 = View(in.msm_x5);
+    const auto& y5 = View(in.msm_y5);
+    const auto& x6 = View(in.msm_x6);
+    const auto& y6 = View(in.msm_y6);
+    const auto& x7 = View(in.msm_x7);
+    const auto& y7 = View(in.msm_y7);
+    const auto& x8 = View(in.msm_x8);
+    const auto& y8 = View(in.msm_y8);
+    // Collision inverses
     const auto& collision_inverse1 = View(in.msm_collision_x1);
     const auto& collision_inverse2 = View(in.msm_collision_x2);
     const auto& collision_inverse3 = View(in.msm_collision_x3);
     const auto& collision_inverse4 = View(in.msm_collision_x4);
+    const auto& collision_inverse5 = View(in.msm_collision_x5);
+    const auto& collision_inverse6 = View(in.msm_collision_x6);
+    const auto& collision_inverse7 = View(in.msm_collision_x7);
+    const auto& collision_inverse8 = View(in.msm_collision_x8);
+    // Lambdas (slopes). lambda1..4 used for both additions and doublings.
+    // lambda5..8 used only for additions 5-8.
     const auto& lambda1 = View(in.msm_lambda1);
     const auto& lambda2 = View(in.msm_lambda2);
     const auto& lambda3 = View(in.msm_lambda3);
     const auto& lambda4 = View(in.msm_lambda4);
+    const auto& lambda5 = View(in.msm_lambda5);
+    const auto& lambda6 = View(in.msm_lambda6);
+    const auto& lambda7 = View(in.msm_lambda7);
+    const auto& lambda8 = View(in.msm_lambda8);
     const auto& lagrange_first = View(in.lagrange_first);
+    // Add selectors
     const auto& add1 = View(in.msm_add1);
     const auto& add1_shift = View(in.msm_add1_shift);
     const auto& add2 = View(in.msm_add2);
     const auto& add3 = View(in.msm_add3);
     const auto& add4 = View(in.msm_add4);
+    const auto& add5 = View(in.msm_add5);
+    const auto& add6 = View(in.msm_add6);
+    const auto& add7 = View(in.msm_add7);
+    const auto& add8 = View(in.msm_add8);
+    // Accumulator
     const auto& acc_x = View(in.msm_accumulator_x);
     const auto& acc_y = View(in.msm_accumulator_y);
     const auto& acc_x_shift = View(in.msm_accumulator_x_shift);
     const auto& acc_y_shift = View(in.msm_accumulator_y_shift);
+    // Slices
     const auto& slice1 = View(in.msm_slice1);
     const auto& slice2 = View(in.msm_slice2);
     const auto& slice3 = View(in.msm_slice3);
     const auto& slice4 = View(in.msm_slice4);
+    const auto& slice5 = View(in.msm_slice5);
+    const auto& slice6 = View(in.msm_slice6);
+    const auto& slice7 = View(in.msm_slice7);
+    const auto& slice8 = View(in.msm_slice8);
+    // Control signals
     const auto& msm_transition = View(in.msm_transition);
     const auto& msm_transition_shift = View(in.msm_transition_shift);
     const auto& round = View(in.msm_round);
     const auto& round_shift = View(in.msm_round_shift);
-    const auto& q_add = View(in.msm_add); // is 1 iff we are at an ADD row in Straus algorithm
+    const auto& q_add = View(in.msm_add);
     const auto& q_add_shift = View(in.msm_add_shift);
     const auto& q_skew = View(in.msm_skew);
     const auto& q_skew_shift = View(in.msm_skew_shift);
-    const auto& q_double = View(in.msm_double); // is 1 iff we are at an DOUBLE row in Straus algorithm
+    const auto& q_double = View(in.msm_double);
     const auto& q_double_shift = View(in.msm_double_shift);
     const auto& msm_size = View(in.msm_size_of_msm);
-    const auto& pc = View(in.msm_pc); // pc stands for `point-counter`.
+    const auto& pc = View(in.msm_pc);
     const auto& pc_shift = View(in.msm_pc_shift);
     const auto& count = View(in.msm_count);
     const auto& count_shift = View(in.msm_count_shift);
     auto is_not_first_row = (-lagrange_first + 1);
 
-    /**
-     * @brief Evaluating ADDITION rounds
-     *
-     * This comment describes the algorithm we want the Prover to perform.
-     * The relations we constrain are supposed to make an honest Prover compute witnesses consistent with the following:
-     *
-     * For an MSM of size-k...
-     *
-     * Algorithm to determine if round at shifted row is an ADDITION round:
-     *     1. count_shift < msm_size
-     *     2. round != 32
-     *
-     * Algorithm to process MSM ADDITION round:
-     * 1. If `round == 0` set `count = 0`
-     * 2. For j = pc + count, perform the following:
-     * 2a.      If j + 3 < k: [P_{j + 3}] = T_{j+ 3}[slice_{j + 3}]
-     * 2b.      If j + 2 < k: [P_{j + 2}] = T_{j+ 2}[slice_{j + 2}]
-     * 2c.      If j + 1 < k: [P_{j + 1}] = T_{j+ 1}[slice_{j + 1}]
-     * 2d.                    [P_{j}]     = T_{j}[slice_{j}]
-     * 2e.      If j + 3 < k: [Acc_shift] = [Acc] + [P_j] + [P_{j+1}] + [P_{j+2}] + [P_{j+3}]
-     * 2f. Else If j + 2 < k: [Acc_shift] = [Acc] + [P_j] + [P_{j+1}] + [P_{j+2}]
-     * 2g. Else IF j + 1 < k: [Acc_shift] = [Acc] + [P_j] + [P_{j+1}]
-     * 2h. Else             : [Acc_shift] = [Acc] + [P_j]
-     * 3. `count_shift = count + 1 + (j + 1 < k) + (j + 2 < k) + (j + 3 < k)`
-     */
-
-    /**
-     * @brief Constraining addition rounds via a multiset-equality check
-     *
-     * @details
-     * The boolean column q_add describes whether a round is an ADDITION round.
-     * The values of q_add are Prover-defined. We need to ensure they set q_add correctly. We will do this via a
-     * multiset-equality check (formerly called a "strict lookup"), which allows the various tables to "communicate".
-     * On a high level, this table "reads" (pc, round, wnaf_slice), another table (Precomputed) "writes"
-     * a potentially different set of (pc, round, wnaf_slice), and we demand that the reads match the writes.
-     * Alternatively said, the MSM columns spawn a multiset of tuples of the form (pc, round, wnaf_slice), the
-     * Precomputed Table columns spawn a potentially different multiset of tuples of the form (pc, round, wnaf_slice),
-     * and we _check_ that these two multisets match.
-     *
-     * The above description does not reference how we will _prove_ that the two multisets are equal. As usual, we use a
-     * grand product argument. A happy byproduct of this is that we can use the grand product technique, which is
-     * powerful enough to allow our multiset equality testing to support _conditional adds_; this means that we only add
-     * a tuple if some particular condition occurs.
-     *
-     * This (pc, round, wnaf_slice) multiset equality testing is made more difficult by the fact that the values of
-     * `precomputed_pc` are _not the same_ as the values of `msm_pc`. The former indexes over every (non-trivial, 128
-     * bit) scalar multiplication, while the latter jumps values and is constant on MSM rows corresponding to a fixed
-     * MSM. However, the transition values should match.
-     *
-     * Given a row of the MSM table, we have four selectors q_add1, q_add2, q_add3, q_add4, as well as a q_skew
-     * selector. For the MSM side of the multiset corresponding to (pc, round, wnaf_slice), we add:
-     *
-     *      1. (msm_pc - msm_count, round, wnaf_slice_{count}) when q_add1 = 1
-     *      2. (msm_pc - msm_count - 1, round, wnaf_slice_{count + 1}) when q_add2 = 1
-     *      3. (msm_pc - msm_count - 2, round, wnaf_slice_{count + 2}) when q_add3 = 1
-     *      4. (msm_pc - msm_count - 3, round, wnaf_slice_{count + 3}) when q_add4 = 1
-     *
-     * That this is "what we want" comes from the following facts: msm_pc is the number of (non-trivial, 128-bit) Point
-     * multiplications we have done _until the start of_ the current MSM, and `msm_count` is the number of Point * wNAF
-     * slice multiplications/lookups we have done _in this round_. (Recall that a round corresponds to a wNAF digit.) In
-     * particular, `msm_count` updates by the appropriate amount (usually 4, more accurately q_add1 + q_add2 + q_add3 +
-     * q_add4) per row of the MSM table.
-     *
-     * On the other side, given a row of the Precomputed columns, if `precompute_select == 1`, we add
-     *      1. (precompute_pc, 4 * precompute_round, w_1)
-     *      2. (precompute_pc, 4 * precompute_round + 1, w_2)
-     *      3. (precompute_pc, 4 * precompute_round + 2, w_3)
-     *      4. (precompute_pc, 4 * precompute_round + 3, w_4)
-     * Additionally, if `precompute_point_transition == 1`, we add
-     *      5. (precompute_pc, 4 * precompute_round + 4, precompute_skew)
-     *
-     * ELSE `precompute_select == 0` and we add:
-     *      1. (0, 0, 0)
-     *
-     * Here, w_K is the compressed wNAF slices corresponding to `precompute_sKhi` and `precompute_sKlo`, for K ∈ {1, 2,
-     * 3, 4} and precompute_skew ∈ {0, 7}.
-     *
-     * SKETCH OF PROOF: We now argue that, under the following assumptions, if the multiset equality holds, then the
-     * `q_addK` and also `q_add` are all correctly constrained for K ∈ {1, 2, 3, 4}.
-     *      1. The Precomputed table is correctly constrained; in particular, the values `precompute_pc`,
-     *      `precompute_round`, `precompute_skew`, `precompute_select`, and `wK` are all correctly constrained.
-     *      2. `round` monotonically increases from 0 to 32 before reseting back to 0. `round_shift - round == 1`
-     *      precisely when `q_double == 1`.
-     *      3. `pc` is monotonic and only updates when there is an `msm_transition`. Here, it updates by `msm_size`,
-     *      which must be constrained somewhere else by a multiset argument. We detail this below.
-     *      4. `q_add`, `q_skew`, and `q_double` are pairwise mutually exclusive.
-     *      5. `q_add1 == 1` iff either `q_add == 1` OR `q_skew == 1`.
-     *      6. The lookup table is implemented correctly.
-     *
-     * First of all, note the asymmetry: we do not explicitly add tuples corresponding to skew on the MSM side of the
-     * table. Indeeed, this is implicit with `msm_round == 32`. Now, the point is that the pair (pc, round) uniquely
-     * specifies the point + wNAF digit that we are processing (and adding to the accumulator) and both `pc` and `round`
-     * are directly constrained to be monotonic.
-     *
-     * Suppose the Prover sets `q_addK = 0` when an honest Prover would set `q_addK == 1`. Then there would be some (pc,
-     * round, wnaf_slice) that the Precomputed table added to its multiset that the prover did not add. The Prover can
-     * _never_ "compensate" for this, as `pc` is locally constrained to be monotonic and `round` is constrained to be
-     * periodic; this means that the Prover has "lost their chance" to add this element to the multiset and hence the
-     * multiset equality check will fail.
-     *
-     * Conversely, if the Prover sets `q_addK = 1` when it should be set to 0, there are several options: either
-     * we are at the end of a `round` (so e.g. `q_add4 ` _should_ be 0), or we are at a double row, or we are at a row
-     * that should be all 0s. In the first two cases, as long as the Precomputed table is correctly constrained, again
-     * we would be adding a tuple to the multiset that can never be hit by the Precomputed table due to `precompute_pc`
-     * monotonicty and `precompute_round` periodicity (enforced in the precomputed columns.). In the final case, the
-     * only way we don't break the multiset check is if `wnaf_slice == 0` for the corresponding `q_addK` that is on. But
-     * then the lookup argument will fail, as there is no corresponding point when `pc = 0`. (Here it is helpful to
-     * remember that `pc` stands for _point-counter_.) Note that this requires that `precompute_pc` is well-formed.
-     *
-     *
-     * We apply consistency/continuity checks to q_add1/q_add2/q_add3/q_add4:
-     * 1. If q_add2 = 1, require q_add1 = 1
-     * 2. If q_add3 = 1, require q_add2 = 1
-     * 3. If q_add4 = 1, require q_add3 = 1
-     * 4. If q_add1_shift = 1 AND round does not update between rows, require q_add4 = 1
-     *
-     */
-
-    /**
-     * @brief Constrain msm_size and output of MSM computation via multiset equality
-     *
-     * @details
-     * As explained in the section on constraining the addition wire values, to make everything work we also need to
-     * constrain `msm_size`, something directly computed in the Transcript columns. We also need to "send" the final
-     * output value of an MSM from the MSM table to the transcript table so it can continue its processing. (Send here
-     * is a euphemism for constrain.) We do this via a multiset equality check of the form:
-     *                      (pc, P.x, P.y, msm-size)
-     * From the perspective of the MSM table, we add such a tuple only when `msm_transition_shift == 1` (i.e., the next
-     * row begins a new MSM, meaning the current row is the last row of the just-completed MSM). The terms P.x and P.y
-     * refer to the output values of the MSM just computed by the MSM table. `msm_size` is the size of the _just
-     * completed_ MSM.
-     *
-     *
-     */
-
-    /**
-     * @brief Looking up the slice-point products {-15[P], -13[P], ..., 13[P], 15[P]}
-     *
-     * @details
-     * In the Point Table, for every point [P] that occurs in the MSM table, we compute the list of points: {-15[P],
-     * -13[P], ..., 13[P], 15[P]}. (Note that these never vanish, as we only send a point to each table if they are
-     * non-zero.) We then constrain the "slice products" that occur here via a lookup argument. For completeness, we
-     * briefly sketch this.
-     *
-     * The PointTable will "write"  the following row to the lookup table: (pc, slice, x, y), where if `pc` corresponds
-     * to an elliptic curve point [P] (`pc` is a decreasing counter of the non-zero points that occur in our
-     * computation), slice ∈ {0, ..., 15}, and (x, y) are the affine coordinates of (2 * slice - 15)[P].
-     *
-     * The MSM table will then read a row of the same form. This constrains the MSM table to have correctly used the
-     * wNAF * point in the Straus algorithm.
-     *
-     */
-
-    /**
-     * @brief Addition relation
-     *
-     * All addition operations in ECCVMMSMRelationImpl are conditional additions, as we sometimes want to add values and
-     * other times simply want to propagate values (consider, e.g., when `q_add2 == 0`).
-     *
-     * This method returns three Accumulators: (x_out, y_out, slope_relation)
-     * - x_out, y_out: Output point coordinates. Either an addition of inputs (if `selector == 1`), or xa/ya (if
-     *   `selector == 0`).
-     * - slope_relation: Constraint enforcing that lambda is computed correctly (lambda = 0 if selector = 0, else
-     *   lambda = (yb - ya) / (xb - xa))
-     *
-     * The `collision_relation` parameter tracks a subrelation that validates xb != xa. This collision check means that
-     * our system is only statistically complete, not perfectly complete. (See the `offset_generator` and the
-     * `first_add` method for details.)
-     *
-     * Repeated calls to this method will increase the max degree of the Accumulator output: deg(x_out) = 1 +
-     * max(deg(xa), deg(xb)), deg(y_out) = max(1 + deg(x_out), 1 + deg(ya)). In our application, we chain together 4 of
-     * these with the pattern in such a way that the final x_out will have degree 5 and the final y_out will have
-     * degree 6.
-     */
+    // ========================================================================
+    // Addition helper: conditional add of (xb, yb) into (xa, ya)
+    // ========================================================================
     auto add = [&](auto& xb, auto& yb, auto& xa, auto& ya, auto& lambda, auto& selector, auto& collision_relation) {
-        // computation of lambda is valid: if q == 1, then L == (yb - ya) / (xb - xa)
-        // if q == 0, then L == 0. combining these into a single constraint yields:
-        // q * (L * (xb - xa - 1) - (yb - ya)) + L = 0
         auto slope_relation = selector * (lambda * (xb - xa - 1) - (yb - ya)) + lambda;
         collision_relation += selector * (xb - xa);
-        // x_out = L.L + (-xb - xa) * q + (1 - q) xa
-        // deg L = 1, deg q = 1, min(deg(xa), deg(xb))≥ 1.
-        // hence deg(x_out) = 1 + max(deg(xa), deg(xb))
         auto x_out = lambda.sqr() + (-xb - xa - xa) * selector + xa;
-
-        // y_out = L . (xa - x_out) - ya * q + (1 - q) ya
-        // hence deg(y_out) = max(1 + deg(x_out), 1 + deg(ya))
         auto y_out = lambda * (xa - x_out) + (-ya - ya) * selector + ya;
         return std::array<Accumulator, 3>{ x_out, y_out, slope_relation };
     };
 
-    /**
-     * @brief First Addition relation - handles the first addition in each row with inhomogeneous base point selection
-     *
-     * @details The first add operation per row is treated inhomogeneously based on whether we are starting a new MSM
-     * or continuing an existing one across multiple rows.
-     *
-     * This method returns three Accumulators: (x_out, y_out, slope_relation), computed as follows:
-     *
-     * **Case 1: Continuing MSM (selector == 0, i.e., msm_transition == 0)**
-     * - Add point (xa, ya) to the accumulator (xb, yb) from the previous row
-     * - Example: In an MSM of size 9 spanning 3 rows, rows 2 and 3 use selector=0 to continue with their respective
-     *   accumulators
-     *
-     * **Case 2: Starting new MSM (selector == 1, i.e., msm_transition == 1)**
-     * - Add point (xa, ya) to the fixed "offset generator" point (xo, yo)
-     * - The offset generator serves two purposes for honest Provers:
-     *   (a) Ensures no intermediate MSM computations produce points at infinity
-     *   (b) Eliminates branch logic for the EC ops
-     * - Example: In an MSM of size 9, only row 1 has msm_transition=1 and uses the offset generator
-     *
-     * **Soundness via collision check:**
-     * We enforce soundness by constraining that the x-coordinates of the two input points must differ (xa != xb or
-     * xa != xo). This prevents the Prover from exploiting incomplete addition formula edge cases (point doubling or
-     * adding inverses). The collision_relation accumulator tracks this constraint, which is later verified via an
-     * inverse check.
-     *
-     * **Statistical completeness:**
-     * Note that this technique is only *statistically* complete. There exist valid MSM computations where an honest
-     * Prover would encounter x-coordinate collisions (xa == xb or xa == xo), causing the constraints to become
-     * unsatisfiable. However, the probability of such a collision is negligible—equivalent to solving the discrete
-     * logarithm problem.
-     */
+    // ========================================================================
+    // First Addition: handles MSM start (offset generator) vs continuation
+    // ========================================================================
     auto first_add =
         [&](auto& xb, auto& yb, auto& xa, auto& ya, auto& lambda, auto& selector, auto& collision_relation) {
-            // N.B. this is brittle - should be curve agnostic but we don't propagate the curve parameter into
-            // relations!
             constexpr auto offset_generator = get_precomputed_generators<g1, "ECCVM_OFFSET_GENERATOR", 1>()[0];
             constexpr uint256_t oxu = offset_generator.x;
             constexpr uint256_t oyu = offset_generator.y;
             const Accumulator xo(oxu);
             const Accumulator yo(oyu);
-            // set (x, y) to be either accumulator if `selector == 0` or OFFSET if `selector == 1`.
             auto x = xo * selector + xb * (-selector + 1);
             auto y = yo * selector + yb * (-selector + 1);
-            auto slope_relation = lambda * (x - xa) - (y - ya); // degree 3
+            auto slope_relation = lambda * (x - xa) - (y - ya);
             collision_relation += (xa - x);
             auto x_out = lambda * lambda + (-x - xa);
             auto y_out = lambda * (xa - x_out) - ya;
             return std::array<Accumulator, 3>{ x_out, y_out, slope_relation };
         };
 
-    // ADD operations (if row represents ADD round, not SKEW or DOUBLE)
+    // ========================================================================
+    // ADDITION chain (8 additions per row)
+    // ========================================================================
     Accumulator x1_collision_relation(0);
     Accumulator x2_collision_relation(0);
     Accumulator x3_collision_relation(0);
     Accumulator x4_collision_relation(0);
-    // If `msm_transition == 1`, we have started a new MSM. We need to treat the current value of [Acc] as the point at
-    // infinity!
+    Accumulator x5_collision_relation(0);
+    Accumulator x6_collision_relation(0);
+    Accumulator x7_collision_relation(0);
+    Accumulator x8_collision_relation(0);
+
     auto [x_t1, y_t1, add_slope_relation1] =
-        first_add(acc_x, acc_y, x1, y1, lambda1, msm_transition, x1_collision_relation); // [deg 2, deg 3]
-    auto [x_t2, y_t2, add_slope_relation2] =
-        add(x2, y2, x_t1, y_t1, lambda2, add2, x2_collision_relation); // [deg 3, deg 4]
-    auto [x_t3, y_t3, add_slope_relation3] =
-        add(x3, y3, x_t2, y_t2, lambda3, add3, x3_collision_relation); // [deg 4, deg 5]
-    auto [x_t4, y_t4, add_slope_relation4] =
-        add(x4, y4, x_t3, y_t3, lambda4, add4, x4_collision_relation); // [deg 5, deg 6]
+        first_add(acc_x, acc_y, x1, y1, lambda1, msm_transition, x1_collision_relation);
+    auto [x_t2, y_t2, add_slope_relation2] = add(x2, y2, x_t1, y_t1, lambda2, add2, x2_collision_relation);
+    auto [x_t3, y_t3, add_slope_relation3] = add(x3, y3, x_t2, y_t2, lambda3, add3, x3_collision_relation);
+    auto [x_t4, y_t4, add_slope_relation4] = add(x4, y4, x_t3, y_t3, lambda4, add4, x4_collision_relation);
+    auto [x_t5, y_t5, add_slope_relation5] = add(x5, y5, x_t4, y_t4, lambda5, add5, x5_collision_relation);
+    auto [x_t6, y_t6, add_slope_relation6] = add(x6, y6, x_t5, y_t5, lambda6, add6, x6_collision_relation);
+    auto [x_t7, y_t7, add_slope_relation7] = add(x7, y7, x_t6, y_t6, lambda7, add7, x7_collision_relation);
+    auto [x_t8, y_t8, add_slope_relation8] = add(x8, y8, x_t7, y_t7, lambda8, add8, x8_collision_relation);
 
     // Validate accumulator output matches ADD output if q_add = 1
-    std::get<0>(accumulator) += q_add * (acc_x_shift - x_t4) * scaling_factor;
-    std::get<1>(accumulator) += q_add * (acc_y_shift - y_t4) * scaling_factor;
+    std::get<0>(accumulator) += q_add * (acc_x_shift - x_t8) * scaling_factor;
+    std::get<1>(accumulator) += q_add * (acc_y_shift - y_t8) * scaling_factor;
     // Validate slope relations for each addition separately to prevent cancellation attacks
     std::get<2>(accumulator) += q_add * add_slope_relation1 * scaling_factor;
     std::get<36>(accumulator) += q_add * add_slope_relation2 * scaling_factor;
     std::get<37>(accumulator) += q_add * add_slope_relation3 * scaling_factor;
     std::get<38>(accumulator) += q_add * add_slope_relation4 * scaling_factor;
-
-    /**
-     * @brief doubles a point.
-     *
-     * Degree of x_out = 2
-     * Degree of y_out = 3
-     * Degree of relation = 4
-     */
+    std::get<47>(accumulator) += q_add * add_slope_relation5 * scaling_factor;
+    std::get<48>(accumulator) += q_add * add_slope_relation6 * scaling_factor;
+    std::get<49>(accumulator) += q_add * add_slope_relation7 * scaling_factor;
+    std::get<50>(accumulator) += q_add * add_slope_relation8 * scaling_factor;
+
+    // ========================================================================
+    // DOUBLING chain (4 doublings per row, using lambda1..4)
+    // ========================================================================
     auto dbl = [&](auto& x, auto& y, auto& lambda) {
         auto two_x = x + x;
         auto slope_relation = lambda * (y + y) - (two_x + x) * x;
@@ -382,60 +205,38 @@ void ECCVMMSMRelationImpl<FF>::accumulate(ContainerOverSubrelations& accumulator
         return std::array<Accumulator, 3>{ x_out, y_out, slope_relation };
     };
 
-    /**
-     * @brief
-     *
-     * Algorithm to determine if round is a DOUBLE round:
-     *    1. count_shift >= msm_size
-     *    2. round != 32
-     *
-     * Algorithm to process MSM DOUBLE round:
-     * [Acc_shift] = ((([Acc].double()).double()).double()).double()
-     *
-     * As with additions, the column q_double describes whether row is a double round. It is Prover-defined.
-     * The value of `msm_round` can only update when `q_double = 1` and we use this to ensure Prover correctly sets
-     * `q_double`. The reason for this is that `msm_round` witnesses the wNAF digit we are processing, and we only
-     * perform the four doublings when we are done processing a wNAF digit. See round transition relations further down.
-     */
     auto [x_d1, y_d1, double_slope_relation1] = dbl(acc_x, acc_y, lambda1);
     auto [x_d2, y_d2, double_slope_relation2] = dbl(x_d1, y_d1, lambda2);
     auto [x_d3, y_d3, double_slope_relation3] = dbl(x_d2, y_d2, lambda3);
     auto [x_d4, y_d4, double_slope_relation4] = dbl(x_d3, y_d3, lambda4);
     std::get<10>(accumulator) += q_double * (acc_x_shift - x_d4) * scaling_factor;
     std::get<11>(accumulator) += q_double * (acc_y_shift - y_d4) * scaling_factor;
-    // Validate slope relations for each doubling separately to prevent cancellation attacks
     std::get<12>(accumulator) += q_double * double_slope_relation1 * scaling_factor;
     std::get<39>(accumulator) += q_double * double_slope_relation2 * scaling_factor;
     std::get<40>(accumulator) += q_double * double_slope_relation3 * scaling_factor;
     std::get<41>(accumulator) += q_double * double_slope_relation4 * scaling_factor;
 
-    /**
-     * @brief SKEW operations
-     * When computing x * [P], if x is even we must subtract [P] from accumulator
-     * (this is because our windowed non-adjacent-form can only represent odd numbers)
-     * Round 32 represents "skew" round.
-     * If scalar slice == 7, we add into accumulator (point_table[7] maps to -[P])
-     * If scalar slice == 0, we do not add into accumulator
-     * i.e. for the skew round we can use the slice values as our "selector" when doing conditional point adds
-     *
-     * As with addition and doubling, the column q_skew is prover-defined. It is precisely turned on when the round
-     * is 32. We implement this constraint slightly differently. For more details, see the round transition relations
-     * below.
-     */
+    // ========================================================================
+    // SKEW chain (8 conditional additions per row)
+    // ========================================================================
     static FF inverse_seven = FF(7).invert();
     auto skew1_select = slice1 * inverse_seven;
     auto skew2_select = slice2 * inverse_seven;
     auto skew3_select = slice3 * inverse_seven;
     auto skew4_select = slice4 * inverse_seven;
+    auto skew5_select = slice5 * inverse_seven;
+    auto skew6_select = slice6 * inverse_seven;
+    auto skew7_select = slice7 * inverse_seven;
+    auto skew8_select = slice8 * inverse_seven;
     Accumulator x1_skew_collision_relation(0);
     Accumulator x2_skew_collision_relation(0);
     Accumulator x3_skew_collision_relation(0);
     Accumulator x4_skew_collision_relation(0);
-    // add skew points iff row is a SKEW row AND slice = 7 (point_table[7] maps to -[P])
-    // N.B. while it would be nice to have one `add` relation for both ADD and SKEW rounds,
-    // this would increase degree of sumcheck identity vs evaluating them separately.
-    // This is because, for add rounds, the result of adding [P1], [Acc] is [P1 + Acc] or [P1]
-    //             but for skew rounds, the result of adding [P1], [Acc] is [P1 + Acc] or [Acc]
+    Accumulator x5_skew_collision_relation(0);
+    Accumulator x6_skew_collision_relation(0);
+    Accumulator x7_skew_collision_relation(0);
+    Accumulator x8_skew_collision_relation(0);
+
     auto [x_s1, y_s1, skew_slope_relation1] =
         add(x1, y1, acc_x, acc_y, lambda1, skew1_select, x1_skew_collision_relation);
     auto [x_s2, y_s2, skew_slope_relation2] =
@@ -444,140 +245,114 @@ void ECCVMMSMRelationImpl<FF>::accumulate(ContainerOverSubrelations& accumulator
         add(x3, y3, x_s2, y_s2, lambda3, skew3_select, x3_skew_collision_relation);
     auto [x_s4, y_s4, skew_slope_relation4] =
         add(x4, y4, x_s3, y_s3, lambda4, skew4_select, x4_skew_collision_relation);
+    auto [x_s5, y_s5, skew_slope_relation5] =
+        add(x5, y5, x_s4, y_s4, lambda5, skew5_select, x5_skew_collision_relation);
+    auto [x_s6, y_s6, skew_slope_relation6] =
+        add(x6, y6, x_s5, y_s5, lambda6, skew6_select, x6_skew_collision_relation);
+    auto [x_s7, y_s7, skew_slope_relation7] =
+        add(x7, y7, x_s6, y_s6, lambda7, skew7_select, x7_skew_collision_relation);
+    auto [x_s8, y_s8, skew_slope_relation8] =
+        add(x8, y8, x_s7, y_s7, lambda8, skew8_select, x8_skew_collision_relation);
 
     // Validate accumulator output matches SKEW output if q_skew = 1
-    std::get<3>(accumulator) += q_skew * (acc_x_shift - x_s4) * scaling_factor;
-    std::get<4>(accumulator) += q_skew * (acc_y_shift - y_s4) * scaling_factor;
-    // Validate slope relations for each skew addition separately to prevent cancellation attacks
+    std::get<3>(accumulator) += q_skew * (acc_x_shift - x_s8) * scaling_factor;
+    std::get<4>(accumulator) += q_skew * (acc_y_shift - y_s8) * scaling_factor;
     std::get<5>(accumulator) += q_skew * skew_slope_relation1 * scaling_factor;
     std::get<42>(accumulator) += q_skew * skew_slope_relation2 * scaling_factor;
     std::get<43>(accumulator) += q_skew * skew_slope_relation3 * scaling_factor;
     std::get<44>(accumulator) += q_skew * skew_slope_relation4 * scaling_factor;
-
-    // Check x-coordinates do not collide if row is an ADD row or a SKEW row
-    // if either q_add or q_skew = 1, an inverse should exist for each computed relation
-    // Step 1: construct boolean selectors that describe whether we added a point at the current row
+    std::get<51>(accumulator) += q_skew * skew_slope_relation5 * scaling_factor;
+    std::get<52>(accumulator) += q_skew * skew_slope_relation6 * scaling_factor;
+    std::get<53>(accumulator) += q_skew * skew_slope_relation7 * scaling_factor;
+    std::get<54>(accumulator) += q_skew * skew_slope_relation8 * scaling_factor;
+
+    // ========================================================================
+    // Collision checks (x-coordinates must differ for all 8 additions)
+    // ========================================================================
     const auto add_first_point = add1 * q_add + q_skew * skew1_select;
     const auto add_second_point = add2 * q_add + q_skew * skew2_select;
     const auto add_third_point = add3 * q_add + q_skew * skew3_select;
     const auto add_fourth_point = add4 * q_add + q_skew * skew4_select;
-    // Step 2: construct the difference a.k.a. delta between x-coordinates for each point add (depending on if row is
-    // ADD or SKEW)
+    const auto add_fifth_point = add5 * q_add + q_skew * skew5_select;
+    const auto add_sixth_point = add6 * q_add + q_skew * skew6_select;
+    const auto add_seventh_point = add7 * q_add + q_skew * skew7_select;
+    const auto add_eighth_point = add8 * q_add + q_skew * skew8_select;
+
     const auto x1_delta = x1_skew_collision_relation * q_skew + x1_collision_relation * q_add;
     const auto x2_delta = x2_skew_collision_relation * q_skew + x2_collision_relation * q_add;
     const auto x3_delta = x3_skew_collision_relation * q_skew + x3_collision_relation * q_add;
     const auto x4_delta = x4_skew_collision_relation * q_skew + x4_collision_relation * q_add;
-    // Step 3: x_delta * inverse - 1 = 0 if we performed a point addition (else x_delta * inverse = 0)
+    const auto x5_delta = x5_skew_collision_relation * q_skew + x5_collision_relation * q_add;
+    const auto x6_delta = x6_skew_collision_relation * q_skew + x6_collision_relation * q_add;
+    const auto x7_delta = x7_skew_collision_relation * q_skew + x7_collision_relation * q_add;
+    const auto x8_delta = x8_skew_collision_relation * q_skew + x8_collision_relation * q_add;
+
     std::get<6>(accumulator) += (x1_delta * collision_inverse1 - add_first_point) * scaling_factor;
     std::get<7>(accumulator) += (x2_delta * collision_inverse2 - add_second_point) * scaling_factor;
     std::get<8>(accumulator) += (x3_delta * collision_inverse3 - add_third_point) * scaling_factor;
     std::get<9>(accumulator) += (x4_delta * collision_inverse4 - add_fourth_point) * scaling_factor;
-
-    // When add_i = 0, force slice_i to ALSO be 0
+    std::get<55>(accumulator) += (x5_delta * collision_inverse5 - add_fifth_point) * scaling_factor;
+    std::get<56>(accumulator) += (x6_delta * collision_inverse6 - add_sixth_point) * scaling_factor;
+    std::get<57>(accumulator) += (x7_delta * collision_inverse7 - add_seventh_point) * scaling_factor;
+    std::get<58>(accumulator) += (x8_delta * collision_inverse8 - add_eighth_point) * scaling_factor;
+
+    // ========================================================================
+    // When add_i = 0, force slice_i to also be 0
+    // ========================================================================
     std::get<13>(accumulator) += (-add1 + 1) * slice1 * scaling_factor;
     std::get<14>(accumulator) += (-add2 + 1) * slice2 * scaling_factor;
     std::get<15>(accumulator) += (-add3 + 1) * slice3 * scaling_factor;
     std::get<16>(accumulator) += (-add4 + 1) * slice4 * scaling_factor;
+    std::get<59>(accumulator) += (-add5 + 1) * slice5 * scaling_factor;
+    std::get<60>(accumulator) += (-add6 + 1) * slice6 * scaling_factor;
+    std::get<61>(accumulator) += (-add7 + 1) * slice7 * scaling_factor;
+    std::get<62>(accumulator) += (-add8 + 1) * slice8 * scaling_factor;
 
+    // ========================================================================
     // SELECTORS ARE MUTUALLY EXCLUSIVE
-    // at most one of q_skew, q_double, q_add can be nonzero.
-    // note that as we can expect our table to be zero padded, we _do not_ insist that q_add + q_double + q_skew == 1.
+    // ========================================================================
     std::get<17>(accumulator) += (q_add * q_double + q_add * q_skew + q_double * q_skew) * scaling_factor;
 
+    // ========================================================================
     // ACCUMULATOR PRESERVATION ON NO-OP ROWS
-    // If no phase selector is active (q_add = q_double = q_skew = 0), the accumulator must not change.
-    // Without this constraint, a malicious prover could insert no-op rows between active rows and
-    // set arbitrary accumulator values on the next row, because the accumulator-update constraints
-    // are all gated by their respective phase selectors.
-    // We exclude two boundary cases:
-    //   - msm_transition = 1 on the current row: msm_transition marks the first row of a new MSM
-    //     (where q_add is also 1), but the final row of the entire MSM trace ALSO has msm_transition = 1
-    //     with ALL phase selectors off. On that row, acc holds the MSM output and acc_shift need not
-    //     be preserved. This is safe because the set relation constrains (pc, acc_x, acc_y, msm_size)
-    //     at transitions. (NOTE: this is a design choice specified by the builder; we could equivalently propagate the
-    //     accumulator one past the last MSM row and then not turn off the constraint when `msm_transition == 1`.)
-    //   - lagrange_first = 1 (row 0): the first row of the trace is zero-padded and the next row
-    //     starts a fresh MSM whose accumulator is initialized via first_add, not by continuity.
+    // ========================================================================
     auto no_op_selector =
-        (-q_add + 1) * (-q_double + 1) * (-q_skew + 1) * (-msm_transition + 1) * (-lagrange_first + 1); // degree 5
-    std::get<45>(accumulator) += no_op_selector * (acc_x_shift - acc_x) * scaling_factor;               // degree 6
-    std::get<46>(accumulator) += no_op_selector * (acc_y_shift - acc_y) * scaling_factor;               // degree 6
+        (-q_add + 1) * (-q_double + 1) * (-q_skew + 1) * (-msm_transition + 1) * (-lagrange_first + 1);
+    std::get<45>(accumulator) += no_op_selector * (acc_x_shift - acc_x) * scaling_factor;
+    std::get<46>(accumulator) += no_op_selector * (acc_y_shift - acc_y) * scaling_factor;
 
-    // Validate that if q_add = 1 or q_skew = 1, add1 also is 1
-    // NOTE(#2222): could just get rid of add1 as a column, as it is a linear combination.
+    // ========================================================================
+    // add1 = q_add + q_skew
+    // ========================================================================
     std::get<32>(accumulator) += (add1 - q_add - q_skew) * scaling_factor;
 
+    // ========================================================================
     // ROUND TRANSITION LOGIC
-    // `round_transition` describes whether we are transitioning between "rounds" of the MSM according to the Straus
-    // algorithm. In particular, the `round` corresponds to the wNAF digit we are currently processing.
-
+    // ========================================================================
     const auto round_delta = round_shift - round;
-    // If `msm_transition == 0` (next row) then `round_delta` is boolean; the round is internal to a given MSM and
-    // represents the wNAF digit currently being processed. `round_delta == 0` means that the current and next steps of
-    // the Straus algorithm are processing the same wNAF digit place.
-
-    // `round_transition == 0` if `round_delta == 0` or the next row is an MSM transition.
-    // if `round_transition != 0`, then `round_transition == round_delta == 1` by the following constraint.
-    // in particular, `round_transition` is boolean. (`round_delta` is not boolean precisely one step before an MSM
-    // transition, but that does not concern us here.)
     const auto round_transition = round_delta * (-msm_transition_shift + 1);
     std::get<18>(accumulator) += round_transition * (round_delta - 1) * scaling_factor;
 
-    // If `round_transition == 1`, then `round_delta == 1` and `msm_transition_shift == 0`. Therefore, we wish to
-    // constrain next row in the VM to either be a double (if `round != 31`) or skew (if `round == 31`). In either case,
-    // the point is that we have finished processing a wNAF digit place and need to either perform the doublings to move
-    // on to the next place _or_ we are at the last place and need to perform the skew computation to finish. These are
-    // equationally represented as:
-    //      round_transition * skew_shift * (round - 31) = 0 (if round tx and skew, then round == 31);
-    //      round_transition * (skew_shift + double_shift - 1) = 0 (if round tx, then skew XOR double = 1).
-    //      (-round_delta + 1) * q_double_shift = 1 (if q_double_shift == 1, then round_transition = 1)
-    // together, these have the following implications: if round tx and round != 31, then double_shift = 1.
-    // conversely, if round tx and double_shift == 0, then `q_skew_shift == 1` (which then forces `round == 31`).
-    // similarly, if q_double_shift == 1, then round_transition == 0,
-    // the fact that a round_transition occurs at the first time skew_shift == 1 follows from the fact that skew == 1
-    // implies round == 32 and the above three relations, together with the _definition_ of round_transition.
     std::get<19>(accumulator) += round_transition * q_skew_shift * (round - 31) * scaling_factor;
     std::get<20>(accumulator) += round_transition * (q_skew_shift + q_double_shift - 1) * scaling_factor;
     std::get<35>(accumulator) += (-round_delta + 1) * q_double_shift * scaling_factor;
-    // if the next is neither double nor skew, and we are not at an msm_transition, then round_delta = 0 and the next
-    // "row" of our VM is processing the same wNAF digit place.
     std::get<21>(accumulator) += round_transition * (-q_double_shift + 1) * (-q_skew_shift + 1) * scaling_factor;
 
+    // ========================================================================
     // CONSTRAINING Q_DOUBLE AND Q_SKEW
-    // NOTE: we have already constrained q_add, q_skew, and q_double to be mutually exclusive.
-
-    // if double, next add = 1. As q_double, q_add, and q_skew are mutually exclusive, this suffices to force
-    // q_double_shift == q_skew_shift == 0.
+    // ========================================================================
     std::get<22>(accumulator) += q_double * (-q_add_shift + 1) * scaling_factor;
-    // if the current row has q_skew == 1 and the next row is _not_ an MSM transition, then q_skew_shift = 1.
-    // this forces q_skew to precisely correspond to the rows where `round == 32`. Indeed, note that the first q_skew
-    // bit is set correctly:
-    //      round == 31, round_transition == 1 ==> q_skew_shift == 1. (if, to the contrary, q_double_shift == 1, then
-    //      the q_add_shift_shift == 1, but we assume that we have correctly constrained the q_adds via the multiset
-    //      argument. this means that q_double_shift == 0, which forces q_skew_shift == 1 because round_transition
-    //      == 1.)
-    // this means that the first row with `round == 32` has q_skew == 1. then all subsequent q_skew entries must be 1,
-    // _until_ we start our new MSM.
     std::get<33>(accumulator) += (-msm_transition_shift + 1) * q_skew * (-q_skew_shift + 1) * scaling_factor;
-    // if q_skew == 1, then round == 32. This is almost certainly redundant but psychologically useful to "constrain
-    // both ends".
     std::get<34>(accumulator) += q_skew * (-round + 32) * scaling_factor;
 
-    // UPDATING THE COUNT
-
-    // if we are changing the `round` (i.e., starting to process a new wNAF digit or at an msm transition), the
-    // count_shift must be 0.
+    // ========================================================================
+    // UPDATING THE COUNT (now sums 8 add selectors)
+    // ========================================================================
     std::get<23>(accumulator) += round_delta * count_shift * scaling_factor;
-    // if msm_transition_shift = 0 and round_delta = 0, then the next "row" of the VM is processing the same wNAF digit.
-    // this means that the count must increase: count_shift = count + add1 + add2 + add3 + add4
     std::get<24>(accumulator) += (-msm_transition_shift + 1) * (-round_delta + 1) *
-                                 (count_shift - count - add1 - add2 - add3 - add4) * scaling_factor;
+                                 (count_shift - count - add1 - add2 - add3 - add4 - add5 - add6 - add7 - add8) *
+                                 scaling_factor;
 
-    // at least one of the following must be true:
-    //      the next step is an MSM transition;
-    //      the next count is zero (meaning we are starting the processing of a new wNAF digit)
-    //      the next step is processing the same wNAF digit (i.e., round_delta == 0)
-    // (note that at the start of a new MSM, the count is also zero, so the above are not mutually exclusive.)
     std::get<25>(accumulator) +=
         is_not_first_row * (-msm_transition_shift + 1) * round_delta * count_shift * scaling_factor;
 
@@ -585,42 +360,22 @@ void ECCVMMSMRelationImpl<FF>::accumulate(ContainerOverSubrelations& accumulator
     std::get<26>(accumulator) += msm_transition * round * scaling_factor;
 
     // if msm_transition_shift = 1, pc = pc_shift + msm_size
-    // NB: `ecc_set_relation` ensures `msm_size` maps to `transcript.msm_count` for the current value of `pc`
     std::get<27>(accumulator) += is_not_first_row * msm_transition_shift * (msm_size + pc_shift - pc) * scaling_factor;
 
-    // Addition continuity checks
-    // We want to RULE OUT the following scenarios:
-    // Case 1: add2 = 1, add1 = 0
-    // Case 2: add3 = 1, add2 = 0
-    // Case 3: add4 = 1, add3 = 0
-    // These checks ensure that the current row does not skip points (for both ADD and SKEW ops)
-    // This is part of a wider set of checks we use to ensure that all point data is used in the assigned
-    // multiscalar multiplication operation (and not in a different MSM operation).
+    // ========================================================================
+    // Addition continuity checks (extended from 4 to 8)
+    // ========================================================================
     std::get<28>(accumulator) += add2 * (-add1 + 1) * scaling_factor;
     std::get<29>(accumulator) += add3 * (-add2 + 1) * scaling_factor;
     std::get<30>(accumulator) += add4 * (-add3 + 1) * scaling_factor;
+    std::get<63>(accumulator) += add5 * (-add4 + 1) * scaling_factor;
+    std::get<64>(accumulator) += add6 * (-add5 + 1) * scaling_factor;
+    std::get<65>(accumulator) += add7 * (-add6 + 1) * scaling_factor;
+    std::get<66>(accumulator) += add8 * (-add7 + 1) * scaling_factor;
 
-    // Final continuity check.
-    // If an addition spans two rows, we need to make sure that the following scenario is RULED OUT:
-    //   add4 = 0 on the CURRENT row, add1 = 1 on the NEXT row
-    // We must apply the above for the two cases:
-    // Case 1: q_add = 1 on the CURRENT row, q_add = 1 on the NEXT row
-    // Case 2: q_skew = 1 on the CURRENT row, q_skew = 1 on the NEXT row
-    // (i.e. if q_skew = 1, q_add_shift = 1 this implies an MSM transition so we skip this continuity check)
+    // Cross-row continuity: if add8 = 0 on current row, add1 = 0 on next row (within same phase)
     std::get<31>(accumulator) +=
-        (q_add * q_add_shift + q_skew * q_skew_shift) * (-add4 + 1) * add1_shift * scaling_factor;
-
-    // remaining checks (done in ecc_set_relation.hpp, ecc_lookup_relation.hpp)
-    // when transition occurs, perform set membership lookup on (accumulator / pc / msm_size)
-    // perform set membership lookups on add_i * (pc / round / slice_i)
-    // perform lookups on (pc / slice_i / x / y)
-
-    // We look up wnaf slices by mapping round + pc -> slice
-    // We use an exact set membership check to validate that
-    // wnafs written in wnaf_relation == wnafs read in msm relation
-    // We use `add1/add2/add3/add4` to flag whether we are performing a wnaf read op
-    // We can set these to be Prover-defined as the set membership check implicitly ensures that the correct reads
-    // have occurred.
+        (q_add * q_add_shift + q_skew * q_skew_shift) * (-add8 + 1) * add1_shift * scaling_factor;
 }
 
 } // namespace bb

From 3b393a88bfce1987b469c67abc409e3e3708ed82 Mon Sep 17 00:00:00 2001
From: notnotraju <raju@aztec-labs.com>
Date: Tue, 17 Mar 2026 13:57:26 +0000
Subject: [PATCH 07/24] eccvm: add boolean checks for msm_add5..8

Extend the bools relation with 4 new boolean constraints for the
msm_add5 through msm_add8 columns (indices 23-26).
Subrelation count: 23 -> 27.
---
 .../barretenberg/relations/ecc_vm/ecc_bools_relation.hpp  | 4 ++--
 .../relations/ecc_vm/ecc_bools_relation_impl.hpp          | 8 ++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_bools_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_bools_relation.hpp
index d5417e3d4b4b..23959425cf09 100644
--- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_bools_relation.hpp
+++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_bools_relation.hpp
@@ -23,8 +23,8 @@ template <typename FF_> class ECCVMBoolsRelationImpl {
   public:
     using FF = FF_;
 
-    static constexpr std::array<size_t, 23> SUBRELATION_PARTIAL_LENGTHS{
-        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    static constexpr std::array<size_t, 27> SUBRELATION_PARTIAL_LENGTHS{
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
     };
 
     template <typename ContainerOverSubrelations, typename AllEntities, typename Parameters>
diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_bools_relation_impl.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_bools_relation_impl.hpp
index 3182751f253c..130851af035e 100644
--- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_bools_relation_impl.hpp
+++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_bools_relation_impl.hpp
@@ -55,6 +55,10 @@ void ECCVMBoolsRelationImpl<FF>::accumulate(ContainerOverSubrelations& accumulat
     auto msm_add2 = View(in.msm_add2);
     auto msm_add3 = View(in.msm_add3);
     auto msm_add4 = View(in.msm_add4);
+    auto msm_add5 = View(in.msm_add5);
+    auto msm_add6 = View(in.msm_add6);
+    auto msm_add7 = View(in.msm_add7);
+    auto msm_add8 = View(in.msm_add8);
 
     std::get<0>(accumulator) += q_eq * (q_eq - 1) * scaling_factor;
     std::get<1>(accumulator) += q_add * (q_add - 1) * scaling_factor;
@@ -79,5 +83,9 @@ void ECCVMBoolsRelationImpl<FF>::accumulate(ContainerOverSubrelations& accumulat
     std::get<20>(accumulator) += msm_add2 * (msm_add2 - 1) * scaling_factor;
     std::get<21>(accumulator) += msm_add3 * (msm_add3 - 1) * scaling_factor;
     std::get<22>(accumulator) += msm_add4 * (msm_add4 - 1) * scaling_factor;
+    std::get<23>(accumulator) += msm_add5 * (msm_add5 - 1) * scaling_factor;
+    std::get<24>(accumulator) += msm_add6 * (msm_add6 - 1) * scaling_factor;
+    std::get<25>(accumulator) += msm_add7 * (msm_add7 - 1) * scaling_factor;
+    std::get<26>(accumulator) += msm_add8 * (msm_add8 - 1) * scaling_factor;
 }
 } // namespace bb

From 4e12c20c4f548b8699a433b14e3175bde1c75894 Mon Sep 17 00:00:00 2001
From: notnotraju <raju@aztec-labs.com>
Date: Tue, 17 Mar 2026 14:03:29 +0000
Subject: [PATCH 08/24] feat(eccvm): widen WNAF relation to 8 digits per row

Update ecc_wnaf_relation to process 8 wNAF digits per precompute row
(was 4), halving the number of rows per scalar from 8 to 4.

Key changes:
- SUBRELATION_PARTIAL_LENGTHS expanded from 23 to 35 entries
- 16 two-bit range checks (was 8) for slices s1hi..s8lo
- 8 wNAF conversions w0..w7 (was 4 w0..w3)
- Horner accumulation uses 2^32 shift (was 2^16) for 8 digits
- Round max changed from 7 to 3 (NUM_WNAF_DIGITS_PER_SCALAR/8 - 1)
- Added slice-zero checks for w4..w7 (subrelations 31-34)
- Updated header docstring to reflect 4-row layout
---
 .../relations/ecc_vm/ecc_wnaf_relation.hpp    |  39 +--
 .../ecc_vm/ecc_wnaf_relation_impl.hpp         | 233 ++++++------------
 2 files changed, 101 insertions(+), 171 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_wnaf_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_wnaf_relation.hpp
index e7113776bea0..c339adce5b67 100644
--- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_wnaf_relation.hpp
+++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_wnaf_relation.hpp
@@ -11,24 +11,20 @@ namespace bb {
 /**
  * @brief ECCVMWnafRelationImpl evaluates relations that convert scalar multipliers into 4-bit WNAF slices
  * @details Each WNAF slice is a 4-bit slice representing one of 16 integers { -15, -13, ..., 15 }
- * Each WNAF slice is represented via two 2-bit columns (precompute_s1hi, ..., precompute_s4lo)
- * One 128-bit scalar multiplier is processed across 8 rows, indexed by a round variable.
+ * Each WNAF slice is represented via two 2-bit columns (precompute_s1hi, ..., precompute_s8lo)
+ * One 128-bit scalar multiplier is processed across 4 rows (8 digits/row), indexed by a round variable.
  * The following table describes the structure for one scalar.
  *
- * | point_transition | round | slices          | skew   | scalar_sum                      |
- * | ---------------- | ----- | --------------- | ------ | ------------------------------- |
- * | 0                | 0     | s0,s1,s2,s3     | 0      | 0                               |
- * | 0                | 1     | s4,s5,s6,s7     | 0      | \sum_{i=0}^4 16^i * s_{3 - i}   |
- * | 0                | 2     | s8,s9,s10,s11   | 0      | \sum_{i=0}^8 16^i * s_{7 - i}   |
- * | 0                | 3     | s12,s13,s14,s14 | 0      | \sum_{i=0}^12 16^i * s_{11 - i} |
- * | 0                | 4     | s16,s17,s18,s19 | 0      | \sum_{i=0}^16 16^i * s_{15 - i} |
- * | 0                | 5     | s20,s21,s22,s23 | 0      | \sum_{i=0}^20 16^i * s_{19 - i} |
- * | 0                | 6     | s24,s25,s26,s27 | 0      | \sum_{i=0}^24 16^i * s_{23 - i} |
- * | 1                | 7     | s28,s29,s30,s31 | s_skew | \sum_{i=0}^28 16^i * s_{27 - i} |
+ * | point_transition | round | slices                          | skew   | scalar_sum                        |
+ * | ---------------- | ----- | ------------------------------- | ------ | --------------------------------- |
+ * | 0                | 0     | s0,s1,s2,s3,s4,s5,s6,s7        | 0      | 0                                 |
+ * | 0                | 1     | s8,s9,s10,s11,s12,s13,s14,s15   | 0      | \sum_{i=0}^7 16^i * s_{7 - i}     |
+ * | 0                | 2     | s16,s17,s18,s19,s20,s21,s22,s23 | 0      | \sum_{i=0}^15 16^i * s_{15 - i}   |
+ * | 1                | 3     | s24,s25,s26,s27,s28,s29,s30,s31 | s_skew | \sum_{i=0}^23 16^i * s_{23 - i}   |
  *
  * The value of the input scalar is equal to the following:
  *
- * scalar = 2^16 * scalar_sum + 2^12 * s28 + 2^8 * s29 + 2^4 * s30 + s31 - s_skew
+ * scalar = 2^32 * scalar_sum + 2^28*s24 + ... + s31 - s_skew
  *
  * We use a multiset equality check in `ecc_set_relation.hpp` to validate the above value maps to the correct input
  * scalar for a given value of `pc` (i.e., for a given non-trivial EC point). In other words, this constrains that the
@@ -44,8 +40,21 @@ template <typename FF_> class ECCVMWnafRelationImpl {
   public:
     using FF = FF_;
 
-    static constexpr std::array<size_t, 23> SUBRELATION_PARTIAL_LENGTHS{ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-                                                                         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 };
+    // 35 subrelations:
+    // 0-7:   range checks for slices 0-7 (degree 5)
+    // 8:     scalar sum consistency (degree 5)
+    // 9-12:  round/PC transition logic (degree 5)
+    // 13:    skew validation (degree 5)
+    // 14-17: slice-zero checks for w0-w3 (degree 5)
+    // 18-19: round/pc zero when inactive (degree 5)
+    // 20:    s1hi MSB positive at transitions (degree 5)
+    // 21:    q_transition zero when inactive (degree 5)
+    // 22:    precompute_select monotonicity (degree 5)
+    // 23-30: range checks for slices 8-15 (degree 5)
+    // 31-34: slice-zero checks for w4-w7 (degree 5)
+    static constexpr std::array<size_t, 35> SUBRELATION_PARTIAL_LENGTHS{
+        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+    };
 
     template <typename ContainerOverSubrelations, typename AllEntities, typename Parameters>
     static void accumulate(ContainerOverSubrelations& accumulator,
diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_wnaf_relation_impl.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_wnaf_relation_impl.hpp
index 4aba841aa36a..e9d325955dc5 100644
--- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_wnaf_relation_impl.hpp
+++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_wnaf_relation_impl.hpp
@@ -11,35 +11,17 @@ namespace bb {
 /**
  * @brief ECCVMWnafRelationImpl evaluates relations that convert scalar multipliers into 4-bit WNAF slices
  * @details Each WNAF slice is a 4-bit slice representing one of 16 integers { -15, -13, ..., 15 }
- * Each WNAF slice is represented via two 2-bit columns (precompute_s1hi, ..., precompute_s4lo)
- * One 128-bit scalar multiplier is processed across 8 rows, indexed by a round variable.
- * The following table describes the structure for one scalar.
+ * Each WNAF slice is represented via two 2-bit columns (precompute_s1hi, ..., precompute_s8lo)
+ * One 128-bit scalar multiplier is processed across 4 rows (8 digits/row), indexed by a round variable.
  *
- * | point_transition | round | slices          | skew   | scalar_sum                      |
- * | ---------------- | ----- | --------------- | ------ | ------------------------------- |
- * | 0                | 0     | s0,s1,s2,s3     | 0      | 0                               |
- * | 0                | 1     | s4,s5,s6,s7     | 0      | \sum_{i=0}^3 16^i * s_{3 - i}   |
- * | 0                | 2     | s8,s9,s10,s11   | 0      | \sum_{i=0}^7 16^i * s_{7 - i}   |
- * | 0                | 3     | s12,s13,s14,s15 | 0      | \sum_{i=0}^11 16^i * s_{11 - i} |
- * | 0                | 4     | s16,s17,s18,s19 | 0      | \sum_{i=0}^15 16^i * s_{15 - i} |
- * | 0                | 5     | s20,s21,s22,s23 | 0      | \sum_{i=0}^19 16^i * s_{19 - i} |
- * | 0                | 6     | s24,s25,s26,s27 | 0      | \sum_{i=0}^23 16^i * s_{23 - i} |
- * | 1                | 7     | s28,s29,s30,s31 | s_skew | \sum_{i=0}^27 16^i * s_{27 - i} |
+ * | point_transition | round | slices                          | skew   | scalar_sum                        |
+ * | ---------------- | ----- | ------------------------------- | ------ | --------------------------------- |
+ * | 0                | 0     | s0,s1,s2,s3,s4,s5,s6,s7        | 0      | 0                                 |
+ * | 0                | 1     | s8,s9,s10,s11,s12,s13,s14,s15   | 0      | \sum_{i=0}^7 16^i * s_{7 - i}     |
+ * | 0                | 2     | s16,s17,s18,s19,s20,s21,s22,s23 | 0      | \sum_{i=0}^15 16^i * s_{15 - i}   |
+ * | 1                | 3     | s24,s25,s26,s27,s28,s29,s30,s31 | s_skew | \sum_{i=0}^23 16^i * s_{23 - i}   |
  *
- * The value of the input scalar is equal to the following:
- *
- * scalar = 2^16 * scalar_sum + 2^12 * s28 + 2^8 * s29 + 2^4 * s30 + s31 - s_skew
- *
- * We use a multiset equality check in `ecc_set_relation.hpp` to validate the above value maps to the correct input
- * scalar for a given value of `pc` (i.e., for a given non-trivial EC point). In other words, this constrains that the
- * wNAF expansion is correct. Note that, from the perpsective of the Precomputed table, we only add the tuple (pc,
- * round, slice) to the multiset when point_transition == 1.
- *
- * Furthermore, as the column `point_transition` is committed to by the Prover, we must constrain it is correctly
- * computed (see also `ECCVMPointTableRelationImpl` for a description of what the table looks like.)
- *
- * @tparam FF
- * @tparam AccumulatorTypes
+ * scalar = 2^32 * scalar_sum + 2^28*s24 + ... + s31 - s_skew
  */
 template <typename FF>
 template <typename ContainerOverSubrelations, typename AllEntities, typename Parameters>
@@ -56,30 +38,24 @@ void ECCVMWnafRelationImpl<FF>::accumulate(ContainerOverSubrelations& accumulato
     auto q_transition = View(in.precompute_point_transition);
     auto round = View(in.precompute_round);
     auto round_shift = View(in.precompute_round_shift);
-    auto pc = View(in.precompute_pc); // note that this is a _point-counter_.
+    auto pc = View(in.precompute_pc);
     auto pc_shift = View(in.precompute_pc_shift);
-    // precompute_select is a boolean column that is 0 at the initial row and 1 at all subsequent active rows in the
-    // precompute table. We only evaluate the ecc_wnaf_relation if `precompute_select=1`. As a reminder, this latter is
-    // 0 at the initial row and then 1 at the rest of the (active) rows of the Precomputed table. The fact that
-    // `precompute_select` is correctly computed is mediated by the set relation.
     auto precompute_select = View(in.precompute_select);
-
     auto precompute_select_shift = View(in.precompute_select_shift);
-
     const auto& precompute_skew = View(in.precompute_skew);
 
-    const std::array<View, 8> slices{
+    // 16 two-bit slices encoding 8 wNAF digits
+    const std::array<View, 16> slices{
         View(in.precompute_s1hi), View(in.precompute_s1lo), View(in.precompute_s2hi), View(in.precompute_s2lo),
         View(in.precompute_s3hi), View(in.precompute_s3lo), View(in.precompute_s4hi), View(in.precompute_s4lo),
+        View(in.precompute_s5hi), View(in.precompute_s5lo), View(in.precompute_s6hi), View(in.precompute_s6lo),
+        View(in.precompute_s7hi), View(in.precompute_s7lo), View(in.precompute_s8hi), View(in.precompute_s8lo),
     };
 
     const auto range_constraint_slice_to_2_bits = [&scaling_factor](const View& s, auto& acc) {
         acc += ((s - 1).sqr() - 1) * ((s - 2).sqr() - 1) * scaling_factor;
     };
 
-    // given two 2-bit numbers `hi`, `lo`, convert to a wNAF digit (in {-15, -13, ..., 13, 15}) via the formula:
-    // `2(4*hi + lo) - 15`. (Here, `4*hi + lo` represents the 4-bit number corresponding to the concatenation of `hi`
-    // and `lo`.)
     const auto convert_to_wnaf = [](const View& hi, const View& lo) {
         auto t = hi + hi;
         t += t;
@@ -89,16 +65,10 @@ void ECCVMWnafRelationImpl<FF>::accumulate(ContainerOverSubrelations& accumulato
     };
 
     const auto scaled_transition = q_transition * scaling_factor;
-    const auto scaled_transition_is_zero =
-        -scaled_transition + scaling_factor; // `scaling_factor * (1 - q_transition)`, i.e., is the scaling_factor if we
-                                             // are _not_ at a transition, else 0.
+    const auto scaled_transition_is_zero = -scaled_transition + scaling_factor;
+    const auto scaled_lagrange_first = scaling_factor * lagrange_first;
 
-    const auto scaled_lagrange_first = scaling_factor * lagrange_first; // for edge-case handling
-    /**
-     * @brief Constrain each of our scalar slice chunks (s1, ..., s8) to be 2 bits.
-     * Doing range checks this way vs permutation-based range check removes need to create sorted list + grand product
-     * polynomial. Probably cheaper even if we have to split each 4-bit WNAF slice into 2-bit chunks.
-     */
+    // Range-check all 16 two-bit slices
     range_constraint_slice_to_2_bits(slices[0], std::get<0>(accumulator));
     range_constraint_slice_to_2_bits(slices[1], std::get<1>(accumulator));
     range_constraint_slice_to_2_bits(slices[2], std::get<2>(accumulator));
@@ -107,45 +77,35 @@ void ECCVMWnafRelationImpl<FF>::accumulate(ContainerOverSubrelations& accumulato
     range_constraint_slice_to_2_bits(slices[5], std::get<5>(accumulator));
     range_constraint_slice_to_2_bits(slices[6], std::get<6>(accumulator));
     range_constraint_slice_to_2_bits(slices[7], std::get<7>(accumulator));
-
-    /**
-     * @brief If we are processing a new scalar (q_transition = 1), validate that the first slice is positive.
-     *        This requires us to validate slice1 is in the range [8, ... 15].
-     *        (when converted into wnaf form this maps to the range [1, 3, ..., 15]).
-     *        We do this to ensure the final scalar sum is positive.
-     *        We already know slice1 is in the range [0, ..., 15]
-     *        To check the range [8, ..., 15] we validate the most significant 2 bits (s1) are >=2
-     */
+    range_constraint_slice_to_2_bits(slices[8], std::get<23>(accumulator));
+    range_constraint_slice_to_2_bits(slices[9], std::get<24>(accumulator));
+    range_constraint_slice_to_2_bits(slices[10], std::get<25>(accumulator));
+    range_constraint_slice_to_2_bits(slices[11], std::get<26>(accumulator));
+    range_constraint_slice_to_2_bits(slices[12], std::get<27>(accumulator));
+    range_constraint_slice_to_2_bits(slices[13], std::get<28>(accumulator));
+    range_constraint_slice_to_2_bits(slices[14], std::get<29>(accumulator));
+    range_constraint_slice_to_2_bits(slices[15], std::get<30>(accumulator));
+
+    // Validate first slice is positive at transitions
     const auto s1hi_shift = View(in.precompute_s1hi_shift);
     const auto s1hi_shift_msb_set = (s1hi_shift - 2) * (s1hi_shift - 3);
     const auto scaled_transition_plus_lagrange_first = scaled_transition + scaled_lagrange_first;
-    // away from row zero, add `scaled_transition * precompute_select_shift * s1hi_shift_msb_set`. however,
-    // `q_transition[0] == 0`, so this constraint will not turn on at the 0th row unless we add
-    // `scaled_lagrange_first`.
     std::get<20>(accumulator) += scaled_transition_plus_lagrange_first * precompute_select_shift * s1hi_shift_msb_set;
-    /**
-     * @brief Convert each pair of 2-bit scalar slices into a 4-bit windowed-non-adjacent-form slice.
-     * Conversion from binary -> wnaf = 2 * binary - 15.
-     * Converts a value in [0, ..., 15] into [-15, -13, -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11 , 13, 15].
-     * We use WNAF representation to avoid case where we are conditionally adding a point in our MSM algo.
-     */
+
+    // Convert 16 two-bit slices into 8 wNAF digits
     const auto w0 = convert_to_wnaf(slices[0], slices[1]);
     const auto w1 = convert_to_wnaf(slices[2], slices[3]);
     const auto w2 = convert_to_wnaf(slices[4], slices[5]);
     const auto w3 = convert_to_wnaf(slices[6], slices[7]);
-
-    /**
-     * @brief Slice consistency check.
-     * We require that `scalar_sum` on the next row correctly accumulates the 4  WNAF slices present on the current row
-     * (i.e. 16 WNAF bits).
-     * i.e. next_scalar_sum - 2^{16} * current_scalar_sum - 2^12 * w_0 - 2^8 * w_1 - 2^4 * w_2 - w_3 = 0
-     * @note We only perform slice_consistency check when next row is processing the same scalar as the current row!
-     *       i.e. when q_transition  = 0
-     * Note(@zac-williamson): improve WNAF use (#2224)
-     */
-    auto row_slice = w0; // row_slice will eventually contain the truncated scalar corresponding to the current row,
-                         // which is 2^12 * w_0 + 2^8 * w_1 + 2^4 * w_2 + w_3. (If one just looks at the wNAF digits in
-                         // this row, this is the resulting odd number. Note that it is not necessarily positive.)
+    const auto w4 = convert_to_wnaf(slices[8], slices[9]);
+    const auto w5 = convert_to_wnaf(slices[10], slices[11]);
+    const auto w6 = convert_to_wnaf(slices[12], slices[13]);
+    const auto w7 = convert_to_wnaf(slices[14], slices[15]);
+
+    // Scalar sum consistency: accumulate 8 wNAF digits via Horner's method
+    // row_slice = 2^28*w0 + 2^24*w1 + ... + 2^4*w6 + w7
+    auto row_slice = w0;
+    // Multiply by 16 (shift left 4 bits), then add next digit. Repeat 7 times.
     row_slice += row_slice;
     row_slice += row_slice;
     row_slice += row_slice;
@@ -161,107 +121,68 @@ void ECCVMWnafRelationImpl<FF>::accumulate(ContainerOverSubrelations& accumulato
     row_slice += row_slice;
     row_slice += row_slice;
     row_slice += w3;
-    auto sum_delta = scalar_sum * FF(1ULL << 16) + row_slice;
+    row_slice += row_slice;
+    row_slice += row_slice;
+    row_slice += row_slice;
+    row_slice += row_slice;
+    row_slice += w4;
+    row_slice += row_slice;
+    row_slice += row_slice;
+    row_slice += row_slice;
+    row_slice += row_slice;
+    row_slice += w5;
+    row_slice += row_slice;
+    row_slice += row_slice;
+    row_slice += row_slice;
+    row_slice += row_slice;
+    row_slice += w6;
+    row_slice += row_slice;
+    row_slice += row_slice;
+    row_slice += row_slice;
+    row_slice += row_slice;
+    row_slice += w7;
+    // Shift by 2^32 (8 digits * 4 bits each)
+    auto sum_delta = scalar_sum * FF(1ULL << 32) + row_slice;
     const auto check_sum = scalar_sum_shift - sum_delta;
     std::get<8>(accumulator) += precompute_select * check_sum * scaled_transition_is_zero;
-    // We must constrain `precompute_select` to be of the correct shape: 0 1 1 ... 1 0 ...0. In other words, after the
-    // first row, it is monotonically non-decreasing. In other words, a malicious prover cannot inject the value '0' in
-    // the middle.
-    const auto scaled_lagrange_first_minus_one =
-        scaled_lagrange_first - scaling_factor; // (if not at the first row, is -1, else 0) * scaling_factor
+
+    // precompute_select monotonicity
+    const auto scaled_lagrange_first_minus_one = scaled_lagrange_first - scaling_factor;
     const auto precompute_select_check = precompute_select_shift * (precompute_select - 1);
     std::get<22>(accumulator) += scaled_lagrange_first_minus_one * precompute_select_check;
-    /**
-     * @brief Transition logic with `round` and `q_transition`.
-     * Goal: `round` is an integer in [0, ... 7] that tracks how many slices we have processed for a given scalar.
-     * i.e., the number of 4-bit WNAF slices processed = round * 4.
-     * We must ensure that `q_transition` is well-formed and that `round` is correctly constrained. Recall that `pc`
-     * stands for point-counter.
-     *
-     * For the former, we force the following:
-     *      1. When `q_transition == 1`, then `scalar_sum_shift == 0`, `round_shift == 0`, `round == 7`, and `pc_shift
-     *      == pc - 1`.
-     *      2. When `q_transition == 0`, then `round_shift - round == 1` and `pc_shift == pc`
-     *
-     * For the latter: note that we don't actually range-constrain `round` (expensive if we don't need to!). We
-     * nonetheless can correctly constrain `round`, because of the multiset checks. There are two multiset equality
-     * checks that we perform that implicate the wNAF relation:
-     *      1. (pc, msm_round, wnaf_slice)
-     *      2. (pc, P.x, P.y, scalar-multiplier)
-     * The first is used to communicate with the MSM table, to validate that the slice * point values the MSM tables use
-     * are indeed what we have precomputed. The second facilitates communication with the Transcript table, to ensure
-     * that the wNAF expansion of the scalar is indeed correct. Moreover, the second is only "sent" to the multiset when
-     * `q_transition == 1`. (It is helpful to recall that `pc` is monotonic: one per each point involved in a
-     * non-trivial scalar multiplication.)
-     *
-     * Here is the logic. We must ensure that `round` can never be set to a value > 7. If this were possible at row `i`,
-     * then `q_transition == 0` for all subsequent rows by the incrementing logic. There are (at least) two problems.
-     *
-     * 1. The implicit MSM round (accounted for in (1)) is between `4 * round` and `4 * round + 3` (in fact `4 *
-     * round + 4` iff we are at a skew). As the `round` must increment, this means that the `msm_round` will be
-     * larger than 32, which can't happen due to the internal constraints in the MSM table. In particular, the multiset
-     * equality check will fail, as the MSM tables can never send an entry with a round larger than 32.
-     *
-     * 2. This forces `precompute_pc` to be constant from here on out. This will violate the multiset equalities both
-     * of terms (1) _and_ (2). For the former, we will write too many entries with the given `pc`. (However, we've
-     * already shown how this multset equality fails due to `round`.) More importantly, for the latter, we will _never_
-     * "send" the tuple (pc, P.x, P.x, scalar-multiplier) to the multiset, for this value of `pc` and all potentially
-     * subsequent values. We explicate this latter failure. The transcript table will certainly fill _some_ values in
-     * for (pc, P.x, P.y, scalar-multipler) (at least with correct pc and scalar-multiplier values), which will cause
-     * the multiset equality check to fail.
-     *
-     * As always, we are relying on the monotonicity of the `pc` in these arguments.
-     *
-     */
 
-    // We combine two checks into a single relation
-    // q_transition * (round - 7) + (-q_transition + 1) * (round_shift - round - 1)
-    // => q_transition * (round - 7 - round_shift + round + 1) + (round_shift - round - 1)
-    // => q_transition * (2 * round - round_shift - 6) + (round_shift - round - 1)
+    // Round transition logic: round now goes 0-3 (was 0-7)
+    // Combined check: q_transition * (round - 3) + (-q_transition + 1) * (round_shift - round - 1)
     const auto round_check = round_shift - round - 1;
-    // This selector is 1 at row 0 (via lagrange_first) and at transition rows where precompute_select == 1.
-    // It's used to constrain shifted values (like round_shift, scalar_sum_shift) that need to be checked
-    // both at the first active row AND at subsequent transitions between scalars.
     const auto precompute_select_transition_plus_lagrange_first =
         precompute_select * scaled_transition + scaled_lagrange_first;
+    // WNAF_DIGITS_PER_ROW - 1 = 7 was used for the old round max; now it's
+    // (NUM_WNAF_DIGITS_PER_SCALAR / WNAF_DIGITS_PER_ROW) - 1 = 3
+    constexpr size_t MAX_ROUND = 3; // 32/8 - 1
     std::get<9>(accumulator) +=
-        precompute_select * (scaled_transition * (round - round_check - 7) + scaling_factor * round_check);
-    // At a transition (or at row 0 via lagrange_first), the next round must be 0.
+        precompute_select *
+        (scaled_transition * (round - round_check - static_cast<int>(MAX_ROUND)) + scaling_factor * round_check);
     std::get<10>(accumulator) += precompute_select_transition_plus_lagrange_first * round_shift;
 
-    /**
-     * @brief Scalar transition/PC checks.
-     * 1: if q_transition = 1 or if lagrange_first = 1, scalar_sum_new = 0. (note that q_transition[0] == 0.)
-     * 2: if q_transition = 0, pc at next row = pc at current row
-     * 3: if q_transition = 1, pc at next row = pc at current row - 1 (decrements by 1)
-     * (we combine 2 and 3 into a single relation)
-     */
+    // Scalar transition / PC checks
     std::get<11>(accumulator) += precompute_select_transition_plus_lagrange_first * scalar_sum_shift;
-    // (2, 3 combined): q_transition * (pc - pc_shift - 1) + (-q_transition + 1) * (pc_shift - pc)
-    // => q_transition * (-2 * (pc_shift - pc) - 1) + (pc_shift - pc)
     const auto pc_delta = pc_shift - pc;
     std::get<12>(accumulator) +=
         precompute_select * (scaled_transition * ((-pc_delta - pc_delta - 1)) + pc_delta * scaling_factor);
 
-    /**
-     * @brief Validate skew is 0 or 7
-     * 7 is the wnaf representation of -1.
-     * We have one skew variable per scalar multiplier. We can only represent odd integers in WNAF form.
-     * If input scalar is even, we must subtract 1 from WNAF scalar sum to get actual value (i.e. where skew = 7)
-     * We use skew in two places.
-     * 1: when validating sum of wnaf slices matches input scalar (we add skew to scalar_sum in ecc_set_relation)
-     * 2: in ecc_msm_relation. Final MSM round uses skew to conditionally subtract a point from the accumulator
-     */
+    // Validate skew is 0 or 7
     std::get<13>(accumulator) += precompute_select * (precompute_skew * (precompute_skew - 7)) * scaling_factor;
 
-    // Set slices (a.k.a. compressed digits), pc, and round all to zero when `precompute_select == 0`.
-    // (this is for one of the multiset equality checks.) Defensively, we also set precompute_point_transition to 0 when
-    // precompute_select == 0.
+    // Set slices, pc, round, q_transition to zero when precompute_select == 0
     const auto precompute_select_zero = (-precompute_select + 1) * scaling_factor;
     std::get<14>(accumulator) += precompute_select_zero * (w0 + 15);
     std::get<15>(accumulator) += precompute_select_zero * (w1 + 15);
     std::get<16>(accumulator) += precompute_select_zero * (w2 + 15);
     std::get<17>(accumulator) += precompute_select_zero * (w3 + 15);
+    std::get<31>(accumulator) += precompute_select_zero * (w4 + 15);
+    std::get<32>(accumulator) += precompute_select_zero * (w5 + 15);
+    std::get<33>(accumulator) += precompute_select_zero * (w6 + 15);
+    std::get<34>(accumulator) += precompute_select_zero * (w7 + 15);
 
     std::get<18>(accumulator) += precompute_select_zero * round;
     std::get<19>(accumulator) += precompute_select_zero * pc;

From f797494dde18c5d735eceb085fb66b771d5c5ab0 Mon Sep 17 00:00:00 2001
From: notnotraju <raju@aztec-labs.com>
Date: Tue, 17 Mar 2026 14:08:17 +0000
Subject: [PATCH 09/24] feat(eccvm): widen set relation to 8 slice fingerprints
 and 8 add-gated tuples

Update ECCVMSetRelation for 8-wide precompute and MSM tables:

Numerator changes:
- 8 slice fingerprints instead of 4, with round encoding 8*round+j
- Scalar reconstruction uses 8 wNAF digits with 2^32 shift (was 4, 2^16)
- Skew tuple uses round offset 8 (was 4)
- eccvm_set_permutation_delta comment updated for 8-term product

Denominator changes:
- 8 add-gated (pc, round, slice) tuples instead of 4
- PC offsets 0..7 (was 0..3) for msm_add1..msm_add8

SUBRELATION_PARTIAL_LENGTHS updated to {29, 3} (was {22, 3}) to
accommodate the higher degree from the 8-wide grand product.
---
 .../relations/ecc_vm/ecc_set_relation.hpp     |   2 +-
 .../ecc_vm/ecc_set_relation_impl.hpp          | 197 ++++++++++++++----
 2 files changed, 161 insertions(+), 38 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation.hpp
index c0f18ac081b8..7eb63c879be4 100644
--- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation.hpp
+++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation.hpp
@@ -27,7 +27,7 @@ template <typename FF_> class ECCVMSetRelationImpl {
     static constexpr uint64_t THIRD_TERM_TAG = 3;  // (pc, P.x, P.y, msm_size)
 
     static constexpr std::array<size_t, 2> SUBRELATION_PARTIAL_LENGTHS{
-        22, // grand product construction sub-relation
+        29, // grand product construction sub-relation (8-wide: denom degree 28 + 1)
         3   // left-shiftable polynomial sub-relation
     };
     // prover optimization to allow for skipping the computation of sub-relations at certain points in sumcheck.
diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation_impl.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation_impl.hpp
index 369b963f2e17..a9c3ccdfea0e 100644
--- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation_impl.hpp
+++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation_impl.hpp
@@ -29,7 +29,7 @@ namespace bb {
  * @note This ensures the following:
  *   * every WNAF slice computed during scalar decomposition must be used exactly once during the MSM computation.
  * @warning There is a subtlety in this table, which slightly complicates the abstraction of multiset-equality testing.
- * On the denominator side, when `addX == 0` for all `X ∈ {1, 2, 3, 4}` (automatically forced by `add1 == 0`), we
+ * On the denominator side, when `addX == 0` for all `X ∈ {1, ..., 8}` (automatically forced by `add1 == 0`), we
  * multiply by 1. On the numerator side, to balance this out, this means that when `precompute_select == 0`, we must
  * multiply by an additional `eccvm_set_permutation_delta`, which is the _inverse_ of the fingerprint of the tuple `(0,
  * 0, 0)`. (This corresponds to "removing" the tuple `(0, 0, 0)` from the left multiset when `precompute_select == 0`).
@@ -94,14 +94,15 @@ Accumulator ECCVMSetRelationImpl<FF>::compute_grand_product_numerator(const AllE
      * part of ECCVMWnafRelation.
      *
      * @details
-     * There are 4 tuple entries per row of the Precompute table. Moreover, the element that "increments" is
-     * 4 * `precompute_round`, due to the fact that the Precompute columns contain four "digits"/slices per row.
+     * There are 8 tuple entries per row of the Precompute table. Moreover, the element that "increments" is
+     * 8 * `precompute_round`, due to the fact that the Precompute columns contain eight "digits"/slices per row.
      *
      * @note
      * We only add this tuple if `precompute_select == 1`. Otherwise, we add a the tuple (0, 0, 0).
      */
 
-    // OPTIMIZE(@zac-williamson #2226) optimize degrees
+    // precompute_round8 = 8 * precompute_round (each row holds 8 digits)
+    const auto precompute_round8 = precompute_round4 + precompute_round4;
 
     Accumulator numerator(1); // degree-0
     {
@@ -113,7 +114,7 @@ Accumulator ECCVMSetRelationImpl<FF>::compute_grand_product_numerator(const AllE
         wnaf_slice += s1;
 
         const auto wnaf_slice_input0 =
-            wnaf_slice + gamma + precompute_pc * beta + precompute_round4 * beta_sqr + first_term_tag;
+            wnaf_slice + gamma + precompute_pc * beta + precompute_round8 * beta_sqr + first_term_tag;
         numerator *= wnaf_slice_input0; // degree-1
     }
     {
@@ -125,7 +126,7 @@ Accumulator ECCVMSetRelationImpl<FF>::compute_grand_product_numerator(const AllE
         wnaf_slice += s1;
 
         const auto wnaf_slice_input1 =
-            wnaf_slice + gamma + precompute_pc * beta + (precompute_round4 + 1) * beta_sqr + first_term_tag;
+            wnaf_slice + gamma + precompute_pc * beta + (precompute_round8 + 1) * beta_sqr + first_term_tag;
         numerator *= wnaf_slice_input1; // degree-2
     }
     {
@@ -137,7 +138,7 @@ Accumulator ECCVMSetRelationImpl<FF>::compute_grand_product_numerator(const AllE
         wnaf_slice += s1;
 
         const auto wnaf_slice_input2 =
-            wnaf_slice + gamma + precompute_pc * beta + (precompute_round4 + 2) * beta_sqr + first_term_tag;
+            wnaf_slice + gamma + precompute_pc * beta + (precompute_round8 + 2) * beta_sqr + first_term_tag;
         numerator *= wnaf_slice_input2; // degree-3
     }
     {
@@ -147,27 +148,76 @@ Accumulator ECCVMSetRelationImpl<FF>::compute_grand_product_numerator(const AllE
         auto wnaf_slice = s0 + s0;
         wnaf_slice += wnaf_slice;
         wnaf_slice += s1;
+
         const auto wnaf_slice_input3 =
-            wnaf_slice + gamma + precompute_pc * beta + (precompute_round4 + 3) * beta_sqr + first_term_tag;
+            wnaf_slice + gamma + precompute_pc * beta + (precompute_round8 + 3) * beta_sqr + first_term_tag;
         numerator *= wnaf_slice_input3; // degree-4
     }
+    {
+        const auto& s0 = View(in.precompute_s5hi);
+        const auto& s1 = View(in.precompute_s5lo);
+
+        auto wnaf_slice = s0 + s0;
+        wnaf_slice += wnaf_slice;
+        wnaf_slice += s1;
+
+        const auto wnaf_slice_input4 =
+            wnaf_slice + gamma + precompute_pc * beta + (precompute_round8 + 4) * beta_sqr + first_term_tag;
+        numerator *= wnaf_slice_input4; // degree-5
+    }
+    {
+        const auto& s0 = View(in.precompute_s6hi);
+        const auto& s1 = View(in.precompute_s6lo);
+
+        auto wnaf_slice = s0 + s0;
+        wnaf_slice += wnaf_slice;
+        wnaf_slice += s1;
+
+        const auto wnaf_slice_input5 =
+            wnaf_slice + gamma + precompute_pc * beta + (precompute_round8 + 5) * beta_sqr + first_term_tag;
+        numerator *= wnaf_slice_input5; // degree-6
+    }
+    {
+        const auto& s0 = View(in.precompute_s7hi);
+        const auto& s1 = View(in.precompute_s7lo);
+
+        auto wnaf_slice = s0 + s0;
+        wnaf_slice += wnaf_slice;
+        wnaf_slice += s1;
+
+        const auto wnaf_slice_input6 =
+            wnaf_slice + gamma + precompute_pc * beta + (precompute_round8 + 6) * beta_sqr + first_term_tag;
+        numerator *= wnaf_slice_input6; // degree-7
+    }
+    {
+        const auto& s0 = View(in.precompute_s8hi);
+        const auto& s1 = View(in.precompute_s8lo);
+
+        auto wnaf_slice = s0 + s0;
+        wnaf_slice += wnaf_slice;
+        wnaf_slice += s1;
+
+        const auto wnaf_slice_input7 =
+            wnaf_slice + gamma + precompute_pc * beta + (precompute_round8 + 7) * beta_sqr + first_term_tag;
+        numerator *= wnaf_slice_input7; // degree-8
+    }
     {
         // skew product if relevant
         const auto& skew = View(in.precompute_skew);
         const auto& precompute_point_transition = View(in.precompute_point_transition);
         const auto skew_input = precompute_point_transition * (skew + gamma + precompute_pc * beta +
-                                                               (precompute_round4 + 4) * beta_sqr + first_term_tag) +
+                                                               (precompute_round8 + 8) * beta_sqr + first_term_tag) +
                                 (-precompute_point_transition + 1);
-        numerator *= skew_input; // degree-6
+        numerator *= skew_input; // degree-10
     }
     {
         // in `EccvmProver` and `ECCVMVerifier`, we see that `eccvm_set_permutation_delta` is initially computed as
-        // (γ+t·β⁴)·(γ+β²+t·β⁴)·(γ+2β²+t·β⁴)·(γ+3β²+t·β⁴) (where t = FIRST_TERM_TAG) and _then_ inverted.
+        // the product of 8 terms (γ+j·β²+t·β⁴) for j=0..7 (where t = FIRST_TERM_TAG) and _then_ inverted.
         const auto& eccvm_set_permutation_delta = params.eccvm_set_permutation_delta;
         // if `precompute_select == 1`, don't change the numerator. if it is 0, then to get the grand product argument
         // to work (as we have zero-padded the rows of the MSM table), we must multiply by the inverse of the
         // fingerprint of (0, 0, 0).
-        numerator *= precompute_select * (-eccvm_set_permutation_delta + 1) + eccvm_set_permutation_delta; // degree-7
+        numerator *= precompute_select * (-eccvm_set_permutation_delta + 1) + eccvm_set_permutation_delta; // degree-11
     }
 
     /**
@@ -205,7 +255,12 @@ Accumulator ECCVMSetRelationImpl<FF>::compute_grand_product_numerator(const AllE
         const auto w1 = convert_to_wnaf<Accumulator>(View(in.precompute_s2hi), View(in.precompute_s2lo));
         const auto w2 = convert_to_wnaf<Accumulator>(View(in.precompute_s3hi), View(in.precompute_s3lo));
         const auto w3 = convert_to_wnaf<Accumulator>(View(in.precompute_s4hi), View(in.precompute_s4lo));
+        const auto w4 = convert_to_wnaf<Accumulator>(View(in.precompute_s5hi), View(in.precompute_s5lo));
+        const auto w5 = convert_to_wnaf<Accumulator>(View(in.precompute_s6hi), View(in.precompute_s6lo));
+        const auto w6 = convert_to_wnaf<Accumulator>(View(in.precompute_s7hi), View(in.precompute_s7lo));
+        const auto w7 = convert_to_wnaf<Accumulator>(View(in.precompute_s8hi), View(in.precompute_s8lo));
 
+        // row_slice = 2^28*w0 + 2^24*w1 + ... + 2^4*w6 + w7 (Horner for 8 digits)
         auto row_slice = w0;
         row_slice += row_slice;
         row_slice += row_slice;
@@ -221,26 +276,62 @@ Accumulator ECCVMSetRelationImpl<FF>::compute_grand_product_numerator(const AllE
         row_slice += row_slice;
         row_slice += row_slice;
         row_slice += row_slice;
-        row_slice += w3; // row_slice = 2^12 w_0 + 2^8 w_1 + 2^4 w_2 + 2^0 w_3
+        row_slice += w3;
+        row_slice += row_slice;
+        row_slice += row_slice;
+        row_slice += row_slice;
+        row_slice += row_slice;
+        row_slice += w4;
+        row_slice += row_slice;
+        row_slice += row_slice;
+        row_slice += row_slice;
+        row_slice += row_slice;
+        row_slice += w5;
+        row_slice += row_slice;
+        row_slice += row_slice;
+        row_slice += row_slice;
+        row_slice += row_slice;
+        row_slice += w6;
+        row_slice += row_slice;
+        row_slice += row_slice;
+        row_slice += row_slice;
+        row_slice += row_slice;
+        row_slice += w7;
 
+        // scalar_sum_full = 2^32 * wnaf_scalar_sum + row_slice + adjusted_skew
         auto scalar_sum_full = wnaf_scalar_sum + wnaf_scalar_sum;
-        scalar_sum_full += scalar_sum_full;
-        scalar_sum_full += scalar_sum_full;
-        scalar_sum_full += scalar_sum_full;
-        scalar_sum_full += scalar_sum_full;
-        scalar_sum_full += scalar_sum_full;
-        scalar_sum_full += scalar_sum_full;
-        scalar_sum_full += scalar_sum_full;
-        scalar_sum_full += scalar_sum_full;
-        scalar_sum_full += scalar_sum_full;
-        scalar_sum_full += scalar_sum_full;
-        scalar_sum_full += scalar_sum_full;
-        scalar_sum_full += scalar_sum_full;
-        scalar_sum_full += scalar_sum_full;
-        scalar_sum_full += scalar_sum_full;
-        scalar_sum_full += scalar_sum_full;
-        scalar_sum_full +=
-            row_slice + adjusted_skew; // scalar_sum_full = 2^16 * wnaf_scalar_sum + row_slice + adjusted_skew
+        scalar_sum_full += scalar_sum_full; // 4x
+        scalar_sum_full += scalar_sum_full; // 8x
+        scalar_sum_full += scalar_sum_full; // 16x
+        scalar_sum_full += scalar_sum_full; // 32x
+        scalar_sum_full += scalar_sum_full; // 64x
+        scalar_sum_full += scalar_sum_full; // 128x
+        scalar_sum_full += scalar_sum_full; // 256x
+        scalar_sum_full += scalar_sum_full; // 512x
+        scalar_sum_full += scalar_sum_full; // 1024x
+        scalar_sum_full += scalar_sum_full; // 2048x
+        scalar_sum_full += scalar_sum_full; // 4096x
+        scalar_sum_full += scalar_sum_full; // 8192x
+        scalar_sum_full += scalar_sum_full; // 16384x
+        scalar_sum_full += scalar_sum_full; // 32768x
+        scalar_sum_full += scalar_sum_full; // 65536x
+        scalar_sum_full += scalar_sum_full; // 2^17
+        scalar_sum_full += scalar_sum_full; // 2^18
+        scalar_sum_full += scalar_sum_full; // 2^19
+        scalar_sum_full += scalar_sum_full; // 2^20
+        scalar_sum_full += scalar_sum_full; // 2^21
+        scalar_sum_full += scalar_sum_full; // 2^22
+        scalar_sum_full += scalar_sum_full; // 2^23
+        scalar_sum_full += scalar_sum_full; // 2^24
+        scalar_sum_full += scalar_sum_full; // 2^25
+        scalar_sum_full += scalar_sum_full; // 2^26
+        scalar_sum_full += scalar_sum_full; // 2^27
+        scalar_sum_full += scalar_sum_full; // 2^28
+        scalar_sum_full += scalar_sum_full; // 2^29
+        scalar_sum_full += scalar_sum_full; // 2^30
+        scalar_sum_full += scalar_sum_full; // 2^31
+        scalar_sum_full += scalar_sum_full; // 2^32
+        scalar_sum_full += row_slice + adjusted_skew;
 
         auto precompute_point_transition = View(in.precompute_point_transition);
 
@@ -306,7 +397,7 @@ Accumulator ECCVMSetRelationImpl<FF>::compute_grand_product_denominator(const Al
 {
     using View = typename Accumulator::View;
 
-    // OPTIMIZE(@zac-williamson). The degree of this contribution is 17! makes overall relation degree 19.
+    // OPTIMIZE(@zac-williamson). The degree of the denominator is 28, making overall relation degree ~29.
     // Can potentially optimize by refining the algebra.
     const auto& gamma = params.gamma;
     const auto& beta = params.beta;
@@ -324,7 +415,7 @@ Accumulator ECCVMSetRelationImpl<FF>::compute_grand_product_denominator(const Al
 
     /**
      * @brief First term: tuple of (pc, round, wnaf_slice), used to determine which points we extract from lookup tables
-     * when evaluaing MSMs in ECCVMMsmRelation.
+     * when evaluating MSMs in ECCVMMsmRelation.
      * These values must be equivalent to the values computed in the 1st term of `compute_grand_product_numerator`
      */
     Accumulator denominator(1); // degree-0
@@ -363,6 +454,38 @@ Accumulator ECCVMSetRelationImpl<FF>::compute_grand_product_denominator(const Al
             (-add4 + 1);
         denominator *= wnaf_slice_output4; // degree-8
     }
+    {
+        const auto& add5 = View(in.msm_add5);
+        const auto& msm_slice5 = View(in.msm_slice5);
+        auto wnaf_slice_output5 =
+            add5 * (msm_slice5 + gamma + (msm_pc - msm_count - 4) * beta + msm_round * beta_sqr + first_term_tag) +
+            (-add5 + 1);
+        denominator *= wnaf_slice_output5; // degree-10
+    }
+    {
+        const auto& add6 = View(in.msm_add6);
+        const auto& msm_slice6 = View(in.msm_slice6);
+        auto wnaf_slice_output6 =
+            add6 * (msm_slice6 + gamma + (msm_pc - msm_count - 5) * beta + msm_round * beta_sqr + first_term_tag) +
+            (-add6 + 1);
+        denominator *= wnaf_slice_output6; // degree-12
+    }
+    {
+        const auto& add7 = View(in.msm_add7);
+        const auto& msm_slice7 = View(in.msm_slice7);
+        auto wnaf_slice_output7 =
+            add7 * (msm_slice7 + gamma + (msm_pc - msm_count - 6) * beta + msm_round * beta_sqr + first_term_tag) +
+            (-add7 + 1);
+        denominator *= wnaf_slice_output7; // degree-14
+    }
+    {
+        const auto& add8 = View(in.msm_add8);
+        const auto& msm_slice8 = View(in.msm_slice8);
+        auto wnaf_slice_output8 =
+            add8 * (msm_slice8 + gamma + (msm_pc - msm_count - 7) * beta + msm_round * beta_sqr + first_term_tag) +
+            (-add8 + 1);
+        denominator *= wnaf_slice_output8; // degree-16
+    }
 
     /**
      * @brief Second term: tuple of  the form `(transcript_pc, transcript_Px, transcript_Py, z1)` OR `(transcript_pc,
@@ -419,7 +542,7 @@ Accumulator ECCVMSetRelationImpl<FF>::compute_grand_product_denominator(const Al
 
         // point_table_init_write = degree 7
         auto point_table_init_write = transcript_mul * transcript_product + (-transcript_mul + 1);
-        denominator *= point_table_init_write; // degree 17
+        denominator *= point_table_init_write; // degree-25
     }
     /**
      * @brief Third term: tuple of (pc, P.x, P.y, msm-size) from ECCVMTranscriptRelation.
@@ -454,7 +577,7 @@ Accumulator ECCVMSetRelationImpl<FF>::compute_grand_product_denominator(const Al
         auto msm_result_read = transcript_pc_shift + transcript_msm_x * beta + transcript_msm_y * beta_sqr +
                                full_msm_count * beta_cube + third_term_tag;
         msm_result_read = transcript_msm_transition * (msm_result_read + gamma) + (-transcript_msm_transition + 1);
-        denominator *= msm_result_read; // degree-20
+        denominator *= msm_result_read; // degree-28
     }
     return denominator;
 }
@@ -480,10 +603,10 @@ void ECCVMSetRelationImpl<FF>::accumulate(ContainerOverSubrelations& accumulator
     using View = typename Accumulator::View;
     using ShortView = typename std::tuple_element_t<1, ContainerOverSubrelations>::View;
 
-    // degree-11
+    // degree-15 (8 slices + skew + delta + second term + third term)
     Accumulator numerator_evaluation = compute_grand_product_numerator<Accumulator>(in, params);
 
-    // degree-20
+    // degree-27 (8 add-gated tuples + second term + third term)
     Accumulator denominator_evaluation = compute_grand_product_denominator<Accumulator>(in, params);
 
     const auto& lagrange_first = View(in.lagrange_first);
@@ -494,7 +617,7 @@ void ECCVMSetRelationImpl<FF>::accumulate(ContainerOverSubrelations& accumulator
     const auto& z_perm_shift = View(in.z_perm_shift);
     const auto& z_perm_shift_short = ShortView(in.z_perm_shift);
 
-    // degree-21
+    // degree-28
     std::get<0>(accumulator) +=
         ((z_perm + lagrange_first) * numerator_evaluation - (z_perm_shift + lagrange_last) * denominator_evaluation) *
         scaling_factor;

From fed78c736860c2f40a9dfa177ba4a8565b854c03 Mon Sep 17 00:00:00 2001
From: notnotraju <raju@aztec-labs.com>
Date: Tue, 17 Mar 2026 14:18:42 +0000
Subject: [PATCH 10/24] feat(eccvm): widen lookup relation to 8 reads and 4
 table terms

Update ECCVMLookupRelation for 8-wide MSM and 2 precomputed points
per precompute row:

- NUM_LOOKUP_TERMS: 4 -> 8 (msm_add1..msm_add8 gated reads)
- NUM_TABLE_TERMS: 2 -> 4 (positive/negative for each of 2 points)
- LENGTH: 9 -> 15

Table term structure (4 terms covering all 16 slice values):
  - table_index 0: point 1 positive, slice = 15 - 2*round -> {15,13,11,9}
  - table_index 1: point 1 negative, slice = 2*round      -> {0,2,4,6}
  - table_index 2: point 2 positive, slice = 14 - 2*round -> {14,12,10,8}
  - table_index 3: point 2 negative, slice = 2*round + 1  -> {1,3,5,7}

Lookup read counts expanded from 2 to 4 columns
(lookup_read_counts_0..3) to match the 4 table terms.
---
 .../relations/ecc_vm/ecc_lookup_relation.hpp  | 164 ++++++++++--------
 1 file changed, 91 insertions(+), 73 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_lookup_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_lookup_relation.hpp
index aa1438379f6b..b0d299cd1d9d 100644
--- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_lookup_relation.hpp
+++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_lookup_relation.hpp
@@ -17,10 +17,10 @@ namespace bb {
 template <typename FF_> class ECCVMLookupRelationImpl {
   public:
     using FF = FF_;
-    static constexpr size_t NUM_LOOKUP_TERMS = 4;
-    static constexpr size_t NUM_TABLE_TERMS = 2;
+    static constexpr size_t NUM_LOOKUP_TERMS = 8;
+    static constexpr size_t NUM_TABLE_TERMS = 4;
     // 1 + polynomial degree of this relation
-    static constexpr size_t LENGTH = NUM_LOOKUP_TERMS + NUM_TABLE_TERMS + 3; // 9
+    static constexpr size_t LENGTH = NUM_LOOKUP_TERMS + NUM_TABLE_TERMS + 3; // 15
 
     static constexpr std::array<size_t, 2> SUBRELATION_PARTIAL_LENGTHS{
         LENGTH, // grand product construction sub-relation
@@ -65,6 +65,12 @@ template <typename FF_> class ECCVMLookupRelationImpl {
         if constexpr (index == 1) {
             return Accumulator(View(in.lookup_read_counts_1));
         }
+        if constexpr (index == 2) {
+            return Accumulator(View(in.lookup_read_counts_2));
+        }
+        if constexpr (index == 3) {
+            return Accumulator(View(in.lookup_read_counts_3));
+        }
         return Accumulator(1);
     }
 
@@ -86,6 +92,18 @@ template <typename FF_> class ECCVMLookupRelationImpl {
         if constexpr (lookup_index == 3) {
             return Accumulator(View(in.msm_add4));
         }
+        if constexpr (lookup_index == 4) {
+            return Accumulator(View(in.msm_add5));
+        }
+        if constexpr (lookup_index == 5) {
+            return Accumulator(View(in.msm_add6));
+        }
+        if constexpr (lookup_index == 6) {
+            return Accumulator(View(in.msm_add7));
+        }
+        if constexpr (lookup_index == 7) {
+            return Accumulator(View(in.msm_add8));
+        }
         return Accumulator(1);
     }
 
@@ -95,9 +113,12 @@ template <typename FF_> class ECCVMLookupRelationImpl {
         using View = typename Accumulator::View;
         // anytime `precompute_select` is on, we "turn on" the table predicate. This concretely means that the sP, where
         // s is a WNAF slice and P is the point being processed, are "written" to the lookup table, i.e., may be
-        // read/looked up later. `table_index == 0` corresponds to positive WNAF entries, `table_index == 1` corresponds
-        // to negative WNAF entries.
-        if constexpr (table_index == 0 || table_index == 1) {
+        // read/looked up later.
+        // table_index 0: point 1 positive WNAF entries
+        // table_index 1: point 1 negative WNAF entries
+        // table_index 2: point 2 positive WNAF entries
+        // table_index 3: point 2 negative WNAF entries
+        if constexpr (table_index < NUM_TABLE_TERMS) {
             return Accumulator(View(in.precompute_select));
         }
         return Accumulator(1);
@@ -105,6 +126,19 @@ template <typename FF_> class ECCVMLookupRelationImpl {
     /**
      * @brief Returns the fingerprint of `(precompute_pc, compressed_slice, (2 * compressed_slice - 15)[P])`, where [P]
      * is the point corresponding to `precompute_pc` and `compressed_slice`∈{0, ..., 15}.
+     *
+     * @details With 2 points per precompute row (tx/ty and tx2/ty2), we have 4 table terms:
+     *   table_index 0: point 1 positive — slice = 15 - 2*round, covers {15,13,11,9}
+     *   table_index 1: point 1 negative — slice = 2*round,      covers {0,2,4,6}
+     *   table_index 2: point 2 positive — slice = 14 - 2*round, covers {14,12,10,8}
+     *   table_index 3: point 2 negative — slice = 2*round + 1,  covers {1,3,5,7}
+     *
+     * Together these cover all 16 slice values {0, ..., 15}.
+     *
+     * Point 1 (tx, ty) at row round = table[15 - 2*round]:
+     *   round 0: 15P, round 1: 13P [was 11P], round 2: 11P [was 7P], round 3: 9P [was 3P]
+     * Point 2 (tx2, ty2) at row round = table[14 - 2*round]:
+     *   round 0: 13P, round 1: 9P, round 2: 5P, round 3: P
      */
     template <typename Accumulator, size_t table_index, typename AllEntities, typename Parameters>
     static Accumulator compute_table_term(const AllEntities& in, const Parameters& params)
@@ -112,58 +146,41 @@ template <typename FF_> class ECCVMLookupRelationImpl {
         using View = typename Accumulator::View;
 
         static_assert(table_index < NUM_TABLE_TERMS);
-        // table_index == 0 means our wNAF digit is positive (i.e., ∈{1, 3..., 15}).
-        // table_index == 1 means our wNAF digit is negative (i.e., ∈{-15, -13..., -1})
-
-        // round starts at 0 and increments to 7
-        // point starts at 15[P] and decrements to [P]
-        // a slice value of 0 maps to -15[P]
-
-        // we have computed `(15 - 2 * round)[P] =: (precompute_tx, precompute_ty)`.
-        // `round`∈{0, 1..., 7}
-        // if table_index == 0, we want to write (pc, 15 - 2 * round, precompute_tx, precompute_ty)
-        // if table_index == 1, we want to write (pc, round, precompute_tx, -precompute_ty)
-        // to sum up, both:
-        //      (pc, round, precompute_tx, -precompute_ty) _and_
-        //      (pc, 15 - 2 * round, precompute_tx, precompute_ty)
-        // will be written to the lookup table.
-        //
-        // therefore, if `pc` corresponds to the elliptic curve point [P], we will write:
-        // | pc | 0  | -15[P].x | -15[P].y |
-        // | pc | 1  | -13[P].x | -13[P].y |
-        // | pc | 2  | -11[P].x | -11[P].y |
-        // | pc | 3  | -9[P].x  | -9[P].y  |
-        // | pc | 4  | -7[P].x  | -7[P].y  |
-        // | pc | 5  | -5[P].x  | -5[P].y  |
-        // | pc | 6  | -3[P].x  | -3[P].y  |
-        // | pc | 7  | -1[P].x  | -1[P].y  |
-        // | pc | 8  |   [P].x  |   [P].y  |
-        // | pc | 9  |  3[P].x  |  3[P].y  |
-        // | pc | 10 |  5[P].x  |  5[P].y  |
-        // | pc | 11 |  7[P].x  |  7[P].y  |
-        // | pc | 12 |  9[P].x  |  9[P].y  |
-        // | pc | 13 | 11[P].x  | 11[P].y  |
-        // | pc | 14 | 13[P].x  | 13[P].y  |
-        // | pc | 15 | 15[P].x  | 15[P].y  |
 
         const auto& precompute_pc = View(in.precompute_pc);
-        const auto& tx = View(in.precompute_tx);
-        const auto& ty = View(in.precompute_ty);
         const auto& precompute_round = View(in.precompute_round);
         const auto& gamma = params.gamma;
         const auto& beta = params.beta;
         const auto& beta_sqr = params.beta_sqr;
         const auto& beta_cube = params.beta_cube;
+        const auto precompute_round2 = precompute_round + precompute_round;
 
         if constexpr (table_index == 0) {
-            const auto positive_slice_value = -(precompute_round) + 15;
-            const auto positive_term =
-                precompute_pc + gamma + positive_slice_value * beta + tx * beta_sqr + ty * beta_cube;
-            return positive_term; // degree 1
+            // Point 1 positive: slice = 15 - 2*round
+            const auto& tx = View(in.precompute_tx);
+            const auto& ty = View(in.precompute_ty);
+            const auto positive_slice_value = -(precompute_round2) + 15;
+            return precompute_pc + gamma + positive_slice_value * beta + tx * beta_sqr + ty * beta_cube; // degree 1
         }
         if constexpr (table_index == 1) {
-            const auto negative_term = precompute_pc + gamma + precompute_round * beta + tx * beta_sqr - ty * beta_cube;
-            return negative_term; // degree 1
+            // Point 1 negative: slice = 2*round
+            const auto& tx = View(in.precompute_tx);
+            const auto& ty = View(in.precompute_ty);
+            return precompute_pc + gamma + precompute_round2 * beta + tx * beta_sqr - ty * beta_cube; // degree 1
+        }
+        if constexpr (table_index == 2) {
+            // Point 2 positive: slice = 14 - 2*round
+            const auto& tx2 = View(in.precompute_tx2);
+            const auto& ty2 = View(in.precompute_ty2);
+            const auto positive_slice_value2 = -(precompute_round2) + 14;
+            return precompute_pc + gamma + positive_slice_value2 * beta + tx2 * beta_sqr + ty2 * beta_cube; // degree 1
+        }
+        if constexpr (table_index == 3) {
+            // Point 2 negative: slice = 2*round + 1
+            const auto& tx2 = View(in.precompute_tx2);
+            const auto& ty2 = View(in.precompute_ty2);
+            const auto negative_slice_value2 = precompute_round2 + 1;
+            return precompute_pc + gamma + negative_slice_value2 * beta + tx2 * beta_sqr - ty2 * beta_cube; // degree 1
         }
         return Accumulator(1);
     }
@@ -182,45 +199,46 @@ template <typename FF_> class ECCVMLookupRelationImpl {
         const auto& beta_cube = params.beta_cube;
         const auto& msm_pc = View(in.msm_pc);
         const auto& msm_count = View(in.msm_count);
-        const auto& msm_slice1 = View(in.msm_slice1);
-        const auto& msm_slice2 = View(in.msm_slice2);
-        const auto& msm_slice3 = View(in.msm_slice3);
-        const auto& msm_slice4 = View(in.msm_slice4);
-        const auto& msm_x1 = View(in.msm_x1);
-        const auto& msm_x2 = View(in.msm_x2);
-        const auto& msm_x3 = View(in.msm_x3);
-        const auto& msm_x4 = View(in.msm_x4);
-        const auto& msm_y1 = View(in.msm_y1);
-        const auto& msm_y2 = View(in.msm_y2);
-        const auto& msm_y3 = View(in.msm_y3);
-        const auto& msm_y4 = View(in.msm_y4);
 
         // Recall that `pc` stands for point-counter. We recall how to compute the current pc.
         //
         // row pc = value of pc after msm
         // msm_count = number of (128-bit) multiplications processed so far in current MSM round (NOT INCLUDING current
-        // row) current_pc = msm_pc - msm_count next_pc = current_pc - {0, 1, 2, 3}, depending on how many adds are
+        // row) current_pc = msm_pc - msm_count next_pc = current_pc - {0, 1, ..., 7}, depending on how many adds are
         // performed in the current row.
         const auto current_pc = msm_pc - msm_count;
 
         if constexpr (lookup_index == 0) {
-            const auto lookup_term1 = (current_pc) + gamma + msm_slice1 * beta + msm_x1 * beta_sqr + msm_y1 * beta_cube;
-            return lookup_term1; // degree 1
+            return (current_pc) + gamma + View(in.msm_slice1) * beta + View(in.msm_x1) * beta_sqr +
+                   View(in.msm_y1) * beta_cube; // degree 1
         }
         if constexpr (lookup_index == 1) {
-            const auto lookup_term2 =
-                (current_pc - 1) + gamma + msm_slice2 * beta + msm_x2 * beta_sqr + msm_y2 * beta_cube;
-            return lookup_term2; // degree 1
+            return (current_pc - 1) + gamma + View(in.msm_slice2) * beta + View(in.msm_x2) * beta_sqr +
+                   View(in.msm_y2) * beta_cube; // degree 1
         }
         if constexpr (lookup_index == 2) {
-            const auto lookup_term3 =
-                (current_pc - 2) + gamma + msm_slice3 * beta + msm_x3 * beta_sqr + msm_y3 * beta_cube;
-            return lookup_term3; // degree 1
+            return (current_pc - 2) + gamma + View(in.msm_slice3) * beta + View(in.msm_x3) * beta_sqr +
+                   View(in.msm_y3) * beta_cube; // degree 1
         }
         if constexpr (lookup_index == 3) {
-            const auto lookup_term4 =
-                (current_pc - 3) + gamma + msm_slice4 * beta + msm_x4 * beta_sqr + msm_y4 * beta_cube;
-            return lookup_term4; // degree 1
+            return (current_pc - 3) + gamma + View(in.msm_slice4) * beta + View(in.msm_x4) * beta_sqr +
+                   View(in.msm_y4) * beta_cube; // degree 1
+        }
+        if constexpr (lookup_index == 4) {
+            return (current_pc - 4) + gamma + View(in.msm_slice5) * beta + View(in.msm_x5) * beta_sqr +
+                   View(in.msm_y5) * beta_cube; // degree 1
+        }
+        if constexpr (lookup_index == 5) {
+            return (current_pc - 5) + gamma + View(in.msm_slice6) * beta + View(in.msm_x6) * beta_sqr +
+                   View(in.msm_y6) * beta_cube; // degree 1
+        }
+        if constexpr (lookup_index == 6) {
+            return (current_pc - 6) + gamma + View(in.msm_slice7) * beta + View(in.msm_x7) * beta_sqr +
+                   View(in.msm_y7) * beta_cube; // degree 1
+        }
+        if constexpr (lookup_index == 7) {
+            return (current_pc - 7) + gamma + View(in.msm_slice8) * beta + View(in.msm_x8) * beta_sqr +
+                   View(in.msm_y8) * beta_cube; // degree 1
         }
         return Accumulator(1);
     }
@@ -231,8 +249,8 @@ template <typename FF_> class ECCVMLookupRelationImpl {
      * Table writes: ECCVMPointTable columns: we define Straus point table:
      * { {0, -15[P]}, {1, -13[P]}, ..., {15, 15[P]} }
      * write source: { precompute_round, precompute_tx, precompute_ty }
-     * Table reads: ECCVMMSM columns. Each row adds up to 4 points into MSM accumulator
-     * read source: { msm_slice1, msm_x1, msm_y1 }, ..., { msm_slice4, msm_x4, msm_y4 }
+     * Table reads: ECCVMMSM columns. Each row adds up to 8 points into MSM accumulator
+     * read source: { msm_slice1, msm_x1, msm_y1 }, ..., { msm_slice8, msm_x8, msm_y8 }
      * @param accumulator transformed to `evals + C(in(X)...)*scaling_factor`
      * @param in an std::array containing the fully extended Accumulator edges.
      * @param relation_params contains beta, gamma, and public_input_delta, ....

From 762be817b24fbef90c9abad1542b8b2f74a36be5 Mon Sep 17 00:00:00 2001
From: notnotraju <raju@aztec-labs.com>
Date: Tue, 17 Mar 2026 14:23:14 +0000
Subject: [PATCH 11/24] feat(eccvm): add 2nd precomputed point constraint to
 point table relation

Update ECCVMPointTableRelation for 2 precomputed points per row
(Tx/Ty and Tx2/Ty2):

SUBRELATION_PARTIAL_LENGTHS expanded from 6 to 8 entries:
- Subrelations 0-1: Doubling constraint, now uses Tx2/Ty2 as the base
  point (at transition row, Tx2=P so Dx=2P)
- Subrelations 2-3: Dx/Dy continuity (unchanged)
- Subrelations 4-5: NEW intra-row addition (Tx = Tx2 + Dx), gated by
  precompute_select. Validates first point = second point + 2P.
- Subrelations 6-7: NEW inter-row addition (Tx2 = Tx_shift + Dx), gated
  by not-transition and not-first-row. Validates second point of row i
  equals first point of row i+1 plus 2P.

Row layout example for point P:
  round 0: Tx=15P, Tx2=13P | round 1: Tx=11P, Tx2=9P
  round 2: Tx=7P,  Tx2=5P  | round 3: Tx=3P,  Tx2=P
---
 .../ecc_vm/ecc_point_table_relation.hpp       |   3 +-
 .../ecc_vm/ecc_point_table_relation_impl.hpp  | 173 +++++++-----------
 2 files changed, 72 insertions(+), 104 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation.hpp
index b5e1ed1e8dd4..eaa4b529f588 100644
--- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation.hpp
+++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation.hpp
@@ -25,7 +25,8 @@ template <typename FF_> class ECCVMPointTableRelationImpl {
   public:
     using FF = FF_;
 
-    static constexpr std::array<size_t, 6> SUBRELATION_PARTIAL_LENGTHS{ 6, 6, 6, 6, 6, 6 };
+    // 8 subrelations: 0-1 doubling, 2-3 Dx/Dy continuity, 4-5 intra-row add (Tx=Tx2+D), 6-7 inter-row add (Tx2=Tx'+D)
+    static constexpr std::array<size_t, 8> SUBRELATION_PARTIAL_LENGTHS{ 6, 6, 6, 6, 6, 6, 6, 6 };
 
     template <typename ContainerOverSubrelations, typename AllEntities, typename Parameters>
     static void accumulate(ContainerOverSubrelations& accumulator,
diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation_impl.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation_impl.hpp
index 071860f39a92..942c801db6df 100644
--- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation_impl.hpp
+++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation_impl.hpp
@@ -47,83 +47,43 @@ void ECCVMPointTableRelationImpl<FF>::accumulate(ContainerOverSubrelations& accu
     const auto& lagrange_first = View(in.lagrange_first);
 
     /**
-     * @brief Row structure
+     * @brief Row structure (8-wide: 2 precomputed points per row, 4 rows per scalar)
      *
      * Consider the set of (128-bit scalar multiplier, point, pc) tuples in the transcript columns.
-     * The point table columns process one tuple every 8 rows. The tuple with the largest pc value is first.
+     * The point table columns process one tuple every 4 rows. The tuple with the largest pc value is first.
      * When transitioning between tuple elements, pc decrements by 1.
      *
-     * The following table gives an example for two points.
-     * In the table, the point associated with `pc = 1` is labelled P.
-     *               the point associated with `pc = 0` is labelled Q.
-     *
-     * | precompute_pc | precompute_point_transition  | precompute_round    |  Tx    | Ty    | Dx   | Dy   |
-     * | ------------- | ---------------------------- | ------------------- |  ----- | ----- | ---- | ---- |
-     * | 1             | 0                            |                   0 | 15P.x  | 15P.y | 2P.x | 2P.y |
-     * | 1             | 0                            |                   1 | 13P.x  | 13P.y | 2P.x | 2P.y |
-     * | 1             | 0                            |                   2 | 11P.x  | 11P.y | 2P.x | 2P.y |
-     * | 1             | 0                            |                   3 |  9P.x  |  9P.y | 2P.x | 2P.y |
-     * | 1             | 0                            |                   4 |  7P.x  |  7P.y | 2P.x | 2P.y |
-     * | 1             | 0                            |                   5 |  5P.x  |  5P.y | 2P.x | 2P.y |
-     * | 1             | 0                            |                   6 |  3P.x  |  3P.y | 2P.x | 2P.y |
-     * | 1             | 1                            |                   7 |   P.x  |   P.y | 2P.x | 2P.y |
-     * | 0             | 0                            |                   0 | 15Q.x  | 15Q.y | 2Q.x | 2Q.y |
-     * | 0             | 0                            |                   1 | 13Q.x  | 13Q.y | 2Q.x | 2Q.y |
-     * | 0             | 0                            |                   2 | 11Q.x  | 11Q.y | 2Q.x | 2Q.y |
-     * | 0             | 0                            |                   3 |  9Q.x  |  9Q.y | 2Q.x | 2Q.y |
-     * | 0             | 0                            |                   4 |  7Q.x  |  7Q.y | 2Q.x | 2Q.y |
-     * | 0             | 0                            |                   5 |  5Q.x  |  5Q.y | 2Q.x | 2Q.y |
-     * | 0             | 0                            |                   6 |  3Q.x  |  3Q.y | 2Q.x | 2Q.y |
-     * | 0             | 1                            |                   7 |   Q.x  |   Q.y | 2Q.x | 2Q.y |
-     *
-     * We apply the following relations to constrain the above table:
-     *
-     * 1. If precompute_point_transition = 0, (Dx, Dy) = (Dx_shift, Dy_shift)
-     * 2. If precompute_point_transition = 1, (Dx, Dy) = 2 (Px, Py)
-     * 3. If precompute_point_transition = 0, (Tx, Ty) = (Tx_shift, Ty_shift) + (Dx, Dy)
-     *
-     * The relations that constrain `precompute_point_transition` and `precompute_pc` are in `ecc_wnaf_relation.hpp`
-     *
-     * When precompute_point_transition = 1, the next row corresponds to the beginning of the processing of a new point.
-     * We use a multiset-equality check, `ecc_set_relation.hpp` to validate (pc, Tx, Ty, scalar-multiplier) is the same
-     * as something derived from the transcript columns. In other words, the multiset equality check allows the tables
-     * to communicate, and in particular validates that we are populating our PointTable with precomputed values that
-     * indeed arise from the Transcript columns. (Formerly, we referred to this as a "strict" lookup protocol = every
-     * item in the table must be read from once, and only once)
-     *
-     * For every row, we use a lookup protocol in `ecc_lookup_relation.hpp` to write the following tuples into a lookup
-     * table:
-     * 1. (pc, 15 - precompute_round, Tx, Ty)
-     * 2. (pc, precompute_round, Tx, -Ty)
-     *
-     * The value `15 - precompute_round` describes the multiplier applied to P at the current row.
-     * (this can be expanded into a wnaf value by taking `2x - 15` where `x = 15 - precompute_round`) .
-     * The value `precompute_round` describes the *negative multiplier* applied to P at the current row.
-     * This is also expanded into a wnaf value by taking `2x - 15` where `x = precompute_round`.
-     *
-     * The following table describes how taking (15 - precompute_round) for positive values and (precompute_round) for
-     * negative values produces the WNAF slice values that correspond to the multipliers for (Tx, Ty) and (Tx, -Ty):
-     *
-     * | Tx    | Ty    | x = 15 - precompute_round | 2x - 15 | y = precompute_round | 2y - 15 |
-     * | ----- | ----- | --------------------      | ------- | ---------------      | ------- |
-     * | 15P.x | 15P.y | 15                        |      15 |                    0 |     -15 |
-     * | 13P.x | 13P.y | 14                        |      13 |                    1 |     -13 |
-     * | 11P.x | 11P.y | 13                        |      11 |                    2 |     -11 |
-     * |  9P.x |  9P.y | 12                        |       9 |                    3 |      -9 |
-     * |  7P.x |  7P.y | 11                        |       7 |                    4 |      -7 |
-     * |  5P.x |  5P.y | 10                        |       5 |                    5 |      -5 |
-     * |  3P.x |  3P.y |  9                        |       3 |                    6 |      -3 |
-     * |   P.x |   P.y |  8                        |       1 |                    7 |      -1 |
+     * Each row stores two precomputed points:
+     *   (Tx, Ty)   = table[15 - 2*round]   (first point, odd table index)
+     *   (Tx2, Ty2) = table[14 - 2*round]   (second point, even table index)
+     *
+     * | pc | transition | round | Tx    | Ty    | Tx2   | Ty2   | Dx   | Dy   |
+     * | -- | ---------- | ----- | ----- | ----- | ----- | ----- | ---- | ---- |
+     * | 1  | 0          | 0     | 15P.x | 15P.y | 13P.x | 13P.y | 2P.x | 2P.y |
+     * | 1  | 0          | 1     | 11P.x | 11P.y |  9P.x |  9P.y | 2P.x | 2P.y |
+     * | 1  | 0          | 2     |  7P.x |  7P.y |  5P.x |  5P.y | 2P.x | 2P.y |
+     * | 1  | 1          | 3     |  3P.x |  3P.y |    P.x |   P.y | 2P.x | 2P.y |
+     *
+     * We apply the following relations:
+     * 1. If precompute_point_transition = 1, (Dx, Dy) = 2(Tx2, Ty2) [doubling at transition, Tx2=P at last row]
+     * 2. If precompute_point_transition = 0, (Dx, Dy) = (Dx_shift, Dy_shift) [continuity]
+     * 3. (Tx, Ty) = (Tx2, Ty2) + (Dx, Dy) [intra-row: first point = second point + 2P]
+     * 4. If precompute_point_transition = 0, (Tx2, Ty2) = (Tx_shift, Ty_shift) + (Dx, Dy)
+     *    [inter-row: second point = next row's first point + 2P]
      */
 
+    const auto& Tx2 = View(in.precompute_tx2);
+    const auto& Tx2_shift = View(in.precompute_tx2_shift);
+    const auto& Ty2 = View(in.precompute_ty2);
+    const auto& Ty2_shift = View(in.precompute_ty2_shift);
+
     /**
-     * @brief Validate Dx, Dy correctness relation
+     * @brief Validate Dx, Dy correctness (doubling relation)
      *
-     * When computing a point table for point [P] = (Px, Py), we require [D] (Dx, Dy) = 2.[P]
-     * If all other relations are satisfied, we know that (Tx, Ty) = (Px, Py)
-     * i.e. (Dx, Dy) = 2(Px, Py) when precompute_round_transition = 1.
+     * When precompute_point_transition = 1, the current row is the last row for this point.
+     * At the last row (round=3), Tx2 = P (the base point). So (Dx, Dy) = 2(Tx2, Ty2) = 2P.
      *
-     * Double formula:
+     * Double formula (for curve a=0, using 3x^2 shortcut since a=0 => slope = 3x^2/(2y)):
      * x_3 = 9x^4 / 4y^2 - 2x
      * y_3 = (3x^2 / 2y) * (x - x_3) - y
      *
@@ -131,20 +91,19 @@ void ECCVMPointTableRelationImpl<FF>::accumulate(ContainerOverSubrelations& accu
      * (x_3 + 2x) * 4y^2 - 9x^4 = 0
      * (y3 + y) * 2y - 3x^2 * (x - x_3) = 0
      */
-    auto two_x = Tx + Tx;
-    auto three_x = two_x + Tx;
-    auto three_xx = Tx * three_x;
-    auto nine_xxxx = three_xx.sqr();
-    auto two_y = Ty + Ty;
-    auto four_yy = two_y.sqr();
-    auto x_double_check = (Dx + two_x) * four_yy - nine_xxxx;
-    auto y_double_check = (Ty + Dy) * two_y + three_xx * (Dx - Tx);
+    auto two_x2 = Tx2 + Tx2;
+    auto three_x2 = two_x2 + Tx2;
+    auto three_x2x2 = Tx2 * three_x2;
+    auto nine_x2x2x2x2 = three_x2x2.sqr();
+    auto two_y2 = Ty2 + Ty2;
+    auto four_y2y2 = two_y2.sqr();
+    auto x_double_check = (Dx + two_x2) * four_y2y2 - nine_x2x2x2x2;
+    auto y_double_check = (Ty2 + Dy) * two_y2 + three_x2x2 * (Dx - Tx2);
     std::get<0>(accumulator) += precompute_point_transition * x_double_check * scaling_factor;
     std::get<1>(accumulator) += precompute_point_transition * y_double_check * scaling_factor;
 
     /**
-     * @brief If precompute_round_transition = 0, (Dx_shift, Dy_shift) = (Dx, Dy)
-     *
+     * @brief If precompute_point_transition = 0, (Dx_shift, Dy_shift) = (Dx, Dy) [continuity]
      * 1st row is empty => don't apply if lagrange_first == 1
      */
     std::get<2>(accumulator) +=
@@ -153,37 +112,45 @@ void ECCVMPointTableRelationImpl<FF>::accumulate(ContainerOverSubrelations& accu
         (-lagrange_first + 1) * (-precompute_point_transition + 1) * (Dy - Dy_shift) * scaling_factor;
 
     /**
-     * @brief Valdiate (Tx, Ty) is correctly computed from (Tx_shift, Ty_shift), (Dx, Dy).
-     *        If precompute_round_transition = 0, [T] = [T_shift] + [D].
+     * @brief Intra-row addition: (Tx, Ty) = (Tx2, Ty2) + (Dx, Dy)
      *
-     * Add formula:
-     * x_3 = (y_2 - y_1)^2 / (x_2 - x_1)^2 - x_2 - x_1
-     * y_3 = ((y_2 - y_1) / (x_2 - x_1)) * (x_1 - x_3) - y_1
+     * The first precomputed point = second precomputed point + 2P.
+     * E.g., at round 0: 15P = 13P + 2P.
      *
-     * Expanding into relations:
+     * This is gated by precompute_select (active when processing a scalar).
+     *
+     * Add formula (denominator form):
      * (x_3 + x_2 + x_1) * (x_2 - x_1)^2 - (y_2 - y_1)^2 = 0
      * (y_3 + y_1) * (x_2 - x_1) + (x_3 - x_1) * (y_2 - y_1) = 0
+     */
+    {
+        const auto& precompute_select = View(in.precompute_select);
+        const auto lambda_num_intra = Dy - Ty2;
+        const auto lambda_den_intra = Dx - Tx2;
+        auto x_add_check_intra = (Tx + Dx + Tx2) * lambda_den_intra.sqr() - lambda_num_intra.sqr();
+        auto y_add_check_intra = (Ty + Ty2) * lambda_den_intra + (Tx - Tx2) * lambda_num_intra;
+        std::get<4>(accumulator) += precompute_select * x_add_check_intra * scaling_factor;
+        std::get<5>(accumulator) += precompute_select * y_add_check_intra * scaling_factor;
+    }
+
+    /**
+     * @brief Inter-row addition: (Tx2, Ty2) = (Tx_shift, Ty_shift) + (Dx, Dy)
+     *
+     * The second precomputed point of row i = first precomputed point of row i+1 + 2P.
+     * E.g., row 0 Tx2 = 13P = row 1 Tx (11P) + 2P.
      *
-     * We don't need to check for incomplete point addition edge case (x_1 == x_2); the only cases this would correspond
-     * to are y2 == y1 or y2 == -y1. Both of these cases may be ruled out as follows.
-     *      1. y2 == y1. Then 2P == kP, where k∈{1, ..., 13}, which of course cannot happen because the order r of E(Fₚ)
-     *      is a large prime and P is already assumed to not be the neutral element.
-     *      2. y2 == -y1. Again, then -2P == kP, k∈{1, ..., 13}, and we get the same contradiction.
+     * Gated by: not first row, not transition (same as old inter-row constraint).
      */
-    const auto& x1 = Tx_shift;
-    const auto& y1 = Ty_shift;
-    const auto& x2 = Dx;
-    const auto& y2 = Dy;
-    const auto& x3 = Tx;
-    const auto& y3 = Ty;
-    const auto lambda_numerator = y2 - y1;
-    const auto lambda_denominator = x2 - x1;
-    auto x_add_check = (x3 + x2 + x1) * lambda_denominator.sqr() - lambda_numerator.sqr();
-    auto y_add_check = (y3 + y1) * lambda_denominator + (x3 - x1) * lambda_numerator;
-    std::get<4>(accumulator) +=
-        (-lagrange_first + 1) * (-precompute_point_transition + 1) * x_add_check * scaling_factor;
-    std::get<5>(accumulator) +=
-        (-lagrange_first + 1) * (-precompute_point_transition + 1) * y_add_check * scaling_factor;
+    {
+        const auto lambda_num_inter = Dy - Ty_shift;
+        const auto lambda_den_inter = Dx - Tx_shift;
+        auto x_add_check_inter = (Tx2 + Dx + Tx_shift) * lambda_den_inter.sqr() - lambda_num_inter.sqr();
+        auto y_add_check_inter = (Ty2 + Ty_shift) * lambda_den_inter + (Tx2 - Tx_shift) * lambda_num_inter;
+        std::get<6>(accumulator) +=
+            (-lagrange_first + 1) * (-precompute_point_transition + 1) * x_add_check_inter * scaling_factor;
+        std::get<7>(accumulator) +=
+            (-lagrange_first + 1) * (-precompute_point_transition + 1) * y_add_check_inter * scaling_factor;
+    }
 }
 
 } // namespace bb

From b89332527c4d86d0735561320f2897fd45ae889e Mon Sep 17 00:00:00 2001
From: notnotraju <raju@aztec-labs.com>
Date: Tue, 17 Mar 2026 14:27:57 +0000
Subject: [PATCH 12/24] feat(eccvm): update eccvm_set_permutation_delta to
 product of 8 terms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With 8 wNAF digits per precompute row, the zero-tuple fingerprint
used for padding inactive rows must be the product of 8 terms
(γ + j·β² + t·β⁴) for j = 0..7, rather than 4 terms for j = 0..3.

Updated in all three locations:
- eccvm_prover.cpp
- eccvm_verifier.cpp
- eccvm_trace_checker.cpp
---
 .../cpp/src/barretenberg/eccvm/eccvm_prover.cpp       | 11 ++++++-----
 .../src/barretenberg/eccvm/eccvm_trace_checker.cpp    |  7 ++++---
 .../cpp/src/barretenberg/eccvm/eccvm_verifier.cpp     |  8 +++++---
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_prover.cpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_prover.cpp
index 065cb9c4b3e6..1d0e0dbed0f3 100644
--- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_prover.cpp
+++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_prover.cpp
@@ -94,12 +94,13 @@ void ECCVMProver::execute_log_derivative_commitments_round()
     // constrain (pc, round, wnaf_slice) to match between the MSM table and the Precomputed table. The number of rows we
     // add per short scalar `mul` is slightly less in the Precomputed table as in the MSM table, so to get the
     // permutation argument to work out, when `precompute_select == 0`, we must implicitly _remove_ (0, 0, 0) as a tuple
-    // on the wNAF side. This corresponds to dividing by
-    // (γ+t·β⁴)·(γ+β²+t·β⁴)·(γ+2β²+t·β⁴)·(γ+3β²+t·β⁴), where t = FIRST_TERM_TAG.
+    // on the wNAF side. With 8 digits per row, we have 8 zero-tuple fingerprints to remove:
+    // product of (γ + j·β² + t·β⁴) for j = 0..7, where t = FIRST_TERM_TAG.
     auto first_term_tag = beta_quartic; // FIRST_TERM_TAG (= 1) * beta_quartic
-    relation_parameters.eccvm_set_permutation_delta = (gamma + first_term_tag) * (gamma + beta_sqr + first_term_tag) *
-                                                      (gamma + beta_sqr + beta_sqr + first_term_tag) *
-                                                      (gamma + beta_sqr + beta_sqr + beta_sqr + first_term_tag);
+    relation_parameters.eccvm_set_permutation_delta = FF(1);
+    for (size_t j = 0; j < 8; ++j) {
+        relation_parameters.eccvm_set_permutation_delta *= (gamma + FF(j) * beta_sqr + first_term_tag);
+    }
     relation_parameters.eccvm_set_permutation_delta = relation_parameters.eccvm_set_permutation_delta.invert();
     // Compute inverse polynomial for our logarithmic-derivative lookup method
     compute_logderivative_inverse<typename Flavor::FF,
diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_trace_checker.cpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_trace_checker.cpp
index bc78b150ae2c..ea1d54d426f5 100644
--- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_trace_checker.cpp
+++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_trace_checker.cpp
@@ -23,9 +23,10 @@ bool ECCVMTraceChecker::check(Builder& builder,
     const FF beta_cube = beta_sqr * beta;
     const FF beta_quartic = beta_sqr * beta_sqr;
     auto first_term_tag = beta_quartic; // FIRST_TERM_TAG (= 1) * beta_quartic
-    auto eccvm_set_permutation_delta = (gamma + first_term_tag) * (gamma + beta_sqr + first_term_tag) *
-                                       (gamma + beta_sqr + beta_sqr + first_term_tag) *
-                                       (gamma + beta_sqr + beta_sqr + beta_sqr + first_term_tag);
+    FF eccvm_set_permutation_delta = FF(1);
+    for (size_t j = 0; j < 8; ++j) {
+        eccvm_set_permutation_delta *= (gamma + FF(j) * beta_sqr + first_term_tag);
+    }
     eccvm_set_permutation_delta = eccvm_set_permutation_delta.invert();
     bb::RelationParameters<typename Flavor::FF> params{
         .eta = 0,
diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_verifier.cpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_verifier.cpp
index 9bd2765ab2f5..ff64027efba7 100644
--- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_verifier.cpp
+++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_verifier.cpp
@@ -56,10 +56,12 @@ typename ECCVMVerifier_<Flavor>::ReductionResult ECCVMVerifier_<Flavor>::reduce_
     relation_parameters.beta_sqr = beta_sqr;
     relation_parameters.beta_cube = beta_sqr * beta;
     relation_parameters.beta_quartic = beta_quartic;
+    // Product of 8 zero-tuple fingerprints (γ + j·β² + t·β⁴) for j = 0..7, then inverted.
     auto first_term_tag = beta_quartic; // FIRST_TERM_TAG (= 1) * beta_quartic
-    relation_parameters.eccvm_set_permutation_delta = (gamma + first_term_tag) * (gamma + beta_sqr + first_term_tag) *
-                                                      (gamma + beta_sqr + beta_sqr + first_term_tag) *
-                                                      (gamma + beta_sqr + beta_sqr + beta_sqr + first_term_tag);
+    relation_parameters.eccvm_set_permutation_delta = FF(1);
+    for (size_t j = 0; j < 8; ++j) {
+        relation_parameters.eccvm_set_permutation_delta *= (gamma + FF(j) * beta_sqr + first_term_tag);
+    }
     relation_parameters.eccvm_set_permutation_delta = relation_parameters.eccvm_set_permutation_delta.invert();
 
     // Get commitment to permutation and lookup grand products

From d4c16a45aa5f2b63bd20ba0625e200717f381ced Mon Sep 17 00:00:00 2001
From: notnotraju <raju@aztec-labs.com>
Date: Tue, 17 Mar 2026 14:53:11 +0000
Subject: [PATCH 13/24] chore(eccvm): add ECCVM univariate benchmarks for
 sumcheck prover

Add benchmarks for ECCVM relation evaluation using Sumcheck
univariates (prover-side work), in addition to the existing
values-based benchmarks (verifier-side work).
---
 .../benchmark/relations_bench/relations.bench.cpp     | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp
index 1a2acbf93f2e..e58f9bb6665e 100644
--- a/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp
+++ b/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp
@@ -86,7 +86,7 @@ BENCHMARK(execute_relation_for_values<TranslatorFlavor, TranslatorDeltaRangeCons
 BENCHMARK(execute_relation_for_values<TranslatorFlavor, TranslatorNonNativeFieldRelation<Fr>>);
 BENCHMARK(execute_relation_for_values<TranslatorFlavor, TranslatorPermutationRelation<Fr>>);
 
-// ECCVM
+// ECCVM (verifier work)
 BENCHMARK(execute_relation_for_values<ECCVMFlavor, ECCVMLookupRelation<Fq>>);
 BENCHMARK(execute_relation_for_values<ECCVMFlavor, ECCVMMSMRelation<Fq>>);
 BENCHMARK(execute_relation_for_values<ECCVMFlavor, ECCVMPointTableRelation<Fq>>);
@@ -95,6 +95,15 @@ BENCHMARK(execute_relation_for_values<ECCVMFlavor, ECCVMTranscriptRelation<Fq>>)
 BENCHMARK(execute_relation_for_values<ECCVMFlavor, ECCVMWnafRelation<Fq>>);
 BENCHMARK(execute_relation_for_values<ECCVMFlavor, ECCVMBoolsRelation<Fq>>);
 
+// ECCVM (Sumcheck prover work — univariate accumulation)
+BENCHMARK(execute_relation_for_univariates<ECCVMFlavor, ECCVMLookupRelation<Fq>>);
+BENCHMARK(execute_relation_for_univariates<ECCVMFlavor, ECCVMMSMRelation<Fq>>);
+BENCHMARK(execute_relation_for_univariates<ECCVMFlavor, ECCVMPointTableRelation<Fq>>);
+BENCHMARK(execute_relation_for_univariates<ECCVMFlavor, ECCVMSetRelation<Fq>>);
+BENCHMARK(execute_relation_for_univariates<ECCVMFlavor, ECCVMTranscriptRelation<Fq>>);
+BENCHMARK(execute_relation_for_univariates<ECCVMFlavor, ECCVMWnafRelation<Fq>>);
+BENCHMARK(execute_relation_for_univariates<ECCVMFlavor, ECCVMBoolsRelation<Fq>>);
+
 } // namespace bb::benchmark::relations
 
 BENCHMARK_MAIN();

From da2fbef80a7b74ecc251e12677fccceb4b8d28b5 Mon Sep 17 00:00:00 2001
From: notnotraju <raju@aztec-labs.com>
Date: Tue, 17 Mar 2026 15:02:16 +0000
Subject: [PATCH 14/24] fix(eccvm): fix MSM relation partial lengths and remove
 unused vars

- Set all MSM relation SUBRELATION_PARTIAL_LENGTHS to 12 (was mixed
  8/12). Required because the View type is derived from the max
  partial length subrelation (index 0 = 12), so all intermediate
  Univariates are 12-wide and can only be accumulated into 12-wide
  accumulators.
- Fixed element count: was 68, now 67 (matching the array declaration).
- Removed unused Tx2_shift/Ty2_shift variables from point table
  relation (the inter-row constraint uses Tx_shift/Ty_shift, not
  the shifted versions of the second point).
---
 .../relations/ecc_vm/ecc_msm_relation.hpp     | 27 ++++++++++---------
 .../ecc_vm/ecc_point_table_relation_impl.hpp  |  2 --
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation.hpp
index 4626847f4dc9..5324c44e5f95 100644
--- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation.hpp
+++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_msm_relation.hpp
@@ -47,19 +47,22 @@ template <typename FF_> class ECCVMMSMRelationImpl {
     // The degree of y_t8 is 10 (degree doubles through chaining: first_add gives deg 3 y, then each subsequent
     // add increments by ~1). With q_add gating, the final degree is 12 (rounded up for safety).
     // Most subrelations remain degree <= 8. The new addition-chain subrelations (47-50, 51-54) have higher degree.
+    // All subrelations use partial length 12 because the View type (used for all wire reads)
+    // is Univariate<FF, 12> (derived from the max-degree subrelation in this relation).
+    // The 8-chained addition/skew outputs reach degree ~10 at the accumulator output (indices 0,1,4).
     static constexpr std::array<size_t, 67> SUBRELATION_PARTIAL_LENGTHS{
-        12, 12, 8,  8,  12, 8, // 0-5: ADD acc(x,y), slope1; SKEW acc(x,y), slope1
-        8,  8,  8,  8,  8,  8, // 6-11: collision1-4; DOUBLE acc(x,y)
-        8,  8,  8,  8,  8,  8, // 12-17: DOUBLE slope1; slice-zero 1-4; mutual excl
-        8,  8,  8,  8,  8,  8, 8,  8,
-        8,  8,  8,  8,  8,  8, 12, // 18-31: round tx, selectors, count, continuity
-        8,  8,  8,  8,  8,  8, 8,  8,
-        8,  8,  8,  8,  8,  8, 8, // 32-46: add1=q_add+q_skew, skew ctrls, ADD/DOUBLE/SKEW slopes 2-4, no-op
-        12, 12, 12, 12,           // 47-50: ADD slopes 5-8
-        12, 12, 12, 12,           // 51-54: SKEW slopes 5-8
-        8,  8,  8,  8,            // 55-58: collision 5-8
-        8,  8,  8,  8,            // 59-62: slice-zero 5-8
-        8,  8,  8,  8             // 63-66: continuity add5-8
+        12, 12, 12, 12, 12, 12,         // 0-5
+        12, 12, 12, 12, 12, 12,         // 6-11
+        12, 12, 12, 12, 12, 12,         // 12-17
+        12, 12, 12, 12, 12, 12, 12,     // 18-24
+        12, 12, 12, 12, 12, 12, 12,     // 25-31
+        12, 12, 12, 12, 12, 12, 12, 12, // 32-39
+        12, 12, 12, 12, 12, 12, 12,     // 40-46
+        12, 12, 12, 12,                 // 47-50: ADD slopes 5-8
+        12, 12, 12, 12,                 // 51-54: SKEW slopes 5-8
+        12, 12, 12, 12,                 // 55-58: collision 5-8
+        12, 12, 12, 12,                 // 59-62: slice-zero 5-8
+        12, 12, 12, 12                  // 63-66: continuity add5-8
     };
 
     template <typename ContainerOverSubrelations, typename AllEntities, typename Parameters>
diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation_impl.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation_impl.hpp
index 942c801db6df..238eabc87f8c 100644
--- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation_impl.hpp
+++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_point_table_relation_impl.hpp
@@ -73,9 +73,7 @@ void ECCVMPointTableRelationImpl<FF>::accumulate(ContainerOverSubrelations& accu
      */
 
     const auto& Tx2 = View(in.precompute_tx2);
-    const auto& Tx2_shift = View(in.precompute_tx2_shift);
     const auto& Ty2 = View(in.precompute_ty2);
-    const auto& Ty2_shift = View(in.precompute_ty2_shift);
 
     /**
      * @brief Validate Dx, Dy correctness (doubling relation)

From 80589aedf259ffe286cde29cb2f7b89f63339341 Mon Sep 17 00:00:00 2001
From: notnotraju <raju@aztec-labs.com>
Date: Tue, 17 Mar 2026 15:39:31 +0000
Subject: [PATCH 15/24] fix(eccvm): resolve crashes in MSM builder for 8-wide
 layout

Two bugs fixed:

1. batch_normalize crash on zero z-coordinates: With ADDITIONS_PER_ROW=8
   and DOUBLINGS_PER_ROW=4, doubling rows only use 4 of 8 trace slots.
   The unused slots had default Element{} with z=0 (point at infinity),
   causing batch_normalize to fail when inverting z-coordinates. Fix:
   fill unused slots with valid (non-infinity) dummy points and track
   which slots are used via is_used vector to skip them during
   collision_inverse computation.

2. Signed integer overflow in precomputed_tables_builder: With 8 wNAF
   digits per row, row_chunk = slice0 * (1<<28) can reach ~4 billion,
   exceeding INT_MAX. This was undefined behavior causing incorrect
   scalar_sum values. Fix: use int64_t for row_chunk computation.
---
 .../src/barretenberg/eccvm/msm_builder.hpp    | 22 ++++++++++++++++---
 .../eccvm/precomputed_tables_builder.hpp      |  9 +++++---
 2 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp b/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp
index e49244b2258f..a5c097939812 100644
--- a/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp
+++ b/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp
@@ -263,6 +263,9 @@ class ECCVMMSMMBuilder {
         // doubling. if it is `true`, then we are doubling (i.e., the condition is that `p3 = p1.dbl()`), else we are
         // adding (i.e., the condition is that `p3 = p1 + p2`).
         std::vector<bool> is_double_or_add(num_point_adds_and_doubles);
+        // `is_used` records whether a trace entry is actually used (as opposed to padding for ADDITIONS_PER_ROW >
+        // DOUBLINGS_PER_ROW in doubling rows). Unused entries must be skipped during batch inversion.
+        std::vector<bool> is_used(num_point_adds_and_doubles, false);
         // accumulator_trace tracks the value of the ECCVM accumulator for each row
         std::span<Element> accumulator_trace(&points_to_normalize[num_point_adds_and_doubles * 3], num_accumulators);
 
@@ -328,6 +331,7 @@ class ECCVMMSMMBuilder {
                         p2_trace[trace_index] = p2;
                         p3_trace[trace_index] = accumulator;
                         is_double_or_add[trace_index] = false;
+                        is_used[trace_index] = true;
                         trace_index++;
                     }
                     // Now, `row.add_state` has been fully processed and we fill in the rest of the members of `row`.
@@ -366,10 +370,17 @@ class ECCVMMSMMBuilder {
                         accumulator = accumulator.dbl();
                         p3_trace[trace_index] = accumulator;
                         is_double_or_add[trace_index] = true;
+                        is_used[trace_index] = true;
+                        trace_index++;
+                    }
+                    // Fill unused trace slots with dummy non-infinity points so batch_normalize doesn't
+                    // fail on z=0. These entries are not used for any relation computation.
+                    for (size_t pad = 0; pad < (ADDITIONS_PER_ROW - DOUBLINGS_PER_ROW); ++pad) {
+                        p1_trace[trace_index] = accumulator;
+                        p2_trace[trace_index] = accumulator;
+                        p3_trace[trace_index] = accumulator;
                         trace_index++;
                     }
-                    // Skip unused trace slots for this row (ADDITIONS_PER_ROW allocated, only DOUBLINGS_PER_ROW used)
-                    trace_index += (ADDITIONS_PER_ROW - DOUBLINGS_PER_ROW);
                     accumulator_trace[msm_row_index] = accumulator;
                     msm_row_index++;
                 } else // process `wnaf_skew`, i.e., the skew digit.
@@ -402,6 +413,7 @@ class ECCVMMSMMBuilder {
                             p2_trace[trace_index] = add_state.point;
                             p3_trace[trace_index] = accumulator;
                             is_double_or_add[trace_index] = false;
+                            is_used[trace_index] = true;
                             trace_index++;
                         }
                         row.q_add = false;
@@ -427,7 +439,11 @@ class ECCVMMSMMBuilder {
         std::vector<FF> inverse_trace(num_point_adds_and_doubles);
         parallel_for_range(num_point_adds_and_doubles, [&](size_t start, size_t end) {
             for (size_t operation_idx = start; operation_idx < end; ++operation_idx) {
-                if (is_double_or_add[operation_idx]) {
+                if (!is_used[operation_idx]) {
+                    // Unused trace slots (padding for ADDITIONS_PER_ROW > DOUBLINGS_PER_ROW in doubling rows).
+                    // Set to 1 so batch_invert doesn't fail on zero.
+                    inverse_trace[operation_idx] = 1;
+                } else if (is_double_or_add[operation_idx]) {
                     inverse_trace[operation_idx] = (p1_trace[operation_idx].y + p1_trace[operation_idx].y);
                 } else {
                     inverse_trace[operation_idx] = (p2_trace[operation_idx].x - p1_trace[operation_idx].x);
diff --git a/barretenberg/cpp/src/barretenberg/eccvm/precomputed_tables_builder.hpp b/barretenberg/cpp/src/barretenberg/eccvm/precomputed_tables_builder.hpp
index 72926f8cb041..d678664a4251 100644
--- a/barretenberg/cpp/src/barretenberg/eccvm/precomputed_tables_builder.hpp
+++ b/barretenberg/cpp/src/barretenberg/eccvm/precomputed_tables_builder.hpp
@@ -130,9 +130,12 @@ class ECCVMPointTablePrecomputationBuilder {
 
                     // N.B. we apply a constraint that requires slice1 to be positive for the 1st row of each scalar
                     // sum. This ensures we do not have WNAF representations of negative values
-                    const int row_chunk = slice7 + (slice6 * (1 << 4)) + (slice5 * (1 << 8)) + (slice4 * (1 << 12)) +
-                                          (slice3 * (1 << 16)) + (slice2 * (1 << 20)) + (slice1 * (1 << 24)) +
-                                          (slice0 * (1 << 28));
+                    // Use int64_t to avoid signed overflow: with 8 digits, slice0*(1<<28) can exceed INT_MAX
+                    const int64_t row_chunk =
+                        static_cast<int64_t>(slice7) + (static_cast<int64_t>(slice6) << 4) +
+                        (static_cast<int64_t>(slice5) << 8) + (static_cast<int64_t>(slice4) << 12) +
+                        (static_cast<int64_t>(slice3) << 16) + (static_cast<int64_t>(slice2) << 20) +
+                        (static_cast<int64_t>(slice1) << 24) + (static_cast<int64_t>(slice0) << 28);
 
                     bool chunk_negative = row_chunk < 0;
 

From 17d669a6e3c07d88bf31c1380a6524548009f0d6 Mon Sep 17 00:00:00 2001
From: notnotraju <raju@aztec-labs.com>
Date: Tue, 17 Mar 2026 15:52:23 +0000
Subject: [PATCH 16/24] fix(eccvm): update lookup read counts and set relation
 for 2-point-per-row layout

Three fixes:

1. Lookup read counts: Reworked MSM builder to return 4 read count
   columns (was 2). With 2 precomputed points per row and 4 table terms
   in the lookup relation, each compressed slice value maps to one of 4
   tables based on parity and magnitude:
   - Table 0: odd slices >= 8 (point 1 positive)
   - Table 1: even slices < 8 (point 1 negative)
   - Table 2: even slices >= 8 (point 2 positive)
   - Table 3: odd slices < 8 (point 2 negative)
   ProverPolynomials now wires all 4 read count columns.

2. Set relation second term: Changed base point reference from
   precompute_tx/ty to precompute_tx2/ty2. In the 2-point-per-row
   layout, the base point P is stored in tx2/ty2 at the transition
   row (round=3), not in tx/ty (which holds 3P).

3. Removed debug trace code from trace checker.
---
 .../src/barretenberg/eccvm/eccvm_flavor.hpp   |  10 +-
 .../src/barretenberg/eccvm/msm_builder.hpp    | 101 ++++++++----------
 .../ecc_vm/ecc_set_relation_impl.hpp          |   6 +-
 3 files changed, 51 insertions(+), 66 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp
index a85ddeb9e705..17fda703f0a5 100644
--- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp
@@ -689,14 +689,12 @@ class ECCVMFlavor {
             lagrange_third.at(2) = 1;
             lagrange_last.at(unmasked_witness_size - 1) = 1;
             for (size_t i = 0; i < point_table_read_counts[0].size(); ++i) {
-                // Explanation of off-by-one offset:
-                // When computing the WNAF slice for a point at point counter value `pc` and a round index `round`, the
-                // row number that computes the slice can be derived. This row number is then mapped to the index of
-                // `lookup_read_counts`. We do this mapping in `ecc_msm_relation`. We are off-by-one because we add an
-                // empty row at the start of the WNAF columns that is not accounted for (index of lookup_read_counts
-                // maps to the row in our WNAF columns that computes a slice for a given value of pc and round)
+                // Off-by-one: the first precompute row is empty (for shifted polynomials), so read counts
+                // are stored at index i+1. Each read count column has ROWS_PER_POINT (= 4) entries per point.
                 lookup_read_counts_0.at(i + 1) = point_table_read_counts[0][i];
                 lookup_read_counts_1.at(i + 1) = point_table_read_counts[1][i];
+                lookup_read_counts_2.at(i + 1) = point_table_read_counts[2][i];
+                lookup_read_counts_3.at(i + 1) = point_table_read_counts[3][i];
             }
 
             // compute polynomials for transcript columns
diff --git a/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp b/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp
index a5c097939812..f2bf17edeceb 100644
--- a/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp
+++ b/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp
@@ -89,72 +89,57 @@ class ECCVMMSMMBuilder {
      * @param num_msm_rows
      * @return std::vector<MSMRow>
      */
-    static std::tuple<std::vector<MSMRow>, std::array<std::vector<size_t>, 2>> compute_rows(
+    static constexpr size_t NUM_READ_COUNT_COLUMNS = 4;
+    static constexpr size_t ROWS_PER_POINT = eccvm::NUM_WNAF_DIGITS_PER_SCALAR / eccvm::WNAF_DIGITS_PER_ROW; // 4
+
+    static std::tuple<std::vector<MSMRow>, std::array<std::vector<size_t>, NUM_READ_COUNT_COLUMNS>> compute_rows(
         const std::vector<MSM>& msms, const uint32_t total_number_of_muls, const size_t num_msm_rows)
     {
-        // To perform a scalar multiplication of a point P by a scalar x, we precompute a table of points
-        //                           -15P, -13P, ..., -3P, -P, P, 3P, ..., 15P
-        // When we perform a scalar multiplication, we decompose x into base-16 wNAF digits then look these precomputed
-        // values up with digit-by-digit. As we are performing lookups with the log-derivative argument, we have to
-        // record read counts. We record read counts in a table with the following structure:
-        //   1st write column = positive wNAF digits
-        //   2nd write column = negative wNAF digits
-        // the row number is a function of pc and wnaf digit:
-        //   point_idx = total_number_of_muls - pc
-        //   row      = point_idx * rows_per_point_table + (some function of the slice value)
-        //
-        // Illustration:
-        //   Block Structure:
-        //      | 0 | 1 |
-        //      | - | - |
-        //    1 | # | # | -1
-        //    3 | # | # | -3
-        //    5 | # | # | -5
-        //    7 | # | # | -7
-        //    9 | # | # | -9
-        //   11 | # | # | -11
-        //   13 | # | # | -13
-        //   15 | # | # | -15
+        // With 2 precomputed points per row and 4 rows per point, the point table has 4 table terms:
+        //   table 0 (read_counts_0): point 1 positive — compressed slices {15,13,11,9} at rounds 0,1,2,3
+        //   table 1 (read_counts_1): point 1 negative — compressed slices {0,2,4,6} at rounds 0,1,2,3
+        //   table 2 (read_counts_2): point 2 positive — compressed slices {14,12,10,8} at rounds 0,1,2,3
+        //   table 3 (read_counts_3): point 2 negative — compressed slices {1,3,5,7} at rounds 0,1,2,3
         //
-        //   Table structure:
-        //    | Block_{0}                      | <-- pc = total_number_of_muls
-        //    | Block_{1}                      | <-- pc = total_number_of_muls-(num muls in msm 0)
-        //    |   ...                          | ...
-        //    | Block_{total_number_of_muls-1} | <-- pc = num muls in last msm
-
-        const size_t num_rows_in_read_counts_table =
-            static_cast<size_t>(total_number_of_muls) *
-            (eccvm::POINT_TABLE_SIZE >> 1); // `POINT_TABLE_SIZE` is 2ʷ, where in our case w = 4. As noted above, with
-                                            // respect to *read counts*, we are record looking up the positive and
-                                            // negative odd multiples of [P] in two separate columns, each of size 2ʷ⁻¹.
-        std::array<std::vector<size_t>, 2> point_table_read_counts;
-        point_table_read_counts[0].reserve(num_rows_in_read_counts_table);
-        point_table_read_counts[1].reserve(num_rows_in_read_counts_table);
-        for (size_t i = 0; i < num_rows_in_read_counts_table; ++i) {
-            point_table_read_counts[0].emplace_back(0);
-            point_table_read_counts[1].emplace_back(0);
+        // Each read count column has ROWS_PER_POINT (= 4) entries per point.
+        // Row index = point_idx * ROWS_PER_POINT + round.
+
+        const size_t num_rows_in_read_counts_table = static_cast<size_t>(total_number_of_muls) * ROWS_PER_POINT;
+        std::array<std::vector<size_t>, NUM_READ_COUNT_COLUMNS> point_table_read_counts;
+        for (auto& col : point_table_read_counts) {
+            col.resize(num_rows_in_read_counts_table, 0);
         }
 
         const auto update_read_count = [&point_table_read_counts](const size_t point_idx, const int slice) {
-            /**
-             * AUDITTODO: verify and correct the point table ordering described below.
-             * The wNAF digits for base 16 lie in the range -15, -13, ..., 13, 15.
-             * The *point table* format is the following:
-             * (for positive point table) T[0] =  P, T[1] =  3P, ..., T[7]  =  15P
-             * (for negative point table) T[0] = -P, T[1] = -3P, ..., T[15] = -15P
-             * i.e. if the slice value is negative, we can use the compressed WNAF directly as the table index
-             *      if the slice value is positive, we must take 15 - (compressed wNAF) to get the table index
-             */
-            const size_t row_index_offset = point_idx * 8;
-            if (slice < 0) {
-                // negative table: T[0] = -15P, T[1] = -13P, ..., T[7] = -P
-                const auto table_index = static_cast<size_t>((slice + 15) / 2);
-                point_table_read_counts[1][row_index_offset + table_index]++;
+            // `slice` is the wNAF digit in {-15, -13, ..., 13, 15}.
+            // `compressed` is the compressed form in {0, 1, ..., 15}.
+            const size_t compressed = static_cast<size_t>((slice + 15) / 2);
+
+            // Determine which table term and round this compressed slice maps to:
+            //   table 0: compressed ∈ {15,13,11,9} (odd, ≥8)  → round = (15-compressed)/2
+            //   table 1: compressed ∈ {0,2,4,6}    (even, <8) → round = compressed/2
+            //   table 2: compressed ∈ {14,12,10,8}  (even, ≥8) → round = (14-compressed)/2
+            //   table 3: compressed ∈ {1,3,5,7}    (odd, <8)  → round = (compressed-1)/2
+            size_t table_idx;
+            size_t round;
+            const bool is_positive = (compressed >= 8);
+            const bool is_odd = (compressed & 1) != 0;
+            if (is_positive && is_odd) {
+                table_idx = 0;
+                round = (15 - compressed) / 2;
+            } else if (!is_positive && !is_odd) {
+                table_idx = 1;
+                round = compressed / 2;
+            } else if (is_positive && !is_odd) {
+                table_idx = 2;
+                round = (14 - compressed) / 2;
             } else {
-                // positive table: T[0] = 15P, T[1] = 13P, ..., T[7] = P
-                const auto table_index = static_cast<size_t>((15 - slice) / 2);
-                point_table_read_counts[0][row_index_offset + table_index]++;
+                table_idx = 3;
+                round = (compressed - 1) / 2;
             }
+
+            const size_t row_index = point_idx * ROWS_PER_POINT + round;
+            point_table_read_counts[table_idx][row_index]++;
         };
 
         // compute which row index each multiscalar multiplication will start at.
diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation_impl.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation_impl.hpp
index a9c3ccdfea0e..630758c9fbfc 100644
--- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation_impl.hpp
+++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation_impl.hpp
@@ -234,8 +234,10 @@ Accumulator ECCVMSetRelationImpl<FF>::compute_grand_product_numerator(const AllE
      * We only add the tuple to the multiset if `precompute_point_transition == 1`.
      */
     {
-        const auto& table_x = View(in.precompute_tx);
-        const auto& table_y = View(in.precompute_ty);
+        // At the transition row (round=3), Tx2/Ty2 = P (the base point).
+        // The old layout had Tx = P at transition, but the 2-point-per-row layout puts P in Tx2.
+        const auto& table_x = View(in.precompute_tx2);
+        const auto& table_y = View(in.precompute_ty2);
 
         const auto& precompute_skew = View(in.precompute_skew);
         const auto negative_inverse_seven = []() {

From 7dd5ce968019fe8fed26469c3e8678525bfc52e5 Mon Sep 17 00:00:00 2001
From: notnotraju <raju@aztec-labs.com>
Date: Tue, 17 Mar 2026 16:03:39 +0000
Subject: [PATCH 17/24] fix(eccvm): update test infrastructure for 8-wide
 layout
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- eccvm.test.cpp: Fix eccvm_set_permutation_delta in
  complete_proving_key_for_test() to compute the product of 8
  zero-tuple fingerprints (γ + j·β² + t·β⁴) for j=0..7, matching
  the prover/verifier/trace_checker. Previously only computed 4 terms,
  causing CommittedSumcheck test to fail.

- eccvm_transcript.test.cpp: Update hardcoded prover manifest in
  construct_eccvm_honk_manifest() with all new wire columns added
  for the 8-wide layout:
  - PRECOMPUTE_S5HI through PRECOMPUTE_S8LO (8 columns)
  - MSM_ADD5 through MSM_ADD8 (4 columns)
  - MSM_X5/Y5 through MSM_X8/Y8 (8 columns)
  - MSM_COLLISION_X5 through MSM_COLLISION_X8 (4 columns)
  - MSM_LAMBDA5 through MSM_LAMBDA8 (4 columns)
  - MSM_SLICE5 through MSM_SLICE8 (4 columns)
  - LOOKUP_READ_COUNTS_2, LOOKUP_READ_COUNTS_3 (2 columns)
  - PRECOMPUTE_TX2, PRECOMPUTE_TY2 (2 columns)

All 41 eccvm_tests now pass.
---
 .../cpp/src/barretenberg/eccvm/eccvm.test.cpp |  8 ++--
 .../eccvm/eccvm_transcript.test.cpp           | 44 +++++++++++++++++++
 2 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm.test.cpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm.test.cpp
index 1a955be8bba8..fa309c8ec90d 100644
--- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm.test.cpp
+++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm.test.cpp
@@ -141,9 +141,11 @@ void complete_proving_key_for_test(bb::RelationParameters<FF>& relation_paramete
     relation_parameters.beta_cube = beta_sqr * beta;
     relation_parameters.beta_quartic = beta_quartic;
     auto first_term_tag = beta_quartic; // FIRST_TERM_TAG (= 1) * beta_quartic
-    relation_parameters.eccvm_set_permutation_delta = (gamma + first_term_tag) * (gamma + beta_sqr + first_term_tag) *
-                                                      (gamma + beta_sqr + beta_sqr + first_term_tag) *
-                                                      (gamma + beta_sqr + beta_sqr + beta_sqr + first_term_tag);
+    // Product of 8 zero-tuple fingerprints (γ + j·β² + t·β⁴) for j = 0..7, then inverted.
+    relation_parameters.eccvm_set_permutation_delta = FF(1);
+    for (size_t j = 0; j < 8; ++j) {
+        relation_parameters.eccvm_set_permutation_delta *= (gamma + FF(j) * beta_sqr + first_term_tag);
+    }
     relation_parameters.eccvm_set_permutation_delta = relation_parameters.eccvm_set_permutation_delta.invert();
 
     const size_t unmasked_witness_size = pk->circuit_size - NUM_DISABLED_ROWS_IN_SUMCHECK;
diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_transcript.test.cpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_transcript.test.cpp
index 9db51a2bf07e..bce2a8835c1e 100644
--- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_transcript.test.cpp
+++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_transcript.test.cpp
@@ -104,6 +104,47 @@ class ECCVMTranscriptTests : public ::testing::Test {
         manifest_expected.add_entry(round, "TRANSCRIPT_MSM_X_INVERSE", frs_per_G);
         manifest_expected.add_entry(round, "TRANSCRIPT_MSM_COUNT_ZERO_AT_TRANSITION", frs_per_G);
         manifest_expected.add_entry(round, "TRANSCRIPT_MSM_COUNT_AT_TRANSITION_INVERSE", frs_per_G);
+        // --- 8-wide additions: new precompute slice columns ---
+        manifest_expected.add_entry(round, "PRECOMPUTE_S5HI", frs_per_G);
+        manifest_expected.add_entry(round, "PRECOMPUTE_S5LO", frs_per_G);
+        manifest_expected.add_entry(round, "PRECOMPUTE_S6HI", frs_per_G);
+        manifest_expected.add_entry(round, "PRECOMPUTE_S6LO", frs_per_G);
+        manifest_expected.add_entry(round, "PRECOMPUTE_S7HI", frs_per_G);
+        manifest_expected.add_entry(round, "PRECOMPUTE_S7LO", frs_per_G);
+        manifest_expected.add_entry(round, "PRECOMPUTE_S8HI", frs_per_G);
+        manifest_expected.add_entry(round, "PRECOMPUTE_S8LO", frs_per_G);
+        // --- 8-wide additions: new MSM add selectors ---
+        manifest_expected.add_entry(round, "MSM_ADD5", frs_per_G);
+        manifest_expected.add_entry(round, "MSM_ADD6", frs_per_G);
+        manifest_expected.add_entry(round, "MSM_ADD7", frs_per_G);
+        manifest_expected.add_entry(round, "MSM_ADD8", frs_per_G);
+        // --- 8-wide additions: new MSM point coordinates ---
+        manifest_expected.add_entry(round, "MSM_X5", frs_per_G);
+        manifest_expected.add_entry(round, "MSM_Y5", frs_per_G);
+        manifest_expected.add_entry(round, "MSM_X6", frs_per_G);
+        manifest_expected.add_entry(round, "MSM_Y6", frs_per_G);
+        manifest_expected.add_entry(round, "MSM_X7", frs_per_G);
+        manifest_expected.add_entry(round, "MSM_Y7", frs_per_G);
+        manifest_expected.add_entry(round, "MSM_X8", frs_per_G);
+        manifest_expected.add_entry(round, "MSM_Y8", frs_per_G);
+        // --- 8-wide additions: new MSM collision inverses ---
+        manifest_expected.add_entry(round, "MSM_COLLISION_X5", frs_per_G);
+        manifest_expected.add_entry(round, "MSM_COLLISION_X6", frs_per_G);
+        manifest_expected.add_entry(round, "MSM_COLLISION_X7", frs_per_G);
+        manifest_expected.add_entry(round, "MSM_COLLISION_X8", frs_per_G);
+        // --- 8-wide additions: new MSM lambdas ---
+        manifest_expected.add_entry(round, "MSM_LAMBDA5", frs_per_G);
+        manifest_expected.add_entry(round, "MSM_LAMBDA6", frs_per_G);
+        manifest_expected.add_entry(round, "MSM_LAMBDA7", frs_per_G);
+        manifest_expected.add_entry(round, "MSM_LAMBDA8", frs_per_G);
+        // --- 8-wide additions: new MSM slices ---
+        manifest_expected.add_entry(round, "MSM_SLICE5", frs_per_G);
+        manifest_expected.add_entry(round, "MSM_SLICE6", frs_per_G);
+        manifest_expected.add_entry(round, "MSM_SLICE7", frs_per_G);
+        manifest_expected.add_entry(round, "MSM_SLICE8", frs_per_G);
+        // --- 8-wide additions: additional lookup read counts ---
+        manifest_expected.add_entry(round, "LOOKUP_READ_COUNTS_2", frs_per_G);
+        manifest_expected.add_entry(round, "LOOKUP_READ_COUNTS_3", frs_per_G);
         manifest_expected.add_entry(round, "TRANSCRIPT_MUL", frs_per_G);
         manifest_expected.add_entry(round, "TRANSCRIPT_MSM_COUNT", frs_per_G);
         manifest_expected.add_entry(round, "PRECOMPUTE_SCALAR_SUM", frs_per_G);
@@ -126,6 +167,9 @@ class ECCVMTranscriptTests : public ::testing::Test {
         manifest_expected.add_entry(round, "TRANSCRIPT_PC", frs_per_G);
         manifest_expected.add_entry(round, "PRECOMPUTE_ROUND", frs_per_G);
         manifest_expected.add_entry(round, "PRECOMPUTE_SELECT", frs_per_G);
+        // --- 8-wide additions: 2nd precomputed point ---
+        manifest_expected.add_entry(round, "PRECOMPUTE_TX2", frs_per_G);
+        manifest_expected.add_entry(round, "PRECOMPUTE_TY2", frs_per_G);
         manifest_expected.add_entry(round, "TRANSCRIPT_ACCUMULATOR_NOT_EMPTY", frs_per_G);
         manifest_expected.add_entry(round, "TRANSCRIPT_ACCUMULATOR_X", frs_per_G);
         manifest_expected.add_entry(round, "TRANSCRIPT_ACCUMULATOR_Y", frs_per_G);

From cedc13d091d569a884a308228faa98f08fa9a760 Mon Sep 17 00:00:00 2001
From: notnotraju <raju@aztec-labs.com>
Date: Tue, 17 Mar 2026 16:19:52 +0000
Subject: [PATCH 18/24] chore(eccvm): update recursive verifier gate count for
 8-wide layout

The ECCVM recursive verifier gate count increased from 224,657 to
269,130 due to the wider relation columns and higher-degree
subrelations in the 8-wide ECCVM layout. The recursive flavor
inherits all entity/relation changes automatically from the native
ECCVMFlavor via templates, so no code changes were needed in the
stdlib recursive verifier itself.
---
 .../src/barretenberg/dsl/acir_format/gate_count_constants.hpp | 4 ++--
 .../dsl/acir_format/mock_verifier_inputs.test.cpp             | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/dsl/acir_format/gate_count_constants.hpp b/barretenberg/cpp/src/barretenberg/dsl/acir_format/gate_count_constants.hpp
index 8077eca18fe5..fecc60544e8d 100644
--- a/barretenberg/cpp/src/barretenberg/dsl/acir_format/gate_count_constants.hpp
+++ b/barretenberg/cpp/src/barretenberg/dsl/acir_format/gate_count_constants.hpp
@@ -113,7 +113,7 @@ constexpr std::tuple<size_t, size_t> HONK_RECURSION_CONSTANTS(
 // ========================================
 
 // Gate count for Chonk recursive verification (Ultra with RollupIO)
-inline constexpr size_t CHONK_RECURSION_GATES = 1493584;
+inline constexpr size_t CHONK_RECURSION_GATES = 1538056;
 
 // ========================================
 // Hypernova Recursion Constants
@@ -147,7 +147,7 @@ inline constexpr size_t HIDING_KERNEL_ULTRA_OPS = 124;
 // ========================================
 
 // Gate count for ECCVM recursive verifier (Ultra-arithmetized)
-inline constexpr size_t ECCVM_RECURSIVE_VERIFIER_GATE_COUNT = 224657;
+inline constexpr size_t ECCVM_RECURSIVE_VERIFIER_GATE_COUNT = 269130;
 
 // ========================================
 // Goblin AVM Recursive Verifier Constants
diff --git a/barretenberg/cpp/src/barretenberg/dsl/acir_format/mock_verifier_inputs.test.cpp b/barretenberg/cpp/src/barretenberg/dsl/acir_format/mock_verifier_inputs.test.cpp
index 659c3cc0986c..bd739a485a12 100644
--- a/barretenberg/cpp/src/barretenberg/dsl/acir_format/mock_verifier_inputs.test.cpp
+++ b/barretenberg/cpp/src/barretenberg/dsl/acir_format/mock_verifier_inputs.test.cpp
@@ -23,7 +23,7 @@ static_assert(HIDING_KERNEL_PUBLIC_INPUTS_SIZE == 28,
 
 // Component proof lengths (used in Noir)
 static_assert(MERGE_PROOF_SIZE == 42, "MERGE_PROOF_SIZE changed - update constants.nr");
-static_assert(ECCVMFlavor::PROOF_LENGTH == 608, "ECCVM proof size changed - update constants.nr");
+static_assert(ECCVMFlavor::PROOF_LENGTH == 756, "ECCVM proof size changed - update constants.nr");
 static_assert(IPA_PROOF_LENGTH == 64, "IPA_PROOF_LENGTH changed - update constants.nr");
 static_assert(TranslatorFlavor::PROOF_LENGTH == 483, "Translator proof size changed - update constants.nr");
 
@@ -32,7 +32,7 @@ static_assert(
     ProofLength::Honk<UltraFlavor>::expected_proof_size<stdlib::recursion::honk::DefaultIO<UltraCircuitBuilder>>(
         UltraFlavor::VIRTUAL_LOG_N) == 449,
     "RECURSIVE_PROOF_LENGTH changed - update constants.nr");
-static_assert(ChonkProof::PROOF_LENGTH == 1330, "CHONK_PROOF_LENGTH changed - update constants.nr");
+static_assert(ChonkProof::PROOF_LENGTH == 1478, "CHONK_PROOF_LENGTH changed - update constants.nr");
 static_assert(ProofLength::MultilinearBatching<MultilinearBatchingFlavor>::LENGTH == 121,
               "MultilinearBatching proof size changed - update constants.nr");
 

From 911f5bd8e2cf7b84e71844983ac4ff580f5a54a0 Mon Sep 17 00:00:00 2001
From: notnotraju <raju@aztec-labs.com>
Date: Wed, 18 Mar 2026 09:48:17 +0000
Subject: [PATCH 19/24] chore(eccvm): update stale comments for 8-wide layout
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Update comments across msm_builder.hpp, eccvm_flavor.hpp, and
ecc_lookup_relation_impl.hpp to reflect the new 8-wide layout:
- "4 point-additions per row" → 8
- "size-4 array" → size-8
- "result of four EC additions" → eight
- Document msm_x/y/add/lambda/slice/collision_x 1..8
- Document precompute_s1..s8 (8 slices per row)
- Document precompute_tx2/ty2 (2 points per row)
- Document all 4 lookup_read_counts columns
---
 .../src/barretenberg/eccvm/eccvm_flavor.hpp   | 50 ++++++-------------
 .../src/barretenberg/eccvm/msm_builder.hpp    | 14 +++---
 .../ecc_vm/ecc_lookup_relation_impl.hpp       |  4 +-
 3 files changed, 25 insertions(+), 43 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp
index 17fda703f0a5..792f42b54bdd 100644
--- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp
+++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_flavor.hpp
@@ -588,13 +588,13 @@ class ECCVMFlavor {
          *          precompute_round: round counter for Straus precomputation algorithm
          *          precompute_scalar_sum: accumulating sum of Straus scalar slices
          *          precompute_s1hi/lo: 2-bit hi/lo components of a Straus 4-bit scalar slice
-         *          precompute_s2hilo/precompute_s3hi/loprecompute_s4hi/lo: same as above but for a total of 4 Straus
-         4-bit scalar slices
+         *          precompute_s2hi/lo through precompute_s8hi/lo: same as above, for a total of 8 Straus
+         4-bit scalar slices per row
          *          precompute_skew: Straus WNAF skew parameter for a single scalar multiplier
-         *          precompute_tx: x-coordinate of point accumulator used to generate Straus lookup table for an input
-         point (from transcript)
-         *          precompute_ty: y-coordinate of point accumulator used to generate Straus lookup table for an input
-         point (from transcript)
+         *          precompute_tx: x-coordinate of first precomputed point in row (Straus lookup table entry)
+         *          precompute_ty: y-coordinate of first precomputed point in row
+         *          precompute_tx2: x-coordinate of second precomputed point in row (2 points per row)
+         *          precompute_ty2: y-coordinate of second precomputed point in row
          *          precompute_dx: x-coordinate of D = 2 * input point we are evaluating Straus over
          *          precompute_dy: y-coordinate of D
          *          msm_pc: point counter for Straus MSM columns
@@ -606,34 +606,16 @@ class ECCVMFlavor {
          *          msm_size_of_msm: size of multiscalar multiplication current row is a part of
          *          msm_round: describes which round of the Straus MSM algorithm the current row represents
          *          msm_count: number of points processed for the round indicated by `msm_round`
-         *          msm_x1: x-coordinate of potential point in Straus MSM round
-         *          msm_y1: y-coordinate of potential point in Straus MSM round
-         *          msm_x2: x-coordinate of potential point in Straus MSM round
-         *          msm_y2: y-coordinate of potential point in Straus MSM round
-         *          msm_x3: x-coordinate of potential point in Straus MSM round
-         *          msm_y3: y-coordinate of potential point in Straus MSM round
-         *          msm_x4: x-coordinate of potential point in Straus MSM round
-         *          msm_y4: y-coordinate of potential point in Straus MSM round
-         *          msm_add1: are we adding msm_x1/msm_y1 into accumulator at current round?
-         *          msm_add2: are we adding msm_x2/msm_y2 into accumulator at current round?
-         *          msm_add3: are we adding msm_x3/msm_y3 into accumulator at current round?
-         *          msm_add4: are we adding msm_x4/msm_y4 into accumulator at current round?
-         *          msm_lambda1: temp variable used for ecc point addition algorithm if msm_add1 = 1
-         *          msm_lambda2: temp variable used for ecc point addition algorithm if msm_add2 = 1
-         *          msm_lambda3: temp variable used for ecc point addition algorithm if msm_add3 = 1
-         *          msm_lambda4: temp variable used for ecc point addition algorithm if msm_add4 = 1
-         *          msm_slice1: wNAF digit/slice for first add
-         *          msm_slice2: wNAF digit/slice for second add
-         *          msm_slice3: wNAF digit/slice for third add
-         *          msm_slice4: wNAF digit/slice for fourth add
-         *          msm_collision_x1: used to ensure incomplete ecc addition exceptions not triggered if msm_add1 = 1
-         *          msm_collision_x2: used to ensure incomplete ecc addition exceptions not triggered if msm_add2 = 1
-         *          msm_collision_x3: used to ensure incomplete ecc addition exceptions not triggered if msm_add3 = 1
-         *          msm_collision_x4: used to ensure incomplete ecc addition exceptions not triggered if msm_add4 = 1
-         *          lookup_read_counts_0: stores number of times a point has been read from a Straus precomputation
-         table (reads come from msm_x/y1, msm_x/y2)
-         *          lookup_read_counts_1: stores number of times a point has been read from a Straus precomputation
-         table (reads come from msm_x/y3, msm_x/y4)
+         *          msm_x1..msm_x8: x-coordinates of potential points in Straus MSM round (8 per row)
+         *          msm_y1..msm_y8: y-coordinates of potential points in Straus MSM round (8 per row)
+         *          msm_add1..msm_add8: are we adding msm_xi/msm_yi into accumulator at current round?
+         *          msm_lambda1..msm_lambda8: temp variables for ecc point addition algorithm
+         *          msm_slice1..msm_slice8: wNAF digit/slice for each of the 8 additions per row
+         *          msm_collision_x1..msm_collision_x8: ensure incomplete ecc addition exceptions not triggered
+         *          lookup_read_counts_0: read counts for Straus lookup table term 0 (point 1 positive slices)
+         *          lookup_read_counts_1: read counts for Straus lookup table term 1 (point 1 negative slices)
+         *          lookup_read_counts_2: read counts for Straus lookup table term 2 (point 2 positive slices)
+         *          lookup_read_counts_3: read counts for Straus lookup table term 3 (point 2 negative slices)
          * @return ProverPolynomials
          */
         ProverPolynomials(const CircuitBuilder& builder)
diff --git a/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp b/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp
index f2bf17edeceb..186794216244 100644
--- a/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp
+++ b/barretenberg/cpp/src/barretenberg/eccvm/msm_builder.hpp
@@ -45,13 +45,13 @@ class ECCVMMSMMBuilder {
         bool q_double = false;
         bool q_skew = false;
 
-        // Each row in the MSM portion of the ECCVM can handle (up to) 4 point-additions.
-        // For each row in the VM we represent the point addition data via a size-4 array of
+        // Each row in the MSM portion of the ECCVM can handle (up to) 8 point-additions.
+        // For each row in the VM we represent the point addition data via a size-8 array of
         // AddState objects.
         struct AddState {
             bool add = false; // are we adding a point at this location in the VM?
-                              // e.g if the MSM is of size-2 then the 3rd and 4th AddState objects will have this set
-                              // to `false`.
+                              // e.g if the MSM is of size-2 then the 3rd through 8th AddState objects will have this
+                              // set to `false`.
             int slice = 0; // wNAF slice value. This has values in {0, ..., 15} and corresponds to an odd number in the
                            // range {-15, -13, ..., 15} via the monotonic bijection.
             AffineElement point{ 0, 0 }; // point being added into the accumulator. (This is of the form nP,
@@ -67,8 +67,8 @@ class ECCVMMSMMBuilder {
             arr.fill(AddState{ false, 0, { 0, 0 }, 0, 0 });
             return arr;
         }();
-        // The accumulator here is, in general, the result of four EC additions: A + Q_1 + Q_2 + Q_3 + Q_4.
-        // We do not explicitly store the intermediate values A + Q_1, A + Q_1 + Q_2, and A + Q_1 + Q_2 + Q_3, although
+        // The accumulator here is, in general, the result of eight EC additions: A + Q_1 + ... + Q_8.
+        // We do not explicitly store the intermediate values A + Q_1, ..., A + Q_1 + ... + Q_7, although
         // these values are implicitly used in the values of `AddState.lambda` and `AddState.collision_inverse`.
 
         FF accumulator_x = 0; // `(accumulator_x, accumulator_y)` is the accumulator to which I potentially want to add
@@ -294,7 +294,7 @@ class ECCVMMSMMBuilder {
                         auto& add_state = row.add_state[point_idx];
                         add_state.add = num_points_in_row > point_idx;
                         int slice = add_state.add ? msm[offset + point_idx].wnaf_digits[digit_idx] : 0;
-                        // In the MSM columns in the ECCVM circuit, we can add up to 4 points per row.
+                        // In the MSM columns in the ECCVM circuit, we can add up to 8 points per row.
                         // if `row.add_state[point_idx].add = 1`, this indicates that we want to add the
                         // `point_idx`'th point in the MSM columns into the MSM accumulator `add_state.slice` = A
                         // 4-bit WNAF slice of the scalar multiplier associated with the point we are adding (the
diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_lookup_relation_impl.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_lookup_relation_impl.hpp
index be82d88d3ef1..a41f5ecc9f59 100644
--- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_lookup_relation_impl.hpp
+++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_lookup_relation_impl.hpp
@@ -18,8 +18,8 @@ namespace bb {
  * Table writes: ECCVMPointTable columns: we define Straus point table:
  * { {0, -15[P]}, {1, -13[P]}, ..., {15, 15[P]} }
  * write source: { precompute_round, precompute_tx, precompute_ty }
- * Table reads: ECCVMMSM columns. Each row adds up to 4 points into MSM accumulator
- * read source: { msm_slice1, msm_x1, msm_y1 }, ..., { msm_slice4, msm_x4, msm_y4 }
+ * Table reads: ECCVMMSM columns. Each row adds up to 8 points into MSM accumulator
+ * read source: { msm_slice1, msm_x1, msm_y1 }, ..., { msm_slice8, msm_x8, msm_y8 }
  * @param evals transformed to `evals + C(in(X)...)*scaling_factor`
  * @param in an std::array containing the fully extended Accumulator edges.
  * @param parameters contains beta, gamma, and public_input_delta, ....

From 4b9154d0938d81a43ab3a01f8b7fe7c8ca744f78 Mon Sep 17 00:00:00 2001
From: notnotraju <raju@aztec-labs.com>
Date: Wed, 18 Mar 2026 09:58:21 +0000
Subject: [PATCH 20/24] chore(eccvm): revert benchmark file changes

Remove ECCVM univariate benchmark additions from relations.bench.cpp
to keep this PR focused on the 8-wide layout change.
---
 .../benchmark/relations_bench/relations.bench.cpp     | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp
index e58f9bb6665e..1a2acbf93f2e 100644
--- a/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp
+++ b/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp
@@ -86,7 +86,7 @@ BENCHMARK(execute_relation_for_values<TranslatorFlavor, TranslatorDeltaRangeCons
 BENCHMARK(execute_relation_for_values<TranslatorFlavor, TranslatorNonNativeFieldRelation<Fr>>);
 BENCHMARK(execute_relation_for_values<TranslatorFlavor, TranslatorPermutationRelation<Fr>>);
 
-// ECCVM (verifier work)
+// ECCVM
 BENCHMARK(execute_relation_for_values<ECCVMFlavor, ECCVMLookupRelation<Fq>>);
 BENCHMARK(execute_relation_for_values<ECCVMFlavor, ECCVMMSMRelation<Fq>>);
 BENCHMARK(execute_relation_for_values<ECCVMFlavor, ECCVMPointTableRelation<Fq>>);
@@ -95,15 +95,6 @@ BENCHMARK(execute_relation_for_values<ECCVMFlavor, ECCVMTranscriptRelation<Fq>>)
 BENCHMARK(execute_relation_for_values<ECCVMFlavor, ECCVMWnafRelation<Fq>>);
 BENCHMARK(execute_relation_for_values<ECCVMFlavor, ECCVMBoolsRelation<Fq>>);
 
-// ECCVM (Sumcheck prover work — univariate accumulation)
-BENCHMARK(execute_relation_for_univariates<ECCVMFlavor, ECCVMLookupRelation<Fq>>);
-BENCHMARK(execute_relation_for_univariates<ECCVMFlavor, ECCVMMSMRelation<Fq>>);
-BENCHMARK(execute_relation_for_univariates<ECCVMFlavor, ECCVMPointTableRelation<Fq>>);
-BENCHMARK(execute_relation_for_univariates<ECCVMFlavor, ECCVMSetRelation<Fq>>);
-BENCHMARK(execute_relation_for_univariates<ECCVMFlavor, ECCVMTranscriptRelation<Fq>>);
-BENCHMARK(execute_relation_for_univariates<ECCVMFlavor, ECCVMWnafRelation<Fq>>);
-BENCHMARK(execute_relation_for_univariates<ECCVMFlavor, ECCVMBoolsRelation<Fq>>);
-
 } // namespace bb::benchmark::relations
 
 BENCHMARK_MAIN();

From 393dfc054329227305a2ef444df27c29b716e14b Mon Sep 17 00:00:00 2001
From: notnotraju <raju@aztec-labs.com>
Date: Wed, 18 Mar 2026 11:05:34 +0000
Subject: [PATCH 21/24] chore(eccvm): fix stale degree comments and design doc
 proof size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Fix ECCVM proof size in design doc: ~716 → 756 Fr (confirmed by
  static_assert in proof_compression.hpp)
- Correct set relation degree comments: denominator sub-products are
  16 (8 add-gated tuples) + 6 (transcript z1/z2) + 4 (MSM output) = 26,
  not the previously claimed 28. Full GP subrelation degree = 27,
  partial length upper bound = 29.
- Fix duplicate comment blocks in set relation numerator/denominator
  third term docstrings
- Update inline cumulative degree annotations throughout
  compute_grand_product_numerator/denominator
---
 .../barretenberg/eccvm/WIDE_SHORT_DESIGN.md   | 188 ++++++++++++++++++
 .../ecc_vm/ecc_set_relation_impl.hpp          |  37 ++--
 2 files changed, 203 insertions(+), 22 deletions(-)
 create mode 100644 barretenberg/cpp/src/barretenberg/eccvm/WIDE_SHORT_DESIGN.md

diff --git a/barretenberg/cpp/src/barretenberg/eccvm/WIDE_SHORT_DESIGN.md b/barretenberg/cpp/src/barretenberg/eccvm/WIDE_SHORT_DESIGN.md
new file mode 100644
index 000000000000..cc45cc9a5e46
--- /dev/null
+++ b/barretenberg/cpp/src/barretenberg/eccvm/WIDE_SHORT_DESIGN.md
@@ -0,0 +1,188 @@
+# ECCVM Wider-and-Shorter: Design Spec
+
+## Goal
+
+Halve the Precomputed and MSM table heights by doubling their width (`WNAF_DIGITS_PER_ROW` 4→8, `ADDITIONS_PER_ROW` 4→8). This halves the number of rows consumed per scalar, so with `CONST_ECCVM_LOG_N` staying at 15 we can handle twice as many scalar multiplications — i.e. **double the stack depth**.
+
+Alternatively, if stack depth is sufficient, `CONST_ECCVM_LOG_N` could drop from 15→14, halving the IPA MSM and speeding up native proving and root rollup verification.
+
+## Impact
+
+Here, "split" refers to the option of splitting up the multiset-equality check into three wires, which would help with the degree. However, this seems to not be especially critical.
+
+| Metric | Before | After (no split) | After (with split) |
+|--------|:------:|:-----------------:|:------------------:|
+| NUM_WIRES | 85 | 121 (+36) | 123 (+38) |
+| Precompute rows/scalar | 8 | 4 | 4 |
+| MSM ADD rows/digit | ⌈m/4⌉ | ⌈m/8⌉ | ⌈m/8⌉ |
+| MAX_PARTIAL_RELATION_LENGTH | 22 | 29 | 17 |
+| ECCVM proof size (Fr) | 608 | 756 (+148) | ~764 (+156) |
+
+The split adds +2 columns (z_perm_b, z_perm_c) and +8 proof elements but drops degree from 29→17.
+
+Note: proof size = 756 was confirmed via `static_assert` in `proof_compression.hpp`.
+
+## Implementation plan
+
+### Step 1: Widen tables (ADDITIONS_PER_ROW + WNAF_DIGITS_PER_ROW, together)
+
+These **cannot** be separated: the lookup relation's table terms reference `Tx2/Ty2` (from WNAF widening) but also need `NUM_LOOKUP_TERMS = 8` (from additions widening). Ship as one diff.
+
+#### 1a. Constants
+
+**`eccvm_builder_types.hpp`:**
+```cpp
+WNAF_DIGITS_PER_ROW = 8;        // was 4
+ADDITIONS_PER_ROW = 8;           // was 4
+DOUBLINGS_PER_ROW = NUM_WNAF_DIGIT_BITS; // NEW, always 4
+```
+
+#### 1b. Builders
+
+**`msm_builder.hpp`:**
+- `MSMRow::add_state`: `std::array<AddState, 4>` → `<AddState, ADDITIONS_PER_ROW>` (line 64)
+- DOUBLE loop (line 358): change bound from `ADDITIONS_PER_ROW` to `DOUBLINGS_PER_ROW`
+- Trace sizing (lines 243, 288): replace hardcoded `* 4` with `* ADDITIONS_PER_ROW`
+- All other loops already use `ADDITIONS_PER_ROW` — auto-adjust
+
+**`precomputed_tables_builder.hpp`:**
+- Remove `static_assert(WNAF_DIGITS_PER_ROW == 4)` (line 66) and `num_rows_per_scalar == POINT_TABLE_SIZE / 2` (line 57)
+- `PointTablePrecomputationRow`: add `s9..s16` (8 more 2-bit slices), add `precompute_accumulator2`
+- `num_rows_per_scalar` = 32/8 = 4. Each row stores 2 precomputed points: `table[POINT_TABLE_SIZE - 1 - 2*i]` and `table[POINT_TABLE_SIZE - 2 - 2*i]`
+- Digit loop: 8 digits → 16 two-bit slices (s1–s16)
+- Horner: `prev_sum * 2^32 + row_chunk` (was `2^16`)
+
+#### 1c. Flavor columns
+
+**`eccvm_flavor.hpp` — add to `WireNonShiftedEntities` (+36 columns):**
+
+| Group | New columns | Count |
+|-------|------------|:-----:|
+| Precompute slices | `precompute_s5hi..s8lo` | +8 |
+| Precompute 2nd point | `precompute_tx2, precompute_ty2` | +2 |
+| MSM adds | `msm_add5..add8` | +4 |
+| MSM points | `msm_x5..x8, msm_y5..y8` | +8 |
+| MSM collision | `msm_collision_x5..x8` | +4 |
+| MSM lambdas | `msm_lambda5..lambda8` | +4 |
+| MSM slices | `msm_slice5..slice8` | +4 |
+| Lookup read counts | `lookup_read_counts_2, _3` | +2 |
+
+Wire up in `eccvm_circuit_builder.hpp` `ProverPolynomials` constructor (follows existing patterns at lines 706–713).
+
+#### 1d. Relations
+
+**`ecc_msm_relation_impl.hpp`:**
+- Extend addition chain 4→8, skew chain 4→8. Doubling chain **unchanged** (4, lambda1–4).
+- 4 new add slope subrelations + 4 skew slope subrelations (separate to prevent cancellation)
+- Extend collision checks, slice-zero, addition continuity, count update to 8
+- Cross-row: `(-add8 + 1) * add1_shift`. Max partial length: 8→**12**.
+
+**`ecc_set_relation_impl.hpp` (ECCVMSetRelation — unchanged structure):**
+- Numerator: 8 slice fingerprints instead of 4 (degree 7→11), round encoding `8 * precompute_round + j`
+- Denominator: 8 add-gated tuples instead of 4 (degree 8→16 in slice sub-product, total denom degree 27)
+- `eccvm_set_permutation_delta`: product of 8 terms instead of 4 (update in prover + verifier)
+- `SUBRELATION_PARTIAL_LENGTHS = {29, 3}` (was `{22, 3}`)
+
+**`ecc_lookup_relation.hpp`:**
+- `NUM_LOOKUP_TERMS = 8`, `NUM_TABLE_TERMS = 4`, `LENGTH = 15`
+- 4 table terms: positive/negative for each of 2 points per precompute row
+- Coverage: `{0..15}` fully covered (verify in tests)
+
+**`ecc_wnaf_relation_impl.hpp`:**
+- 8 extra range checks (s5hi–s8lo), Horner for 8 digits, `* 2^32` shift
+- Round: 0–3 (was 0–7), replace hardcoded `7` with `WNAF_DIGITS_PER_ROW - 1`
+
+**`ecc_point_table_relation_impl.hpp`:**
+- Add intra-row constraint: `(Tx, Ty) = (Tx2, Ty2) + (Dx, Dy)`
+- Doubling target: `2 * (Tx2, Ty2)` at transitions (Tx2 = 1·P, not Tx = 3·P)
+- Inter-row addition links via Tx2. Subrelations: 6→8.
+
+**`ecc_bools_relation_impl.hpp`:** +4 boolean checks for `msm_add5..8`.
+
+#### 1e. Post-widening MAX_PARTIAL_RELATION_LENGTH (no split)
+
+| Relation | Partial length |
+|----------|:---:|
+| Set (combined GP) | **29** |
+| Lookup | 15 |
+| MSM | 12 |
+| WNAF | 5 |
+| Point Table | 6 |
+| Bools | 3 |
+| **MAX** | **29** |
+
+### Step 2: Constants, VKs, recursive verifier
+
+- Update `SUBRELATION_PARTIAL_LENGTHS` in `ecc_set_relation.hpp` to `{29, 3}`
+- Update static_asserts in `mock_verifier_inputs.test.cpp`
+- Check `proof_compression.hpp` for hardcoded offsets
+- Update `constants.nr`, run `yarn remake-constants`
+- Regenerate VKs: `./test_chonk_standalone_vks_havent_changed.sh --update_inputs`
+- Recursive verifier: relation changes auto-propagate via templates. Flavor entity changes (new columns) need manual mirroring in `stdlib/eccvm_verifier/`
+
+### Step 3: Test and measure
+
+- `eccvm_tests` after widening
+- `chonk_tests`, `goblin_tests` after full integration
+- Measure actual workload row counts → decide if `CONST_ECCVM_LOG_N` can drop 15→14
+- If yes: update `constants.hpp`, cascade to Noir/TS
+
+### Step 4 (optional): Grand product split
+
+Split the single `ECCVMSetRelation` grand product into 3 independent ones to drop `MAX_PARTIAL_RELATION_LENGTH` from 29→17. Only worth doing if degree 29 causes measurable performance issues.
+
+**Key constraint: `compute_grand_products<Flavor>()` supports exactly one GP per relation class** — it calls `get_grand_product_polynomial()` (singular) and `compute_grand_product_numerator/denominator()` (each returning a single scalar). There is no index-templated overload. So you must create **3 separate relation classes**, not one relation with 3 sub-products.
+
+**New relation classes** (replace `ECCVMSetRelation`):
+- `ECCVMSetRelationSlices` — GP A: `(pc, round, wnaf_slice)` multiset. N = slice fingerprints + skew + delta. D = add-gated `(pc, round, slice)` from MSM.
+- `ECCVMSetRelationPointTable` — GP B: `(pc, Px, Py, scalar)` multiset. N = point-table tuples at `point_transition`. D = transcript `z1/z2` scalar tuples.
+- `ECCVMSetRelationMSMOutput` — GP C: `(pc, acc_x, acc_y, msm_size)` multiset. N = MSM accumulator at `msm_transition`. D = transcript MSM output.
+
+Each class provides its own `get_grand_product_polynomial()`, `compute_grand_product_numerator/denominator()`, `accumulate()`, and `skip()`.
+
+**Post-split degrees (with widening):**
+
+| GP | Numerator degree | Denominator degree | Partial length |
+|----|:---:|:---:|:---:|
+| A (slices) | 15 | 16 | **17** |
+| B (point-table) | 9 | 7 | **10** |
+| C (MSM output) | 11 | 4 | **12** |
+
+**Flavor changes:**
+- Add `z_perm_b`, `z_perm_c` to `DerivedWitnessEntities`
+- Add `z_perm_b_shift`, `z_perm_c_shift` to shifted entities + `get_to_be_shifted()`
+- Update `GrandProductRelations` tuple: `std::tuple<ECCVMSetRelationSlices<FF>, ECCVMSetRelationPointTable<FF>, ECCVMSetRelationMSMOutput<FF>>`
+
+**Each new relation's `skip()`:** Must check its own z_perm, not the others.
+
+**Constants cascade:** +2 commitments × 2 Fr + 4 evaluations × 1 Fr = ~8 Fr added to proof. Update static_asserts, Noir constants, VKs.
+
+## Why the grand product split is optional
+
+The original plan required splitting the grand product to "keep relation degrees sane." Analysis shows this isn't load-bearing:
+
+**Without split, after widening:** `MAX_PARTIAL_RELATION_LENGTH` goes from 22 → **29**. The combined denominator grows because the slice sub-product doubles (degree 8→16), making the cumulative denominator degree 16+6+4 = 26. The full accumulate expression `(z_perm + lagrange_first) * numerator - (z_perm_shift + lagrange_last) * denominator` has degree max(1+16, 1+26) = 27, partial length 28. We set `SUBRELATION_PARTIAL_LENGTHS[0] = 29` as a conservative upper bound. (Sub-product degrees: 16 for 8 add-gated tuples, 6 for transcript z1/z2, 4 for MSM output.)
+
+**Why degree 29 is acceptable:**
+
+1. **Sumcheck is ~40% of ECCVM proving (~500ms).** Higher degree means the prover sends a larger univariate polynomial per sumcheck round (29 evaluations instead of 22). This is a ~30% increase in sumcheck work — so maybe +150ms. That's noise compared to the IPA MSM savings from dropping `log_N` (which saves seconds in the recursive verifier).
+
+2. **Per-round communication doesn't change structurally.** The prover still sends one univariate per round for `log2(N)` rounds. Each univariate is just a few more field elements. Total sumcheck proof overhead: `(29-22) × log2(N) = ~100 extra Fr` — dwarfed by the IPA MSM savings.
+
+3. **The recursive verifier is only 215K gates total.** A higher-degree sumcheck means ~7 extra multiplications per round to evaluate the univariate (Horner's method, degree 28 vs 21). Over `log2(N) = 14` rounds that's ~98 extra gates — 0.05% of the 215K total. Negligible.
+
+4. **The GP split adds complexity for little benefit:** 3 new relation classes, 2 extra z_perm polynomials (+2 commitments, +4 evaluations in the proof), flavor/prover/verifier changes, and more surface area for bugs. All to save ~150ms of sumcheck time.
+
+**Bottom line:** Ship the widening without the split. If degree 29 turns out to matter later, the split can be done as a follow-up.
+
+## Corrections to the original spec / previous version of this plan
+
+1. **"Grand product split is required alongside the widening"** — It isn't. Sumcheck is ~40% of ECCVM proving (~500ms). Going from degree 22 to 29 adds ~30% to sumcheck cost (~150ms), which is negligible vs. the IPA MSM savings from dropping `log_N`. The recursive verifier is only 215K gates — a few extra gates per sumcheck round for higher-degree univariates doesn't matter. The split is a nice-to-have, not a prerequisite.
+
+2. **"One relation with 3 indexed grand products"** — The `compute_grand_product` library doesn't support this. `get_grand_product_polynomial()` returns a single polynomial; `compute_grand_product_numerator/denominator()` each return a single scalar. One GP per relation class. Must create **3 separate relation classes** in `GrandProductRelations`.
+
+3. **"Steps 2 and 3 (additions vs digits) can be done independently"** — They can't. The lookup table terms reference `Tx2/Ty2` (from WNAF widening) but need `NUM_LOOKUP_TERMS = 8` (from additions widening). Circular dependency. Ship together.
+
+4. **"Step 4: Wire up builders → flavor" as separate step** — Not a real step. You can't add flavor columns without wiring the builder. These are part of the same diff.
+
+5. **The degree analysis (corrected sub-product breakdown).** Current denominator is degree 18 (sub-products: 8 + 6 + 4 = 18). After widening without split, slice sub-product doubles (degree 8→16), making total denominator degree 16+6+4 = 26. The full GP subrelation degree is max(1+numerator, 1+denominator) = max(17, 27) = 27, partial length 28. We set `SUBRELATION_PARTIAL_LENGTHS[0] = 29` as a conservative upper bound. After widening with split, each GP carries only its own sub-product, so the bottleneck is GP A.
diff --git a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation_impl.hpp b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation_impl.hpp
index 630758c9fbfc..ee84a33e599e 100644
--- a/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation_impl.hpp
+++ b/barretenberg/cpp/src/barretenberg/relations/ecc_vm/ecc_set_relation_impl.hpp
@@ -342,15 +342,13 @@ Accumulator ECCVMSetRelationImpl<FF>::compute_grand_product_numerator(const AllE
         point_table_init_read =
             precompute_point_transition * (point_table_init_read + gamma) + (-precompute_point_transition + 1);
 
-        numerator *= point_table_init_read; // degree-9
+        numerator *= point_table_init_read; // degree-13 (cumulative: 11 + 2)
     }
     /**
-     * @brief Third term: tuple of (pc, P.x, P.y, msm-size) from ECCVMMSMRelation.
      * @brief Third term: tuple of (pc, P.x, P.y, msm-size) from ECCVMMSMRelation.
      *        (P.x, P.y) is the output of a multi-scalar-multiplication evaluated in ECCVMMSMRelation.
      *        We need to validate that the same values (P.x, P.y) are present in the Transcript columns and describe a
      *        multi-scalar multiplication of size `msm-size`, starting at `pc`.
-     *        multi-scalar multiplication of size `msm-size`, starting at `pc`.
      *
      *        If `msm_transition_shift == 1`, this indicates the current row is the last row of a multiscalar
      *        multiplication evaluation. The output of the MSM will be present on `(msm_accumulator_x_shift,
@@ -388,7 +386,7 @@ Accumulator ECCVMSetRelationImpl<FF>::compute_grand_product_numerator(const AllE
 
         // msm_result_write = degree 2
         msm_result_write = msm_transition_shift * (msm_result_write + gamma) + (-msm_transition_shift + 1);
-        numerator *= msm_result_write; // degree-11
+        numerator *= msm_result_write; // degree-16 (cumulative: 13 + 3; msm_transition_shift is degree 2)
     }
     return numerator;
 }
@@ -399,8 +397,8 @@ Accumulator ECCVMSetRelationImpl<FF>::compute_grand_product_denominator(const Al
 {
     using View = typename Accumulator::View;
 
-    // OPTIMIZE(@zac-williamson). The degree of the denominator is 28, making overall relation degree ~29.
-    // Can potentially optimize by refining the algebra.
+    // OPTIMIZE(@zac-williamson). The degree of the denominator is 26, making overall relation degree 27
+    // (partial length upper bound = 29). Can potentially optimize by refining the algebra.
     const auto& gamma = params.gamma;
     const auto& beta = params.beta;
     const auto& beta_sqr = params.beta_sqr;
@@ -518,7 +516,7 @@ Accumulator ECCVMSetRelationImpl<FF>::compute_grand_product_denominator(const Al
         auto transcript_input1 = transcript_pc + transcript_Px * beta + transcript_Py * beta_sqr + z1 * beta_cube +
                                  second_term_tag; // degree = 1
         auto transcript_input2 = (transcript_pc - lookup_first) + transcript_Px * cube_root_unity * beta -
-                                 transcript_Py * beta_sqr + z2 * beta_cube + second_term_tag; // degree = 2
+                                 transcript_Py * beta_sqr + z2 * beta_cube + second_term_tag; // degree = 1
 
         // The following diagram expresses a fingerprint of part of the tuple. It does not include `transcript_pc` and
         // has not weighted the X and Y with beta and beta_sqr respectively. The point is nonetheless to show exactly
@@ -537,14 +535,14 @@ Accumulator ECCVMSetRelationImpl<FF>::compute_grand_product_denominator(const Al
         // | 1     | 1       | 0       |             1 | 1                           |
         // | 1     | 1       | 1       |             1 | 1                           |
         transcript_input1 = (transcript_input1 + gamma) * lookup_first + (-lookup_first + 1);   // degree 2
-        transcript_input2 = (transcript_input2 + gamma) * lookup_second + (-lookup_second + 1); // degree 3
+        transcript_input2 = (transcript_input2 + gamma) * lookup_second + (-lookup_second + 1); // degree 2
 
-        // transcript_product = degree 6
+        // transcript_product = degree 5 (deg2 * deg2 * deg1 + deg0)
         auto transcript_product = (transcript_input1 * transcript_input2) * (-base_infinity + 1) + base_infinity;
 
-        // point_table_init_write = degree 7
+        // point_table_init_write = degree 6 (deg1 * deg5 + deg1)
         auto point_table_init_write = transcript_mul * transcript_product + (-transcript_mul + 1);
-        denominator *= point_table_init_write; // degree-25
+        denominator *= point_table_init_write; // degree-22 (cumulative: 16 + 6)
     }
     /**
      * @brief Third term: tuple of (pc, P.x, P.y, msm-size) from ECCVMTranscriptRelation.
@@ -554,12 +552,6 @@ Accumulator ECCVMSetRelationImpl<FF>::compute_grand_product_denominator(const Al
      *        `transcript_pc` and has size `transcript_msm_count`.
      * @note  In the case of an honest prover, `(transcript_msm_output_x, transcript_msm_output_y)` is the value of the
      *        just-completed MSM + `OFFSET` (as this is what the MSM table computes with to avoid branch logic.)
-     *
-     *        in `transcript_msm_output_x, transcript_msm_output_y`, for a given multi-scalar multiplication starting at
-     *        `transcript_pc` and has size `transcript_msm_count`.
-     * @note  In the case of an honest prover, `(transcript_msm_output_x, transcript_msm_output_y)` is the value of the
-     *        just-completed MSM + `OFFSET` (as this is what the MSM table computes with to avoid branch logic.)
-     *
      */
     {
         const auto& transcript_pc_shift = View(in.transcript_pc_shift);
@@ -575,11 +567,12 @@ Accumulator ECCVMSetRelationImpl<FF>::compute_grand_product_denominator(const Al
         // do not add to count if point at infinity!
         auto full_msm_count =
             transcript_msm_count + transcript_mul * ((-z1_zero + 1) + (-z2_zero + 1)) * (-base_infinity + 1);
-        // msm_result_read = degree 2
+        // msm_result_read = degree 3 (dominated by full_msm_count which is degree 3)
         auto msm_result_read = transcript_pc_shift + transcript_msm_x * beta + transcript_msm_y * beta_sqr +
                                full_msm_count * beta_cube + third_term_tag;
+        // after gating by transcript_msm_transition (degree 1): degree 1 * degree 3 + degree 1 = degree 4
         msm_result_read = transcript_msm_transition * (msm_result_read + gamma) + (-transcript_msm_transition + 1);
-        denominator *= msm_result_read; // degree-28
+        denominator *= msm_result_read; // degree-26 (cumulative: 22 + 4)
     }
     return denominator;
 }
@@ -605,10 +598,10 @@ void ECCVMSetRelationImpl<FF>::accumulate(ContainerOverSubrelations& accumulator
     using View = typename Accumulator::View;
     using ShortView = typename std::tuple_element_t<1, ContainerOverSubrelations>::View;
 
-    // degree-15 (8 slices + skew + delta + second term + third term)
+    // numerator degree = 16 (8 slice fingerprints + skew + delta + second term + third term)
     Accumulator numerator_evaluation = compute_grand_product_numerator<Accumulator>(in, params);
 
-    // degree-27 (8 add-gated tuples + second term + third term)
+    // denominator degree = 26 (16 from 8 add-gated tuples + 6 from second term + 4 from third term)
     Accumulator denominator_evaluation = compute_grand_product_denominator<Accumulator>(in, params);
 
     const auto& lagrange_first = View(in.lagrange_first);
@@ -619,7 +612,7 @@ void ECCVMSetRelationImpl<FF>::accumulate(ContainerOverSubrelations& accumulator
     const auto& z_perm_shift = View(in.z_perm_shift);
     const auto& z_perm_shift_short = ShortView(in.z_perm_shift);
 
-    // degree-28
+    // full expression degree = max(1+16, 1+26) = 27; partial length upper bound = 29
     std::get<0>(accumulator) +=
         ((z_perm + lagrange_first) * numerator_evaluation - (z_perm_shift + lagrange_last) * denominator_evaluation) *
         scaling_factor;

From 913c4eb113a82cbe1dca40d7f861148dbb63cc18 Mon Sep 17 00:00:00 2001
From: notnotraju <raju@aztec-labs.com>
Date: Wed, 18 Mar 2026 13:32:32 +0000
Subject: [PATCH 22/24] chore(eccvm): make MaxCapacityPassing test compute max
 apps from LOG_N

Instead of hardcoding 17 apps, compute the max number of app circuits
that fit in the ECCVM based on CONST_ECCVM_LOG_N. Each app adds ~1104
ECCVM rows with ~1494 base overhead. At LOG_N=15: 28 apps; LOG_N=14: 13.
---
 .../cpp/src/barretenberg/chonk/chonk.test.cpp | 23 ++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/chonk/chonk.test.cpp b/barretenberg/cpp/src/barretenberg/chonk/chonk.test.cpp
index 3ec89c9fdf1e..839b54d4970d 100644
--- a/barretenberg/cpp/src/barretenberg/chonk/chonk.test.cpp
+++ b/barretenberg/cpp/src/barretenberg/chonk/chonk.test.cpp
@@ -398,14 +398,31 @@ TEST_F(ChonkTests, VKIndependenceFromCircuitSize)
 };
 
 /**
- * @brief Test to establish the "max" number of apps that can be accumulated due to limitations on the ECCVM size
- *
+ * @brief Test to establish the "max" number of apps that can be accumulated due to limitations on the ECCVM size.
+ * @details With 8-wide layout (ADDITIONS_PER_ROW = 8, WNAF_DIGITS_PER_ROW = 8), each app adds ~1104 ECCVM rows with a
+ * base overhead of ~1494 rows. At CONST_ECCVM_LOG_N = 15 (32768 rows): max apps = floor((32768 - 4 - 1494) / 1104) =
+ * 28. At CONST_ECCVM_LOG_N = 14 (16384 rows): max apps = 13.
  */
 HEAVY_TEST(ChonkKernelCapacity, MaxCapacityPassing)
 {
     bb::srs::init_file_crs_factory(bb::srs::bb_crs_path());
 
-    const size_t NUM_APP_CIRCUITS = 17;
+    // Each app adds ~1104 ECCVM rows; base overhead ~1494 rows.
+    // LOG_N=15: floor((32768 - 4 - 1494) / 1104) = 28
+    // LOG_N=14: floor((16384 - 4 - 1494) / 1104) = 13
+    constexpr size_t ECCVM_ROWS_PER_APP = 1104;
+    constexpr size_t ECCVM_BASE_ROWS = 1494;
+    constexpr size_t ECCVM_FIXED_SIZE = 1UL << CONST_ECCVM_LOG_N;
+    constexpr size_t MAX_USABLE_ROWS = ECCVM_FIXED_SIZE - NUM_DISABLED_ROWS_IN_SUMCHECK;
+    const size_t NUM_APP_CIRCUITS = (MAX_USABLE_ROWS - ECCVM_BASE_ROWS) / ECCVM_ROWS_PER_APP;
+
+    info("MaxCapacityPassing: LOG_N=",
+         CONST_ECCVM_LOG_N,
+         ", ECCVM_FIXED_SIZE=",
+         ECCVM_FIXED_SIZE,
+         ", max apps=",
+         NUM_APP_CIRCUITS);
+
     auto [proof, vk] = ChonkTests::accumulate_and_prove_ivc(NUM_APP_CIRCUITS);
 
     bool verified = ChonkTests::verify_chonk(proof, vk);

From f19f2852cdd5ec932fbc23752d1f746881492365 Mon Sep 17 00:00:00 2001
From: notnotraju <raju@aztec-labs.com>
Date: Wed, 18 Mar 2026 13:53:21 +0000
Subject: [PATCH 23/24] =?UTF-8?q?chore(eccvm):=20add=20Step=203=20to=20des?=
 =?UTF-8?q?ign=20doc=20=E2=80=94=20double=20the=20doublings=20per=20row?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Plan to pack 2 doubling rounds into 1 MSM row (DOUBLINGS_PER_ROW 4→8),
cutting doubling rows from 31 to 16 per MSM. Reuses lambda5..8 on
doubling rows (free since q_add/q_double are mutually exclusive).
No new columns needed. MSM formula: 33*ceil(m/8)+16 (was +31).
---
 .../barretenberg/eccvm/WIDE_SHORT_DESIGN.md   | 99 ++++++++++++++++++-
 1 file changed, 94 insertions(+), 5 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/eccvm/WIDE_SHORT_DESIGN.md b/barretenberg/cpp/src/barretenberg/eccvm/WIDE_SHORT_DESIGN.md
index cc45cc9a5e46..6f44b45a8884 100644
--- a/barretenberg/cpp/src/barretenberg/eccvm/WIDE_SHORT_DESIGN.md
+++ b/barretenberg/cpp/src/barretenberg/eccvm/WIDE_SHORT_DESIGN.md
@@ -15,6 +15,8 @@ Here, "split" refers to the option of splitting up the multiset-equality check i
 | NUM_WIRES | 85 | 121 (+36) | 123 (+38) |
 | Precompute rows/scalar | 8 | 4 | 4 |
 | MSM ADD rows/digit | ⌈m/4⌉ | ⌈m/8⌉ | ⌈m/8⌉ |
+| MSM doubling rows | 31 | 16 (with Step 3) | 16 |
+| MSM row formula | `33·⌈m/4⌉+31` | `33·⌈m/8⌉+16` | `33·⌈m/8⌉+16` |
 | MAX_PARTIAL_RELATION_LENGTH | 22 | 29 | 17 |
 | ECCVM proof size (Fr) | 608 | 756 (+148) | ~764 (+156) |
 
@@ -72,7 +74,7 @@ Wire up in `eccvm_circuit_builder.hpp` `ProverPolynomials` constructor (follows
 #### 1d. Relations
 
 **`ecc_msm_relation_impl.hpp`:**
-- Extend addition chain 4→8, skew chain 4→8. Doubling chain **unchanged** (4, lambda1–4).
+- Extend addition chain 4→8, skew chain 4→8. Doubling chain unchanged in Step 1 (4, lambda1–4); widened to 8 in Step 3.
 - 4 new add slope subrelations + 4 skew slope subrelations (separate to prevent cancellation)
 - Extend collision checks, slice-zero, addition continuity, count update to 8
 - Cross-row: `(-add8 + 1) * add1_shift`. Max partial length: 8→**12**.
@@ -120,14 +122,101 @@ Wire up in `eccvm_circuit_builder.hpp` `ProverPolynomials` constructor (follows
 - Regenerate VKs: `./test_chonk_standalone_vks_havent_changed.sh --update_inputs`
 - Recursive verifier: relation changes auto-propagate via templates. Flavor entity changes (new columns) need manual mirroring in `stdlib/eccvm_verifier/`
 
-### Step 3: Test and measure
+### Step 3: Double the doublings per row (DOUBLINGS_PER_ROW 4→8)
 
-- `eccvm_tests` after widening
-- `chonk_tests`, `goblin_tests` after full integration
+Pack 2 doubling rounds into 1 row, cutting doubling rows from 31 to 16 per MSM. This requires **no new columns**: on doubling rows, `lambda5..lambda8` are unused (they're only for addition slots 5-8 on add rows), and `q_add`/`q_double`/`q_skew` are mutually exclusive. So we reuse `lambda5..lambda8` for the second set of 4 doublings on doubling rows.
+
+**Why this matters:** Without this, the 8-wide change only achieves ~1.65x capacity (28 apps) instead of the expected 2x. The 31 doubling rows per MSM are a fixed cost that doesn't benefit from the additions widening. With this fix, the MSM row formula changes:
+
+| | Old (4-wide) | After Step 1 (8-wide, 4 dbl/row) | After Step 3 (8-wide, 8 dbl/row) |
+|--|:--:|:--:|:--:|
+| MSM rows | `33·⌈m/4⌉ + 31` | `33·⌈m/8⌉ + 31` | `33·⌈m/8⌉ + 16` |
+| Max apps (LOG_N=15) | 17 | 28 | ~30 |
+
+#### 3a. Constants
+
+**`eccvm_builder_types.hpp`:**
+```cpp
+DOUBLINGS_PER_ROW = 2 * NUM_WNAF_DIGIT_BITS; // was NUM_WNAF_DIGIT_BITS (4), now 8
+```
+
+#### 3b. Builder (`msm_builder.hpp`)
+
+The doubling loop (currently iterating `DOUBLINGS_PER_ROW = 4` times per doubling row) now iterates 8 times. Each doubling row performs 8 point doublings (= 2 rounds of 4 doublings each = multiply accumulator by 2^8 = 256).
+
+- The doubling row generation loop at line ~346 already uses `DOUBLINGS_PER_ROW` — it will auto-adjust to 8.
+- The number of doubling rows changes: currently `NUM_WNAF_DIGITS_PER_SCALAR - 1 = 31` rows. With 2 rounds per row: `ceil(31/2) = 16` rows. But 31 is odd, so the last doubling row only does 4 doublings (1 round), not 8. Must handle this: either pad to 32 rounds (adding an extra no-op doubling at the start), or track a "half-doubling" flag for the last row.
+
+**Simplest approach:** Keep 31 rounds, emit `ceil(31/2) = 16` doubling rows. The first 15 rows each do 8 doublings (2 rounds). The last row does 4 doublings (1 round) with `lambda5..8` unused/zeroed. The relation must handle this — use the existing `add_state[i].point` slots for "is this doubling slot active" or simply check if `point_idx < actual_doublings_this_row`.
+
+Actually, even simpler: the relation doesn't need to know whether a doubling slot is "active" — the doubling chain is purely sequential. If we always do 8 doublings, the last doubling row would do an extra 4 doublings that shouldn't happen. So we need a selector or convention:
+
+**Option A (recommended):** Change from 31 to 30 doubling rounds by adjusting the Straus algorithm: use `NUM_WNAF_DIGITS_PER_SCALAR = 32` digit slots but start the MSM at digit 31 instead of digit 32. This makes the number of inter-digit doublings 30 (even), giving exactly 15 doubling rows with 8 doublings each. This requires a small tweak to the scalar decomposition — the leading digit is constrained to be in a smaller range (no change to security, just a tighter range check on the most significant digit).
+
+**Option B:** Keep 31 doubling rounds, emit 16 rows. The last row uses only 4 doublings (lambda1..4). Gate the second set of 4 doublings with a new boolean column `q_double_second` (or reuse a spare signal, e.g. the last doubling row has `msm_round` that distinguishes it). Alternatively, the relation just checks: if `round == 0` (first round after the leading digit), only 4 doublings; otherwise, 8.
+
+**Option C (simplest):** Add one extra dummy doubling round (32 total inter-digit gaps by starting from an identity-like state), making it 32 rounds = 16 rows × 8 doublings. The extra doubling at the end is a no-op since the skew round follows.
+
+Recommend **Option B** — it's the most straightforward and doesn't change the scalar decomposition. The relation already has `q_double` and `round` available. On a "half" doubling row, constrain `acc_x_shift = x_d4` (after 4 doublings) instead of `acc_x_shift = x_d8` (after 8). The condition is: this is the last doubling row, i.e., the row where `round` transitions from digit 0 to the skew round. In practice, every other doubling row can be detected by checking if the *next* doubling row follows (via `q_double_shift`) or if an add/skew row follows.
+
+#### 3c. Relation (`ecc_msm_relation_impl.hpp`)
+
+Currently the doubling chain does:
+```
+[x_d1, y_d1] = dbl(acc_x, acc_y, lambda1)
+[x_d2, y_d2] = dbl(x_d1, y_d1, lambda2)
+[x_d3, y_d3] = dbl(x_d2, y_d2, lambda3)
+[x_d4, y_d4] = dbl(x_d3, y_d3, lambda4)
+constrain: acc_x_shift = x_d4, acc_y_shift = y_d4
+```
+
+Extend to:
+```
+[x_d5, y_d5] = dbl(x_d4, y_d4, lambda5)
+[x_d6, y_d6] = dbl(x_d5, y_d5, lambda6)
+[x_d7, y_d7] = dbl(x_d6, y_d6, lambda7)
+[x_d8, y_d8] = dbl(x_d7, y_d7, lambda8)
+```
+
+For a "full" doubling row (2 rounds): `acc_shift = (x_d8, y_d8)`
+For a "half" doubling row (1 round, last one): `acc_shift = (x_d4, y_d4)`
+
+The output constraint becomes:
+```
+q_double * q_double_shift * (acc_x_shift - x_d8) = 0  // full: next row is also double
+q_double * (-q_double_shift + 1) * (acc_x_shift - x_d4) = 0  // half: next row is NOT double
+```
+(Same for y.) This adds 2 subrelations and replaces the existing 2 output subrelations (indices 10, 11). Max degree: `q_double * q_double_shift * (acc_x_shift - x_d8)` = degree 1+1+1 = 3. No increase to MSM relation max partial length (still 12).
+
+New doubling slope subrelations for `lambda5..8`: 4 new subrelations (same structure as existing `double_slope_relation1..4`). Total MSM subrelations: 67 + 4 + 2 = ~73. (The +2 is for splitting the output constraint into full/half cases; the original 2 are replaced.)
+
+#### 3d. Row tracker (`eccvm_row_tracker.hpp`)
+
+Update `num_eccvm_msm_rows`:
+```cpp
+const size_t num_double_rounds = eccvm::NUM_WNAF_DIGITS_PER_SCALAR - 1; // 31
+const size_t num_double_rows = (num_double_rounds + 1) / 2; // ceil(31/2) = 16
+```
+
+#### 3e. Capacity impact
+
+With `DOUBLINGS_PER_ROW = 8`:
+- MSM rows per MSM: `33 * ceil(m/8) + 16` (was `33 * ceil(m/8) + 31`)
+- Per-app saving: ~15 fewer doubling rows per MSM × ~2 MSMs per app ≈ ~30 rows/app
+- Expected max apps at LOG_N=15: ~30 (up from 28, closer to the theoretical 2x of 34)
+
+#### 3f. No new columns needed
+
+This is key: `lambda5..lambda8` already exist in the flavor for additions 5-8. On doubling rows (`q_double = 1`), additions are inactive (`q_add = 0`), so `lambda5..8` are free to be repurposed for doublings 5-8. The relation just needs to read them in both the addition and doubling sections, gated by the respective selectors.
+
+### Step 4: Test and measure
+
+- `eccvm_tests` after doubling widening
+- `chonk_tests` MaxCapacityPassing — verify increased capacity
 - Measure actual workload row counts → decide if `CONST_ECCVM_LOG_N` can drop 15→14
 - If yes: update `constants.hpp`, cascade to Noir/TS
 
-### Step 4 (optional): Grand product split
+### Step 5 (optional): Grand product split
 
 Split the single `ECCVMSetRelation` grand product into 3 independent ones to drop `MAX_PARTIAL_RELATION_LENGTH` from 29→17. Only worth doing if degree 29 causes measurable performance issues.
 

From fcd3c7660d5a520d82eeaac6aba07f658c491114 Mon Sep 17 00:00:00 2001
From: notnotraju <raju@aztec-labs.com>
Date: Wed, 18 Mar 2026 14:08:36 +0000
Subject: [PATCH 24/24] chore(eccvm): document why doubling widening is
 infeasible
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The 31 doubling rows per MSM cannot be halved because each occurs
between consecutive digit-slot ADD phases in the Straus algorithm.
Combining two DBL rounds into one row would require 8-bit digits
(point table size 256), which is impractical. The 8-wide change
achieves ~1.65x capacity (17→28 apps), not 2x.
---
 .../barretenberg/eccvm/WIDE_SHORT_DESIGN.md   | 99 +++----------------
 1 file changed, 14 insertions(+), 85 deletions(-)

diff --git a/barretenberg/cpp/src/barretenberg/eccvm/WIDE_SHORT_DESIGN.md b/barretenberg/cpp/src/barretenberg/eccvm/WIDE_SHORT_DESIGN.md
index 6f44b45a8884..a2fff219be0c 100644
--- a/barretenberg/cpp/src/barretenberg/eccvm/WIDE_SHORT_DESIGN.md
+++ b/barretenberg/cpp/src/barretenberg/eccvm/WIDE_SHORT_DESIGN.md
@@ -15,8 +15,8 @@ Here, "split" refers to the option of splitting up the multiset-equality check i
 | NUM_WIRES | 85 | 121 (+36) | 123 (+38) |
 | Precompute rows/scalar | 8 | 4 | 4 |
 | MSM ADD rows/digit | ⌈m/4⌉ | ⌈m/8⌉ | ⌈m/8⌉ |
-| MSM doubling rows | 31 | 16 (with Step 3) | 16 |
-| MSM row formula | `33·⌈m/4⌉+31` | `33·⌈m/8⌉+16` | `33·⌈m/8⌉+16` |
+| MSM doubling rows | 31 | 31 (unchanged) | 31 |
+| MSM row formula | `33·⌈m/4⌉+31` | `33·⌈m/8⌉+31` | `33·⌈m/8⌉+31` |
 | MAX_PARTIAL_RELATION_LENGTH | 22 | 29 | 17 |
 | ECCVM proof size (Fr) | 608 | 756 (+148) | ~764 (+156) |
 
@@ -74,7 +74,7 @@ Wire up in `eccvm_circuit_builder.hpp` `ProverPolynomials` constructor (follows
 #### 1d. Relations
 
 **`ecc_msm_relation_impl.hpp`:**
-- Extend addition chain 4→8, skew chain 4→8. Doubling chain unchanged in Step 1 (4, lambda1–4); widened to 8 in Step 3.
+- Extend addition chain 4→8, skew chain 4→8. Doubling chain **unchanged** (4, lambda1–4).
 - 4 new add slope subrelations + 4 skew slope subrelations (separate to prevent cancellation)
 - Extend collision checks, slice-zero, addition continuity, count update to 8
 - Cross-row: `(-add8 + 1) * add1_shift`. Max partial length: 8→**12**.
@@ -122,101 +122,30 @@ Wire up in `eccvm_circuit_builder.hpp` `ProverPolynomials` constructor (follows
 - Regenerate VKs: `./test_chonk_standalone_vks_havent_changed.sh --update_inputs`
 - Recursive verifier: relation changes auto-propagate via templates. Flavor entity changes (new columns) need manual mirroring in `stdlib/eccvm_verifier/`
 
-### Step 3: Double the doublings per row (DOUBLINGS_PER_ROW 4→8)
+### Step 3 (infeasible): Double the doublings per row
 
-Pack 2 doubling rounds into 1 row, cutting doubling rows from 31 to 16 per MSM. This requires **no new columns**: on doubling rows, `lambda5..lambda8` are unused (they're only for addition slots 5-8 on add rows), and `q_add`/`q_double`/`q_skew` are mutually exclusive. So we reuse `lambda5..lambda8` for the second set of 4 doublings on doubling rows.
+~~Pack 2 doubling rounds into 1 row, cutting doubling rows from 31 to 16.~~
 
-**Why this matters:** Without this, the 8-wide change only achieves ~1.65x capacity (28 apps) instead of the expected 2x. The 31 doubling rows per MSM are a fixed cost that doesn't benefit from the additions widening. With this fix, the MSM row formula changes:
+**This doesn't work.** The 31 doubling rounds are structurally tied to the Straus algorithm: each one occurs between two consecutive digit-slot ADD phases. The sequence is:
 
-| | Old (4-wide) | After Step 1 (8-wide, 4 dbl/row) | After Step 3 (8-wide, 8 dbl/row) |
-|--|:--:|:--:|:--:|
-| MSM rows | `33·⌈m/4⌉ + 31` | `33·⌈m/8⌉ + 31` | `33·⌈m/8⌉ + 16` |
-| Max apps (LOG_N=15) | 17 | 28 | ~30 |
-
-#### 3a. Constants
-
-**`eccvm_builder_types.hpp`:**
-```cpp
-DOUBLINGS_PER_ROW = 2 * NUM_WNAF_DIGIT_BITS; // was NUM_WNAF_DIGIT_BITS (4), now 8
 ```
-
-#### 3b. Builder (`msm_builder.hpp`)
-
-The doubling loop (currently iterating `DOUBLINGS_PER_ROW = 4` times per doubling row) now iterates 8 times. Each doubling row performs 8 point doublings (= 2 rounds of 4 doublings each = multiply accumulator by 2^8 = 256).
-
-- The doubling row generation loop at line ~346 already uses `DOUBLINGS_PER_ROW` — it will auto-adjust to 8.
-- The number of doubling rows changes: currently `NUM_WNAF_DIGITS_PER_SCALAR - 1 = 31` rows. With 2 rounds per row: `ceil(31/2) = 16` rows. But 31 is odd, so the last doubling row only does 4 doublings (1 round), not 8. Must handle this: either pad to 32 rounds (adding an extra no-op doubling at the start), or track a "half-doubling" flag for the last row.
-
-**Simplest approach:** Keep 31 rounds, emit `ceil(31/2) = 16` doubling rows. The first 15 rows each do 8 doublings (2 rounds). The last row does 4 doublings (1 round) with `lambda5..8` unused/zeroed. The relation must handle this — use the existing `add_state[i].point` slots for "is this doubling slot active" or simply check if `point_idx < actual_doublings_this_row`.
-
-Actually, even simpler: the relation doesn't need to know whether a doubling slot is "active" — the doubling chain is purely sequential. If we always do 8 doublings, the last doubling row would do an extra 4 doublings that shouldn't happen. So we need a selector or convention:
-
-**Option A (recommended):** Change from 31 to 30 doubling rounds by adjusting the Straus algorithm: use `NUM_WNAF_DIGITS_PER_SCALAR = 32` digit slots but start the MSM at digit 31 instead of digit 32. This makes the number of inter-digit doublings 30 (even), giving exactly 15 doubling rows with 8 doublings each. This requires a small tweak to the scalar decomposition — the leading digit is constrained to be in a smaller range (no change to security, just a tighter range check on the most significant digit).
-
-**Option B:** Keep 31 doubling rounds, emit 16 rows. The last row uses only 4 doublings (lambda1..4). Gate the second set of 4 doublings with a new boolean column `q_double_second` (or reuse a spare signal, e.g. the last doubling row has `msm_round` that distinguishes it). Alternatively, the relation just checks: if `round == 0` (first round after the leading digit), only 4 doublings; otherwise, 8.
-
-**Option C (simplest):** Add one extra dummy doubling round (32 total inter-digit gaps by starting from an identity-like state), making it 32 rounds = 16 rows × 8 doublings. The extra doubling at the end is a no-op since the skew round follows.
-
-Recommend **Option B** — it's the most straightforward and doesn't change the scalar decomposition. The relation already has `q_double` and `round` available. On a "half" doubling row, constrain `acc_x_shift = x_d4` (after 4 doublings) instead of `acc_x_shift = x_d8` (after 8). The condition is: this is the last doubling row, i.e., the row where `round` transitions from digit 0 to the skew round. In practice, every other doubling row can be detected by checking if the *next* doubling row follows (via `q_double_shift`) or if an add/skew row follows.
-
-#### 3c. Relation (`ecc_msm_relation_impl.hpp`)
-
-Currently the doubling chain does:
+ADD(d0) → DBL(×16) → ADD(d1) → DBL(×16) → ADD(d2) → ... → ADD(d31) → SKEW
 ```
-[x_d1, y_d1] = dbl(acc_x, acc_y, lambda1)
-[x_d2, y_d2] = dbl(x_d1, y_d1, lambda2)
-[x_d3, y_d3] = dbl(x_d2, y_d2, lambda3)
-[x_d4, y_d4] = dbl(x_d3, y_d3, lambda4)
-constrain: acc_x_shift = x_d4, acc_y_shift = y_d4
-```
-
-Extend to:
-```
-[x_d5, y_d5] = dbl(x_d4, y_d4, lambda5)
-[x_d6, y_d6] = dbl(x_d5, y_d5, lambda6)
-[x_d7, y_d7] = dbl(x_d6, y_d6, lambda7)
-[x_d8, y_d8] = dbl(x_d7, y_d7, lambda8)
-```
-
-For a "full" doubling row (2 rounds): `acc_shift = (x_d8, y_d8)`
-For a "half" doubling row (1 round, last one): `acc_shift = (x_d4, y_d4)`
-
-The output constraint becomes:
-```
-q_double * q_double_shift * (acc_x_shift - x_d8) = 0  // full: next row is also double
-q_double * (-q_double_shift + 1) * (acc_x_shift - x_d4) = 0  // half: next row is NOT double
-```
-(Same for y.) This adds 2 subrelations and replaces the existing 2 output subrelations (indices 10, 11). Max degree: `q_double * q_double_shift * (acc_x_shift - x_d8)` = degree 1+1+1 = 3. No increase to MSM relation max partial length (still 12).
-
-New doubling slope subrelations for `lambda5..8`: 4 new subrelations (same structure as existing `double_slope_relation1..4`). Total MSM subrelations: 67 + 4 + 2 = ~73. (The +2 is for splitting the output constraint into full/half cases; the original 2 are replaced.)
-
-#### 3d. Row tracker (`eccvm_row_tracker.hpp`)
-
-Update `num_eccvm_msm_rows`:
-```cpp
-const size_t num_double_rounds = eccvm::NUM_WNAF_DIGITS_PER_SCALAR - 1; // 31
-const size_t num_double_rows = (num_double_rounds + 1) / 2; // ceil(31/2) = 16
-```
-
-#### 3e. Capacity impact
 
-With `DOUBLINGS_PER_ROW = 8`:
-- MSM rows per MSM: `33 * ceil(m/8) + 16` (was `33 * ceil(m/8) + 31`)
-- Per-app saving: ~15 fewer doubling rows per MSM × ~2 MSMs per app ≈ ~30 rows/app
-- Expected max apps at LOG_N=15: ~30 (up from 28, closer to the theoretical 2x of 34)
+Each DBL must happen before the next ADD because the additions for digit d_{j+1} operate on the already-shifted accumulator. You cannot combine two consecutive DBL rounds into one row without removing the ADD round between them.
 
-#### 3f. No new columns needed
+To remove the intervening ADD, you'd need to process digit pairs (d_j, d_{j+1}) in one cycle and do ×256 between cycles. But this changes the scalar decomposition from 4-bit to 8-bit digits, requiring a point table of size 256 (currently 16). That's impractical.
 
-This is key: `lambda5..lambda8` already exist in the flavor for additions 5-8. On doubling rows (`q_double = 1`), additions are inactive (`q_add = 0`), so `lambda5..8` are free to be repurposed for doublings 5-8. The relation just needs to read them in both the addition and doubling sections, gated by the respective selectors.
+**Bottom line:** The 31 doubling rows per MSM are an inherent cost of the 4-bit wNAF structure. The 8-wide change achieves ~1.65x capacity improvement (17→28 apps at LOG_N=15), not the theoretical 2x, because doubling rows don't benefit from addition widening.
 
-### Step 4: Test and measure
+### Step 3 (actual): Test and measure
 
-- `eccvm_tests` after doubling widening
-- `chonk_tests` MaxCapacityPassing — verify increased capacity
+- `eccvm_tests` after widening
+- `chonk_tests`, `goblin_tests` after full integration
 - Measure actual workload row counts → decide if `CONST_ECCVM_LOG_N` can drop 15→14
 - If yes: update `constants.hpp`, cascade to Noir/TS
 
-### Step 5 (optional): Grand product split
+### Step 4 (optional): Grand product split
 
 Split the single `ECCVMSetRelation` grand product into 3 independent ones to drop `MAX_PARTIAL_RELATION_LENGTH` from 29→17. Only worth doing if degree 29 causes measurable performance issues.