From 8cbdf189a16a65502614bfc968634eafe428fc61 Mon Sep 17 00:00:00 2001 From: Santiago Palladino Date: Tue, 9 Jun 2026 22:53:52 -0300 Subject: [PATCH 01/10] fix(e2e): emulate self-advancing L1 in HA suite to deflake distribute-work test The HA compose suite runs anvil without interval mining, so L1 chain time advances only when blocks are mined on demand, while the sequencers' test clock free-runs. The one-shot per-iteration mine(8) overshot the test clock by ~1.5 slots, making every iteration's first propose race its slot boundary on L1, and once a race was lost the suite deadlocked: the archiver-sync gate deadline runs on the test clock while nothing advances L1 during waitForTx. Replace the blind mine with a gap-closing one that never overshoots the test clock, make the per-iteration clock alignment forward-only so it cannot rewind below L1 time, and nudge L1 chain time plus archiver sync once per L1 slot while awaiting trigger txs, emulating a self-advancing L1. --- .../src/composed/ha/e2e_ha_full.test.ts | 121 +++++++++++------- 1 file changed, 76 insertions(+), 45 deletions(-) diff --git a/yarn-project/end-to-end/src/composed/ha/e2e_ha_full.test.ts b/yarn-project/end-to-end/src/composed/ha/e2e_ha_full.test.ts index 6c3a52616b8d..a88f640244d7 100644 --- a/yarn-project/end-to-end/src/composed/ha/e2e_ha_full.test.ts +++ b/yarn-project/end-to-end/src/composed/ha/e2e_ha_full.test.ts @@ -21,7 +21,7 @@ import { Buffer32 } from '@aztec/foundation/buffer'; import { SecretValue } from '@aztec/foundation/config'; import { withLoggerBindings } from '@aztec/foundation/log/server'; import { retryUntil } from '@aztec/foundation/retry'; -import { sleep } from '@aztec/foundation/sleep'; +import { InterruptibleSleep, sleep } from '@aztec/foundation/sleep'; import type { TestDateProvider } from '@aztec/foundation/timer'; import { GovernanceProposerAbi } from '@aztec/l1-artifacts/GovernanceProposerAbi'; import { TestContract } from '@aztec/noir-test-contracts.js/Test'; @@ -122,6 +122,7 @@ async function waitForTriggerTx(node: AztecNode, txHash: TxHash): Promise { @@ -130,22 +131,15 @@ async function setDateProviderToNextBlockSlot( throw new Error('Could not load latest block for HA trigger tx'); } - const nextBlockTimestamp = latestBlock.header.globalVariables.timestamp + BigInt(aztecSlotDuration); - dateProvider.setTime(Number(nextBlockTimestamp) * 1000); -} - -async function sendTriggerTx( - wallet: TestWallet, - node: AztecNode, - testContract: TestContract, - from: AztecAddress, - syncL1Data: () => Promise, - alignTimeToNextBlockSlot: () => Promise, -): Promise { - await alignTimeToNextBlockSlot(); - const txHash = await submitTriggerTx(wallet, testContract, from); - await syncL1Data(); - return await waitForTriggerTx(node, txHash); + // Jump to the next L2 slot boundary that also covers the L1 chain clock. The compose anvil mines + // blocks only on demand (no interval mining), so its chain timestamp moves independently of the test + // clock and may sit several slots past the latest L2 block. Aligning blindly to `latest block + 1 + // slot` can rewind the test clock below L1 time, making sequencers (which schedule on the test + // clock) build proposals for slots that have already expired on L1. + const latestBlockTimestamp = Number(latestBlock.header.globalVariables.timestamp); + const nextL1Timestamp = await ethCheatCodes.nextBlockTimestamp(); + const slotsAhead = Math.max(1, Math.ceil((nextL1Timestamp - latestBlockTimestamp) / aztecSlotDuration)); + dateProvider.setTime((latestBlockTimestamp + slotsAhead * aztecSlotDuration) * 1000); } // TODO: re-enable once HA block building is reconciled with the always-enforced timetable (#23821). @@ -200,9 +194,25 @@ describe.skip('HA Full Setup', () => { logger.info('All HA peer sequencers started'); }; + /** + * Mines as many L1 blocks as needed to bring anvil's chain clock up to the test clock, never past + * it. The compose anvil runs in automine with a +ethereumSlotDuration timestamp interval per block + * and no interval mining, so L1 chain time stands still unless a tx lands or we mine here — this is + * the suite's only L1 heartbeat. Overshooting the test clock is as harmful as falling behind: + * sequencers schedule proposals on the test clock, and a proposal mined after its target slot has + * expired on L1 is silently dropped, pruning the pending block it carried. + */ + const advanceL1ChainTimeToTestClock = async () => { + const nextL1Timestamp = await ethCheatCodes.nextBlockTimestamp(); + const testClockNow = Math.floor(dateProvider.now() / 1000); + const blocksToMine = Math.floor((testClockNow - nextL1Timestamp) / config.ethereumSlotDuration) + 1; + if (blocksToMine > 0) { + await ethCheatCodes.mine(blocksToMine); + } + }; + const syncHAL1Data = async () => { - const l1BlocksPerSyncNudge = Math.ceil((config.aztecSlotDuration * 2) / config.ethereumSlotDuration); - await ethCheatCodes.mine(l1BlocksPerSyncNudge); + await advanceL1ChainTimeToTestClock(); await Promise.all( haNodeServices.map(async service => { try { @@ -220,7 +230,49 @@ describe.skip('HA Full Setup', () => { }; const alignDateProviderToNextBlockSlot = async () => { - await setDateProviderToNextBlockSlot(aztecNode, dateProvider, config.aztecSlotDuration); + await setDateProviderToNextBlockSlot(aztecNode, ethCheatCodes, dateProvider, config.aztecSlotDuration); + }; + + /** + * Waits for the trigger tx to be checkpointed while emulating a self-advancing L1, nudging chain + * time and archiver sync once per L1 slot of wall time. Without the heartbeat, L1 time freezes + * while the test thread is blocked here (nothing mines on the on-demand compose anvil), the + * proposers' archiver-sync gate — whose deadline runs on the free-running test clock — can then + * never pass, and a single missed slot becomes an unrecoverable stall until the jest timeout. + */ + const waitForTriggerTxWithL1Heartbeat = async (txHash: TxHash): Promise => { + let waiting = true; + const heartbeatSleep = new InterruptibleSleep(); + const heartbeat = (async () => { + while (waiting) { + await heartbeatSleep.sleep(config.ethereumSlotDuration * 1000); + if (!waiting) { + break; + } + try { + await syncHAL1Data(); + } catch (error) { + logger.debug('Error advancing L1 time while awaiting trigger tx', { + error: error instanceof Error ? error.message : String(error), + }); + } + } + })(); + + try { + return await waitForTriggerTx(aztecNode, txHash); + } finally { + waiting = false; + heartbeatSleep.interrupt(); + await heartbeat; + } + }; + + const sendTriggerTx = async (): Promise => { + await alignDateProviderToNextBlockSlot(); + const txHash = await submitTriggerTx(wallet, testContract, ownerAddress); + await syncHAL1Data(); + return await waitForTriggerTxWithL1Heartbeat(txHash); }; const stopHANode = async (nodeIndex: number) => { @@ -510,7 +562,7 @@ describe.skip('HA Full Setup', () => { dateProvider.setTime(dateProvider.now() - config.aztecSlotDuration * 1000); await startHASequencers(); await syncHAL1Data(); - const receipt = await waitForTriggerTx(aztecNode, txHash); + const receipt = await waitForTriggerTxWithL1Heartbeat(txHash); expect(receipt.blockNumber).toBeDefined(); logger.info(`Trigger tx checkpointed in block ${receipt.blockNumber}`); @@ -625,14 +677,7 @@ describe.skip('HA Full Setup', () => { // Send a transaction to trigger block building which will also trigger voting logger.info('Sending transaction to trigger block building...'); - const receipt = await sendTriggerTx( - wallet, - aztecNode, - testContract, - ownerAddress, - syncHAL1Data, - alignDateProviderToNextBlockSlot, - ); + const receipt = await sendTriggerTx(); expect(receipt.blockNumber).toBeDefined(); logger.info(`Transaction mined in block ${receipt.blockNumber}`); @@ -815,14 +860,7 @@ describe.skip('HA Full Setup', () => { verifyNodeAttesters(i, i < 3 ? groupB : groupA, i < 3 ? 'group B (swapped)' : 'group A (swapped)'); } - const receipt = await sendTriggerTx( - wallet, - aztecNode, - testContract, - ownerAddress, - syncHAL1Data, - alignDateProviderToNextBlockSlot, - ); + const receipt = await sendTriggerTx(); expect(receipt.blockNumber).toBeDefined(); const [block] = await aztecNode.getBlocks(receipt.blockNumber!, 1, { includeL1PublishInfo: true, @@ -865,14 +903,7 @@ describe.skip('HA Full Setup', () => { logger.info(`\n=== Producing block ${i + 1}/${blockCount} ===`); logger.info(`Active nodes: ${haNodeServices.length - killedNodes.length}/${NODE_COUNT}`); - const receipt = await sendTriggerTx( - wallet, - aztecNode, - testContract, - ownerAddress, - syncHAL1Data, - alignDateProviderToNextBlockSlot, - ); + const receipt = await sendTriggerTx(); expect(receipt.blockNumber).toBeDefined(); From 337e8e2cf840cbb3d6409b75101e0cea61941c48 Mon Sep 17 00:00:00 2001 From: Santiago Palladino Date: Tue, 9 Jun 2026 23:12:25 -0300 Subject: [PATCH 02/10] fix(e2e): keep L1 heartbeat during HA governance signal poll A governance signal lands on L1 only when its block's timestamp falls within the slot it was signed for. When the HA duty race flushes the signal standalone instead of bundled with the timestamp-aligned propose tx, the per-slot retries need L1 chain time to keep tracking the test clock; with L1 frozen during the poll, every retry signs a slot the chain never reaches and the 120s poll times out. --- .../src/composed/ha/e2e_ha_full.test.ts | 110 ++++++++++-------- 1 file changed, 60 insertions(+), 50 deletions(-) diff --git a/yarn-project/end-to-end/src/composed/ha/e2e_ha_full.test.ts b/yarn-project/end-to-end/src/composed/ha/e2e_ha_full.test.ts index a88f640244d7..0deb520d7765 100644 --- a/yarn-project/end-to-end/src/composed/ha/e2e_ha_full.test.ts +++ b/yarn-project/end-to-end/src/composed/ha/e2e_ha_full.test.ts @@ -234,25 +234,26 @@ describe.skip('HA Full Setup', () => { }; /** - * Waits for the trigger tx to be checkpointed while emulating a self-advancing L1, nudging chain - * time and archiver sync once per L1 slot of wall time. Without the heartbeat, L1 time freezes - * while the test thread is blocked here (nothing mines on the on-demand compose anvil), the - * proposers' archiver-sync gate — whose deadline runs on the free-running test clock — can then - * never pass, and a single missed slot becomes an unrecoverable stall until the jest timeout. + * Runs `fn` while emulating a self-advancing L1, nudging chain time and archiver sync once per L1 + * slot of wall time. Without the heartbeat, L1 time freezes while the test thread is blocked + * (nothing mines on the on-demand compose anvil) and any retry loop scheduled on the free-running + * test clock — the proposers' archiver-sync gate, or per-slot governance signals that must mine in + * a block whose timestamp matches the signed slot — can then never succeed, turning a single missed + * slot into an unrecoverable stall until the jest timeout. */ - const waitForTriggerTxWithL1Heartbeat = async (txHash: TxHash): Promise => { - let waiting = true; + const withL1Heartbeat = async (fn: () => Promise): Promise => { + let running = true; const heartbeatSleep = new InterruptibleSleep(); const heartbeat = (async () => { - while (waiting) { + while (running) { await heartbeatSleep.sleep(config.ethereumSlotDuration * 1000); - if (!waiting) { + if (!running) { break; } try { await syncHAL1Data(); } catch (error) { - logger.debug('Error advancing L1 time while awaiting trigger tx', { + logger.debug('Error advancing L1 time on heartbeat', { error: error instanceof Error ? error.message : String(error), }); } @@ -260,14 +261,17 @@ describe.skip('HA Full Setup', () => { })(); try { - return await waitForTriggerTx(aztecNode, txHash); + return await fn(); } finally { - waiting = false; + running = false; heartbeatSleep.interrupt(); await heartbeat; } }; + const waitForTriggerTxWithL1Heartbeat = (txHash: TxHash): Promise => + withL1Heartbeat(() => waitForTriggerTx(aztecNode, txHash)); + const sendTriggerTx = async (): Promise => { await alignDateProviderToNextBlockSlot(); const txHash = await submitTriggerTx(wallet, testContract, ownerAddress); @@ -719,44 +723,50 @@ describe.skip('HA Full Setup', () => { const govProposerAddr = deployL1ContractsValues.l1ContractAddresses.governanceProposerAddress.toString() as `0x${string}`; - const { l1VoteCount, lastSignalSlot, payloadWithMostSignals } = await retryUntil( - async () => { - const snapshotBlock = await deployL1ContractsValues.l1Client.getBlockNumber(); - const [roundData, l1VoteCountBig] = await Promise.all([ - deployL1ContractsValues.l1Client.readContract({ - address: govProposerAddr, - abi: GovernanceProposerAbi, - functionName: 'getRoundData', - args: [rollupAddr, round], - blockNumber: snapshotBlock, - }), - deployL1ContractsValues.l1Client.readContract({ - address: govProposerAddr, - abi: GovernanceProposerAbi, - functionName: 'signalCount', - args: [rollupAddr, round, mockGovernancePayload.toString() as `0x${string}`], - blockNumber: snapshotBlock, - }), - ]); - const lastSignalSlot = Number(roundData.lastSignalSlot); - const l1VoteCount = Number(l1VoteCountBig); - logger.info( - `L1 round ${round}: lastSignalSlot=${lastSignalSlot}, l1VoteCount=${l1VoteCount}, ` + - `payloadWithMostSignals=${roundData.payloadWithMostSignals} ` + - `(snapshot at L1 block ${snapshotBlock})`, - ); - if (l1VoteCount === 0) { - return undefined; - } - return { - l1VoteCount, - lastSignalSlot, - payloadWithMostSignals: roundData.payloadWithMostSignals, - }; - }, - `L1 governance round to land >= 1 signal`, - 120, - 0.5, + // Run the poll under the L1 heartbeat: a signal can only land when its L1 block's timestamp falls + // within the slot it was signed for, so if the bundled propose+signal publish lost the duty race + // (signal flushed standalone one block early), the per-slot retries need L1 chain time to keep + // tracking the test clock or they would sign slots a frozen L1 never reaches. + const { l1VoteCount, lastSignalSlot, payloadWithMostSignals } = await withL1Heartbeat(() => + retryUntil( + async () => { + const snapshotBlock = await deployL1ContractsValues.l1Client.getBlockNumber(); + const [roundData, l1VoteCountBig] = await Promise.all([ + deployL1ContractsValues.l1Client.readContract({ + address: govProposerAddr, + abi: GovernanceProposerAbi, + functionName: 'getRoundData', + args: [rollupAddr, round], + blockNumber: snapshotBlock, + }), + deployL1ContractsValues.l1Client.readContract({ + address: govProposerAddr, + abi: GovernanceProposerAbi, + functionName: 'signalCount', + args: [rollupAddr, round, mockGovernancePayload.toString() as `0x${string}`], + blockNumber: snapshotBlock, + }), + ]); + const lastSignalSlot = Number(roundData.lastSignalSlot); + const l1VoteCount = Number(l1VoteCountBig); + logger.info( + `L1 round ${round}: lastSignalSlot=${lastSignalSlot}, l1VoteCount=${l1VoteCount}, ` + + `payloadWithMostSignals=${roundData.payloadWithMostSignals} ` + + `(snapshot at L1 block ${snapshotBlock})`, + ); + if (l1VoteCount === 0) { + return undefined; + } + return { + l1VoteCount, + lastSignalSlot, + payloadWithMostSignals: roundData.payloadWithMostSignals, + }; + }, + `L1 governance round to land >= 1 signal`, + 120, + 0.5, + ), ); // Outcome 1: the round leader payload is the one we configured all HA nodes to vote for. From d7a594b8a5dcd982cb2e9bf6bc6cf77504fa728a Mon Sep 17 00:00:00 2001 From: Santiago Palladino Date: Wed, 10 Jun 2026 00:14:53 -0300 Subject: [PATCH 03/10] fix(sequencer): send vote-only bundles at the target slot start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Governance and slashing signal signatures bind the L2 slot the tx mines in, so the one-L1-slot mempool warm-up in sendRequestsAt — meant to get proposes into the first L1 block of their slot — makes a vote-only bundle eligible for inclusion in the L1 block right before its slot starts, where signature verification fails silently inside Multicall3. Environments that mine txs on arrival (anvil automine) turn that risk into a certainty. Keep the warm-up only when the bundle contains a propose, and submit vote-only bundles at the slot boundary, matching the documented intent at the call sites. --- .../src/publisher/sequencer-publisher.test.ts | 94 +++++++++++++++++++ .../src/publisher/sequencer-publisher.ts | 19 ++-- 2 files changed, 106 insertions(+), 7 deletions(-) diff --git a/yarn-project/sequencer-client/src/publisher/sequencer-publisher.test.ts b/yarn-project/sequencer-client/src/publisher/sequencer-publisher.test.ts index bf7b7f2fafb8..667946a7af98 100644 --- a/yarn-project/sequencer-client/src/publisher/sequencer-publisher.test.ts +++ b/yarn-project/sequencer-client/src/publisher/sequencer-publisher.test.ts @@ -929,4 +929,98 @@ describe('SequencerPublisher', () => { expect(governanceProposerContract.hasActiveProposalWithPayload).toHaveBeenCalledTimes(2); }); + + describe('sendRequestsAt timing', () => { + const aztecSlotDuration = 36; + const ethereumSlotDuration = 12; + const targetSlot = SlotNumber(2); + + let dateProvider: TestDateProvider; + let timingPublisher: SequencerPublisher; + let targetSlotStartMs: number; + let sleptMs: number; + + beforeEach(() => { + dateProvider = new TestDateProvider(); + // Anchor the L1 genesis on a whole second so slot boundaries are exact in ms. + const genesisMs = Math.ceil(dateProvider.now() / 1000) * 1000; + dateProvider.setTime(genesisMs); + targetSlotStartMs = genesisMs + Number(targetSlot) * aztecSlotDuration * 1000; + + epochCache.getL1Constants.mockReturnValue({ + ...EmptyL1RollupConstants, + l1GenesisTime: BigInt(genesisMs / 1000), + slotDuration: aztecSlotDuration, + ethereumSlotDuration, + }); + + timingPublisher = new SequencerPublisher( + { l1ChainId: 1, aztecSlotDuration, ethereumSlotDuration } as unknown as ConstructorParameters< + typeof SequencerPublisher + >[0], + { + blobClient, + rollupContract: rollup, + l1TxUtils, + epochCache, + slashingProposerContract, + governanceProposerContract, + dateProvider, + metrics: l1Metrics, + lastActions: {}, + }, + ); + + // Capture the wake-up delay instead of really sleeping, and skip the actual send. + sleptMs = 0; + jest.spyOn((timingPublisher as any).interruptibleSleep, 'sleep').mockImplementation((...args: unknown[]) => { + sleptMs = args[0] as number; + return Promise.resolve(); + }); + jest.spyOn(timingPublisher, 'sendRequests').mockResolvedValue(undefined); + }); + + const getWakeTimeMs = async () => { + const nowMs = dateProvider.now(); + await timingPublisher.sendRequestsAt(targetSlot); + return nowMs + sleptMs; + }; + + const enqueueGovernanceSignal = async () => { + const { govPayload } = mockGovernancePayload(); + expect( + await timingPublisher.enqueueGovernanceCastSignal( + govPayload, + targetSlot, + EthAddress.fromString(testHarnessAttesterAccount.address), + msg => testHarnessAttesterAccount.signTypedData(msg), + ), + ).toEqual(true); + }; + + it('waits for the target slot start before sending vote-only bundles', async () => { + // Governance/slashing signal signatures bind the slot the tx mines in, so a vote landing in the + // L1 block right before the L2 slot starts fails signature verification silently. Submitting at + // the slot boundary guarantees the mined block's timestamp is within the signed slot even in + // environments that mine txs on arrival (anvil automine). + await enqueueGovernanceSignal(); + const wakeTimeMs = await getWakeTimeMs(); + expect(wakeTimeMs).toBeGreaterThanOrEqual(targetSlotStartMs - 50); + expect(wakeTimeMs).toBeLessThan(targetSlotStartMs + 1000); + }); + + it('sends bundles containing a propose one L1 slot before the target slot start', async () => { + await enqueueGovernanceSignal(); + (timingPublisher as any).addRequest({ + action: 'propose', + request: { to: EthAddress.random().toString(), data: '0x' }, + lastValidL2Slot: targetSlot, + checkSuccess: () => true, + }); + const wakeTimeMs = await getWakeTimeMs(); + const expectedWakeMs = targetSlotStartMs - ethereumSlotDuration * 1000; + expect(wakeTimeMs).toBeGreaterThanOrEqual(expectedWakeMs - 50); + expect(wakeTimeMs).toBeLessThan(expectedWakeMs + 1000); + }); + }); }); diff --git a/yarn-project/sequencer-client/src/publisher/sequencer-publisher.ts b/yarn-project/sequencer-client/src/publisher/sequencer-publisher.ts index 96754bfeea39..89815f14e0b4 100644 --- a/yarn-project/sequencer-client/src/publisher/sequencer-publisher.ts +++ b/yarn-project/sequencer-client/src/publisher/sequencer-publisher.ts @@ -617,19 +617,24 @@ export class SequencerPublisher { /* * Schedules sending all enqueued requests at (or after) the start of the given L2 slot. - * Sleeps until one L1 slot before the L2 slot boundary so the tx has a chance of being - * picked up by the first L1 block of the L2 slot. - * NB: there is a known correctness risk — being included in the L1 block right before the - * L2 slot starts would revert propose with HeaderLib__InvalidSlotNumber. + * Bundles containing a propose sleep until one L1 slot before the L2 slot boundary, so the tx is + * already in the mempool when the slot's first L1 block is built. NB: this warm-up carries a known + * correctness risk — being included in the L1 block right before the L2 slot starts would revert + * propose with HeaderLib__InvalidSlotNumber. + * Vote-only bundles wait for the slot boundary itself: governance/slashing signal signatures bind + * the slot the tx mines in, so a vote mined in the L1 block right before the L2 slot starts fails + * signature verification silently inside Multicall3 — and environments that mine txs on arrival + * (anvil automine) turn the early send into exactly that. * Uses InterruptibleSleep so it can be cancelled via interrupt(). */ public async sendRequestsAt(targetSlot: SlotNumber): Promise { const l1Constants = this.epochCache.getL1Constants(); // Start of the target L2 slot, in ms (getTimestampForSlot returns seconds). const startOfTargetSlotMs = Number(getTimestampForSlot(targetSlot, l1Constants)) * 1000; - // Aim to be in the mempool one L1 slot before the L2 slot starts, so we have a chance of - // being picked up by the first L1 block of the L2 slot. - const submitAfterMs = startOfTargetSlotMs - Number(this.ethereumSlotDuration) * 1000; + const hasPropose = this.requests.some(request => request.action === 'propose'); + const submitAfterMs = hasPropose + ? startOfTargetSlotMs - Number(this.ethereumSlotDuration) * 1000 + : startOfTargetSlotMs; const sleepMs = submitAfterMs - this.dateProvider.now(); if (sleepMs > 0) { this.log.debug(`Sleeping ${sleepMs}ms before sending requests`, { From cb4b0569cbefbda473ac9aacc669e5dbbc8362b6 Mon Sep 17 00:00:00 2001 From: Santiago Palladino Date: Wed, 10 Jun 2026 00:15:25 -0300 Subject: [PATCH 04/10] test(e2e): re-enable HA full suite The suite was skipped in #23976 while the HA block-building interaction with the always-enforced timetable (#23821) was diagnosed; the preceding commits fix that interaction. --- yarn-project/end-to-end/src/composed/ha/e2e_ha_full.test.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/yarn-project/end-to-end/src/composed/ha/e2e_ha_full.test.ts b/yarn-project/end-to-end/src/composed/ha/e2e_ha_full.test.ts index 0deb520d7765..d5acd2a8aab9 100644 --- a/yarn-project/end-to-end/src/composed/ha/e2e_ha_full.test.ts +++ b/yarn-project/end-to-end/src/composed/ha/e2e_ha_full.test.ts @@ -142,8 +142,7 @@ async function setDateProviderToNextBlockSlot( dateProvider.setTime((latestBlockTimestamp + slotsAhead * aztecSlotDuration) * 1000); } -// TODO: re-enable once HA block building is reconciled with the always-enforced timetable (#23821). -describe.skip('HA Full Setup', () => { +describe('HA Full Setup', () => { jest.setTimeout(20 * 60 * 1000); // 20 minutes let logger: Logger; From a91d246a7d544b106b6b397156e693a2bf9d6c93 Mon Sep 17 00:00:00 2001 From: Santiago Palladino Date: Wed, 10 Jun 2026 10:35:56 -0300 Subject: [PATCH 05/10] Revert "fix(sequencer): send vote-only bundles at the target slot start" This reverts commit c93af0b43a96afd56999c2aba2a879bc27ac4a3c. --- .../src/publisher/sequencer-publisher.test.ts | 94 ------------------- .../src/publisher/sequencer-publisher.ts | 19 ++-- 2 files changed, 7 insertions(+), 106 deletions(-) diff --git a/yarn-project/sequencer-client/src/publisher/sequencer-publisher.test.ts b/yarn-project/sequencer-client/src/publisher/sequencer-publisher.test.ts index 667946a7af98..bf7b7f2fafb8 100644 --- a/yarn-project/sequencer-client/src/publisher/sequencer-publisher.test.ts +++ b/yarn-project/sequencer-client/src/publisher/sequencer-publisher.test.ts @@ -929,98 +929,4 @@ describe('SequencerPublisher', () => { expect(governanceProposerContract.hasActiveProposalWithPayload).toHaveBeenCalledTimes(2); }); - - describe('sendRequestsAt timing', () => { - const aztecSlotDuration = 36; - const ethereumSlotDuration = 12; - const targetSlot = SlotNumber(2); - - let dateProvider: TestDateProvider; - let timingPublisher: SequencerPublisher; - let targetSlotStartMs: number; - let sleptMs: number; - - beforeEach(() => { - dateProvider = new TestDateProvider(); - // Anchor the L1 genesis on a whole second so slot boundaries are exact in ms. - const genesisMs = Math.ceil(dateProvider.now() / 1000) * 1000; - dateProvider.setTime(genesisMs); - targetSlotStartMs = genesisMs + Number(targetSlot) * aztecSlotDuration * 1000; - - epochCache.getL1Constants.mockReturnValue({ - ...EmptyL1RollupConstants, - l1GenesisTime: BigInt(genesisMs / 1000), - slotDuration: aztecSlotDuration, - ethereumSlotDuration, - }); - - timingPublisher = new SequencerPublisher( - { l1ChainId: 1, aztecSlotDuration, ethereumSlotDuration } as unknown as ConstructorParameters< - typeof SequencerPublisher - >[0], - { - blobClient, - rollupContract: rollup, - l1TxUtils, - epochCache, - slashingProposerContract, - governanceProposerContract, - dateProvider, - metrics: l1Metrics, - lastActions: {}, - }, - ); - - // Capture the wake-up delay instead of really sleeping, and skip the actual send. - sleptMs = 0; - jest.spyOn((timingPublisher as any).interruptibleSleep, 'sleep').mockImplementation((...args: unknown[]) => { - sleptMs = args[0] as number; - return Promise.resolve(); - }); - jest.spyOn(timingPublisher, 'sendRequests').mockResolvedValue(undefined); - }); - - const getWakeTimeMs = async () => { - const nowMs = dateProvider.now(); - await timingPublisher.sendRequestsAt(targetSlot); - return nowMs + sleptMs; - }; - - const enqueueGovernanceSignal = async () => { - const { govPayload } = mockGovernancePayload(); - expect( - await timingPublisher.enqueueGovernanceCastSignal( - govPayload, - targetSlot, - EthAddress.fromString(testHarnessAttesterAccount.address), - msg => testHarnessAttesterAccount.signTypedData(msg), - ), - ).toEqual(true); - }; - - it('waits for the target slot start before sending vote-only bundles', async () => { - // Governance/slashing signal signatures bind the slot the tx mines in, so a vote landing in the - // L1 block right before the L2 slot starts fails signature verification silently. Submitting at - // the slot boundary guarantees the mined block's timestamp is within the signed slot even in - // environments that mine txs on arrival (anvil automine). - await enqueueGovernanceSignal(); - const wakeTimeMs = await getWakeTimeMs(); - expect(wakeTimeMs).toBeGreaterThanOrEqual(targetSlotStartMs - 50); - expect(wakeTimeMs).toBeLessThan(targetSlotStartMs + 1000); - }); - - it('sends bundles containing a propose one L1 slot before the target slot start', async () => { - await enqueueGovernanceSignal(); - (timingPublisher as any).addRequest({ - action: 'propose', - request: { to: EthAddress.random().toString(), data: '0x' }, - lastValidL2Slot: targetSlot, - checkSuccess: () => true, - }); - const wakeTimeMs = await getWakeTimeMs(); - const expectedWakeMs = targetSlotStartMs - ethereumSlotDuration * 1000; - expect(wakeTimeMs).toBeGreaterThanOrEqual(expectedWakeMs - 50); - expect(wakeTimeMs).toBeLessThan(expectedWakeMs + 1000); - }); - }); }); diff --git a/yarn-project/sequencer-client/src/publisher/sequencer-publisher.ts b/yarn-project/sequencer-client/src/publisher/sequencer-publisher.ts index 89815f14e0b4..96754bfeea39 100644 --- a/yarn-project/sequencer-client/src/publisher/sequencer-publisher.ts +++ b/yarn-project/sequencer-client/src/publisher/sequencer-publisher.ts @@ -617,24 +617,19 @@ export class SequencerPublisher { /* * Schedules sending all enqueued requests at (or after) the start of the given L2 slot. - * Bundles containing a propose sleep until one L1 slot before the L2 slot boundary, so the tx is - * already in the mempool when the slot's first L1 block is built. NB: this warm-up carries a known - * correctness risk — being included in the L1 block right before the L2 slot starts would revert - * propose with HeaderLib__InvalidSlotNumber. - * Vote-only bundles wait for the slot boundary itself: governance/slashing signal signatures bind - * the slot the tx mines in, so a vote mined in the L1 block right before the L2 slot starts fails - * signature verification silently inside Multicall3 — and environments that mine txs on arrival - * (anvil automine) turn the early send into exactly that. + * Sleeps until one L1 slot before the L2 slot boundary so the tx has a chance of being + * picked up by the first L1 block of the L2 slot. + * NB: there is a known correctness risk — being included in the L1 block right before the + * L2 slot starts would revert propose with HeaderLib__InvalidSlotNumber. * Uses InterruptibleSleep so it can be cancelled via interrupt(). */ public async sendRequestsAt(targetSlot: SlotNumber): Promise { const l1Constants = this.epochCache.getL1Constants(); // Start of the target L2 slot, in ms (getTimestampForSlot returns seconds). const startOfTargetSlotMs = Number(getTimestampForSlot(targetSlot, l1Constants)) * 1000; - const hasPropose = this.requests.some(request => request.action === 'propose'); - const submitAfterMs = hasPropose - ? startOfTargetSlotMs - Number(this.ethereumSlotDuration) * 1000 - : startOfTargetSlotMs; + // Aim to be in the mempool one L1 slot before the L2 slot starts, so we have a chance of + // being picked up by the first L1 block of the L2 slot. + const submitAfterMs = startOfTargetSlotMs - Number(this.ethereumSlotDuration) * 1000; const sleepMs = submitAfterMs - this.dateProvider.now(); if (sleepMs > 0) { this.log.debug(`Sleeping ${sleepMs}ms before sending requests`, { From c2171e72c0475561bde38dd4f0bbfc487b713103 Mon Sep 17 00:00:00 2001 From: Santiago Palladino Date: Wed, 10 Jun 2026 10:50:26 -0300 Subject: [PATCH 06/10] fix(e2e): run HA full e2e suite against in-proc interval-mining anvil The HA compose suite was the only block-building suite running against an L1 with no self-advancing clock: an external anvil container in automine mode, excluded from the TestDateProvider sync that locally-spawned anvils get. After #23821 removed the AnvilTestWatcher that coupled the clocks, the test emulated L1 time with clock warps and cheat-mining nudges, which raced proposals against slot boundaries and starved the proposers' archiver-sync gate whenever the test thread blocked on a tx wait. Drop the anvil container and ETHEREUM_HOSTS so setup() spawns anvil in-proc with interval mining and the stdout dateProvider sync, exactly like e2e_epochs and the sibling web3signer compose suite. Delete all time scaffolding from the test; tests now submit txs and wait in real time. Deploy L1 contracts under temporary automine (automineL1Setup). --- .../end-to-end/scripts/ha/docker-compose.yml | 17 +- ...l.test.ts => e2e_ha_full.parallel.test.ts} | 283 +++++------------- 2 files changed, 82 insertions(+), 218 deletions(-) rename yarn-project/end-to-end/src/composed/ha/{e2e_ha_full.test.ts => e2e_ha_full.parallel.test.ts} (83%) diff --git a/yarn-project/end-to-end/scripts/ha/docker-compose.yml b/yarn-project/end-to-end/scripts/ha/docker-compose.yml index eb8ecad5d320..cb700f840159 100644 --- a/yarn-project/end-to-end/scripts/ha/docker-compose.yml +++ b/yarn-project/end-to-end/scripts/ha/docker-compose.yml @@ -29,12 +29,6 @@ services: volumes: - web3signer_keys:/keys - anvil: - image: aztecprotocol/build:3.0 - cpus: 1 - mem_limit: 2G - entrypoint: 'anvil --silent -p 8545 --host 0.0.0.0 --chain-id 31337' - end-to-end: image: aztecprotocol/build:3.0 cpus: 4 @@ -51,7 +45,8 @@ services: environment: JEST_CACHE_DIR: /tmp-jest LOG_LEVEL: ${LOG_LEVEL:-verbose} - ETHEREUM_HOSTS: http://anvil:8545 + TEST: ${TEST:-./src/composed/ha/e2e_ha_full.parallel.test.ts} + TEST_NAME: ${TEST_NAME:-} L1_CHAIN_ID: 31337 DATABASE_URL: postgresql://aztec:aztec@postgres:5432/aztec_ha_test WEB3_SIGNER_URL: http://web3signer:9000 @@ -70,10 +65,6 @@ services: while ! nc -z web3signer 9000; do sleep 1; done; echo "Web3Signer is ready" - # Wait for anvil to be ready - while ! nc -z anvil 8545; do sleep 1; done; - echo "Anvil is ready" - # Run database migrations echo "Running database migrations..." cd /root/aztec-packages/yarn-project/aztec @@ -84,7 +75,7 @@ services: cd /root/aztec-packages/yarn-project/end-to-end # Run the test - setsid ./scripts/test_simple.sh ${TEST:-./src/composed/ha/e2e_ha_sequencer.test.ts} & + setsid ./scripts/test_simple.sh "$${TEST}" "$${TEST_NAME}" & pid=$$! pgid=$$(($$(ps -o pgid= -p $$pid))) trap "kill -SIGTERM -$$pgid" SIGTERM @@ -96,8 +87,6 @@ services: condition: service_healthy web3signer: condition: service_started - anvil: - condition: service_started volumes: postgres_data: diff --git a/yarn-project/end-to-end/src/composed/ha/e2e_ha_full.test.ts b/yarn-project/end-to-end/src/composed/ha/e2e_ha_full.parallel.test.ts similarity index 83% rename from yarn-project/end-to-end/src/composed/ha/e2e_ha_full.test.ts rename to yarn-project/end-to-end/src/composed/ha/e2e_ha_full.parallel.test.ts index d5acd2a8aab9..0d85eab45f78 100644 --- a/yarn-project/end-to-end/src/composed/ha/e2e_ha_full.test.ts +++ b/yarn-project/end-to-end/src/composed/ha/e2e_ha_full.parallel.test.ts @@ -15,13 +15,12 @@ import type { Logger } from '@aztec/aztec.js/log'; import { type AztecNode, waitForTx } from '@aztec/aztec.js/node'; import { GovernanceProposerContract } from '@aztec/ethereum/contracts'; import type { DeployAztecL1ContractsReturnType } from '@aztec/ethereum/deploy-aztec-l1-contracts'; -import type { EthCheatCodes } from '@aztec/ethereum/test'; import { BlockNumber, CheckpointNumber, SlotNumber } from '@aztec/foundation/branded-types'; import { Buffer32 } from '@aztec/foundation/buffer'; import { SecretValue } from '@aztec/foundation/config'; import { withLoggerBindings } from '@aztec/foundation/log/server'; import { retryUntil } from '@aztec/foundation/retry'; -import { InterruptibleSleep, sleep } from '@aztec/foundation/sleep'; +import { sleep } from '@aztec/foundation/sleep'; import type { TestDateProvider } from '@aztec/foundation/timer'; import { GovernanceProposerAbi } from '@aztec/l1-artifacts/GovernanceProposerAbi'; import { TestContract } from '@aztec/noir-test-contracts.js/Test'; @@ -70,16 +69,6 @@ const NODE_COUNT = 5; const VALIDATOR_COUNT = 4; const COMMITTEE_SIZE = 4; -type SyncImmediateBlockSource = { - syncImmediate: () => Promise; -}; - -function hasSyncImmediate(value: unknown): value is SyncImmediateBlockSource { - return ( - typeof value === 'object' && value !== null && 'syncImmediate' in value && typeof value.syncImmediate === 'function' - ); -} - async function getHardcodedAccountData(secret: Fr, salt: Fr): Promise { const contract = new SchnorrHardcodedKeyAccountContract(); const address = await getAccountContractAddress(contract, secret, salt); @@ -120,28 +109,6 @@ async function waitForTriggerTx(node: AztecNode, txHash: TxHash): Promise { - const latestBlock = await node.getBlockData('latest'); - if (!latestBlock) { - throw new Error('Could not load latest block for HA trigger tx'); - } - - // Jump to the next L2 slot boundary that also covers the L1 chain clock. The compose anvil mines - // blocks only on demand (no interval mining), so its chain timestamp moves independently of the test - // clock and may sit several slots past the latest L2 block. Aligning blindly to `latest block + 1 - // slot` can rewind the test clock below L1 time, making sequencers (which schedule on the test - // clock) build proposals for slots that have already expired on L1. - const latestBlockTimestamp = Number(latestBlock.header.globalVariables.timestamp); - const nextL1Timestamp = await ethCheatCodes.nextBlockTimestamp(); - const slotsAhead = Math.max(1, Math.ceil((nextL1Timestamp - latestBlockTimestamp) / aztecSlotDuration)); - dateProvider.setTime((latestBlockTimestamp + slotsAhead * aztecSlotDuration) * 1000); -} - describe('HA Full Setup', () => { jest.setTimeout(20 * 60 * 1000); // 20 minutes @@ -151,7 +118,6 @@ describe('HA Full Setup', () => { let testContract: TestContract; let aztecNode: AztecNode; let config: AztecNodeConfig; - let ethCheatCodes: EthCheatCodes; let teardown: () => Promise = async () => {}; let dateProvider: TestDateProvider; let genesis: GenesisData | undefined; @@ -193,89 +159,10 @@ describe('HA Full Setup', () => { logger.info('All HA peer sequencers started'); }; - /** - * Mines as many L1 blocks as needed to bring anvil's chain clock up to the test clock, never past - * it. The compose anvil runs in automine with a +ethereumSlotDuration timestamp interval per block - * and no interval mining, so L1 chain time stands still unless a tx lands or we mine here — this is - * the suite's only L1 heartbeat. Overshooting the test clock is as harmful as falling behind: - * sequencers schedule proposals on the test clock, and a proposal mined after its target slot has - * expired on L1 is silently dropped, pruning the pending block it carried. - */ - const advanceL1ChainTimeToTestClock = async () => { - const nextL1Timestamp = await ethCheatCodes.nextBlockTimestamp(); - const testClockNow = Math.floor(dateProvider.now() / 1000); - const blocksToMine = Math.floor((testClockNow - nextL1Timestamp) / config.ethereumSlotDuration) + 1; - if (blocksToMine > 0) { - await ethCheatCodes.mine(blocksToMine); - } - }; - - const syncHAL1Data = async () => { - await advanceL1ChainTimeToTestClock(); - await Promise.all( - haNodeServices.map(async service => { - try { - const blockSource = service.getBlockSource(); - if (hasSyncImmediate(blockSource)) { - await blockSource.syncImmediate(); - } - } catch (error) { - logger.debug('Skipping HA L1 sync nudge for stopped node', { - error: error instanceof Error ? error.message : String(error), - }); - } - }), - ); - }; - - const alignDateProviderToNextBlockSlot = async () => { - await setDateProviderToNextBlockSlot(aztecNode, ethCheatCodes, dateProvider, config.aztecSlotDuration); - }; - - /** - * Runs `fn` while emulating a self-advancing L1, nudging chain time and archiver sync once per L1 - * slot of wall time. Without the heartbeat, L1 time freezes while the test thread is blocked - * (nothing mines on the on-demand compose anvil) and any retry loop scheduled on the free-running - * test clock — the proposers' archiver-sync gate, or per-slot governance signals that must mine in - * a block whose timestamp matches the signed slot — can then never succeed, turning a single missed - * slot into an unrecoverable stall until the jest timeout. - */ - const withL1Heartbeat = async (fn: () => Promise): Promise => { - let running = true; - const heartbeatSleep = new InterruptibleSleep(); - const heartbeat = (async () => { - while (running) { - await heartbeatSleep.sleep(config.ethereumSlotDuration * 1000); - if (!running) { - break; - } - try { - await syncHAL1Data(); - } catch (error) { - logger.debug('Error advancing L1 time on heartbeat', { - error: error instanceof Error ? error.message : String(error), - }); - } - } - })(); - - try { - return await fn(); - } finally { - running = false; - heartbeatSleep.interrupt(); - await heartbeat; - } - }; - - const waitForTriggerTxWithL1Heartbeat = (txHash: TxHash): Promise => - withL1Heartbeat(() => waitForTriggerTx(aztecNode, txHash)); - const sendTriggerTx = async (): Promise => { - await alignDateProviderToNextBlockSlot(); + await startHASequencers(); const txHash = await submitTriggerTx(wallet, testContract, ownerAddress); - await syncHAL1Data(); - return await waitForTriggerTxWithL1Heartbeat(txHash); + return await waitForTriggerTx(aztecNode, txHash); }; const stopHANode = async (nodeIndex: number) => { @@ -337,39 +224,39 @@ describe('HA Full Setup', () => { const initialValidators = createInitialValidatorsFromPrivateKeys(attesterPrivateKeys); const hardcodedAccountData = await getHardcodedAccountData(Fr.random(), Fr.random()); - ({ teardown, logger, wallet, aztecNode, config, ethCheatCodes, dateProvider, deployL1ContractsValues, genesis } = - await setup( - 0, - { - ...PIPELINING_SETUP_OPTS, - initialFundedAccounts: [hardcodedAccountData], - initialValidators, - sequencerPublisherPrivateKeys: [new SecretValue(publisherPrivateKeys[0])], - aztecTargetCommitteeSize: COMMITTEE_SIZE, - // The full HA docker/Web3Signer stack can still be joining and syncing after the shared - // 12s pipelining preset's 2.5s start window has closed. Keep real sequencing, but give - // HA validators enough time to pass the enforced build-start gate in CI. - aztecSlotDuration: 16, - // This suite validates HA coordination on tx-bearing checkpoints. Requiring one tx avoids a startup empty - // checkpoint from occupying the shared HA publisher while the trigger tx is still being prepared. - minTxsPerBlock: 1, - archiverPollingIntervalMS: 200, - sequencerPollingIntervalMS: 200, - worldStateBlockCheckIntervalMS: 200, - blockCheckIntervalMS: 200, - startProverNode: true, - // The bootstrap node is only an RPC/P2P anchor. HA validators are the first block producers in this suite. - disableValidator: true, - skipAccountDeployment: true, - // Enable P2P for transaction gossip - p2pEnabled: true, - // Enable slashing for testing governance + slashing vote coordination - slasherEnabled: true, - slashingRoundSizeInEpochs: 1, // 32 slots (1 epoch) - slashingQuorum: 17, // >50% of 32 slots for tally quorum, - }, - { syncChainTip: 'proven' }, - )); + ({ teardown, logger, wallet, aztecNode, config, dateProvider, deployL1ContractsValues, genesis } = await setup( + 0, + { + ...PIPELINING_SETUP_OPTS, + automineL1Setup: true, + initialFundedAccounts: [hardcodedAccountData], + initialValidators, + sequencerPublisherPrivateKeys: [new SecretValue(publisherPrivateKeys[0])], + aztecTargetCommitteeSize: COMMITTEE_SIZE, + // The full HA docker/Web3Signer stack can still be joining and syncing after the shared + // 12s pipelining preset's 2.5s start window has closed. Keep real sequencing, but give + // HA validators enough time to pass the enforced build-start gate in CI. + aztecSlotDuration: 16, + // This suite validates HA coordination on tx-bearing checkpoints. Requiring one tx avoids a startup empty + // checkpoint from occupying the shared HA publisher while the trigger tx is still being prepared. + minTxsPerBlock: 1, + archiverPollingIntervalMS: 200, + sequencerPollingIntervalMS: 200, + worldStateBlockCheckIntervalMS: 200, + blockCheckIntervalMS: 200, + startProverNode: true, + // The bootstrap node is only an RPC/P2P anchor. HA validators are the first block producers in this suite. + disableValidator: true, + skipAccountDeployment: true, + // Enable P2P for transaction gossip + p2pEnabled: true, + // Enable slashing for testing governance + slashing vote coordination + slasherEnabled: true, + slashingRoundSizeInEpochs: 1, // 32 slots (1 epoch) + slashingQuorum: 17, // >50% of 32 slots for tally quorum, + }, + { syncChainTip: 'proven' }, + )); ownerAddress = await registerHardcodedAccount(wallet, hardcodedAccountData); testContract = await registerTestContract(wallet); @@ -494,7 +381,7 @@ describe('HA Full Setup', () => { // Stop all HA peer nodes in parallel with a per-node deadline. A single stuck node can otherwise // block the serial loop long enough to blow the jest hook timeout — e.g. a sequencer.stop() that - // awaits an L1 publish whose tx-timeout was computed on a test-warped clock and never fires. + // hangs awaiting an L1 publish that never lands. if (haNodeServices) { const STOP_DEADLINE_MS = 30_000; await Promise.allSettled( @@ -559,13 +446,8 @@ describe('HA Full Setup', () => { // so HA validators are the first block producers exercised by this suite. logger.info(`Sending trigger tx from ${ownerAddress}`); const txHash = await submitTriggerTx(wallet, testContract, ownerAddress); - // HA nodes cold-start with their archivers synced through the previous L2 slot. Move the - // test clock back one slot before starting their sequencers so the first HA proposal builds - // the next slot their local sync gate permits, instead of immediately chasing a future slot. - dateProvider.setTime(dateProvider.now() - config.aztecSlotDuration * 1000); await startHASequencers(); - await syncHAL1Data(); - const receipt = await waitForTriggerTxWithL1Heartbeat(txHash); + const receipt = await waitForTriggerTx(aztecNode, txHash); expect(receipt.blockNumber).toBeDefined(); logger.info(`Trigger tx checkpointed in block ${receipt.blockNumber}`); @@ -722,50 +604,44 @@ describe('HA Full Setup', () => { const govProposerAddr = deployL1ContractsValues.l1ContractAddresses.governanceProposerAddress.toString() as `0x${string}`; - // Run the poll under the L1 heartbeat: a signal can only land when its L1 block's timestamp falls - // within the slot it was signed for, so if the bundled propose+signal publish lost the duty race - // (signal flushed standalone one block early), the per-slot retries need L1 chain time to keep - // tracking the test clock or they would sign slots a frozen L1 never reaches. - const { l1VoteCount, lastSignalSlot, payloadWithMostSignals } = await withL1Heartbeat(() => - retryUntil( - async () => { - const snapshotBlock = await deployL1ContractsValues.l1Client.getBlockNumber(); - const [roundData, l1VoteCountBig] = await Promise.all([ - deployL1ContractsValues.l1Client.readContract({ - address: govProposerAddr, - abi: GovernanceProposerAbi, - functionName: 'getRoundData', - args: [rollupAddr, round], - blockNumber: snapshotBlock, - }), - deployL1ContractsValues.l1Client.readContract({ - address: govProposerAddr, - abi: GovernanceProposerAbi, - functionName: 'signalCount', - args: [rollupAddr, round, mockGovernancePayload.toString() as `0x${string}`], - blockNumber: snapshotBlock, - }), - ]); - const lastSignalSlot = Number(roundData.lastSignalSlot); - const l1VoteCount = Number(l1VoteCountBig); - logger.info( - `L1 round ${round}: lastSignalSlot=${lastSignalSlot}, l1VoteCount=${l1VoteCount}, ` + - `payloadWithMostSignals=${roundData.payloadWithMostSignals} ` + - `(snapshot at L1 block ${snapshotBlock})`, - ); - if (l1VoteCount === 0) { - return undefined; - } - return { - l1VoteCount, - lastSignalSlot, - payloadWithMostSignals: roundData.payloadWithMostSignals, - }; - }, - `L1 governance round to land >= 1 signal`, - 120, - 0.5, - ), + const { l1VoteCount, lastSignalSlot, payloadWithMostSignals } = await retryUntil( + async () => { + const snapshotBlock = await deployL1ContractsValues.l1Client.getBlockNumber(); + const [roundData, l1VoteCountBig] = await Promise.all([ + deployL1ContractsValues.l1Client.readContract({ + address: govProposerAddr, + abi: GovernanceProposerAbi, + functionName: 'getRoundData', + args: [rollupAddr, round], + blockNumber: snapshotBlock, + }), + deployL1ContractsValues.l1Client.readContract({ + address: govProposerAddr, + abi: GovernanceProposerAbi, + functionName: 'signalCount', + args: [rollupAddr, round, mockGovernancePayload.toString() as `0x${string}`], + blockNumber: snapshotBlock, + }), + ]); + const lastSignalSlot = Number(roundData.lastSignalSlot); + const l1VoteCount = Number(l1VoteCountBig); + logger.info( + `L1 round ${round}: lastSignalSlot=${lastSignalSlot}, l1VoteCount=${l1VoteCount}, ` + + `payloadWithMostSignals=${roundData.payloadWithMostSignals} ` + + `(snapshot at L1 block ${snapshotBlock})`, + ); + if (l1VoteCount === 0) { + return undefined; + } + return { + l1VoteCount, + lastSignalSlot, + payloadWithMostSignals: roundData.payloadWithMostSignals, + }; + }, + `L1 governance round to land >= 1 signal`, + 120, + 0.5, ); // Outcome 1: the round leader payload is the one we configured all HA nodes to vote for. @@ -1066,7 +942,6 @@ describe('HA Full Setup', () => { ); expect(equivocationOffenses).toEqual([]); - dateProvider.reset(); await Promise.all(haNodeServices.map((_, nodeIndex) => stopHANode(nodeIndex))); }); @@ -1146,7 +1021,7 @@ describe('HA Full Setup', () => { } }); - it('should not delete recent duties when node clock is ahead (using cleanupOldDuties)', async () => { + it('should not delete recent duties via cleanupOldDuties when node clock is ahead', async () => { const spDb = new PostgresSlashingProtectionDatabase(mainPool); // Ensure clean slate for this test @@ -1208,7 +1083,7 @@ describe('HA Full Setup', () => { expect(result.rows.length).toBe(1); }); - it('should delete old duties based on DB time, not node time (using cleanupOldDuties)', async () => { + it('should delete old duties via cleanupOldDuties based on DB time, not node time', async () => { const spDb = new PostgresSlashingProtectionDatabase(mainPool); // Ensure clean slate for this test @@ -1277,7 +1152,7 @@ describe('HA Full Setup', () => { expect(result.rows.length).toBe(0); }); - it('should not delete recent stuck duties when node clock is ahead (using cleanupOwnStuckDuties)', async () => { + it('should not delete recent stuck duties via cleanupOwnStuckDuties when node clock is ahead', async () => { const spDb = new PostgresSlashingProtectionDatabase(mainPool); // Create a signing duty (stuck, not completed) using our actual method From 448816f4a583a581a0c18fc51eb9ee0d00271d7f Mon Sep 17 00:00:00 2001 From: Santiago Palladino Date: Wed, 10 Jun 2026 10:50:36 -0300 Subject: [PATCH 07/10] chore(e2e): run each HA full suite test as its own CI job Rename the suite to .parallel.test.ts and expand it in bootstrap.sh into one job per test name, each in its own compose stack. run_test.sh forwards the test name into the container (jest --testNamePattern) and namespaces the docker compose project per test so concurrent jobs on one host don't collide. sendTriggerTx starts the HA sequencers idempotently since tests no longer share state, and three clock-skew titles lose their parentheses, which testNamePattern would parse as regex groups and match nothing. --- .test_patterns.yml | 2 +- yarn-project/end-to-end/bootstrap.sh | 8 +++++++- yarn-project/end-to-end/scripts/run_test.sh | 6 ++++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/.test_patterns.yml b/.test_patterns.yml index b740808ec72b..498fc75e22ad 100644 --- a/.test_patterns.yml +++ b/.test_patterns.yml @@ -371,7 +371,7 @@ tests: owners: - *palla - - regex: "yarn-project/end-to-end/scripts/run_test.sh ha src/composed/ha/e2e_ha_full.test.ts" + - regex: "yarn-project/end-to-end/scripts/run_test.sh ha src/composed/ha/e2e_ha_full.parallel.test.ts" owners: - *spyros diff --git a/yarn-project/end-to-end/bootstrap.sh b/yarn-project/end-to-end/bootstrap.sh index 18fd910e9372..3c9806c4985c 100755 --- a/yarn-project/end-to-end/bootstrap.sh +++ b/yarn-project/end-to-end/bootstrap.sh @@ -96,7 +96,13 @@ function test_cmds { ) for test in "${tests[@]}"; do # We must set ONLY_TERM_PARENT=1 to allow the script to fully control cleanup process. - echo "$hash:ONLY_TERM_PARENT=1:TIMEOUT=30m $run_test_script ha $test" + if [[ "$test" == *.parallel.test.ts ]]; then + while IFS= read -r test_name; do + echo "$hash:ONLY_TERM_PARENT=1:TIMEOUT=30m $run_test_script ha $test \"$test_name\"" + done < <(extract_test_names "$test") + else + echo "$hash:ONLY_TERM_PARENT=1:TIMEOUT=30m $run_test_script ha $test" + fi done #echo "$hash:ONLY_TERM_PARENT=1 $run_test_script simple src/e2e_multi_validator/e2e_multi_validator_node.test.ts" diff --git a/yarn-project/end-to-end/scripts/run_test.sh b/yarn-project/end-to-end/scripts/run_test.sh index 7f5ca7b8219a..5977af5ec27b 100755 --- a/yarn-project/end-to-end/scripts/run_test.sh +++ b/yarn-project/end-to-end/scripts/run_test.sh @@ -25,7 +25,9 @@ case "$type" in TEST=$test exec run_compose_test $test end-to-end $PWD/web3signer ;; "ha") - # Remove volumes on cleanup for HA tests to ensure clean database state on retries - TEST=$test REMOVE_COMPOSE_VOLUMES=1 exec run_compose_test $test end-to-end $PWD/ha + # Remove volumes on cleanup for HA tests to ensure clean database state on retries. + # NAME_POSTFIX namespaces the compose project per test so parallel per-test jobs don't collide. + postfix=$(echo "$test_name" | sed 's/[^a-zA-Z0-9]/_/g') + TEST=$test TEST_NAME=$test_name NAME_POSTFIX=${postfix:+_$postfix} REMOVE_COMPOSE_VOLUMES=1 exec run_compose_test $test end-to-end $PWD/ha ;; esac From 532dcd5d3c953a311a4b90c023f4bd4f74edbe85 Mon Sep 17 00:00:00 2001 From: Santiago Palladino Date: Wed, 10 Jun 2026 11:00:36 -0300 Subject: [PATCH 08/10] fix(e2e): lowercase HA compose project name postfix Docker compose project names must be lowercase; test titles are not. --- yarn-project/end-to-end/scripts/run_test.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yarn-project/end-to-end/scripts/run_test.sh b/yarn-project/end-to-end/scripts/run_test.sh index 5977af5ec27b..c5a06472e5e5 100755 --- a/yarn-project/end-to-end/scripts/run_test.sh +++ b/yarn-project/end-to-end/scripts/run_test.sh @@ -27,7 +27,8 @@ case "$type" in "ha") # Remove volumes on cleanup for HA tests to ensure clean database state on retries. # NAME_POSTFIX namespaces the compose project per test so parallel per-test jobs don't collide. - postfix=$(echo "$test_name" | sed 's/[^a-zA-Z0-9]/_/g') + # Compose project names must be lowercase alphanumerics, hyphens, and underscores. + postfix=$(echo "$test_name" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9]/_/g') TEST=$test TEST_NAME=$test_name NAME_POSTFIX=${postfix:+_$postfix} REMOVE_COMPOSE_VOLUMES=1 exec run_compose_test $test end-to-end $PWD/ha ;; esac From 937b9f986574ea61afbc9d27bd23f46773621230 Mon Sep 17 00:00:00 2001 From: Santiago Palladino Date: Wed, 10 Jun 2026 12:52:52 -0300 Subject: [PATCH 09/10] fix(e2e): stop HA nodes fully before resetting the test clock The afterAll hook abandoned node stops after 30s and reset the shared TestDateProvider before stopping anything. Resetting rewinds the clock from chain time to wall time (minutes apart after the automine deploy burst), so vote submissions armed against the rewound clock blocked sequencer shutdown until wall time caught up, and the abandoned nodes outlived the jest environment, keeping the worker alive until the CI job timeout. Stop sequencers first, await every stop, reset the clock last. --- .../composed/ha/e2e_ha_full.parallel.test.ts | 37 +++++++++++-------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/yarn-project/end-to-end/src/composed/ha/e2e_ha_full.parallel.test.ts b/yarn-project/end-to-end/src/composed/ha/e2e_ha_full.parallel.test.ts index 0d85eab45f78..87c7617c5844 100644 --- a/yarn-project/end-to-end/src/composed/ha/e2e_ha_full.parallel.test.ts +++ b/yarn-project/end-to-end/src/composed/ha/e2e_ha_full.parallel.test.ts @@ -377,27 +377,34 @@ describe('HA Full Setup', () => { }); afterAll(async () => { - dateProvider?.reset(); - - // Stop all HA peer nodes in parallel with a per-node deadline. A single stuck node can otherwise - // block the serial loop long enough to blow the jest hook timeout — e.g. a sequencer.stop() that - // hangs awaiting an L1 publish that never lands. + // Stop all sequencers before tearing down the nodes: a sequencer stop awaits its in-flight + // iteration, which can spend tens of seconds finishing a vote or checkpoint publish on L1. + // Stops must be awaited fully — jest runs without forceExit, so a node abandoned mid-stop + // outlives the test environment and keeps the worker process alive until the CI job timeout. + // The dateProvider reset must wait until nodes are stopped: it rewinds the shared clock from + // chain time to wall time (minutes apart after the automine deploy burst), and any publisher + // deadline armed against the rewound clock would block shutdown until wall time catches up. if (haNodeServices) { - const STOP_DEADLINE_MS = 30_000; await Promise.allSettled( - haNodeServices.map((_, i) => { - return Promise.race([ - stopHANode(i).catch(error => { - logger.error(`Failed to stop HA peer node ${i}: ${error}`); - }), - sleep(STOP_DEADLINE_MS).then(() => { - logger.error(`HA peer node ${i} stop did not return within ${STOP_DEADLINE_MS}ms; abandoning`); - }), - ]); + haNodeServices.map(async (service, i) => { + try { + await service.getSequencer()?.stop(); + } catch (error) { + logger.error(`Failed to stop sequencer of HA peer node ${i}: ${error}`); + } }), ); + await Promise.allSettled( + haNodeServices.map((_, i) => + stopHANode(i).catch(error => { + logger.error(`Failed to stop HA peer node ${i}: ${error}`); + }), + ), + ); } + dateProvider?.reset(); + // Cleanup HA keystore temp directories if (haKeystoreDirs) { for (let i = 0; i < haKeystoreDirs.length; i++) { From e1711135e39be92aa52ff5760190a958289304c8 Mon Sep 17 00:00:00 2001 From: Santiago Palladino Date: Wed, 10 Jun 2026 12:53:22 -0300 Subject: [PATCH 10/10] fix: interrupt publisher send-at-slot sleep on sequencer stop (#23990) Port of #23990 from merge-train/spartan. Propagates CheckpointProposalJob.interrupt() to its SequencerPublisher so the publisher's sendRequestsAt slot-deadline sleep is cancelled on sequencer stop, and checks interrupted before sleeping since InterruptibleSleep.interrupt() only resolves sleeps already in flight. The e2e_ha_full teardown changes from the original PR are superseded by the afterAll rework in this branch and are not ported. --- .../src/publisher/sequencer-publisher.test.ts | 22 ++++++++++++ .../src/publisher/sequencer-publisher.ts | 3 ++ .../sequencer/checkpoint_proposal_job.test.ts | 34 +++++++++++++++++++ .../src/sequencer/checkpoint_proposal_job.ts | 3 +- 4 files changed, 61 insertions(+), 1 deletion(-) diff --git a/yarn-project/sequencer-client/src/publisher/sequencer-publisher.test.ts b/yarn-project/sequencer-client/src/publisher/sequencer-publisher.test.ts index bf7b7f2fafb8..519fbdb6dfed 100644 --- a/yarn-project/sequencer-client/src/publisher/sequencer-publisher.test.ts +++ b/yarn-project/sequencer-client/src/publisher/sequencer-publisher.test.ts @@ -720,6 +720,28 @@ describe('SequencerPublisher', () => { expect((publisher as any).requests.length).toEqual(0); }); + it('does not sleep in sendRequestsAt if interrupted beforehand', async () => { + // A target slot far enough in the future that sendRequestsAt would sleep for ~1 hour + // (EmptyL1RollupConstants has slotDuration 1s and l1GenesisTime 0, so slot N starts at N seconds). + const targetSlot = SlotNumber(Math.ceil(Date.now() / 1000) + 3600); + publisher.interrupt(); + + let timeout: NodeJS.Timeout | undefined; + try { + const result = await Promise.race([ + publisher.sendRequestsAt(targetSlot), + new Promise<'timed-out'>(resolve => { + timeout = setTimeout(() => resolve('timed-out'), 1000); + }), + ]); + expect(result).toBeUndefined(); + } finally { + if (timeout) { + clearTimeout(timeout); + } + } + }); + it('does not send requests if no valid requests are found', async () => { publisher.addRequest({ action: 'propose', diff --git a/yarn-project/sequencer-client/src/publisher/sequencer-publisher.ts b/yarn-project/sequencer-client/src/publisher/sequencer-publisher.ts index 96754bfeea39..738e83d5d5b5 100644 --- a/yarn-project/sequencer-client/src/publisher/sequencer-publisher.ts +++ b/yarn-project/sequencer-client/src/publisher/sequencer-publisher.ts @@ -630,6 +630,9 @@ export class SequencerPublisher { // Aim to be in the mempool one L1 slot before the L2 slot starts, so we have a chance of // being picked up by the first L1 block of the L2 slot. const submitAfterMs = startOfTargetSlotMs - Number(this.ethereumSlotDuration) * 1000; + if (this.interrupted) { + return undefined; + } const sleepMs = submitAfterMs - this.dateProvider.now(); if (sleepMs > 0) { this.log.debug(`Sleeping ${sleepMs}ms before sending requests`, { diff --git a/yarn-project/sequencer-client/src/sequencer/checkpoint_proposal_job.test.ts b/yarn-project/sequencer-client/src/sequencer/checkpoint_proposal_job.test.ts index 176e6d4ea83a..f1da2c37dcee 100644 --- a/yarn-project/sequencer-client/src/sequencer/checkpoint_proposal_job.test.ts +++ b/yarn-project/sequencer-client/src/sequencer/checkpoint_proposal_job.test.ts @@ -14,6 +14,7 @@ import { TimeoutError } from '@aztec/foundation/error'; import { EthAddress } from '@aztec/foundation/eth-address'; import { Signature } from '@aztec/foundation/eth-signature'; import { createLogger } from '@aztec/foundation/log'; +import { promiseWithResolvers } from '@aztec/foundation/promise'; import { TestDateProvider } from '@aztec/foundation/timer'; import type { TypedEventEmitter } from '@aztec/foundation/types'; import { type P2P, P2PClientState } from '@aztec/p2p'; @@ -1735,6 +1736,39 @@ describe('CheckpointProposalJob', () => { } }); + it('interrupts a pending L1 submission sleeping in the publisher', async () => { + const { txs, block } = await setupTxsAndBlock(p2p, globalVariables, 1, chainId); + checkpointBuilder.seedBlocks([block], [txs]); + validatorClient.collectAttestations.mockResolvedValue(getAttestations(block)); + + // Simulate sendRequestsAt sleeping until the target slot: the promise only resolves once + // the publisher itself is interrupted. + const sendDeferred = promiseWithResolvers(); + publisher.sendRequestsAt.mockReturnValue(sendDeferred.promise); + publisher.interrupt.mockImplementation(() => sendDeferred.resolve(undefined)); + + const checkpoint = await job.execute(); + expect(checkpoint).toBeDefined(); + + const pendingSubmission = job.awaitPendingSubmission().then(() => 'stopped' as const); + job.interrupt(); + + let timeout: NodeJS.Timeout | undefined; + try { + const result = await Promise.race([ + pendingSubmission, + new Promise<'timed-out'>(resolve => { + timeout = setTimeout(() => resolve('timed-out'), 1000); + }), + ]); + expect(result).toBe('stopped'); + } finally { + if (timeout) { + clearTimeout(timeout); + } + } + }); + it('aborts checkpoint when syncing proposed block to archiver fails', async () => { const { txs, block } = await setupTxsAndBlock(p2p, globalVariables, 1, chainId); checkpointBuilder.seedBlocks([block], [txs]); diff --git a/yarn-project/sequencer-client/src/sequencer/checkpoint_proposal_job.ts b/yarn-project/sequencer-client/src/sequencer/checkpoint_proposal_job.ts index eb9f24087950..613d5080749a 100644 --- a/yarn-project/sequencer-client/src/sequencer/checkpoint_proposal_job.ts +++ b/yarn-project/sequencer-client/src/sequencer/checkpoint_proposal_job.ts @@ -189,10 +189,11 @@ export class CheckpointProposalJob implements Traceable { await this.pendingL1Submission; } - /** Interrupts job-owned waits so shutdown can finish. */ + /** Interrupts job-owned waits, including the publisher's send-at-slot sleep, so shutdown can finish. */ public interrupt(): void { this.interrupted = true; this.interruptibleSleep.interrupt(true); + this.publisher.interrupt(); } private async awaitInterruptibleSleep(ms: number): Promise {