From da96f5a2899579b14e71ec5369eb4275d7e4d20c Mon Sep 17 00:00:00 2001 From: David Garske Date: Tue, 12 May 2026 13:15:17 -0700 Subject: [PATCH 1/2] Fix Vorago VA416x0 IRAM shadow update (use word-aligned stores) The VA416xx code RAM (IRAM at 0x00000000-0x0003FFFF) silently drops 8-bit and 16-bit stores when ROM_PROT.WREN=1 - only word-aligned 32-bit stores are honored, because the ECC machinery computes parity per 32-bit word and rejects sub-word writes (no fault is raised). wolfBoot's local memcpy()/memset() are byte-wise (ldrb/strb), so the IRAM shadow update in ext_flash_write/read/erase appeared to succeed but actually left the destination unchanged. The FRAM content was updated correctly, but after the swap wolfBoot branched into the partition at 0xB800 in IRAM where the stale image still lived, so the running app was the OLD version even though the partition header (read from FRAM) reported the new version. Add iram_write/iram_fill helpers that use 32-bit stores for the bulk of an aligned region and read-modify-write the containing word for any unaligned head/tail, and use them in ext_flash_write/read/erase in place of memcpy/memset for the shadow update. Partition addresses are sector-aligned so in practice the path is always pure 32-bit stores. Verified end-to-end on VA41630-EVK with build_test.sh update: app v1 boots, trigger sets, swap runs sector-by-sector, post-swap boot prints "Booting version: 0x2" and the running app prints v2-specific output in TESTING state. After wolfBoot_success() and a manual reset the second boot reports state CONFIRMED. --- hal/va416x0.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 77 insertions(+), 7 deletions(-) diff --git a/hal/va416x0.c b/hal/va416x0.c index b9fb78e441..753c4bb0dc 100644 --- a/hal/va416x0.c +++ b/hal/va416x0.c @@ -361,6 +361,77 @@ void ext_flash_unlock(void) VOR_SYSCONFIG->ROM_PROT |= SYSCONFIG_ROM_PROT_WREN_Msk; } +/* The VA416xx code RAM (IRAM, 0x00000000-0x0003FFFF) silently drops 8/16-bit + * stores when WREN=1 - only word-aligned 32-bit stores stick (the ECC + * machinery computes parity per word and rejects sub-word writes without + * fault). A generic byte-wise memcpy/memset on the IRAM shadow appears + * to succeed but leaves the destination unchanged. These helpers copy/fill + * with 32-bit stores so the IRAM shadow update actually takes effect. + * + * Partition addresses (boot/update/swap) are sector-aligned (0x800) and the + * swap engine moves whole sector-sized blocks, so in practice the IRAM + * shadow path is always word-aligned. The unaligned head/tail fallbacks + * exist for defense-in-depth. */ +static void iram_write(void *dst, const void *src, int len) +{ + uintptr_t d = (uintptr_t)dst; + uintptr_t s = (uintptr_t)src; + /* Word-aligned bulk copy */ + if (((d | s) & 3u) == 0u) { + uint32_t *wd = (uint32_t *)dst; + const uint32_t *ws = (const uint32_t *)src; + while (len >= 4) { + *wd++ = *ws++; + len -= 4; + } + /* Fall through with byte tail (typically zero on this target) */ + dst = wd; + src = ws; + } + /* Byte tail/unaligned: do read-modify-write of the containing word + * (sub-word stores are dropped by the hardware). */ + while (len > 0) { + uintptr_t addr = (uintptr_t)dst & ~3u; + uint32_t off = (uintptr_t)dst & 3u; + uint32_t word = *(volatile uint32_t *)addr; + uint8_t *wp = (uint8_t *)&word; + while (len > 0 && off < 4u) { + wp[off++] = *(const uint8_t *)src; + src = (const uint8_t *)src + 1; + dst = (uint8_t *)dst + 1; + len--; + } + *(volatile uint32_t *)addr = word; + } +} + +static void iram_fill(void *dst, uint8_t val, int len) +{ + uint32_t pattern = ((uint32_t)val << 24) | ((uint32_t)val << 16) | + ((uint32_t)val << 8) | (uint32_t)val; + uintptr_t d = (uintptr_t)dst; + if ((d & 3u) == 0u) { + uint32_t *wd = (uint32_t *)dst; + while (len >= 4) { + *wd++ = pattern; + len -= 4; + } + dst = wd; + } + while (len > 0) { + uintptr_t addr = (uintptr_t)dst & ~3u; + uint32_t off = (uintptr_t)dst & 3u; + uint32_t word = *(volatile uint32_t *)addr; + uint8_t *wp = (uint8_t *)&word; + while (len > 0 && off < 4u) { + wp[off++] = val; + dst = (uint8_t *)dst + 1; + len--; + } + *(volatile uint32_t *)addr = word; + } +} + int ext_flash_write(uintptr_t address, const uint8_t *data, int len) { hal_status_t status; @@ -370,8 +441,8 @@ int ext_flash_write(uintptr_t address, const uint8_t *data, int len) #endif status = FRAM_Write(ROM_SPI_BANK, address, (uint8_t*)data, len); if (status == hal_status_ok) { - /* update the shadow IRAM */ - memcpy((void*)address, data, len); + /* update the shadow IRAM (word-aligned stores; see iram_write) */ + iram_write((void*)address, data, len); } else { return -(int)status; /* convert to negative error code */ @@ -388,8 +459,8 @@ int ext_flash_read(uintptr_t address, uint8_t *data, int len) #endif status = FRAM_Read(ROM_SPI_BANK, address, data, len); if (status == hal_status_ok) { - /* update the shadow IRAM */ - memcpy((void*)address, data, len); + /* update the shadow IRAM (word-aligned stores; see iram_write) */ + iram_write((void*)address, data, len); } else { return -(int)status; /* convert to negative error code */ @@ -405,8 +476,8 @@ int ext_flash_erase(uintptr_t address, int len) #endif status = FRAM_Erase(ROM_SPI_BANK, address, len); if (status == hal_status_ok) { - /* update the shadow IRAM */ - memset((void*)address, 0xFF, len); + /* update the shadow IRAM (word-aligned stores; see iram_fill) */ + iram_fill((void*)address, 0xFF, len); } else { return -(int)status; /* convert to negative error code */ @@ -539,7 +610,6 @@ void hal_init(void) #ifdef TEST_EXT_FLASH test_ext_flash(); #endif - } void hal_prepare_boot(void) From b46c5f6364200b0cc4318e10409760b0f1114e89 Mon Sep 17 00:00:00 2001 From: David Garske Date: Tue, 12 May 2026 13:47:43 -0700 Subject: [PATCH 2/2] Vorago VA416x0: harden iram_write/iram_fill per review Two follow-ups to the IRAM shadow update fix (da96f5a2): 1. Mark the bulk-copy destination as volatile uint32_t * so the compiler cannot lower a 32-bit assignment into byte/halfword stores. Sub-word stores are silently dropped by the IRAM ECC, so a future codegen change could have broken the workaround. 2. Use (uintptr_t)3u / ~(uintptr_t)3u for alignment masking. On targets where uintptr_t is wider than unsigned int the bare ~3u would zero the high half of the address; harmless on Cortex-M4 today, but the helper pattern is easy to copy elsewhere. The int len type is kept to match the wolfBoot ext_flash_* HAL signature convention across all ports. Verified with arm-none-eabi-objdump: aligned bulk path emits only 32-bit str instructions (no strb/strh). --- hal/va416x0.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/hal/va416x0.c b/hal/va416x0.c index 753c4bb0dc..a88b1435f9 100644 --- a/hal/va416x0.c +++ b/hal/va416x0.c @@ -376,23 +376,25 @@ static void iram_write(void *dst, const void *src, int len) { uintptr_t d = (uintptr_t)dst; uintptr_t s = (uintptr_t)src; - /* Word-aligned bulk copy */ - if (((d | s) & 3u) == 0u) { - uint32_t *wd = (uint32_t *)dst; + /* Word-aligned bulk copy. The destination is marked volatile so the + * compiler cannot lower a 32-bit assignment into byte/halfword stores + * (which would be silently dropped by the IRAM ECC machinery). */ + if (((d | s) & (uintptr_t)3u) == 0u) { + volatile uint32_t *wd = (volatile uint32_t *)dst; const uint32_t *ws = (const uint32_t *)src; while (len >= 4) { *wd++ = *ws++; len -= 4; } /* Fall through with byte tail (typically zero on this target) */ - dst = wd; + dst = (void *)wd; src = ws; } /* Byte tail/unaligned: do read-modify-write of the containing word * (sub-word stores are dropped by the hardware). */ while (len > 0) { - uintptr_t addr = (uintptr_t)dst & ~3u; - uint32_t off = (uintptr_t)dst & 3u; + uintptr_t addr = (uintptr_t)dst & ~(uintptr_t)3u; + uint32_t off = (uint32_t)((uintptr_t)dst & (uintptr_t)3u); uint32_t word = *(volatile uint32_t *)addr; uint8_t *wp = (uint8_t *)&word; while (len > 0 && off < 4u) { @@ -410,17 +412,18 @@ static void iram_fill(void *dst, uint8_t val, int len) uint32_t pattern = ((uint32_t)val << 24) | ((uint32_t)val << 16) | ((uint32_t)val << 8) | (uint32_t)val; uintptr_t d = (uintptr_t)dst; - if ((d & 3u) == 0u) { - uint32_t *wd = (uint32_t *)dst; + /* Word-aligned bulk fill via volatile to guarantee 32-bit stores. */ + if ((d & (uintptr_t)3u) == 0u) { + volatile uint32_t *wd = (volatile uint32_t *)dst; while (len >= 4) { *wd++ = pattern; len -= 4; } - dst = wd; + dst = (void *)wd; } while (len > 0) { - uintptr_t addr = (uintptr_t)dst & ~3u; - uint32_t off = (uintptr_t)dst & 3u; + uintptr_t addr = (uintptr_t)dst & ~(uintptr_t)3u; + uint32_t off = (uint32_t)((uintptr_t)dst & (uintptr_t)3u); uint32_t word = *(volatile uint32_t *)addr; uint8_t *wp = (uint8_t *)&word; while (len > 0 && off < 4u) {