diff --git a/.gdbinit b/.gdbinit index ef7d89f91f..94871a406c 100644 --- a/.gdbinit +++ b/.gdbinit @@ -4,4 +4,3 @@ add-symbol-file test-app/image.elf set pagination off foc c - diff --git a/.github/workflows/test-build-riscv.yml b/.github/workflows/test-build-riscv.yml index c195d756ed..b742f7e12c 100644 --- a/.github/workflows/test-build-riscv.yml +++ b/.github/workflows/test-build-riscv.yml @@ -19,7 +19,6 @@ jobs: build: runs-on: ubuntu-24.04 timeout-minutes: 30 - steps: - uses: actions/checkout@v4 with: @@ -80,26 +79,30 @@ jobs: - name: Update repository run: sudo apt-get update -o Acquire::Retries=3 - - name: Download and install RISC-V toolchains + # ============================================================ + # Prebuilt RISC-V toolchains from riscv-collab + # https://github.com/riscv-collab/riscv-gnu-toolchain/releases + # Extracts to: riscv/bin/riscv{32,64}-unknown-elf-* + # ============================================================ + - name: Download and install RISC-V toolchain (riscv32) if: ${{ inputs.arch == 'riscv' }} run: | - # Download SiFive prebuilt toolchain with newlib - wget -q https://static.dev.sifive.com/dev-tools/freedom-tools/v2020.12/riscv64-unknown-elf-toolchain-10.2.0-2020.12.8-x86_64-linux-ubuntu14.tar.gz - tar xzf riscv64-unknown-elf-toolchain-10.2.0-2020.12.8-x86_64-linux-ubuntu14.tar.gz - echo "$GITHUB_WORKSPACE/riscv64-unknown-elf-toolchain-10.2.0-2020.12.8-x86_64-linux-ubuntu14/bin" >> $GITHUB_PATH - - # Download latest prebuilt RISC-V toolchains - #wget -q https://github.com/RISCV-Tools/riscv-gnu-toolchain/releases/latest/download/riscv32-elf-ubuntu-24.04-gcc.tar.xz - #tar -xf riscv32-elf-ubuntu-24.04-gcc.tar.xz - #echo "$GITHUB_WORKSPACE/riscv/bin" >> $GITHUB_PATH + wget -q https://github.com/riscv-collab/riscv-gnu-toolchain/releases/download/2025.01.17/riscv32-elf-ubuntu-24.04-gcc-nightly-2025.01.17-nightly.tar.xz + tar -xf riscv32-elf-ubuntu-24.04-gcc-nightly-2025.01.17-nightly.tar.xz + echo "$GITHUB_WORKSPACE/riscv/bin" >> $GITHUB_PATH + $GITHUB_WORKSPACE/riscv/bin/riscv32-unknown-elf-gcc --version - - name: Download and install RISC-V toolchains + - name: Download and install RISC-V toolchain (riscv64) if: ${{ inputs.arch == 'riscv64' }} run: | - wget -q https://github.com/RISCV-Tools/riscv-gnu-toolchain/releases/latest/download/riscv64-elf-ubuntu-24.04-gcc.tar.xz - tar -xf riscv64-elf-ubuntu-24.04-gcc.tar.xz + wget -q https://github.com/riscv-collab/riscv-gnu-toolchain/releases/download/2025.01.17/riscv64-elf-ubuntu-24.04-gcc-nightly-2025.01.17-nightly.tar.xz + tar -xf riscv64-elf-ubuntu-24.04-gcc-nightly-2025.01.17-nightly.tar.xz echo "$GITHUB_WORKSPACE/riscv/bin" >> $GITHUB_PATH + $GITHUB_WORKSPACE/riscv/bin/riscv64-unknown-elf-gcc --version + # ============================================================ + # Build wolfboot + # ============================================================ - name: make clean run: | make distclean @@ -115,10 +118,9 @@ jobs: - name: Build wolfboot (riscv32) if: ${{ inputs.arch == 'riscv' }} run: | - # using riscv64 for now since riscv32-unknown-elf- is missing "zicsr" extension - make CROSS_COMPILE=riscv64-unknown-elf- FREEDOM_E_SDK=$GITHUB_WORKSPACE/freedom-e-sdk ${{inputs.make-args}} + make CROSS_COMPILE=riscv32-unknown-elf- FREEDOM_E_SDK=$GITHUB_WORKSPACE/freedom-e-sdk ${{inputs.make-args}} - - name: Build wolfboot (riscv64)) + - name: Build wolfboot (riscv64) if: ${{ inputs.arch == 'riscv64' }} run: | make CROSS_COMPILE=riscv64-unknown-elf- ${{inputs.make-args}} diff --git a/.github/workflows/test-configs.yml b/.github/workflows/test-configs.yml index e1323e41e9..296bdac64c 100644 --- a/.github/workflows/test-configs.yml +++ b/.github/workflows/test-configs.yml @@ -19,9 +19,6 @@ jobs: with: arch: riscv config-file: ./config/examples/hifive1.config - # Only building wolfBoot - not test app for now (cross compiler cannot find suitable multilib set for '-march=rv32imafdc_zicsr_zmmul_zaamo_zalrsc_zca_zcd_zcf'/'-mabi=ilp32d') - # Consider building cached RISCV64 toolchain for this target - make-args: wolfboot.bin sama5d3_test: uses: ./.github/workflows/test-build.yml @@ -266,9 +263,16 @@ jobs: with: arch: riscv64 config-file: ./config/examples/polarfire_mpfs250.config - # Only building wolfBoot - not test app for now (cross compiler cannot find suitable multilib set for '-march=rv64imafd_zicsr_zmmul_zaamo_zalrsc'/'-mabi=lp64d') - # Consider building cached RISCV64 toolchain for this target - make-args: wolfboot.bin + microchip_mpfs250_qspi_test: + uses: ./.github/workflows/test-build-riscv.yml + with: + arch: riscv64 + config-file: ./config/examples/polarfire_mpfs250_qspi.config + microchip_mpfs250_m_qspi_test: + uses: ./.github/workflows/test-build-riscv.yml + with: + arch: riscv64 + config-file: ./config/examples/polarfire_mpfs250_m_qspi.config raspi3_test: uses: ./.github/workflows/test-build.yml diff --git a/.gitignore b/.gitignore index 2bb7f203b1..4e54ac6d80 100644 --- a/.gitignore +++ b/.gitignore @@ -327,6 +327,7 @@ language.settings.xml # PolarFire SoC Device Tree Binary hal/mpfs.dtb +bootmode1/ # Emulator test: generated linker scripts test-app/emu-test-apps/*/target.ld diff --git a/Makefile b/Makefile index b993abb6dd..c25413327f 100644 --- a/Makefile +++ b/Makefile @@ -521,7 +521,8 @@ $(LSCRIPT): $(LSCRIPT_IN) FORCE sed -e "s/@WOLFBOOT_LOAD_BASE@/$(WOLFBOOT_LOAD_BASE)/g" | \ sed -e "s/@BOOTLOADER_START@/$(BOOTLOADER_START)/g" | \ sed -e "s/@IMAGE_HEADER_SIZE@/$(IMAGE_HEADER_SIZE)/g" | \ - sed -e "s/@FSP_S_LOAD_BASE@/$(FSP_S_LOAD_BASE)/g" \ + sed -e "s/@FSP_S_LOAD_BASE@/$(FSP_S_LOAD_BASE)/g" | \ + sed -e "s/@WOLFBOOT_L2LIM_SIZE@/$(WOLFBOOT_L2LIM_SIZE)/g" \ > $@ hex: wolfboot.hex diff --git a/arch.mk b/arch.mk index 4fbd75d4e8..849fa0360f 100644 --- a/arch.mk +++ b/arch.mk @@ -564,10 +564,24 @@ endif ## RISCV (32-bit) ifeq ($(ARCH),RISCV) CROSS_COMPILE?=riscv32-unknown-elf- - ARCH_FLAGS=-march=rv32imac -mabi=ilp32 -mcmodel=medany + # GCC 12+ separated _zicsr (CSR instructions) and _zifencei (fence.i) + # from the base "I" extension, requiring them to be listed explicitly in + # the -march string. Detect with compile-only tests (-c). + # + # IMPORTANT: these extensions are added to CFLAGS only (compilation). + # They must NOT appear in LDFLAGS because GCC 15 decomposes the ISA + # string (m→zmmul, a→zaamo+zalrsc, c→zca) producing an expanded + # -march that has no matching multilib, causing a fatal error at + # link time. The base march is multilib-safe for all GCC versions. + RISCV32_ZICSR := $(shell echo "void _start(void){}" | \ + $(CROSS_COMPILE)gcc -march=rv32imac_zicsr -mabi=ilp32 -c -x c - -o /dev/null 2>/dev/null && echo _zicsr) + RISCV32_ZIFENCEI := $(shell echo "void _start(void){}" | \ + $(CROSS_COMPILE)gcc -march=rv32imac_zifencei -mabi=ilp32 -c -x c - -o /dev/null 2>/dev/null && echo _zifencei) CFLAGS+=-fno-builtin-printf -DUSE_M_TIME -g -nostartfiles -DARCH_RISCV - CFLAGS+=$(ARCH_FLAGS) - LDFLAGS+=$(ARCH_FLAGS) + # Compilation: extended march so assembler accepts CSR/fence.i + CFLAGS+=-march=rv32imac$(RISCV32_ZICSR)$(RISCV32_ZIFENCEI) -mabi=ilp32 -mcmodel=medany + # Linking: base march (no extension suffixes) for multilib lookup + LDFLAGS+=-march=rv32imac -mabi=ilp32 -mcmodel=medany MATH_OBJS += $(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/sp_c32.o # Prune unused functions and data @@ -582,7 +596,24 @@ endif ## RISCV64 (64-bit) ifeq ($(ARCH),RISCV64) CROSS_COMPILE?=riscv64-unknown-elf- - CFLAGS+=-DMMU -DWOLFBOOT_DUALBOOT + + # M-mode vs S-mode configuration + ifeq ($(RISCV_MMODE),1) + # Machine Mode: Running directly from eNVM/L2 SRAM + CFLAGS+=-DWOLFBOOT_RISCV_MMODE -DWOLFBOOT_DUALBOOT + # Use M-mode specific linker script + LSCRIPT_IN:=hal/$(TARGET)-m.ld + else + # Supervisor Mode: Running under HSS + CFLAGS+=-DWOLFBOOT_DUALBOOT + ifeq ($(MPFS_L2LIM),1) + # L2-LIM mode: wolfBoot in on-chip SRAM, loaded by HSS (no DDR) + LSCRIPT_IN:=hal/$(TARGET)-hss.ld + else + # DDR mode (default): full MMU and FDT support + CFLAGS+=-DMMU + endif + endif # If SD card or eMMC is enabled use update_disk loader with GPT support ifneq ($(filter 1,$(DISK_SDCARD) $(DISK_EMMC)),) @@ -596,10 +627,30 @@ ifeq ($(ARCH),RISCV64) UPDATE_OBJS?=src/update_ram.o endif - ARCH_FLAGS=-march=rv64imafd -mabi=lp64d -mcmodel=medany + # GCC 12+ extension detection (see RISCV 32-bit section above). + # Extensions go in CFLAGS only; LDFLAGS uses the base march to avoid + # GCC 15 multilib lookup failures from ISA string decomposition. + RISCV64_ZICSR := $(shell echo "void _start(void){}" | \ + $(CROSS_COMPILE)gcc -march=rv64imafd_zicsr -mabi=lp64d -c -x c - -o /dev/null 2>/dev/null && echo _zicsr) + RISCV64_ZIFENCEI := $(shell echo "void _start(void){}" | \ + $(CROSS_COMPILE)gcc -march=rv64imafd_zifencei -mabi=lp64d -c -x c - -o /dev/null 2>/dev/null && echo _zifencei) + + ifeq ($(RISCV_MMODE),1) + # E51 core: rv64imac (no FPU, no crypto extensions) + CFLAGS+=-march=rv64imac$(RISCV64_ZICSR)$(RISCV64_ZIFENCEI) -mabi=lp64 -mcmodel=medany + LDFLAGS+=-march=rv64imac -mabi=lp64 -mcmodel=medany + else + # U54 cores: rv64gc (with FPU) + CFLAGS+=-march=rv64imafd$(RISCV64_ZICSR)$(RISCV64_ZIFENCEI) -mabi=lp64d -mcmodel=medany + LDFLAGS+=-march=rv64imafd -mabi=lp64d -mcmodel=medany + + # FDT support for DDR S-mode (not needed for L2-LIM bare-metal boot) + ifneq ($(MPFS_L2LIM),1) + CFLAGS+=-DWOLFBOOT_FDT + OBJS+=src/fdt.o + endif + endif CFLAGS+=-fno-builtin-printf -DUSE_M_TIME -g -nostartfiles -DARCH_RISCV -DARCH_RISCV64 - CFLAGS+=$(ARCH_FLAGS) - LDFLAGS+=$(ARCH_FLAGS) # Prune unused functions and data CFLAGS +=-ffunction-sections -fdata-sections @@ -608,9 +659,6 @@ ifeq ($(ARCH),RISCV64) # Unified RISC-V boot code (32/64-bit via __riscv_xlen) OBJS+=src/boot_riscv_start.o src/boot_riscv.o src/vector_riscv.o - CFLAGS+=-DWOLFBOOT_FDT - OBJS+=src/fdt.o - ifeq ($(SPMATH),1) MATH_OBJS += $(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/sp_c64.o endif @@ -1632,6 +1680,13 @@ ifeq ($(DEBUG_UART),1) endif endif +# UART QSPI programmer (PolarFire SoC MPFS): receive binary over UART and +# write it directly to QSPI flash. Requires EXT_FLASH=1 and DEBUG_UART=1. +# Use tools/scripts/mpfs_qspi_prog.py on the host side. +ifeq ($(UART_QSPI_PROGRAM),1) + CFLAGS+=-DUART_QSPI_PROGRAM +endif + ifeq ($(NXP_CUSTOM_DCD),1) CFLAGS+=-DNXP_CUSTOM_DCD OBJS+=$(NXP_CUSTOM_DCD_OBJS) diff --git a/config/examples/polarfire_mpfs250.config b/config/examples/polarfire_mpfs250.config index 5c3ca74059..083623a689 100644 --- a/config/examples/polarfire_mpfs250.config +++ b/config/examples/polarfire_mpfs250.config @@ -68,7 +68,7 @@ CFLAGS_EXTRA+=-DBOOT_PART_B=2 # Speed up disk partition read (512KB chunks - max DMA size) CFLAGS_EXTRA+=-DDISK_BLOCK_SIZE=0x80000 -# DTS (Device Tree) +# DTS (Device Tree) load address WOLFBOOT_LOAD_DTS_ADDRESS?=0x8A000000 # Optional Encryption @@ -78,7 +78,7 @@ WOLFBOOT_LOAD_DTS_ADDRESS?=0x8A000000 #OBJS_EXTRA=src/my_custom_encrypt_key.o # Optional EMMC_SD debugging logs -#CFLAGS_EXTRA+=-DDEBUG_MMC +#CFLAGS_EXTRA+=-DDEBUG_SDHCI # Optional disk debugging logs #CFLAGS_EXTRA+=-DDEBUG_DISK #CFLAGS_EXTRA+=-DDISK_TEST diff --git a/config/examples/polarfire_mpfs250_hss_l2lim.config b/config/examples/polarfire_mpfs250_hss_l2lim.config new file mode 100644 index 0000000000..db69755240 --- /dev/null +++ b/config/examples/polarfire_mpfs250_hss_l2lim.config @@ -0,0 +1,92 @@ +ARCH?=RISCV64 +TARGET?=mpfs250 + +# ECC P384 + SHA384 +SIGN?=ECC384 +HASH?=SHA384 +IMAGE_HEADER_SIZE=512 + +WOLFBOOT_VERSION?=1 +ARMORED?=0 +DEBUG?=0 +DEBUG_SYMBOLS?=1 +DEBUG_UART?=1 +VTOR?=1 + +NO_XIP?=1 + +NVM_FLASH_WRITEONCE?=0 +UART_FLASH?=0 +V?=0 +NO_MPU?=1 +RAM_CODE?=0 +SPMATH?=1 +DUALBANK_SWAP?=0 +PKA?=0 +ENCRYPT=0 +WOLFTPM?=0 +ELF?=0 + +# U54 cores lack RISC-V crypto extensions (Zknh); use portable C implementations +NO_ASM?=1 + +# QSPI Flash Configuration +# Using Micron MT25QL01GBBB (128MB, 64KB sectors) +EXT_FLASH?=1 +SPI_FLASH?=0 + +# SPI Flash Controller Selection: +# MPFS_SC_SPI: Use SC QSPI Controller (0x37020100) for fabric-connected flash. +# Direct register access to System Controller's QSPI instance. +# DEFAULT: Use MSS QSPI Controller (0x21000000) for external flash +# on MSS QSPI pins. +CFLAGS_EXTRA+=-DMPFS_SC_SPI + +# L2-LIM mode: wolfBoot loaded by HSS to L2-LIM (no DDR) +# HSS runs on E51 from eNVM, loads wolfBoot to L2-LIM on U54 (S-mode) +# wolfBoot loads application from SC ext SPI flash to L2-LIM +MPFS_L2LIM?=1 + +# wolfBoot origin in L2-LIM (above HSS resident region) +# NOTE: Adjust based on HSS L2-LIM footprint. HSS typically uses ~200-400KB +# from 0x08000000. Check HSS linker map for actual end address. +WOLFBOOT_ORIGIN?=0x08040000 + +# Application loaded from QSPI to L2-LIM (above wolfBoot, 128KB offset) +WOLFBOOT_LOAD_ADDRESS?=0x08060000 + +# Stack at top of 1.5MB L2-LIM (grows downward) +WOLFBOOT_STACK_TOP?=0x08180000 + +# L2-LIM size available for wolfBoot (STACK_TOP - ORIGIN) +WOLFBOOT_L2LIM_SIZE?=0x140000 + +# Flash geometry (64 KB sector) +WOLFBOOT_SECTOR_SIZE?=0x10000 + +# Partition layout for 128MB QSPI flash +# Boot partition: 0x00020000 - 0x01FFFFFF (~32MB) +# Update partition: 0x02000000 - 0x03FFFFFF (~32MB) +# Swap partition: 0x04000000 - 0x0400FFFF (64KB) +# Remaining: 0x04010000 - 0x07FFFFFF (~64MB available) +WOLFBOOT_PARTITION_SIZE?=0x1FE0000 +WOLFBOOT_PARTITION_BOOT_ADDRESS?=0x20000 +WOLFBOOT_PARTITION_UPDATE_ADDRESS?=0x2000000 +WOLFBOOT_PARTITION_SWAP_ADDRESS?=0x4000000 + +# Speed up reads from flash by using larger blocks +CFLAGS_EXTRA+=-DWOLFBOOT_SHA_BLOCK_SIZE=4096 + +# Optional Encryption +#CUSTOM_ENCRYPT_KEY=1 +#ENCRYPT=1 +#ENCRYPT_WITH_AES256=1 +#OBJS_EXTRA=src/my_custom_encrypt_key.o + +# Optional QSPI debugging +# Uncomment for verbose QSPI debug output +#CFLAGS_EXTRA+=-DDEBUG_QSPI + +# Optional QSPI flash test (erase/write/read on update partition) +# Uncomment to run test during hal_init() +#CFLAGS_EXTRA+=-DTEST_EXT_FLASH diff --git a/config/examples/polarfire_mpfs250_m_qspi.config b/config/examples/polarfire_mpfs250_m_qspi.config new file mode 100644 index 0000000000..a4bd89c22d --- /dev/null +++ b/config/examples/polarfire_mpfs250_m_qspi.config @@ -0,0 +1,112 @@ +# PolarFire SoC MPFS250T M-Mode (Machine Mode) with SC QSPI Flash +# +# This configuration runs wolfBoot directly from eNVM in M-mode (Machine Mode), +# and boots a test application from SC QSPI flash to L2 Scratchpad (no DDR). +# +# Boot flow: +# 1. eNVM (0x20220100) -> L2_SCRATCH (0x0A000000) - wolfBoot starts +# 2. Load signed app image from SC QSPI flash to L2_SCRATCH (0x0A010200) +# 3. Verify signature and boot +# +# Flash using mpfsBootmodeProgrammer (bootmode 1): +# java -jar mpfsBootmodeProgrammer.jar --bootmode 1 --die MPFS250T \ +# --package FCG1152 --workdir $PWD wolfboot.elf + +ARCH?=RISCV64 +TARGET?=mpfs250 +SIGN?=ECC384 +HASH?=SHA384 +IMAGE_HEADER_SIZE=512 +WOLFBOOT_VERSION?=1 +ARMORED?=0 +DEBUG?=0 +DEBUG_SYMBOLS?=1 +DEBUG_UART?=1 +VTOR?=1 +NO_XIP?=1 +NVM_FLASH_WRITEONCE?=0 +UART_FLASH?=0 +V?=0 +NO_MPU?=1 +RAM_CODE?=0 +SPMATH?=0 +SPMATHALL?=1 +DUALBANK_SWAP?=0 +PKA?=0 +ENCRYPT=0 +WOLFTPM?=0 +ELF?=1 +#DEBUG_ELF?=1 + +OPTIMIZATION_LEVEL=1 + +# M-Mode Configuration +# Runs on E51 core in Machine Mode from L2 SRAM +RISCV_MMODE?=1 + +# Stack size per hart (reduced for L2 SRAM constraints) +CFLAGS_EXTRA+=-DSTACK_SIZE_PER_HART=8192 + +# E51 core lacks RISC-V crypto extensions (Zknh), use portable C implementations +NO_ASM?=1 + +# QSPI Flash Configuration +# Using Micron MT25QL01GBBB (128MB, 64KB sectors) +EXT_FLASH?=1 +SPI_FLASH?=0 + +# SPI Flash Controller Selection: +# MPFS_SC_SPI: Use SC QSPI Controller (0x37020100) for fabric-connected flash. +# Direct register access to System Controller's QSPI instance. +# DEFAULT: Use MSS QSPI Controller (0x21000000) for external flash +# on MSS QSPI pins. +CFLAGS_EXTRA+=-DMPFS_SC_SPI + +# No SD card or eMMC +DISK_SDCARD?=0 +DISK_EMMC?=0 + +# L2 SRAM Address for wolfBoot (256KB available) +# Stack grows down from end of L2_SCRATCH +WOLFBOOT_ORIGIN?=0x0A000000 + +# Load application to L2 Scratchpad (above wolfBoot code, below stack) +# wolfBoot occupies ~40KB at 0x0A000000, stack is 64KB at top of 256KB. +# Note: update_ram places header at (LOAD_ADDRESS - IMAGE_HEADER_SIZE), +# so offset by header size to keep header aligned. +# IMPORTANT: Strip debug symbols from test-app ELF before signing to keep +# the image small enough to fit in L2 Scratchpad (~150KB available). +WOLFBOOT_LOAD_ADDRESS?=0x0A010200 + +# Flash geometry (64 KB sector to match QSPI flash) +WOLFBOOT_SECTOR_SIZE?=0x10000 + +# Partition layout for 128MB QSPI flash +# Boot partition: 0x00020000 - 0x01FFFFFF (~32MB) +# Update partition: 0x02000000 - 0x03FFFFFF (~32MB) +# Swap partition: 0x04000000 - 0x0400FFFF (64KB) +WOLFBOOT_PARTITION_SIZE?=0x1FE0000 +WOLFBOOT_PARTITION_BOOT_ADDRESS?=0x20000 +WOLFBOOT_PARTITION_UPDATE_ADDRESS?=0x2000000 +WOLFBOOT_PARTITION_SWAP_ADDRESS?=0x4000000 + +# Speed up reads from flash by using larger blocks +CFLAGS_EXTRA+=-DWOLFBOOT_SHA_BLOCK_SIZE=4096 + +# Debug options (useful for initial M-mode bring-up) +#CFLAGS_EXTRA+=-DDEBUG_BOOT + +# Optional QSPI debugging +# Uncomment for verbose QSPI debug output +#CFLAGS_EXTRA+=-DDEBUG_QSPI + +# Optional QSPI flash test (erase/write/read on update partition) +# Uncomment to run test during hal_init() +#CFLAGS_EXTRA+=-DTEST_EXT_FLASH + +# UART QSPI programmer (disabled by default) +# When enabled, wolfBoot prompts on UART at startup to receive a signed firmware +# image and write it to QSPI flash -- no Libero/JTAG tool required for updates. +# Use: python3 tools/scripts/mpfs_qspi_prog.py [qspi_offset] +# Requires EXT_FLASH=1 (already set) and DEBUG_UART=1. +UART_QSPI_PROGRAM?=0 diff --git a/config/examples/polarfire_mpfs250_qspi.config b/config/examples/polarfire_mpfs250_qspi.config index 17741cc8b9..b12b3dc503 100644 --- a/config/examples/polarfire_mpfs250_qspi.config +++ b/config/examples/polarfire_mpfs250_qspi.config @@ -50,7 +50,6 @@ SPI_FLASH?=0 # on MSS QSPI pins. CFLAGS_EXTRA+=-DMPFS_SC_SPI - # Enable SD card temporarily (wolfBoot still loads from SD, apps from QSPI) # For pure QSPI boot, HSS would need to load wolfBoot from QSPI DISK_SDCARD?=0 @@ -101,3 +100,10 @@ CFLAGS_EXTRA+=-DWOLFBOOT_SHA_BLOCK_SIZE=4096 # Optional QSPI flash test (erase/write/read on update partition) # Uncomment to run test during hal_init() #CFLAGS_EXTRA+=-DTEST_EXT_FLASH + +# UART QSPI programmer (disabled by default) +# When enabled, wolfBoot prompts on UART at startup to receive a signed firmware +# image and write it to QSPI flash -- no Libero/JTAG tool required for updates. +# Use: python3 tools/scripts/mpfs_qspi_prog.py [qspi_offset] +# Requires EXT_FLASH=1 (already set) and DEBUG_UART=1. +UART_QSPI_PROGRAM?=0 \ No newline at end of file diff --git a/docs/Targets.md b/docs/Targets.md index ac5973cfda..380ae474f9 100644 --- a/docs/Targets.md +++ b/docs/Targets.md @@ -795,13 +795,46 @@ The PolarFire SoC is a 64-bit RISC-V SoC featuring a five-core CPU cluster (1× * Low power consumption * External flash support +### Supported Boot Configurations + +Five ready-to-use config templates cover all supported boot mode / storage / memory combinations: + +| Configuration | Config File | Boot Mode | Storage | Memory | HSS | +|---------------|-------------|-----------|---------|--------|-----| +| **SDCard** | `polarfire_mpfs250.config` | S-mode (U54 via HSS) | SD Card | DDR | Yes | +| **eMMC** | `polarfire_mpfs250.config` + `DISK_EMMC=1` | S-mode (U54 via HSS) | eMMC | DDR | Yes | +| **QSPI (S-mode)** | `polarfire_mpfs250_qspi.config` | S-mode (U54 via HSS) | MSS or SC QSPI | DDR | Yes | +| **QSPI + L2-LIM** | `polarfire_mpfs250_hss_l2lim.config` | S-mode (U54 via HSS) | SC QSPI | L2-LIM (no DDR) | Yes | +| **M-Mode (no HSS)** | `polarfire_mpfs250_m_qspi.config` | M-mode (E51, no HSS) | SC QSPI | L2 Scratchpad | No | + +Key build settings that differ between configurations: + +| Setting | SDCard | eMMC | QSPI | L2-LIM | M-Mode | +|---------|--------|------|------|--------|--------| +| `WOLFBOOT_ORIGIN` | `0x80000000` | `0x80000000` | `0x80000000` | `0x08040000` | `0x0A000000` | +| `WOLFBOOT_LOAD_ADDRESS` | `0x8E000000` | `0x8E000000` | `0x8E000000` | `0x08060000` | `0x0A010200` | +| `EXT_FLASH` | 0 | 0 | 1 | 1 | 1 | +| `DISK_SDCARD` | 1 | 0 | 0 | 0 | 0 | +| `DISK_EMMC` | 0 | 1 | 0 | 0 | 0 | +| `MPFS_L2LIM` | – | – | – | 1 | – | +| `RISCV_MMODE` | – | – | – | – | 1 | +| Linker script | `mpfs250.ld` | `mpfs250.ld` | `mpfs250.ld` | `mpfs250-hss.ld` | `mpfs250-m.ld` | +| HSS YAML | `mpfs.yaml` | `mpfs.yaml` | `mpfs.yaml` | `mpfs-l2lim.yaml` | N/A | +| `ELF` output | 1 | 1 | 1 | 0 (raw .bin) | 1 | + +> **Note:** All configurations require `NO_ASM=1` because the MPFS250 U54/E51 cores lack RISC-V +> crypto extensions (Zknh); wolfBoot uses portable C implementations for all cryptographic operations. + ### PolarFire SoC Files -`hal/mpfs250.c` - Hardware abstraction layer implementation (UART and uSD) +`hal/mpfs250.c` - Hardware abstraction layer (UART, QSPI, SD/eMMC, multi-hart) `hal/mpfs250.h` - Register definitions and hardware interfaces -`hal/mpfs250.ld` - Linker script for the platform +`hal/mpfs250.ld` - Linker script for S-mode (HSS-based boot) +`hal/mpfs250-m.ld` - Linker script for M-mode (eNVM + L2 SRAM) +`hal/mpfs250-hss.ld` - Linker script for S-mode (HSS with L2-LIM) `hal/mpfs.dts` - Device tree source -`hal/mpfs.yaml` - HSS payload generator configuration +`hal/mpfs.yaml` - HSS payload generator configuration for use of DDR +`hal/mpfs-l2lim.yaml` - HSS payload generator for the use of L2-LIM `hal/mpfs250.its` - Example FIT image creation template ### PolarFire SoC Building wolfBoot @@ -903,6 +936,184 @@ Notes: - The MSS QSPI path expects external flash on the MSS QSPI pins; the SC QSPI path is for fabric-connected flash (design flash) accessed via the System Controller's QSPI instance. +### PolarFire SoC HSS S-Mode with L2-LIM (no DDR) + +wolfBoot can run in S-mode via HSS without DDR by targeting the on-chip **L2 Loosely Integrated +Memory (L2-LIM)**. HSS loads wolfBoot from SC QSPI flash into L2-LIM on a U54 application core, +and wolfBoot loads the signed application from SC QSPI into L2-LIM as well. This is useful for +early bring-up or power-constrained scenarios where DDR is not yet initialized. + +**Features:** +* S-mode on U54 application core (hart 1), loaded by HSS +* wolfBoot and application both reside in L2-LIM (`0x08000000`, up to 1.5 MB) +* No DDR required +* SC QSPI flash for both wolfBoot payload and signed application image +* Raw binary output (`ELF=0`) required — ELF with debug symbols is too large for L2-LIM + +**Relevant files:** + +| File | Description | +|------|-------------| +| `config/examples/polarfire_mpfs250_hss_l2lim.config` | HSS S-mode + SC QSPI + L2-LIM | +| `hal/mpfs250-hss.ld` | Linker script for S-mode with L2-LIM | +| `hal/mpfs-l2lim.yaml` | HSS payload generator YAML for L2-LIM load target | + +**Build:** +```sh +cp config/examples/polarfire_mpfs250_hss_l2lim.config .config +make clean && make wolfboot.bin +dtc -I dts -O dtb hal/mpfs.dts -o hal/mpfs.dtb +hss-payload-generator -vvv -c ./hal/mpfs-l2lim.yaml wolfboot.bin +``` + +Flash the HSS payload to the eMMC/SD BIOS partition using HSS `USBDMSC`: +```sh +sudo dd if=wolfboot.bin of=/dev/sdc1 bs=512 && sudo cmp wolfboot.bin /dev/sdc1 +``` + +**Build and sign the test application:** +```sh +make test-app/image_v1_signed.bin +``` + +**Flash the signed application to QSPI:** +```sh +python3 tools/scripts/mpfs_qspi_prog.py /dev/ttyUSB1 \ + test-app/image_v1_signed.bin 0x20000 +``` + +**Notes:** +- `ELF=0` is required: the test-app linker script (`test-app/RISCV64-mpfs250.ld`) places `.init` + (containing `_reset()`) first so the raw binary entry point is at offset 0. The full ELF with + debug symbols exceeds L2-LIM capacity. +- wolfBoot is placed at `0x08040000` (above the HSS L2-LIM resident region) and the application + is loaded at `0x08060000`. The stack resides at the top of the 1.5 MB L2-LIM region. +- HSS must be built and programmed to eNVM separately (see [PolarFire Building Hart Software Services](#polarfire-building-hart-software-services-hss)). +- **LIM instruction fetch caveat:** Ensure `L2_WAY_ENABLE` leaves enough cache ways unallocated + to back the LIM SRAM region. See the M-mode section for a detailed explanation. +- UART output appears on MMUART1 (`/dev/ttyUSB1`), same as other S-mode configurations. + +### PolarFire SoC M-Mode (bare-metal eNVM boot) + +wolfBoot supports running directly in Machine Mode (M-mode) on PolarFire SoC, replacing the Hart +Software Services (HSS) as the first-stage bootloader. wolfBoot runs on the E51 monitor core from +eNVM and loads a signed application from SC QSPI flash into L2 Scratchpad (on-chip RAM) — no HSS +or DDR required. This is the simplest bring-up path. + +**Features:** +* Runs on E51 monitor core (hart 0) directly from eNVM +* Executes from L2 Scratchpad SRAM (256 KB at `0x0A000000`) +* Loads signed application from SC QSPI flash to L2 Scratchpad (`0x0A010200`) +* No HSS or DDR required — boots entirely from on-chip memory +* Wakes and manages secondary U54 harts via IPI +* Per-hart UART output (each hart uses its own MMUART) +* ECC384 + SHA384 signature verification + +**Relevant files:** + +| File | Description | +|------|-------------| +| `config/examples/polarfire_mpfs250_m_qspi.config` | M-mode + SC QSPI configuration | +| `hal/mpfs250-m.ld` | M-mode linker script (eNVM + L2 SRAM) | +| `hal/mpfs250.c` | HAL with QSPI driver, UART, L2 cache init | +| `src/boot_riscv_start.S` | M-mode assembly startup | + +**Boot flow:** +1. **eNVM reset vector** (`0x20220100`): CPU starts, startup code copies wolfBoot to L2 Scratchpad +2. **L2 Scratchpad execution** (`0x0A000000`): wolfBoot runs from scratchpad +3. **Hardware init**: L2 cache configuration, UART setup +4. **QSPI init**: SC QSPI controller (`0x37020100`), JEDEC ID read, 4-byte address mode +5. **Image load**: Read signed image from QSPI flash (`0x20000`) to L2 Scratchpad (`0x0A010200`) +6. **Verify & boot**: SHA384 integrity check, ECC384 signature verification, jump to app + +**Build:** +```sh +cp config/examples/polarfire_mpfs250_m_qspi.config .config +make clean && make wolfboot.elf +``` + +**Flash wolfBoot to eNVM** (requires SoftConsole / Libero SoC install): +```sh +export SC_INSTALL_DIR=/opt/Microchip/SoftConsole-v2022.2-RISC-V-747 + +$SC_INSTALL_DIR/eclipse/jre/bin/java -jar \ + $SC_INSTALL_DIR/extras/mpfs/mpfsBootmodeProgrammer.jar \ + --bootmode 1 --die MPFS250T --package FCG1152 --workdir $PWD wolfboot.elf +``` + +**Build and sign the test application:** +```sh +make test-app/image_v1_signed.bin +``` + +**Flash the signed application to QSPI** using the UART programmer (requires `EXT_FLASH=1` and +`UART_QSPI_PROGRAM=1` in `.config`, and `pyserial` installed): +```sh +python3 tools/scripts/mpfs_qspi_prog.py /dev/ttyUSB0 \ + test-app/image_v1_signed.bin 0x20000 +``` + +The script: +1. Waits for wolfBoot to print the `QSPI-PROG: Press 'P'` prompt (power-cycle the board) +2. Sends `P` to enter programming mode +3. Transfers the binary in 256-byte ACK-driven chunks +4. wolfBoot erases, writes, and then continues booting the new image + +Use `0x20000` for the boot partition and `0x02000000` for the update partition. + +**QSPI partition layout** (Micron MT25QL01G, 128 MB): + +| Region | Address | Size | +|--------|---------|------| +| Boot partition | `0x00020000` | ~32 MB | +| Update partition | `0x02000000` | ~32 MB | +| Swap partition | `0x04000000` | 64 KB | + +**UART mapping:** + +| Hart | Core | MMUART | USB device | +|------|------|--------|------------| +| 0 | E51 | MMUART0 | /dev/ttyUSB0 | +| 1 | U54_1 | MMUART1 | /dev/ttyUSB1 | +| 2 | U54_2 | MMUART2 | N/A | +| 3 | U54_3 | MMUART3 | N/A | +| 4 | U54_4 | MMUART4 | N/A | + +**Expected serial output on successful boot:** +``` +wolfBoot Version: 2.7.0 (...) +Running on E51 (hart 0) in M-mode +QSPI: Using SC QSPI Controller (0x37020100) +QSPI: Flash ID = 0x20 0xBA 0x21 +QSPI-PROG: Press 'P' within 3s to program flash +QSPI-PROG: No trigger (got 0x00 ...), booting +Versions: Boot 1, Update 0 +... +Firmware Valid +Booting at 0x... +``` + +**Notes:** +- The E51 is `rv64imac` (no FPU or crypto extensions). wolfBoot is compiled with `NO_ASM=1` to + use portable C crypto implementations and `-march=rv64imac -mabi=lp64` for correct code + generation. The `rdtime` CSR instruction is not available in bare-metal M-mode; wolfBoot uses a + calibrated busy-loop for all delays (`udelay()` in `hal/mpfs250.c`). +- `UART_QSPI_PROGRAM=1` adds a 3-second boot pause every time. Set to `0` once the flash + contents are stable. +- The config uses `WOLFBOOT_LOAD_ADDRESS=0x0A010200` to place the application in L2 Scratchpad + above wolfBoot code (~64 KB at `0x0A000000`), with the stack at the top of the 256 KB region. +- **LIM instruction fetch limitation:** The on-chip LIM (`0x08000000`, 2 MB) is backed by L2 + cache ways. When `L2_WAY_ENABLE` is set to `0x0B` (all cache ways 0–7 active for caching), + no ways remain for LIM backing SRAM. Data reads from LIM work through the L2 cache, but + instruction fetch silently hangs — the CPU stalls with no trap generated. For this reason the + application is loaded into L2 Scratchpad (`0x0A000000`), which is always accessible regardless + of `L2_WAY_ENABLE`. To use LIM, reduce `L2_WAY_ENABLE` to free cache ways for LIM backing. +- **Strip debug symbols** before signing the test-app ELF. The debug build is ~150 KB but the + stripped ELF is ~5 KB. L2 Scratchpad has ~150 KB available between wolfBoot code and the stack: + `riscv64-unknown-elf-strip --strip-debug test-app/image.elf` +- **DDR support:** DDR initialization is available on the `polarfire_ddr` branch for use cases + that require loading larger applications to DDR memory. + ### PolarFire testing This section describes how to build the test-application, create a custom uSD with required partitions and copying signed test-application to uSD partitions. @@ -1363,13 +1574,6 @@ ML-DSA 87 verify 200 ops took 1.077 sec, avg 5.385 ms, 185.704 ops/ Benchmark complete ``` -### PolarFire TODO - -* Add support for full HSS replacement using wolfboot - - Machine level assembly startup - - DDR driver - - ## STM32F7 The STM32-F76x and F77x offer dual-bank hardware-assisted swapping. diff --git a/hal/mpfs-l2lim.yaml b/hal/mpfs-l2lim.yaml new file mode 100644 index 0000000000..05aa3348cc --- /dev/null +++ b/hal/mpfs-l2lim.yaml @@ -0,0 +1,21 @@ +# +# HSS Payload Generator - configuration file for wolfBoot on PolarFire SoC +# Boot target: L2-LIM (no DDR) via sNVM or QSPI HSS payload +# +# wolfBoot is loaded by HSS to L2-LIM at WOLFBOOT_ORIGIN (0x08040000). +# HSS remains resident in lower L2-LIM for SBI services (timer, IPI, etc.). +# wolfBoot runs on U54_1 in S-mode, loads application from SC QSPI flash. +# + +set-name: 'PolarFire-SoC-HSS::wolfBoot-L2LIM' +hart-entry-points: { + u54_1: '0x08040000', + u54_2: '0x08040000', + u54_3: '0x08040000', + u54_4: '0x08040000' +} +payloads: + wolfboot.elf: { + owner-hart: u54_1, + priv-mode: prv_s + } diff --git a/hal/mpfs250-hss.ld b/hal/mpfs250-hss.ld new file mode 100644 index 0000000000..fd628fbe27 --- /dev/null +++ b/hal/mpfs250-hss.ld @@ -0,0 +1,71 @@ +/* PolarFire SoC MPFS250 Linker Script for wolfBoot in L2-LIM (HSS boot) + * + * Boot sequence: HSS (eNVM/E51) loads wolfBoot into L2-LIM on U54 (S-mode). + * wolfBoot then loads application from SC ext SPI flash into L2-LIM. + * No DDR - HSS remains resident in lower L2-LIM for SBI services. + * + * Memory layout (1.5MB L2-LIM: 0x08000000 - 0x0817FFFF): + * 0x08000000 - HSS resident runtime (~256KB) + * WOLFBOOT_ORIGIN (0x08040000) - wolfBoot code + data + * WOLFBOOT_LOAD_ADDRESS (0x08060000) - Application load area + * WOLFBOOT_STACK_TOP (0x08180000) - Stack grows downward + */ + +OUTPUT_ARCH( "riscv" ) + +ENTRY( _reset ) + +MEMORY +{ + /* L2-LIM on-chip SRAM (cache ways configured as LIM by HSS) + * HSS loads wolfBoot binary here directly - no VMA/LMA split needed */ + L2_LIM (rwx) : ORIGIN = @WOLFBOOT_ORIGIN@, LENGTH = @WOLFBOOT_L2LIM_SIZE@ +} + +PROVIDE(STACK_SIZE_PER_HART = 16k); + +SECTIONS +{ + .text : ALIGN(0x10) + { + _start_text = .; + KEEP(*(.init)) + . = ORIGIN(L2_LIM) + 0x100; + _start_vector = .; + KEEP(*(.isr_vector)) + . = ALIGN(0x10); + *(.text*) + *(.rodata*) + *(.srodata*) + . = ALIGN(4); + _end_text = .; + } > L2_LIM + + .data : ALIGN(0x10) + { + _start_data = .; + KEEP(*(.ramcode*)) + . = ALIGN(4); + KEEP(*(.keystore*)) + . = ALIGN(4); + *(.data*) + . = ALIGN(4); + _global_pointer = . + 0x800; + *(.sdata*) + . = ALIGN(4); + _end_data = .; + } > L2_LIM + + .bss (NOLOAD) : ALIGN(0x10) + { + _start_bss = .; + *(.bss*) + *(COMMON) + . = ALIGN(4); + _end_bss = .; + _end = .; + } > L2_LIM +} + +PROVIDE(_start_heap = _end); +PROVIDE(_end_stack = ORIGIN(L2_LIM) + LENGTH(L2_LIM)); diff --git a/hal/mpfs250-m.ld b/hal/mpfs250-m.ld new file mode 100644 index 0000000000..11d6f033d8 --- /dev/null +++ b/hal/mpfs250-m.ld @@ -0,0 +1,146 @@ +/* PolarFire SoC MPFS250 M-Mode Linker Script for wolfBoot + * + * This linker script is for running wolfBoot in Machine Mode (M-mode) + * directly from eNVM, executing from L2 SRAM. + * + * Boot flow: + * 1. CPU starts at eNVM reset vector (0x20220100) + * 2. Startup code in eNVM copies main code to L2_SCRATCH + * 3. Jumps to L2_SCRATCH for execution + * + * The first 0x100 bytes of eNVM are reserved for the boot ROM secure boot + * meta information added by mpfsBootmodeProgrammer. + * + * Memory regions: + * FLASH_ENVM - Embedded NVM (128KB - 0x100 for header) + * L2_SCRATCH - L2 Scratchpad SRAM (256KB) - execution and data + */ + +OUTPUT_ARCH( "riscv" ) + +ENTRY( _reset ) + +MEMORY +{ + /* The first 0x100 bytes of eNVM are used for boot ROM secure boot meta information + * This offset is added by mpfsBootmodeProgrammer (bootmode 1) */ + FLASH_ENVM (rx) : ORIGIN = 0x20220100, LENGTH = 128k - 0x100 + + /* L2 Scratchpad SRAM - 256KB available + * Used for code execution, data, and stack in M-mode + * Address range: 0x0A000000 - 0x0A03FFFF */ + L2_SCRATCH (rwx) : ORIGIN = @WOLFBOOT_ORIGIN@, LENGTH = 256k +} + +/* Stack size for the boot hart (E51 in M-mode) + * ECC384 signature verification with sp_int needs significant stack + * for big number temporaries and point multiplication */ +PROVIDE(STACK_SIZE = 64k); + +SECTIONS +{ + /* + * Reset vector and early initialization code + * This section MUST be in eNVM (VMA = LMA) since CPU starts here. + * It copies the main code to L2_SCRATCH and jumps there. + */ + .init : ALIGN(0x10) + { + _start_text = .; + KEEP(*(.init)) + . = ALIGN(0x10); + } > FLASH_ENVM + + /* + * Main code section - runs from L2_SCRATCH, stored in FLASH_ENVM + * The .init code will copy this section to L2_SCRATCH before jumping here. + */ + .text : ALIGN(0x10) + { + _start_text_sram = .; + _start_vector = .; + KEEP(*(.isr_vector)) + KEEP(*(.trap_vector)) + . = ALIGN(0x10); + *(.text*) + *(.rodata*) + *(.srodata*) + . = ALIGN(8); + _end_text = .; + } > L2_SCRATCH AT > FLASH_ENVM + + /* Provide load address for copying from flash */ + _stored_text = LOADADDR(.text); + _stored_data = LOADADDR(.data); + + /* Initialized data section */ + .data : ALIGN(0x10) + { + _start_data = .; + KEEP(*(.ramcode*)) + . = ALIGN(4); + *(.data*) + . = ALIGN(4); + /* Global pointer is set to .sdata + 0x800 for efficient access + * to small data using gp-relative addressing (+/- 2KB range) */ + _global_pointer = . + 0x800; + *(.sdata*) + . = ALIGN(4); + /* Public key store - must be in a copied section so it's available + * in L2 SRAM after startup copies .data from eNVM */ + KEEP(*(.keystore*)) + . = ALIGN(4); + _end_data = .; + } > L2_SCRATCH AT > FLASH_ENVM + + /* Uninitialized data section (cleared to zero on startup) */ + .bss (NOLOAD) : ALIGN(0x10) + { + _start_bss = .; + *(.bss*) + *(.sbss*) + *(COMMON) + . = ALIGN(4); + _end_bss = .; + _end = .; + } > L2_SCRATCH +} + +/* Heap starts after BSS (between _end and stack) */ +PROVIDE(_start_heap = _end); + +/* Stack configuration for multi-hart boot + * Memory layout at end of L2_SCRATCH: + * [code/data/bss/heap] ... [secondary stacks] [main stack] + * + * Stack sizes (defined in config or header): + * STACK_SIZE_PER_HART = 8192 (8KB per hart) + * STACK_SIZE = 64K (64KB for main hart E51) + * + * Total stack area: STACK_SIZE + 4 * STACK_SIZE_PER_HART = 48KB + */ +PROVIDE(STACK_SIZE_PER_HART = 8192); + +/* End of L2 scratchpad */ +PROVIDE(_l2_scratch_end = ORIGIN(L2_SCRATCH) + LENGTH(L2_SCRATCH)); + +/* Main hart (E51) stack at very end, grows downward */ +PROVIDE(_end_stack = _l2_scratch_end); +PROVIDE(_main_hart_stack_top = _end_stack); +PROVIDE(_main_hart_stack_bottom = _main_hart_stack_top - STACK_SIZE); + +/* Main hart HLS location (at top of main stack minus 64 bytes) */ +PROVIDE(_main_hart_hls = _main_hart_stack_top - 64); + +/* Secondary hart stacks below main hart stack + * Hart 1 stack: _main_hart_stack_bottom - STACK_SIZE_PER_HART * 0 to - STACK_SIZE_PER_HART * 1 + * Hart 2 stack: _main_hart_stack_bottom - STACK_SIZE_PER_HART * 1 to - STACK_SIZE_PER_HART * 2 + * etc. + */ +PROVIDE(_secondary_hart_stack_base = _main_hart_stack_bottom - 4 * STACK_SIZE_PER_HART); + +/* Provide symbols for M-mode startup code */ +PROVIDE(__global_pointer$ = _global_pointer); + +/* Size of text section to copy (for startup code) */ +PROVIDE(_text_size = _end_text - _start_text_sram); diff --git a/hal/mpfs250.c b/hal/mpfs250.c index de65b6f672..1df72b5343 100644 --- a/hal/mpfs250.c +++ b/hal/mpfs250.c @@ -46,159 +46,211 @@ #include "gpt.h" #include "fdt.h" +/* UART base addresses for per-hart access (LO addresses, M-mode compatible) */ +const unsigned long MSS_UART_BASE_ADDR[5] = { + MSS_UART0_LO_BASE, /* Hart 0 (E51) */ + MSS_UART1_LO_BASE, /* Hart 1 (U54_1) */ + MSS_UART2_LO_BASE, /* Hart 2 (U54_2) */ + MSS_UART3_LO_BASE, /* Hart 3 (U54_3) */ + MSS_UART4_LO_BASE, /* Hart 4 (U54_4) */ +}; + #if defined(DISK_SDCARD) || defined(DISK_EMMC) #include "sdhci.h" + +/* Forward declaration of SDHCI IRQ handler */ +extern void sdhci_irq_handler(void); #endif +/* Video Kit DDR/Clock configuration is included in mpfs250.h */ + +/* Configure L2 cache: enable ways 0,1,3 (0x0B) and set way masks for all masters */ +#ifdef WOLFBOOT_RISCV_MMODE +static void mpfs_config_l2_cache(void) +{ + L2_WAY_ENABLE = 0x0B; /* ways 0, 1, 3 — matches DDR demo config */ + SYSREG_L2_SHUTDOWN_CR = 0; + L2_WAY_MASK_DMA = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_AXI4_PORT0 = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_AXI4_PORT1 = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_AXI4_PORT2 = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_AXI4_PORT3 = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_E51_DCACHE = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_E51_ICACHE = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_U54_1_DCACHE = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_U54_1_ICACHE = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_U54_2_DCACHE = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_U54_2_ICACHE = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_U54_3_DCACHE = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_U54_3_ICACHE = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_U54_4_DCACHE = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_U54_4_ICACHE = L2_WAY_MASK_CACHE_ONLY; + __asm__ volatile("fence iorw, iorw" ::: "memory"); +} + +/* Busy-loop delay — MTIME not running in M-mode without HSS. + * E51 at 80 MHz reset: ~8 iters/us accounting for loop overhead. */ +static __attribute__((noinline)) void udelay(uint32_t us) +{ + volatile uint32_t i; + for (i = 0; i < us * 8; i++) + ; +} + +#endif /* WOLFBOOT_RISCV_MMODE */ + + +/* Multi-Hart Support */ +#ifdef WOLFBOOT_RISCV_MMODE + +extern uint8_t _main_hart_hls; /* linker-provided address symbol; typed as uint8_t to avoid size confusion */ + +/* CLINT MSIP register for IPI delivery */ +#define CLINT_MSIP_REG(hart) (*(volatile uint32_t*)(CLINT_BASE + (hart) * 4)) + +/* Signal secondary harts that E51 (main hart) is ready. */ +static void mpfs_signal_main_hart_started(void) +{ + HLS_DATA* hls = (HLS_DATA*)&_main_hart_hls; + hls->in_wfi_indicator = HLS_MAIN_HART_STARTED; + hls->my_hart_id = MPFS_FIRST_HART; + __asm__ volatile("fence iorw, iorw" ::: "memory"); +} + +/* Wake secondary U54 harts by sending software IPIs via CLINT MSIP. */ +int mpfs_wake_secondary_harts(void) +{ + int hart_id; + int woken_count = 0; + + wolfBoot_printf("Waking secondary harts...\n"); + for (hart_id = MPFS_FIRST_U54_HART; hart_id <= MPFS_LAST_U54_HART; hart_id++) { + CLINT_MSIP_REG(hart_id) = 0x01; + __asm__ volatile("fence iorw, iorw" ::: "memory"); + udelay(1000); + woken_count++; + } + wolfBoot_printf("Woke %d secondary harts\n", woken_count); + return woken_count; +} + +/* Secondary hart (U54) entry: init per-hart UART and spin in WFI for Linux/SBI. */ +void secondary_hart_entry(unsigned long hartid, HLS_DATA* hls) +{ + char msg[] = "Hart X: Woken, waiting for Linux boot...\n"; + (void)hls; + uart_init_hart(hartid); + msg[5] = '0' + (char)hartid; + uart_write_hart(hartid, msg, sizeof(msg) - 1); + while (1) + __asm__ volatile("wfi"); +} +#endif /* WOLFBOOT_RISCV_MMODE */ + #if defined(EXT_FLASH) && defined(TEST_EXT_FLASH) && defined(__WOLFBOOT) static int test_ext_flash(void); #endif +#if defined(EXT_FLASH) && defined(UART_QSPI_PROGRAM) && defined(__WOLFBOOT) +static void qspi_uart_program(void); +#endif void hal_init(void) { -#if defined(DEBUG_UART) && defined(__WOLFBOOT) +#ifdef WOLFBOOT_RISCV_MMODE + mpfs_config_l2_cache(); + mpfs_signal_main_hart_started(); +#endif + +#ifdef DEBUG_UART + SYSREG_SUBBLK_CLOCK_CR |= (MSS_PERIPH_MMUART0 << DEBUG_UART_PORT); + SYSREG_SOFT_RESET_CR &= ~(MSS_PERIPH_MMUART0 << DEBUG_UART_PORT); + uart_init(); +#endif + #ifdef WOLFBOOT_REPRODUCIBLE_BUILD wolfBoot_printf("wolfBoot Version: %s\n", LIBWOLFBOOT_VERSION_STRING); #else wolfBoot_printf("wolfBoot Version: %s (%s %s)\n", - LIBWOLFBOOT_VERSION_STRING,__DATE__, __TIME__); + LIBWOLFBOOT_VERSION_STRING, __DATE__, __TIME__); #endif + +#ifdef WOLFBOOT_RISCV_MMODE + wolfBoot_printf("Running on E51 (hart 0) in M-mode\n"); #endif #ifdef EXT_FLASH if (qspi_init() != 0) { wolfBoot_printf("QSPI: Init failed\n"); - } + } else { #if defined(TEST_EXT_FLASH) && defined(__WOLFBOOT) - else { test_ext_flash(); - } #endif +#if defined(UART_QSPI_PROGRAM) && defined(__WOLFBOOT) + qspi_uart_program(); +#endif + } #endif /* EXT_FLASH */ } -/* ============================================================================ - * System Controller Mailbox Functions - * - * The MPFS system controller provides various system services via a mailbox - * interface. Commands are sent by writing the opcode to the control register - * and responses are read from the mailbox RAM. - * ============================================================================ */ - -/* Wait for SCB register bits to clear, with timeout */ -static int mpfs_scb_wait_clear(uint32_t reg_offset, uint32_t mask, - uint32_t timeout) -{ - while ((SCBCTRL_REG(reg_offset) & mask) && --timeout) - ; - return (timeout == 0) ? -1 : 0; -} - -int mpfs_scb_read_mailbox(uint8_t *out, uint32_t len) -{ - uint32_t i; - - if (out == NULL) { - return -1; - } - - for (i = 0; i < len; i++) { - out[i] = SCBMBOX_BYTE(i); - } - - return 0; -} +/* System Controller Mailbox */ -static void mpfs_scb_write_mailbox(const uint8_t *data, uint32_t len) +static int mpfs_scb_mailbox_busy(void) { - uint32_t i = 0; - - if (data == NULL || len == 0) { - return; - } - - /* Write full words (little-endian) */ - while (i + 4 <= len) { - uint32_t word = ((uint32_t)data[i]) | - ((uint32_t)data[i + 1] << 8) | - ((uint32_t)data[i + 2] << 16) | - ((uint32_t)data[i + 3] << 24); - SCBMBOX_REG(i) = word; - i += 4; - } - - /* Write remaining bytes */ - while (i < len) { - SCBMBOX_BYTE(i) = data[i]; - i++; - } + return (SCBCTRL_REG(SERVICES_SR_OFFSET) & SERVICES_SR_BUSY_MASK); } -int mpfs_scb_service_call(uint8_t opcode, const uint8_t *mb_data, - uint32_t mb_len, uint32_t timeout) +/* Read 16-byte device serial number via SCB system service (opcode 0x00). */ +int mpfs_read_serial_number(uint8_t *serial) { - uint32_t cmd; - uint32_t status; + uint32_t cmd, status; + int i, timeout; - if (mpfs_scb_wait_clear(SERVICES_SR_OFFSET, SERVICES_SR_BUSY_MASK, 1)) { + if (serial == NULL) { return -1; } - if (mb_data && mb_len > 0) { - mpfs_scb_write_mailbox(mb_data, mb_len); + /* Check if mailbox is busy */ + if (mpfs_scb_mailbox_busy()) { + wolfBoot_printf("SCB mailbox busy\n"); + return -2; } - cmd = ((opcode & 0x7F) << SERVICES_CR_COMMAND_SHIFT) | + /* Send serial number request command (opcode 0x00) + * Command format: [31:16] = opcode, [0] = request bit */ + cmd = (SYS_SERV_CMD_SERIAL_NUMBER << SERVICES_CR_COMMAND_SHIFT) | SERVICES_CR_REQ_MASK; SCBCTRL_REG(SERVICES_CR_OFFSET) = cmd; - if (mpfs_scb_wait_clear(SERVICES_CR_OFFSET, SERVICES_CR_REQ_MASK, - timeout) < 0) { - return -2; + /* Wait for request bit to clear (command accepted) */ + timeout = MPFS_SCB_TIMEOUT; + while ((SCBCTRL_REG(SERVICES_CR_OFFSET) & SERVICES_CR_REQ_MASK) && timeout > 0) { + timeout--; } - - if (mpfs_scb_wait_clear(SERVICES_SR_OFFSET, SERVICES_SR_BUSY_MASK, - timeout) < 0) { + if (timeout == 0) { + wolfBoot_printf("SCB mailbox request timeout\n"); return -3; } - status = (SCBCTRL_REG(SERVICES_SR_OFFSET) >> SERVICES_SR_STATUS_SHIFT) - & 0xFFFF; - if (status != 0) { - return -4; + /* Wait for busy bit to clear (command completed) */ + timeout = MPFS_SCB_TIMEOUT; + while (mpfs_scb_mailbox_busy() && timeout > 0) { + timeout--; } - - return 0; -} - -/** - * mpfs_read_serial_number - Read the device serial number via system services - * @serial: Buffer to store the 16-byte device serial number - * - * This function sends a serial number request (opcode 0x00) to the system - * controller and reads the 16-byte response from the mailbox RAM. - * - * Returns: 0 on success, negative error code on failure - */ -int mpfs_read_serial_number(uint8_t *serial) -{ - int ret; - - if (serial == NULL) { - return -1; + if (timeout == 0) { + wolfBoot_printf("SCB mailbox busy timeout\n"); + return -4; } - ret = mpfs_scb_service_call(SYS_SERV_CMD_SERIAL_NUMBER, NULL, 0, - MPFS_SCB_TIMEOUT); - if (ret != 0) { - wolfBoot_printf("SCB mailbox error: %d\n", ret); - return ret; + /* Check status (upper 16 bits of status register) */ + status = (SCBCTRL_REG(SERVICES_SR_OFFSET) >> SERVICES_SR_STATUS_SHIFT) & 0xFFFF; + if (status != 0) { + wolfBoot_printf("SCB mailbox error: 0x%x\n", status); + return -5; } - /* Read serial number from mailbox RAM (16 bytes). */ - ret = mpfs_scb_read_mailbox(serial, DEVICE_SERIAL_NUMBER_SIZE); - if (ret != 0) { - return ret; + /* Read serial number from mailbox RAM (16 bytes) */ + for (i = 0; i < DEVICE_SERIAL_NUMBER_SIZE; i++) { + serial[i] = SCBMBOX_BYTE(i); } return 0; @@ -316,9 +368,10 @@ int hal_dts_fixup(void* dts_addr) return 0; } - void hal_prepare_boot(void) { + /* reset the eMMC/SD card? */ + } @@ -347,6 +400,16 @@ int RAMFUNCTION hal_flash_erase(uint32_t address, int len) return 0; } + +/* Wait for SCB register bits to clear, with timeout */ +static int mpfs_scb_wait_clear(uint32_t reg_offset, uint32_t mask, + uint32_t timeout) +{ + while ((SCBCTRL_REG(reg_offset) & mask) && --timeout) + ; + return (timeout == 0) ? -1 : 0; +} + #ifdef EXT_FLASH /* ========================================================================== * QSPI Flash Controller Implementation @@ -357,13 +420,14 @@ int RAMFUNCTION hal_flash_erase(uint32_t address, int len) * ========================================================================== */ /* Microsecond delay using RISC-V time CSR (1 MHz tick rate) */ +#ifndef WOLFBOOT_RISCV_MMODE static void udelay(uint32_t us) { uint64_t start = csr_read(time); while ((uint64_t)(csr_read(time) - start) < us) ; } - +#endif /* Forward declarations */ static int qspi_transfer_block(uint8_t read_mode, const uint8_t *cmd, uint32_t cmd_len, uint8_t *data, @@ -819,6 +883,186 @@ int ext_flash_erase(uintptr_t address, int len) return total; } +/* ============================================================================ + * UART QSPI Programmer + * + * Allows programming the QSPI flash over the debug UART without a JTAG/Libero + * tool. Enabled at build time with UART_QSPI_PROGRAM=1 in the .config. + * + * Protocol (after wolfBoot prints the "QSPI-PROG" prompt): + * 1. Host sends 'P' within the timeout window to enter programming mode + * 2. wolfBoot sends "READY\r\n" + * 3. Host sends [4-byte LE QSPI address][4-byte LE data length] + * 4. wolfBoot erases required sectors, sends "ERASED\r\n" + * 5. For each 256-byte chunk: + * wolfBoot sends ACK byte (0x06) -> host sends chunk -> wolfBoot writes + * 6. wolfBoot sends "DONE\r\n" and continues normal boot + * + * Host side: tools/scripts/mpfs_qspi_prog.py + * ============================================================================ */ +#if defined(UART_QSPI_PROGRAM) && defined(__WOLFBOOT) + +#define QSPI_PROG_CHUNK 256 +#define QSPI_PROG_ACK 0x06 +#define QSPI_RX_TIMEOUT_MS 5000U /* 5 s per byte — aborts if host disappears */ + +/* Returns 0-255 on success, -1 on timeout (so the boot path is never deadlocked). */ +static int uart_qspi_rx(void) +{ + uint32_t t; + for (t = 0; t < QSPI_RX_TIMEOUT_MS; t++) { + if (MMUART_LSR(DEBUG_UART_BASE) & MSS_UART_DR) + return (int)(uint8_t)MMUART_RBR(DEBUG_UART_BASE); + udelay(1000); + } + return -1; /* timeout */ +} + +static void uart_qspi_tx(uint8_t c) +{ + while (!(MMUART_LSR(DEBUG_UART_BASE) & MSS_UART_THRE)) + ; + MMUART_THR(DEBUG_UART_BASE) = c; +} + +static void uart_qspi_puts(const char *s) +{ + while (*s) + uart_qspi_tx((uint8_t)*s++); +} + +static void qspi_uart_program(void) +{ + uint8_t ch = 0; + uint32_t addr, size, n_sectors, written, t; + uint32_t i, s; + uint8_t chunk[QSPI_PROG_CHUNK]; + + wolfBoot_printf("QSPI-PROG: Press 'P' within 3s to program flash\r\n"); + + /* Drain any stale RX bytes before opening the window */ + while (MMUART_LSR(DEBUG_UART_BASE) & MSS_UART_DR) + (void)MMUART_RBR(DEBUG_UART_BASE); + + /* Wait up to 3s: 3000 iterations of 1ms each */ + for (t = 0; t < 3000U; t++) { + udelay(1000); + if (MMUART_LSR(DEBUG_UART_BASE) & MSS_UART_DR) { + ch = MMUART_RBR(DEBUG_UART_BASE); + break; + } + } + + if (ch != 'P' && ch != 'p') { + wolfBoot_printf("QSPI-PROG: No trigger (got 0x%02x LSR=0x%02x), booting\r\n", + (unsigned)ch, + (unsigned)MMUART_LSR(DEBUG_UART_BASE)); + return; + } + + wolfBoot_printf("QSPI-PROG: Entering programmer mode\r\n"); + uart_qspi_puts("READY\r\n"); + + /* Receive destination address then data length (4 bytes LE each) */ + addr = 0; + for (i = 0; i < 4; i++) { + int b = uart_qspi_rx(); + if (b < 0) { + wolfBoot_printf("QSPI-PROG: RX timeout receiving addr\r\n"); + return; + } + addr |= ((uint32_t)(uint8_t)b << (i * 8)); + } + size = 0; + for (i = 0; i < 4; i++) { + int b = uart_qspi_rx(); + if (b < 0) { + wolfBoot_printf("QSPI-PROG: RX timeout receiving size\r\n"); + return; + } + size |= ((uint32_t)(uint8_t)b << (i * 8)); + } + + wolfBoot_printf("QSPI-PROG: addr=0x%x size=%u bytes\r\n", addr, size); + + if (size == 0 || size > 0x200000U) { + wolfBoot_printf("QSPI-PROG: Invalid size, aborting\r\n"); + return; + } + + /* Reject writes to unaligned or out-of-partition addresses before any erase */ + if ((addr & (FLASH_SECTOR_SIZE - 1U)) != 0U) { + wolfBoot_printf("QSPI-PROG: addr 0x%x not sector-aligned, aborting\r\n", addr); + return; + } + if (!((addr >= WOLFBOOT_PARTITION_BOOT_ADDRESS && + addr + size <= WOLFBOOT_PARTITION_BOOT_ADDRESS + WOLFBOOT_PARTITION_SIZE) || + (addr >= WOLFBOOT_PARTITION_UPDATE_ADDRESS && + addr + size <= WOLFBOOT_PARTITION_UPDATE_ADDRESS + WOLFBOOT_PARTITION_SIZE))) { + wolfBoot_printf("QSPI-PROG: addr 0x%x+%u outside allowed partitions, aborting\r\n", + addr, size); + return; + } + + /* Erase all required sectors (FLASH_SECTOR_SIZE = 64 KB) */ + n_sectors = (size + FLASH_SECTOR_SIZE - 1) / FLASH_SECTOR_SIZE; + wolfBoot_printf("QSPI-PROG: Erasing %u sector(s) at 0x%x...\r\n", + n_sectors, addr); + ext_flash_unlock(); + for (s = 0; s < n_sectors; s++) { + int ret = ext_flash_erase(addr + s * FLASH_SECTOR_SIZE, + FLASH_SECTOR_SIZE); + if (ret < 0) { + wolfBoot_printf("QSPI-PROG: Erase failed at 0x%x (ret %d)\r\n", + addr + s * FLASH_SECTOR_SIZE, ret); + ext_flash_lock(); + return; + } + } + + /* "ERASED\r\n" must be the last bytes before the first ACK (0x06). + * Do not insert any wolfBoot_printf between here and the transfer loop. */ + uart_qspi_puts("ERASED\r\n"); + + /* Chunk transfer: wolfBoot requests each 256-byte block with ACK 0x06 */ + written = 0; + while (written < size) { + int ret; + uint32_t chunk_len = size - written; + if (chunk_len > QSPI_PROG_CHUNK) + chunk_len = QSPI_PROG_CHUNK; + + uart_qspi_tx(QSPI_PROG_ACK); /* request next chunk */ + + for (i = 0; i < chunk_len; i++) { + int b = uart_qspi_rx(); + if (b < 0) { + wolfBoot_printf("QSPI-PROG: RX timeout at 0x%x+%u\r\n", + addr + written, i); + ext_flash_lock(); + return; + } + chunk[i] = (uint8_t)b; + } + + ret = ext_flash_write(addr + written, chunk, (int)chunk_len); + if (ret < 0) { + wolfBoot_printf("QSPI-PROG: Write failed at 0x%x (ret %d)\r\n", + addr + written, ret); + ext_flash_lock(); + return; + } + written += chunk_len; + } + ext_flash_lock(); + + wolfBoot_printf("QSPI-PROG: Wrote %u bytes to 0x%x\r\n", written, addr); + uart_qspi_puts("DONE\r\n"); + wolfBoot_printf("QSPI-PROG: Done, continuing boot\r\n"); +} + +#endif /* UART_QSPI_PROGRAM */ + /* Test for external QSPI flash erase/write/read */ #ifdef TEST_EXT_FLASH @@ -940,44 +1184,20 @@ void* hal_get_dts_address(void) } #endif -#if defined(DISK_SDCARD) || defined(DISK_EMMC) -/* ============================================================================ - * SDHCI Platform HAL Implementation - * ============================================================================ */ - -/* Register access functions for generic SDHCI driver */ -uint32_t sdhci_reg_read(uint32_t offset) +/* PLIC: E51(hart 0)->ctx 0 (M-mode only); U54(1-4)->ctx hart*2-1 (M), hart*2 (S) */ +#ifdef WOLFBOOT_RISCV_MMODE +uint32_t plic_get_context(void) { - return *((volatile uint32_t*)(EMMC_SD_BASE + offset)); + uint32_t hart_id; + __asm__ volatile("csrr %0, mhartid" : "=r"(hart_id)); + return (hart_id == 0) ? 0 : (hart_id * 2) - 1; } - -void sdhci_reg_write(uint32_t offset, uint32_t val) -{ - *((volatile uint32_t*)(EMMC_SD_BASE + offset)) = val; -} -#endif /* DISK_SDCARD || DISK_EMMC */ - -/* ============================================================================ - * PLIC - Platform-Level Interrupt Controller (MPFS250-specific) - * - * Generic PLIC functions are in src/boot_riscv.c - * Platform must provide: - * - plic_get_context(): Map current hart to PLIC context - * - plic_dispatch_irq(): Dispatch IRQ to appropriate handler - * ============================================================================ */ - -/* Get the PLIC context for the current hart in S-mode */ +#else extern unsigned long get_boot_hartid(void); uint32_t plic_get_context(void) { - uint32_t hart_id = get_boot_hartid(); - /* Get S-mode context for a given hart (1-4 for U54 cores) */ - return hart_id * 2; + return (uint32_t)get_boot_hartid() * 2; } - -/* Forward declaration of SDHCI IRQ handler */ -#if defined(DISK_SDCARD) || defined(DISK_EMMC) -extern void sdhci_irq_handler(void); #endif /* Dispatch IRQ to appropriate platform handler */ @@ -996,18 +1216,13 @@ void plic_dispatch_irq(uint32_t irq) } #if defined(DISK_SDCARD) || defined(DISK_EMMC) -/* ============================================================================ - * SDHCI Platform HAL Functions - * ============================================================================ */ +/* SDHCI Platform HAL */ -/* Platform initialization - called from sdhci_init() */ void sdhci_platform_init(void) { - /* Release MMC controller from reset */ - SYSREG_SOFT_RESET_CR &= ~SYSREG_SOFT_RESET_CR_MMC; + SYSREG_SOFT_RESET_CR &= ~MSS_PERIPH_MMC; } -/* Platform interrupt setup - called from sdhci_init() */ void sdhci_platform_irq_init(void) { /* Set priority for MMC main interrupt */ @@ -1025,109 +1240,69 @@ void sdhci_platform_irq_init(void) #endif } -/* Platform bus mode selection - called from sdhci_init() */ void sdhci_platform_set_bus_mode(int is_emmc) { (void)is_emmc; - /* Nothing additional needed for MPFS - mode is set in generic driver */ } -#endif /* DISK_SDCARD || DISK_EMMC */ -/* ============================================================================ - * DEBUG UART Functions - * ============================================================================ */ +uint32_t sdhci_reg_read(uint32_t offset) +{ + return *((volatile uint32_t*)(EMMC_SD_BASE + offset)); +} -#ifdef DEBUG_UART +void sdhci_reg_write(uint32_t offset, uint32_t val) +{ + *((volatile uint32_t*)(EMMC_SD_BASE + offset)) = val; +} +#endif /* DISK_SDCARD || DISK_EMMC */ -#ifndef DEBUG_UART_BASE -#define DEBUG_UART_BASE MSS_UART1_LO_BASE -#endif +/* DEBUG UART */ +#ifdef DEBUG_UART -/* Configure baud divisors with fractional baud rate support. - * - * UART baud rate divisor formula: divisor = PCLK / (baudrate * 16) - * - * To support fractional divisors (6-bit, 0-63), we scale up the calculation: - * divisor_x128 = (PCLK * 8) / baudrate (128x scaled for rounding precision) - * divisor_x64 = divisor_x128 / 2 (64x scaled for 6-bit fractional) - * integer_div = divisor_x64 / 64 (integer portion of divisor) - * frac_div = divisor_x64 % 64 (fractional portion, 0-63) - * - * The fractional part is then adjusted using the x128 value for rounding. - */ -static void uart_config_clk(uint32_t baudrate) +/* Baud divisor: integer = PCLK/(baudrate*16), fractional (0-63) via 128x scaling. */ +static void uart_config_baud(unsigned long base, uint32_t baudrate) { const uint64_t pclk = MSS_APB_AHB_CLK; - - /* Scale up for precision: (PCLK * 128) / (baudrate * 16) */ uint32_t div_x128 = (uint32_t)((8UL * pclk) / baudrate); uint32_t div_x64 = div_x128 / 2u; - - /* Extract integer and fractional parts */ uint32_t div_int = div_x64 / 64u; uint32_t div_frac = div_x64 - (div_int * 64u); - - /* Apply rounding correction from x128 calculation */ div_frac += (div_x128 - (div_int * 128u)) - (div_frac * 2u); - if (div_int > (uint32_t)UINT16_MAX) return; - - /* Write 16-bit divisor: set DLAB, write high/low bytes, clear DLAB */ - MMUART_LCR(DEBUG_UART_BASE) |= DLAB_MASK; - MMUART_DMR(DEBUG_UART_BASE) = (uint8_t)(div_int >> 8); - MMUART_DLR(DEBUG_UART_BASE) = (uint8_t)div_int; - MMUART_LCR(DEBUG_UART_BASE) &= ~DLAB_MASK; - - /* Enable fractional divisor if integer divisor > 1 */ + MMUART_LCR(base) |= DLAB_MASK; + MMUART_DMR(base) = (uint8_t)(div_int >> 8); + MMUART_DLR(base) = (uint8_t)div_int; + MMUART_LCR(base) &= ~DLAB_MASK; if (div_int > 1u) { - MMUART_MM0(DEBUG_UART_BASE) |= EFBR_MASK; - MMUART_DFR(DEBUG_UART_BASE) = (uint8_t)div_frac; - } - else { - MMUART_MM0(DEBUG_UART_BASE) &= ~EFBR_MASK; + MMUART_MM0(base) |= EFBR_MASK; + MMUART_DFR(base) = (uint8_t)div_frac; + } else { + MMUART_MM0(base) &= ~EFBR_MASK; } } -void uart_init(void) +static void uart_init_base(unsigned long base) { - /* Disable special modes: LIN, IrDA, SmartCard */ - MMUART_MM0(DEBUG_UART_BASE) &= ~ELIN_MASK; - MMUART_MM1(DEBUG_UART_BASE) &= ~EIRD_MASK; - MMUART_MM2(DEBUG_UART_BASE) &= ~EERR_MASK; - - /* Disable interrupts */ - MMUART_IER(DEBUG_UART_BASE) = 0u; - - /* Reset and configure FIFOs, enable RXRDYN/TXRDYN pins */ - MMUART_FCR(DEBUG_UART_BASE) = 0u; - MMUART_FCR(DEBUG_UART_BASE) |= CLEAR_RX_FIFO_MASK | CLEAR_TX_FIFO_MASK; - MMUART_FCR(DEBUG_UART_BASE) |= RXRDY_TXRDYN_EN_MASK; - - /* Disable loopback (local and remote) */ - MMUART_MCR(DEBUG_UART_BASE) &= ~(LOOP_MASK | RLOOP_MASK); - - /* Set LSB-first for TX/RX */ - MMUART_MM1(DEBUG_UART_BASE) &= ~(E_MSB_TX_MASK | E_MSB_RX_MASK); - - /* Disable AFM, single wire mode */ - MMUART_MM2(DEBUG_UART_BASE) &= ~(EAFM_MASK | ESWM_MASK); - - /* Disable TX time guard, RX timeout, fractional baud */ - MMUART_MM0(DEBUG_UART_BASE) &= ~(ETTG_MASK | ERTO_MASK | EFBR_MASK); - - /* Clear timing registers */ - MMUART_GFR(DEBUG_UART_BASE) = 0u; - MMUART_TTG(DEBUG_UART_BASE) = 0u; - MMUART_RTO(DEBUG_UART_BASE) = 0u; - - /* Configure baud rate (115200) */ - uart_config_clk(115200); + MMUART_MM0(base) &= ~ELIN_MASK; + MMUART_MM1(base) &= ~EIRD_MASK; + MMUART_MM2(base) &= ~EERR_MASK; + MMUART_IER(base) = 0u; + MMUART_FCR(base) = CLEAR_RX_FIFO_MASK | CLEAR_TX_FIFO_MASK | RXRDY_TXRDYN_EN_MASK; + MMUART_MCR(base) &= ~(LOOP_MASK | RLOOP_MASK); + MMUART_MM1(base) &= ~(E_MSB_TX_MASK | E_MSB_RX_MASK); + MMUART_MM2(base) &= ~(EAFM_MASK | ESWM_MASK); + MMUART_MM0(base) &= ~(ETTG_MASK | ERTO_MASK | EFBR_MASK); + MMUART_GFR(base) = 0u; + MMUART_TTG(base) = 0u; + MMUART_RTO(base) = 0u; + uart_config_baud(base, 115200); + MMUART_LCR(base) = MSS_UART_DATA_8_BITS | MSS_UART_NO_PARITY | MSS_UART_ONE_STOP_BIT; +} - /* Set line config: 8N1 */ - MMUART_LCR(DEBUG_UART_BASE) = MSS_UART_DATA_8_BITS | - MSS_UART_NO_PARITY | - MSS_UART_ONE_STOP_BIT; +void uart_init(void) +{ + uart_init_base(DEBUG_UART_BASE); } void uart_write(const char* buf, unsigned int sz) @@ -1135,7 +1310,7 @@ void uart_write(const char* buf, unsigned int sz) uint32_t pos = 0; while (sz-- > 0) { char c = buf[pos++]; - if (c == '\n') { /* handle CRLF */ + if (c == '\n') { while ((MMUART_LSR(DEBUG_UART_BASE) & MSS_UART_THRE) == 0); MMUART_THR(DEBUG_UART_BASE) = '\r'; } @@ -1144,3 +1319,41 @@ void uart_write(const char* buf, unsigned int sz) } } #endif /* DEBUG_UART */ + +#ifdef WOLFBOOT_RISCV_MMODE +/* Initialize UART for a secondary hart (1-4). Hart 0 uses uart_init(). */ +void uart_init_hart(unsigned long hartid) +{ + unsigned long base; + if (hartid == 0 || hartid > 4) + return; + base = UART_BASE_FOR_HART(hartid); + /* MSS_PERIPH_MMUART0 = bit 5; shift by hartid selects MMUART1-4 */ + SYSREG_SUBBLK_CLOCK_CR |= (MSS_PERIPH_MMUART0 << hartid); + __asm__ volatile("fence iorw, iorw" ::: "memory"); + SYSREG_SOFT_RESET_CR &= ~(MSS_PERIPH_MMUART0 << hartid); + __asm__ volatile("fence iorw, iorw" ::: "memory"); + udelay(100); + uart_init_base(base); + udelay(10); +} + +/* Write to a specific hart's UART (hart 0-4). */ +void uart_write_hart(unsigned long hartid, const char* buf, unsigned int sz) +{ + unsigned long base; + uint32_t pos = 0; + if (hartid > 4) + return; + base = UART_BASE_FOR_HART(hartid); + while (sz-- > 0) { + char c = buf[pos++]; + if (c == '\n') { + while ((MMUART_LSR(base) & MSS_UART_THRE) == 0); + MMUART_THR(base) = '\r'; + } + while ((MMUART_LSR(base) & MSS_UART_THRE) == 0); + MMUART_THR(base) = c; + } +} +#endif /* WOLFBOOT_RISCV_MMODE */ diff --git a/hal/mpfs250.h b/hal/mpfs250.h index ffaccd2e67..497bee4a88 100644 --- a/hal/mpfs250.h +++ b/hal/mpfs250.h @@ -27,8 +27,28 @@ /* PolarFire SoC MPFS250T board specific configuration */ -/* APB/AHB Clock Frequency */ -#define MSS_APB_AHB_CLK 150000000 +/* APB/AHB Peripheral Bus Clock Frequency (used for UART baud rate divisor) + * M-mode (out of reset, no PLL): 40 MHz + * S-mode (after HSS configures PLL): 150 MHz + */ +#ifndef MSS_APB_AHB_CLK + #ifdef WOLFBOOT_RISCV_MMODE + #define MSS_APB_AHB_CLK 40000000 + #else + #define MSS_APB_AHB_CLK 150000000 + #endif +#endif + +/* CPU Core Clock Frequency (used for mcycle-based benchmarking) + * The E51 core runs at 2x the APB bus clock on reset. + * After HSS configures the PLL, CPU runs at 600 MHz. */ +#ifndef MSS_CPU_CLK + #ifdef WOLFBOOT_RISCV_MMODE + #define MSS_CPU_CLK (MSS_APB_AHB_CLK * 2) + #else + #define MSS_CPU_CLK 600000000 + #endif +#endif /* Hardware Base Address */ #define SYSREG_BASE 0x20002000 @@ -44,21 +64,16 @@ /* Peripheral Soft Reset Control Register (offset 0x88) */ #define SYSREG_SOFT_RESET_CR (*((volatile uint32_t*)(SYSREG_BASE + 0x88))) -#define SYSREG_SOFT_RESET_CR_ENVM (1U << 0) -#define SYSREG_SOFT_RESET_CR_MMC (1U << 3) -#define SYSREG_SOFT_RESET_CR_MMUART0 (1U << 5) -#define SYSREG_SOFT_RESET_CR_MMUART1 (1U << 6) -#define SYSREG_SOFT_RESET_CR_MMUART2 (1U << 7) -#define SYSREG_SOFT_RESET_CR_MMUART3 (1U << 8) -#define SYSREG_SOFT_RESET_CR_MMUART4 (1U << 9) -#define SYSREG_SOFT_RESET_CR_SPI0 (1U << 10) -#define SYSREG_SOFT_RESET_CR_SPI1 (1U << 11) -#define SYSREG_SOFT_RESET_CR_QSPI (1U << 19) -#define SYSREG_SOFT_RESET_CR_GPIO0 (1U << 20) -#define SYSREG_SOFT_RESET_CR_GPIO1 (1U << 21) -#define SYSREG_SOFT_RESET_CR_GPIO2 (1U << 22) -#define SYSREG_SOFT_RESET_CR_DDRC (1U << 23) -#define SYSREG_SOFT_RESET_CR_ATHENA (1U << 28) /* Crypto hardware accelerator */ + +/* MSS Peripheral control bits (shared by SUBBLK_CLOCK_CR and SOFT_RESET_CR) */ +#define MSS_PERIPH_ENVM (1U << 0) +#define MSS_PERIPH_MMC (1U << 3) +#define MSS_PERIPH_MMUART0 (1U << 5) +#define MSS_PERIPH_MMUART1 (1U << 6) +#define MSS_PERIPH_MMUART2 (1U << 7) +#define MSS_PERIPH_MMUART3 (1U << 8) +#define MSS_PERIPH_MMUART4 (1U << 9) +#define MSS_PERIPH_QSPI (1U << 19) /* UART */ @@ -74,26 +89,51 @@ #define MSS_UART3_HI_BASE 0x28104000UL #define MSS_UART4_HI_BASE 0x28106000UL +/* UART base address table for per-hart access (LO addresses for M-mode) */ +#ifndef __ASSEMBLER__ +extern const unsigned long MSS_UART_BASE_ADDR[5]; +#define UART_BASE_FOR_HART(hart) (MSS_UART_BASE_ADDR[(hart) < 5 ? (hart) : 0]) +#endif /* __ASSEMBLER__ */ + +/* Debug UART port selection (0-4): M-mode defaults to UART0, S-mode to UART1 */ +#ifndef DEBUG_UART_PORT + #ifdef WOLFBOOT_RISCV_MMODE + #define DEBUG_UART_PORT 0 + #else + #define DEBUG_UART_PORT 1 + #endif +#endif + +/* Derive base address from port number */ +#if DEBUG_UART_PORT == 0 + #define DEBUG_UART_BASE MSS_UART0_LO_BASE +#elif DEBUG_UART_PORT == 1 + #define DEBUG_UART_BASE MSS_UART1_LO_BASE +#elif DEBUG_UART_PORT == 2 + #define DEBUG_UART_BASE MSS_UART2_LO_BASE +#elif DEBUG_UART_PORT == 3 + #define DEBUG_UART_BASE MSS_UART3_LO_BASE +#elif DEBUG_UART_PORT == 4 + #define DEBUG_UART_BASE MSS_UART4_LO_BASE +#else + #error "Invalid DEBUG_UART_PORT (must be 0-4)" +#endif + #define MMUART_RBR(base) *((volatile uint8_t*)((base)) + 0x00) /* Receiver buffer register */ #define MMUART_IER(base) *((volatile uint8_t*)((base)) + 0x04) /* Interrupt enable register */ #define MMUART_IIR(base) *((volatile uint8_t*)((base)) + 0x08) /* Interrupt ID register */ #define MMUART_LCR(base) *((volatile uint8_t*)((base)) + 0x0C) /* Line control register */ #define MMUART_MCR(base) *((volatile uint8_t*)((base)) + 0x10) /* Modem control register */ #define MMUART_LSR(base) *((volatile uint8_t*)((base)) + 0x14) /* Line status register */ -#define MMUART_MSR(base) *((volatile uint8_t*)((base)) + 0x18) /* Modem status register */ -#define MMUART_SCR(base) *((volatile uint8_t*)((base)) + 0x1C) /* Scratch register */ -#define MMUART_IEM(base) *((volatile uint8_t*)((base)) + 0x24) /* Interrupt enable mask */ -#define MMUART_IIM(base) *((volatile uint8_t*)((base)) + 0x28) /* multi-mode Interrupt ID register */ #define MMUART_MM0(base) *((volatile uint8_t*)((base)) + 0x30) /* Mode register 0 */ #define MMUART_MM1(base) *((volatile uint8_t*)((base)) + 0x34) /* Mode register 1 */ #define MMUART_MM2(base) *((volatile uint8_t*)((base)) + 0x38) /* Mode register 2 */ -#define MMUART_DFR(base) *((volatile uint8_t*)((base)) + 0x3C) /* Data frame register */ +#define MMUART_DFR(base) *((volatile uint8_t*)((base)) + 0x3C) /* Fractional divisor register */ #define MMUART_GFR(base) *((volatile uint8_t*)((base)) + 0x44) /* Global filter register */ #define MMUART_TTG(base) *((volatile uint8_t*)((base)) + 0x48) /* TX time guard register */ #define MMUART_RTO(base) *((volatile uint8_t*)((base)) + 0x4C) /* RX timeout register */ -#define MMUART_ADR(base) *((volatile uint8_t*)((base)) + 0x50) /* Address register */ -#define MMUART_DLR(base) *((volatile uint8_t*)((base)) + 0x80) /* Divisor latch register */ -#define MMUART_DMR(base) *((volatile uint8_t*)((base)) + 0x84) /* Divisor mode register */ +#define MMUART_DLR(base) *((volatile uint8_t*)((base)) + 0x80) /* Divisor latch LSB */ +#define MMUART_DMR(base) *((volatile uint8_t*)((base)) + 0x84) /* Divisor latch MSB */ #define MMUART_THR(base) *((volatile uint8_t*)((base)) + 0x100) /* Transmitter holding register */ #define MMUART_FCR(base) *((volatile uint8_t*)((base)) + 0x104) /* FIFO control register */ @@ -104,8 +144,8 @@ #define MSS_UART_ONE_STOP_BIT ((uint8_t)0x00) /* LSR (Line Status Register) */ +#define MSS_UART_DR ((uint8_t)0x01) /* Data ready */ #define MSS_UART_THRE ((uint8_t)0x20) /* Transmitter holding register empty */ -#define MSS_UART_TEMT ((uint8_t)0x40) /* Transmit empty */ #define ELIN_MASK (1U << 3) /* Enable LIN header detection */ #define EIRD_MASK (1U << 2) /* Enable IrDA modem */ @@ -161,7 +201,6 @@ /* System Service command opcodes */ #define SYS_SERV_CMD_SERIAL_NUMBER 0x00u -#define SYS_SERV_CMD_SPI_COPY 0x50u /* SCB mailbox SPI copy service */ /* Device serial number size in bytes */ #define DEVICE_SERIAL_NUMBER_SIZE 16 @@ -190,70 +229,124 @@ int mpfs_scb_read_mailbox(uint8_t *out, uint32_t len); int mpfs_read_serial_number(uint8_t *serial); #endif /* __ASSEMBLER__ */ -/* Crypto Engine: Athena F5200 TeraFire Crypto Processor (1x), 200 MHz */ +/* Crypto Engine: Athena F5200 (200 MHz) */ #define ATHENA_BASE (SYSREG_BASE + 0x125000) +/* L2 Cache Controller (CACHE_CTRL @ 0x02010000) */ +#define L2_CACHE_BASE 0x02010000UL + +#define L2_CONFIG (*(volatile uint64_t*)(L2_CACHE_BASE + 0x000)) +#define L2_WAY_ENABLE (*(volatile uint64_t*)(L2_CACHE_BASE + 0x008)) +#define L2_WAY_MASK_DMA (*(volatile uint64_t*)(L2_CACHE_BASE + 0x800)) +#define L2_WAY_MASK_AXI4_PORT0 (*(volatile uint64_t*)(L2_CACHE_BASE + 0x808)) +#define L2_WAY_MASK_AXI4_PORT1 (*(volatile uint64_t*)(L2_CACHE_BASE + 0x810)) +#define L2_WAY_MASK_AXI4_PORT2 (*(volatile uint64_t*)(L2_CACHE_BASE + 0x818)) +#define L2_WAY_MASK_AXI4_PORT3 (*(volatile uint64_t*)(L2_CACHE_BASE + 0x820)) +#define L2_WAY_MASK_E51_DCACHE (*(volatile uint64_t*)(L2_CACHE_BASE + 0x828)) +#define L2_WAY_MASK_E51_ICACHE (*(volatile uint64_t*)(L2_CACHE_BASE + 0x830)) +#define L2_WAY_MASK_U54_1_DCACHE (*(volatile uint64_t*)(L2_CACHE_BASE + 0x838)) +#define L2_WAY_MASK_U54_1_ICACHE (*(volatile uint64_t*)(L2_CACHE_BASE + 0x840)) +#define L2_WAY_MASK_U54_2_DCACHE (*(volatile uint64_t*)(L2_CACHE_BASE + 0x848)) +#define L2_WAY_MASK_U54_2_ICACHE (*(volatile uint64_t*)(L2_CACHE_BASE + 0x850)) +#define L2_WAY_MASK_U54_3_DCACHE (*(volatile uint64_t*)(L2_CACHE_BASE + 0x858)) +#define L2_WAY_MASK_U54_3_ICACHE (*(volatile uint64_t*)(L2_CACHE_BASE + 0x860)) +#define L2_WAY_MASK_U54_4_DCACHE (*(volatile uint64_t*)(L2_CACHE_BASE + 0x868)) +#define L2_WAY_MASK_U54_4_ICACHE (*(volatile uint64_t*)(L2_CACHE_BASE + 0x870)) + +/* L2 Shutdown Control Register */ +#define SYSREG_L2_SHUTDOWN_CR (*(volatile uint32_t*)(SYSREG_BASE + 0x174)) + +/* L2 Cache/Scratchpad constants */ +#define L2_NUM_CACHE_WAYS 8 /* Ways 0-7 are cache */ +#define L2_NUM_SCRATCH_WAYS 4 /* Ways 8-11 are scratchpad */ +#define L2_WAY_BYTE_LENGTH 0x20000 /* 128KB per way */ +#define L2_SCRATCH_BASE 0x0A000000UL +#define L2_SCRATCH_SIZE (L2_NUM_SCRATCH_WAYS * L2_WAY_BYTE_LENGTH) + +#define L2_WAY_ENABLE_RESET 0x01 +#define L2_WAY_ENABLE_ALL_CACHE 0xFF +#define L2_WAY_ENABLE_WITH_SCRATCH 0x0FFF +#define L2_WAY_MASK_CACHE_ONLY 0xFF + + +/* CLINT - Core Local Interruptor */ +#ifndef CLINT_BASE +#define CLINT_BASE 0x02000000UL +#endif -/* ============================================================================ - * PLIC - Platform-Level Interrupt Controller (MPFS250-specific configuration) - * Base Address: 0x0c000000, Size: 64MB - * - * Generic PLIC register access is provided by hal/riscv.h - * ============================================================================ */ -#define PLIC_BASE 0x0C000000UL -#define PLIC_SIZE 0x04000000UL /* 64MB */ +/* RTC Clock Frequency (1 MHz after divisor) */ +#define RTC_CLOCK_FREQ 1000000UL -/* Number of interrupt sources and contexts */ -#define PLIC_NUM_SOURCES 186 /* riscv,ndev = 0xBA = 186 */ -#define PLIC_NUM_HARTS 5 /* 1x E51 + 4x U54 */ -#define PLIC_NUM_CONTEXTS 10 /* 2 contexts per hart (M-mode + S-mode) */ +/* In M-mode CLINT MTIME is not running without HSS; use mcycle (CPU clock) instead. + * In S-mode MTIME runs at 1 MHz (default RISCV_SMODE_TIMER_FREQ). */ +#if defined(WOLFBOOT_RISCV_MMODE) && !defined(RISCV_SMODE_TIMER_FREQ) +#define RISCV_SMODE_TIMER_FREQ MSS_CPU_CLK +#endif -/* MSS Global Interrupt offset - PLIC interrupts 0-12 are local, 13+ are MSS */ -#define OFFSET_TO_MSS_GLOBAL_INTS 13 -/* PLIC Interrupt Sources (PLIC IRQ numbers) */ -#define PLIC_INT_MMC_MAIN 88 /* MMC/SD controller main interrupt */ -#define PLIC_INT_MMC_WAKEUP 89 /* MMC/SD controller wakeup interrupt */ +/* Hart Local Storage (HLS) - per-hart communication structure, 64 bytes at top of stack */ +#define HLS_DEBUG_AREA_SIZE 64 -/* PLIC Context IDs for each hart - * Hart 0 (E51): Context 0 = M-mode (no S-mode on E51) - * Hart 1 (U54): Context 1 = M-mode, Context 2 = S-mode - * Hart 2 (U54): Context 3 = M-mode, Context 4 = S-mode - * Hart 3 (U54): Context 5 = M-mode, Context 6 = S-mode - * Hart 4 (U54): Context 7 = M-mode, Context 8 = S-mode - */ -#define PLIC_CONTEXT_E51_M 0 -#define PLIC_CONTEXT_U54_1_M 1 -#define PLIC_CONTEXT_U54_1_S 2 -#define PLIC_CONTEXT_U54_2_M 3 -#define PLIC_CONTEXT_U54_2_S 4 -#define PLIC_CONTEXT_U54_3_M 5 -#define PLIC_CONTEXT_U54_3_S 6 -#define PLIC_CONTEXT_U54_4_M 7 -#define PLIC_CONTEXT_U54_4_S 8 +#ifndef __ASSEMBLER__ +typedef struct { + volatile uint32_t in_wfi_indicator; /* 0x00: Hart status indicator */ + volatile uint32_t my_hart_id; /* 0x04: Hart ID */ + volatile uint32_t shared_mem_marker; /* 0x08: Init marker */ + volatile uint32_t shared_mem_status; /* 0x0C: Status */ + volatile uint64_t* shared_mem; /* 0x10: Shared memory pointer */ + volatile uint64_t reserved[5]; /* 0x18: Reserved/padding to 64 bytes */ +} HLS_DATA; /* 64 bytes total */ +#endif /* __ASSEMBLER__ */ + +#define HLS_MAIN_HART_STARTED 0x12344321UL +#define HLS_OTHER_HART_IN_WFI 0x12345678UL + +/* Number of harts on MPFS */ +#define MPFS_NUM_HARTS 5 +#define MPFS_FIRST_HART 0 /* E51 is hart 0 */ +#define MPFS_FIRST_U54_HART 1 /* First U54 is hart 1 */ +#define MPFS_LAST_U54_HART 4 /* Last U54 is hart 4 */ + +/* Stack configuration per hart */ +#ifndef STACK_SIZE_PER_HART +#define STACK_SIZE_PER_HART 8192 +#endif + +/* Multi-hart function declarations */ +#ifndef __ASSEMBLER__ +#ifdef DEBUG_UART +void uart_init(void); +void uart_write(const char* buf, unsigned int sz); +#endif +#ifdef WOLFBOOT_RISCV_MMODE +int mpfs_wake_secondary_harts(void); +void secondary_hart_entry(unsigned long hartid, HLS_DATA* hls); +void uart_init_hart(unsigned long hartid); +void uart_write_hart(unsigned long hartid, const char* buf, unsigned int sz); +#endif +#endif /* __ASSEMBLER__ */ + + + +/* PLIC - Platform-Level Interrupt Controller (base 0x0C000000, 64MB) */ +#define PLIC_BASE 0x0C000000UL +#define PLIC_SIZE 0x04000000UL +#define PLIC_NUM_SOURCES 186 +#define PLIC_NUM_HARTS 5 +#define PLIC_NUM_CONTEXTS 10 +#define OFFSET_TO_MSS_GLOBAL_INTS 13 + +#define PLIC_INT_MMC_MAIN 88 #ifdef EXT_FLASH -/* ========================================================================== - * QSPI Flash Controller Definitions - * - * PolarFire SoC has two CoreQSPI v2 controllers with identical registers: - * - * 1. System Controller QSPI (MPFS_SC_SPI=1, default): - * - SC QSPI at 0x37020100 (size 0x100) - * - For fabric-connected flash (design flash) - * - Direct register access (same register layout as MSS QSPI) - * - Supports read, write, and erase operations - * - Does NOT require MSS clock enable or soft reset +/* QSPI Flash Controller * - * 2. MSS QSPI Controller (MPFS_SC_SPI=0): - * - MSS QSPI at 0x21000000 (size 0x1000) - * - For external flash connected to MSS QSPI pins - * - Requires MSS QSPI clock enable and soft reset release - * - Supports read, write, and erase operations - * - * ========================================================================== */ + * Two CoreQSPI v2 controllers with identical register layouts: + * SC QSPI (MPFS_SC_SPI=1, default): 0x37020100 — fabric-connected flash + * MSS QSPI (MPFS_SC_SPI=0): 0x21000000 — MSS QSPI pins + */ /* QSPI Controller Base Address */ #ifndef QSPI_BASE @@ -291,13 +384,7 @@ int mpfs_read_serial_number(uint8_t *serial); #define QSPI_CTRL_EN (1u << QSPI_CTRL_EN_OFFSET) #define QSPI_CTRL_XIP (1u << QSPI_CTRL_XIP_OFFSET) #define QSPI_CTRL_CLKIDLE (1u << QSPI_CTRL_CLKIDLE_OFFSET) -#define QSPI_CTRL_SAMPLE_MASK (0x3u << QSPI_CTRL_SAMPLE_OFFSET) #define QSPI_CTRL_SAMPLE_SCK (0x0u << QSPI_CTRL_SAMPLE_OFFSET) -#define QSPI_CTRL_SAMPLE_HCLKF (0x1u << QSPI_CTRL_SAMPLE_OFFSET) -#define QSPI_CTRL_SAMPLE_HCLKR (0x2u << QSPI_CTRL_SAMPLE_OFFSET) -#define QSPI_CTRL_QMODE0 (1u << QSPI_CTRL_QMODE0_OFFSET) -#define QSPI_CTRL_QMODE12_MASK (0x3u << QSPI_CTRL_QMODE12_OFFSET) -#define QSPI_CTRL_CLKRATE_MASK (0xFu << QSPI_CTRL_CLKRATE_OFFSET) /* QSPI Frames Register Bits */ #define QSPI_FRAMES_TOTALBYTES_OFFSET 0 @@ -317,10 +404,7 @@ int mpfs_read_serial_number(uint8_t *serial); #define QSPI_STATUS_RXDONE (1u << 1) #define QSPI_STATUS_RXAVAIL (1u << 2) #define QSPI_STATUS_TXAVAIL (1u << 3) -#define QSPI_STATUS_RXEMPTY (1u << 4) -/* Bit 5 is reserved in CoreQSPI v2 */ #define QSPI_STATUS_READY (1u << 7) -#define QSPI_STATUS_FLAGSX4 (1u << 8) /* QSPI Clock Configuration */ #define QSPI_CLK_DIV_2 0x01u @@ -329,20 +413,7 @@ int mpfs_read_serial_number(uint8_t *serial); #define QSPI_CLK_DIV_8 0x04u #define QSPI_CLK_DIV_10 0x05u #define QSPI_CLK_DIV_12 0x06u -#define QSPI_CLK_DIV_30 0x0Fu /* Conservative: ~5MHz from 150MHz APB */ - -/* QSPI SPI Modes */ -#define QSPI_SPI_MODE0 0 /* CPOL=0, CPHA=0 */ -#define QSPI_SPI_MODE3 1 /* CPOL=1, CPHA=1 */ - -/* QSPI IO Formats */ -#define QSPI_IO_FORMAT_NORMAL 0 /* 1-bit SPI */ -#define QSPI_IO_FORMAT_DUAL_EX0 1 /* 2-bit with extended mode 0 */ -#define QSPI_IO_FORMAT_QUAD_EX0 2 /* 4-bit with extended mode 0 */ -#define QSPI_IO_FORMAT_DUAL_EX1 3 /* 2-bit with extended mode 1 */ -#define QSPI_IO_FORMAT_QUAD_EX1 4 /* 4-bit with extended mode 1 */ -#define QSPI_IO_FORMAT_DUAL_FULL 5 /* Full 2-bit mode */ -#define QSPI_IO_FORMAT_QUAD_FULL 6 /* Full 4-bit mode */ +#define QSPI_CLK_DIV_30 0x0Fu /* ~5 MHz from 150 MHz APB */ /* Micron MT25QL01G Flash Commands */ #define QSPI_CMD_READ_ID_OPCODE 0x9Fu /* JEDEC ID Read */ diff --git a/hal/nxp_t2080.c b/hal/nxp_t2080.c index 45b49b1845..27980b240e 100644 --- a/hal/nxp_t2080.c +++ b/hal/nxp_t2080.c @@ -19,6 +19,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA */ #include +#include #include "target.h" #include "printf.h" #include "image.h" /* for RAMFUNCTION */ diff --git a/hal/riscv.h b/hal/riscv.h index 64c790c702..71159b317b 100644 --- a/hal/riscv.h +++ b/hal/riscv.h @@ -22,104 +22,72 @@ #ifndef RISCV_H #define RISCV_H +/* RISC-V privilege mode: + * M-mode (direct boot from eNVM): WOLFBOOT_RISCV_MMODE + * S-mode (running under HSS/SBI): default + */ -/* ============================================================================ - * RISC-V Privilege Mode Selection - * - * - Machine mode (direct boot from eNVM) : WOLFBOOT_RISCV_MMODE - * - Supervisor mode (running under HSS/SBI) : default - * - * ============================================================================ */ - - /* Initial stack pointer address (stack grows downward from here) */ #ifndef WOLFBOOT_STACK_TOP #ifdef WOLFBOOT_RISCV_MMODE - /* M-mode: Stack at end of L2 Scratchpad (256KB) */ - #define WOLFBOOT_STACK_TOP 0x0A040000 + #define WOLFBOOT_STACK_TOP 0x0A040000 /* end of L2 Scratchpad (256KB) */ #else - /* S-mode: Stack in DDR */ - #define WOLFBOOT_STACK_TOP 0x80200000 + #define WOLFBOOT_STACK_TOP 0x80200000 /* DDR */ #endif #endif -/* ============================================================================ - * Generic RISC-V definitions (32-bit and 64-bit) - * ============================================================================ */ - -/* ============================================================================ - * XLEN-Dependent Definitions - * ============================================================================ */ - +/* XLEN-dependent load/store mnemonics and register width */ #if __riscv_xlen == 64 - #define STORE sd - #define LOAD ld + #define STORE sd + #define LOAD ld #define REGBYTES 8 - #define VECTOR_ALIGN 3 /* 8-byte alignment for RV64 */ + #define VECTOR_ALIGN 3 #else - #define STORE sw - #define LOAD lw + #define STORE sw + #define LOAD lw #define REGBYTES 4 - #define VECTOR_ALIGN 2 /* 4-byte alignment for RV32 */ + #define VECTOR_ALIGN 2 #endif - -/* RISC-V S-mode timer frequency (1 MHz default, can be overridden by platform) */ -#ifndef RISCV_SMODE_TIMER_FREQ -#define RISCV_SMODE_TIMER_FREQ 1000000 /* 1 MHz */ +/* S-mode timer frequency (1 MHz default; platform may override). + * In M-mode, hal_get_timer() returns mcycle so the platform (e.g. mpfs250.h) + * sets RISCV_SMODE_TIMER_FREQ to the CPU clock; do not default it here. */ +#if !defined(WOLFBOOT_RISCV_MMODE) && !defined(RISCV_SMODE_TIMER_FREQ) +#define RISCV_SMODE_TIMER_FREQ 1000000 #endif -/* ============================================================================ - * Machine Information Registers (CSRs) - * ============================================================================ */ -#define CSR_TIME 0xC01 /* Timer register (read-only) */ -#define CSR_TIMEH 0xC81 /* Timer register high (RV32 only) */ -#define CSR_MVENDORID 0xF11 /* Vendor ID */ -#define CSR_MARCHID 0xF12 /* Architecture ID */ -#define CSR_MIMPID 0xF13 /* Implementation ID */ -#define CSR_MHARTID 0xF14 /* Hardware thread ID */ - +/* Mode-prefixed CSR name helper */ #ifdef WOLFBOOT_RISCV_MMODE #define MODE_PREFIX(__suffix) m##__suffix #else #define MODE_PREFIX(__suffix) s##__suffix #endif -/* ============================================================================ - * CSR Access Macros - * ============================================================================ */ - -/* Read CSR using inline assembly */ -#define csr_read(csr) \ -({ \ - register unsigned long __v; \ - __asm__ __volatile__ ("csrr %0, " #csr : "=r"(__v) : ); \ - __v; \ +/* CSR access macros */ +#define csr_read(csr) \ +({ \ + register unsigned long __v; \ + __asm__ __volatile__ ("csrr %0, " #csr : "=r"(__v) : ); \ + __v; \ }) -/* Write CSR using inline assembly */ -#define csr_write(csr, val) \ -({ \ - unsigned long __v = (unsigned long)(val); \ - __asm__ __volatile__ ("csrw " #csr ", %0" : : "rK"(__v)); \ +#define csr_write(csr, val) \ +({ \ + unsigned long __v = (unsigned long)(val); \ + __asm__ __volatile__ ("csrw " #csr ", %0" : : "rK"(__v)); \ }) -/* Set bits in CSR */ -#define csr_set(csr, val) \ -({ \ - unsigned long __v = (unsigned long)(val); \ - __asm__ __volatile__ ("csrs " #csr ", %0" : : "rK"(__v)); \ +#define csr_set(csr, val) \ +({ \ + unsigned long __v = (unsigned long)(val); \ + __asm__ __volatile__ ("csrs " #csr ", %0" : : "rK"(__v)); \ }) -/* Clear bits in CSR */ -#define csr_clear(csr, val) \ -({ \ - unsigned long __v = (unsigned long)(val); \ - __asm__ __volatile__ ("csrc " #csr ", %0" : : "rK"(__v)); \ +#define csr_clear(csr, val) \ +({ \ + unsigned long __v = (unsigned long)(val); \ + __asm__ __volatile__ ("csrc " #csr ", %0" : : "rK"(__v)); \ }) -/* ============================================================================ - * Cache / I-Cache Sync Helpers - * ============================================================================ */ #ifndef __ASSEMBLER__ static inline void riscv_icache_sync(void) { @@ -127,135 +95,101 @@ static inline void riscv_icache_sync(void) __asm__ __volatile__("fence.i" ::: "memory"); #endif } -#endif /* !__ASSEMBLER__ */ +#endif -/* ============================================================================ - * Interrupt Numbers (for SIE/SIP and MIE/MIP registers) - * ============================================================================ */ -#define IRQ_U_SOFT 0 /* User software interrupt */ -#define IRQ_S_SOFT 1 /* Supervisor software interrupt */ -#define IRQ_M_SOFT 3 /* Machine software interrupt */ -#define IRQ_U_TIMER 4 /* User timer interrupt */ -#define IRQ_S_TIMER 5 /* Supervisor timer interrupt */ -#define IRQ_M_TIMER 7 /* Machine timer interrupt */ -#define IRQ_U_EXT 8 /* User external interrupt */ -#define IRQ_S_EXT 9 /* Supervisor external interrupt */ -#define IRQ_M_EXT 11 /* Machine external interrupt */ - -/* ============================================================================ - * Status Register Bits (mstatus/sstatus) - * ============================================================================ */ -#define MSTATUS_MIE (1 << 3) /* Machine-mode global interrupt enable */ -#define MSTATUS_MPIE (1 << 7) /* Machine-mode previous interrupt enable */ -#define SSTATUS_SIE (1 << 1) /* Supervisor-mode global interrupt enable */ -#define SSTATUS_SPIE (1 << 5) /* Supervisor-mode previous interrupt enable */ - -/* ============================================================================ - * Machine Interrupt Enable (MIE) Register Bits - * ============================================================================ */ -#define MIE_MSIE (1 << IRQ_M_SOFT) /* Machine software interrupt enable */ -#define MIE_MTIE (1 << IRQ_M_TIMER) /* Machine timer interrupt enable */ -#define MIE_MEIE (1 << IRQ_M_EXT) /* Machine external interrupt enable */ - -/* ============================================================================ - * Supervisor Interrupt Enable (SIE) Register Bits - * ============================================================================ */ -#define SIE_SSIE (1 << IRQ_S_SOFT) /* Supervisor software interrupt enable */ -#define SIE_STIE (1 << IRQ_S_TIMER) /* Supervisor timer interrupt enable */ -#define SIE_SEIE (1 << IRQ_S_EXT) /* Supervisor external interrupt enable */ - -/* ============================================================================ - * Exception Cause Register (MCAUSE/SCAUSE) Definitions - * ============================================================================ */ +/* Interrupt numbers (for MIE/MIP/SIE/SIP registers) */ +#define IRQ_S_SOFT 1 +#define IRQ_M_SOFT 3 +#define IRQ_S_TIMER 5 +#define IRQ_M_TIMER 7 +#define IRQ_S_EXT 9 +#define IRQ_M_EXT 11 + +/* Privilege levels */ +#define PRV_S 1 +#define PRV_M 3 + +/* MSTATUS register bits */ +#define MSTATUS_SIE (1UL << 1) +#define MSTATUS_MIE (1UL << 3) +#define MSTATUS_SPIE (1UL << 5) +#define MSTATUS_MPIE (1UL << 7) +#define MSTATUS_SPP (1UL << 8) +#define MSTATUS_MPP_SHIFT 11 +#define MSTATUS_MPP_MASK (3UL << MSTATUS_MPP_SHIFT) +#define MSTATUS_MPP_M (PRV_M << MSTATUS_MPP_SHIFT) +#define MSTATUS_MPP_S (PRV_S << MSTATUS_MPP_SHIFT) + +/* SSTATUS bits (S-mode visible subset) */ +#define SSTATUS_SIE (1UL << 1) +#define SSTATUS_SPIE (1UL << 5) + +/* MIP register bits (mip CSR: pending interrupt flags, written by hardware) */ +#define MIP_MSIP (1 << IRQ_M_SOFT) + +/* MIE register bits (mie CSR: interrupt enable, always written by software. + * Bit positions match MIP but use these names when targeting the mie register + * to avoid confusing the two CSRs.) */ +#define MIE_MSIE (1 << IRQ_M_SOFT) +#define MIE_MTIE (1 << IRQ_M_TIMER) +#define MIE_MEIE (1 << IRQ_M_EXT) + +/* SIE register bits */ +#define SIE_SSIE (1 << IRQ_S_SOFT) +#define SIE_STIE (1 << IRQ_S_TIMER) +#define SIE_SEIE (1 << IRQ_S_EXT) + +/* MCAUSE / SCAUSE interrupt bit and exception code mask */ #if __riscv_xlen == 64 -#define MCAUSE_INT 0x8000000000000000ULL /* Interrupt bit (MSB) */ -#define MCAUSE_CAUSE 0x7FFFFFFFFFFFFFFFULL /* Exception code mask */ +#define MCAUSE_INT 0x8000000000000000ULL +#define MCAUSE_CAUSE 0x7FFFFFFFFFFFFFFFULL #else -#define MCAUSE_INT 0x80000000UL /* Interrupt bit (MSB) */ -#define MCAUSE_CAUSE 0x7FFFFFFFUL /* Exception code mask */ +#define MCAUSE_INT 0x80000000UL +#define MCAUSE_CAUSE 0x7FFFFFFFUL #endif -/* Legacy aliases for compatibility */ -#define MCAUSE64_INT 0x8000000000000000ULL -#define MCAUSE64_CAUSE 0x7FFFFFFFFFFFFFFFULL -#define MCAUSE32_INT 0x80000000UL -#define MCAUSE32_CAUSE 0x7FFFFFFFUL - -/* ============================================================================ - * PLIC - Platform-Level Interrupt Controller (Generic) - * Reference: RISC-V Platform-Level Interrupt Controller Specification v1.0 - * ============================================================================ - * - * The PLIC is the standard external interrupt controller for RISC-V systems. - * It aggregates external interrupt sources and presents them to harts. +/* PLIC - Platform-Level Interrupt Controller + * Generic implementation, enabled when platform defines PLIC_BASE. * - * PLIC Memory Map (standard offsets from PLIC_BASE): - * 0x000000-0x000FFF: Priority registers (1 word per source, source 0 reserved) - * 0x001000-0x001FFF: Pending bits (1 bit per source, packed in 32-bit words) - * 0x002000-0x1FFFFF: Enable bits (per context, 1 bit per source, packed) - * 0x200000-0x3FFFFF: Context registers (threshold + claim/complete per context) + * Standard memory map offsets: + * 0x000000: Priority registers (1 word/source) + * 0x002000: Enable bits (per context, 1 bit/source) + * 0x200000: Context registers (threshold + claim/complete per context) * - * Each hart typically has 2 contexts: M-mode and S-mode. - * - * Platform must define before using PLIC functions: - * PLIC_BASE - Base address of PLIC registers - * PLIC_NUM_SOURCES - Number of interrupt sources (optional, for bounds check) - * ============================================================================ */ + * Platform must define PLIC_BASE (and optionally PLIC_NUM_SOURCES). */ -/* PLIC Register Offsets (usable in both C and assembly) */ #define PLIC_PRIORITY_OFFSET 0x000000UL -#define PLIC_PENDING_OFFSET 0x001000UL #define PLIC_ENABLE_OFFSET 0x002000UL #define PLIC_ENABLE_STRIDE 0x80UL #define PLIC_CONTEXT_OFFSET 0x200000UL #define PLIC_CONTEXT_STRIDE 0x1000UL -/* PLIC Priority Levels (standard values) */ -#define PLIC_PRIORITY_DISABLED 0 /* Priority 0 = interrupt disabled */ -#define PLIC_PRIORITY_MIN 1 /* Minimum active priority */ -#define PLIC_PRIORITY_MAX 7 /* Maximum priority (7 levels typical) */ -#define PLIC_PRIORITY_DEFAULT 4 /* Default/medium priority */ +#define PLIC_PRIORITY_DISABLED 0 +#define PLIC_PRIORITY_MIN 1 +#define PLIC_PRIORITY_MAX 7 +#define PLIC_PRIORITY_DEFAULT 4 -/* ============================================================================ - * PLIC Register Access Macros (C code only) - * ============================================================================ */ #ifndef __ASSEMBLER__ - -/* Priority registers: one 32-bit word per interrupt source (source 0 reserved) */ #define PLIC_PRIORITY_REG(base, irq) \ (*((volatile uint32_t*)((base) + PLIC_PRIORITY_OFFSET + ((irq) * 4)))) -/* Pending bits: 32 interrupts per 32-bit word */ -#define PLIC_PENDING_REG(base, irq) \ - (*((volatile uint32_t*)((base) + PLIC_PENDING_OFFSET + (((irq) / 32) * 4)))) -#define PLIC_PENDING_BIT(irq) (1U << ((irq) % 32)) - -/* Enable bits: per context, 32 interrupts per 32-bit word - * Each context has 0x80 bytes (32 words * 32 bits = 1024 sources max) */ #define PLIC_ENABLE_REG(base, ctx, irq) \ (*((volatile uint32_t*)((base) + PLIC_ENABLE_OFFSET + \ ((ctx) * PLIC_ENABLE_STRIDE) + (((irq) / 32) * 4)))) #define PLIC_ENABLE_BIT(irq) (1U << ((irq) % 32)) -/* Context registers: threshold and claim/complete - * Each context has 0x1000 bytes, with threshold at offset 0 and claim at offset 4 */ #define PLIC_THRESHOLD_REG(base, ctx) \ (*((volatile uint32_t*)((base) + PLIC_CONTEXT_OFFSET + \ ((ctx) * PLIC_CONTEXT_STRIDE) + 0x00))) #define PLIC_CLAIM_REG(base, ctx) \ (*((volatile uint32_t*)((base) + PLIC_CONTEXT_OFFSET + \ ((ctx) * PLIC_CONTEXT_STRIDE) + 0x04))) -/* Complete uses the same register as claim (write IRQ number to complete) */ #define PLIC_COMPLETE_REG(base, ctx) PLIC_CLAIM_REG(base, ctx) #endif /* !__ASSEMBLER__ */ -/* ============================================================================ - * PLIC Function Declarations (C code only, when PLIC_BASE is defined) - * - * These functions are implemented in boot_riscv.c when PLIC_BASE is defined. - * Platform must provide plic_get_context() to map current hart to PLIC context. - * ============================================================================ */ +/* PLIC function declarations (boot_riscv.c, when PLIC_BASE is defined). + * Platform must provide plic_get_context() and plic_dispatch_irq(). */ #if defined(PLIC_BASE) && !defined(__ASSEMBLER__) #include @@ -263,31 +197,31 @@ static inline void riscv_icache_sync(void) /* Platform-provided: Get PLIC context ID for current hart * Returns the context number (e.g., hart 1 S-mode = context 2) */ extern uint32_t plic_get_context(void); - -/* Set priority for an interrupt source (0 = disabled, 1-7 = priority levels) */ void plic_set_priority(uint32_t irq, uint32_t priority); - -/* Enable an interrupt for the current hart's context */ void plic_enable_interrupt(uint32_t irq); - -/* Disable an interrupt for the current hart's context */ void plic_disable_interrupt(uint32_t irq); - -/* Set the priority threshold for the current hart's context - * Interrupts with priority <= threshold are masked */ void plic_set_threshold(uint32_t threshold); - -/* Claim the highest priority pending interrupt - * Returns IRQ number, or 0 if no interrupt pending */ uint32_t plic_claim(void); - -/* Signal completion of interrupt handling */ void plic_complete(uint32_t irq); - -/* Platform-provided: Dispatch IRQ to appropriate handler - * Called by generic external interrupt handler for each claimed IRQ */ extern void plic_dispatch_irq(uint32_t irq); #endif /* PLIC_BASE && !__ASSEMBLER__ */ +/* CLINT - Core Local Interruptor (M-mode only). + * Provides software IPIs (MSIP) and timer (MTIME/MTIMECMP). */ +#ifdef WOLFBOOT_RISCV_MMODE + +#ifndef CLINT_BASE +#define CLINT_BASE 0x02000000UL +#endif + +#define CLINT_MSIP_OFFSET 0x0000UL + +#ifndef __ASSEMBLER__ +#define CLINT_MSIP(hart) \ + (*((volatile uint32_t*)(CLINT_BASE + CLINT_MSIP_OFFSET + ((hart) * 4)))) +#endif + +#endif /* WOLFBOOT_RISCV_MMODE */ + #endif /* RISCV_H */ diff --git a/include/printf.h b/include/printf.h index 860774d355..1163ad4eda 100644 --- a/include/printf.h +++ b/include/printf.h @@ -48,7 +48,9 @@ /* support for wolfBoot_printf logging */ #if defined(PRINTF_ENABLED) && !defined(WOLFBOOT_NO_PRINTF) -# include +# if !defined(DEBUG_UART) && !defined(DEBUG_ZYNQ) && !defined(WOLFBOOT_DEBUG_EFI) +# include +# endif # if defined(DEBUG_ZYNQ) && !defined(USE_QNX) && !defined(DEBUG_UART) # include "xil_printf.h" # define wolfBoot_printf(_f_, ...) xil_printf(_f_, ##__VA_ARGS__) diff --git a/src/boot_riscv.c b/src/boot_riscv.c index e472626e33..b7b814279c 100644 --- a/src/boot_riscv.c +++ b/src/boot_riscv.c @@ -42,56 +42,32 @@ extern void trap_entry(void); extern void trap_exit(void); -/* Linker symbols - use native pointer-sized types */ +/* Linker symbols */ #if __riscv_xlen == 64 -extern uint64_t _start_vector; -extern uint64_t _stored_data; -extern uint64_t _start_data; -extern uint64_t _end_data; -extern uint64_t _start_bss; -extern uint64_t _end_bss; -extern uint64_t _end_stack; -extern uint64_t _start_heap; +extern uint64_t _start_vector, _stored_data, _start_data, _end_data; +extern uint64_t _start_bss, _end_bss, _end_stack, _start_heap; extern uint64_t _global_pointer; extern void (* const trap_vector_table[])(void); #else -extern uint32_t _start_vector; -extern uint32_t _stored_data; -extern uint32_t _start_data; -extern uint32_t _end_data; -extern uint32_t _start_bss; -extern uint32_t _end_bss; -extern uint32_t _end_stack; -extern uint32_t _start_heap; +extern uint32_t _start_vector, _stored_data, _start_data, _end_data; +extern uint32_t _start_bss, _end_bss, _end_stack, _start_heap; extern uint32_t _global_pointer; extern void (* const IV[])(void); #endif extern void main(void); - -/* reloc_trap_vector is implemented in boot_riscv_start.S */ extern void reloc_trap_vector(const uint32_t *address); -/* ============================================================================ - * Trap Handling - * ============================================================================ */ - +/* Trap state saved for debugging */ #if __riscv_xlen == 64 -static uint64_t last_cause = 0; -static uint64_t last_epc = 0; -static uint64_t last_tval = 0; +static uint64_t last_cause = 0, last_epc = 0, last_tval = 0; #else -static uint32_t last_cause = 0; -static uint32_t last_epc = 0; -static uint32_t last_tval = 0; +static uint32_t last_cause = 0, last_epc = 0, last_tval = 0; #endif #ifdef PLIC_BASE -/* ============================================================================ - * PLIC - Platform-Level Interrupt Controller (Generic Implementation) - * ============================================================================ */ +/* PLIC generic implementation */ -/* Set priority for an interrupt source */ void plic_set_priority(uint32_t irq, uint32_t priority) { if (irq > 0 && priority <= PLIC_PRIORITY_MAX) { @@ -103,7 +79,6 @@ void plic_set_priority(uint32_t irq, uint32_t priority) } } -/* Enable an interrupt for the current hart's context */ void plic_enable_interrupt(uint32_t irq) { uint32_t ctx = plic_get_context(); @@ -116,7 +91,6 @@ void plic_enable_interrupt(uint32_t irq) } } -/* Disable an interrupt for the current hart's context */ void plic_disable_interrupt(uint32_t irq) { uint32_t ctx = plic_get_context(); @@ -129,7 +103,6 @@ void plic_disable_interrupt(uint32_t irq) } } -/* Set the priority threshold for the current hart's context */ void plic_set_threshold(uint32_t threshold) { uint32_t ctx = plic_get_context(); @@ -138,31 +111,25 @@ void plic_set_threshold(uint32_t threshold) } } -/* Claim the highest priority pending interrupt */ uint32_t plic_claim(void) { uint32_t ctx = plic_get_context(); return PLIC_CLAIM_REG(PLIC_BASE, ctx); } -/* Signal completion of interrupt handling */ void plic_complete(uint32_t irq) { uint32_t ctx = plic_get_context(); PLIC_COMPLETE_REG(PLIC_BASE, ctx) = irq; } -/* Handle external interrupts via PLIC */ static void handle_external_interrupt(void) { uint32_t irq; /* Claim and dispatch interrupts until none pending */ while ((irq = plic_claim()) != 0) { - /* Platform-provided dispatch function */ plic_dispatch_irq(irq); - - /* Signal completion to PLIC */ plic_complete(irq); } } @@ -175,6 +142,15 @@ unsigned long WEAKFUNCTION handle_trap(unsigned long cause, unsigned long epc, last_epc = epc; last_tval = tval; +#ifdef DEBUG_BOOT + /* Debug: print trap info for synchronous exceptions (not interrupts) */ + if (!(cause & MCAUSE_INT)) { + wolfBoot_printf("TRAP: cause=%lx epc=%lx tval=%lx\n", cause, epc, + tval); + while (1) ; /* halt to prevent infinite trap-mret loop */ + } +#endif + #ifdef PLIC_BASE /* Check if this is an interrupt (MSB set) */ if (cause & MCAUSE_INT) { @@ -198,11 +174,13 @@ unsigned long WEAKFUNCTION handle_trap(unsigned long cause, unsigned long epc, uint64_t hal_get_timer(void) { -#if __riscv_xlen == 64 - /* For RV64, CSR time contains full 64-bit value */ +#ifdef WOLFBOOT_RISCV_MMODE + /* M-mode: rdtime not available without HSS; use mcycle (CPU clock) */ + return csr_read(mcycle); +#elif __riscv_xlen == 64 return csr_read(time); #else - /* For RV32, read both timeh and time with wrap-around protection */ + /* RV32: read timeh+time with wrap-around protection */ uint32_t hi, lo; do { @@ -214,23 +192,13 @@ uint64_t hal_get_timer(void) #endif } -/* Get timer value in microseconds - * Formula: time_us = (ticks * 1000) / (rate / 1000) - * = (ticks * 1000000) / rate - */ uint64_t hal_get_timer_us(void) { uint64_t ticks = hal_get_timer(); uint32_t rate = RISCV_SMODE_TIMER_FREQ; - - /* Avoid overflow: (ticks * 1000) / (rate / 1000) */ return (ticks * 1000) / (rate / 1000); } -/* ============================================================================ - * Boot Functions - * ============================================================================ */ - #ifdef MMU int WEAKFUNCTION hal_dts_fixup(void* dts_addr) { @@ -239,8 +207,51 @@ int WEAKFUNCTION hal_dts_fixup(void* dts_addr) } #endif +#ifdef WOLFBOOT_RISCV_MMODE +/* Configure PMP entry 0: NAPOT full address space, RWX, for S-mode access */ +static void setup_pmp_for_smode(void) +{ + csr_write(pmpaddr0, -1UL); /* all-ones = cover entire address space (NAPOT) */ + csr_write(pmpcfg0, 0x1F); /* A=NAPOT(3), R=1, W=1, X=1 */ + __asm__ volatile("sfence.vma" ::: "memory"); +} + +/* Delegate common exceptions and S-mode interrupts to S-mode */ +static void delegate_traps_to_smode(void) +{ + /* Delegate exceptions 0-8, 12, 13, 15 (all except S-mode ecall, reserved) */ + csr_write(medeleg, (1 << 0)|(1 << 1)|(1 << 2)|(1 << 3)| + (1 << 4)|(1 << 5)|(1 << 6)|(1 << 7)| + (1 << 8)|(1 << 12)|(1 << 13)|(1 << 15)); + /* Delegate S-mode software, timer, and external interrupts */ + csr_write(mideleg, (1 << IRQ_S_SOFT)|(1 << IRQ_S_TIMER)|(1 << IRQ_S_EXT)); +} + +/* Switch to S-mode and jump to entry (never returns). a0=hartid, a1=dtb */ +static void __attribute__((noreturn)) enter_smode(unsigned long entry, + unsigned long hartid, + unsigned long dtb) +{ + unsigned long mstatus_val; + csr_write(mepc, entry); + mstatus_val = csr_read(mstatus); + mstatus_val &= ~MSTATUS_MPP_MASK; + mstatus_val |= MSTATUS_MPP_S | MSTATUS_MPIE; + mstatus_val &= ~MSTATUS_MIE; + csr_write(mstatus, mstatus_val); + csr_write(satp, 0); + __asm__ volatile( + "mv a0, %0\n" + "mv a1, %1\n" + "mret\n" + : : "r"(hartid), "r"(dtb) : "a0", "a1" + ); + __builtin_unreachable(); +} +#endif /* WOLFBOOT_RISCV_MMODE */ + #if __riscv_xlen == 64 -/* Get the hartid saved by boot_riscv_start.S in the tp register */ +/* Return the hartid saved in tp by boot_riscv_start.S */ unsigned long get_boot_hartid(void) { unsigned long hartid; @@ -260,17 +271,13 @@ void do_boot(const uint32_t *app_offset) #endif #ifdef MMU unsigned long dts_addr; -#endif - -#ifdef MMU hal_dts_fixup((uint32_t*)dts_offset); dts_addr = (unsigned long)dts_offset; +#elif defined(WOLFBOOT_RISCV_MMODE) || __riscv_xlen == 64 + unsigned long dts_addr = 0; #endif #if __riscv_xlen == 64 - /* Get the hartid that was saved by boot_riscv_start.S in tp register. - * This is the hartid passed to wolfBoot by the prior boot stage (e.g., HSS). - * For MPFS, this should be 1-4 (U54 cores), never 0 (E51 monitor core). */ hartid = get_boot_hartid(); #endif @@ -288,20 +295,46 @@ void do_boot(const uint32_t *app_offset) /* Relocate trap vector table to application */ reloc_trap_vector(app_offset); - /* - * RISC-V Linux kernel boot requirements (Documentation/arch/riscv/boot.rst): - * a0 = hartid of the current core - * a1 = physical address of the device tree blob (DTB) - * satp = 0 (MMU disabled) - * - * For SMP systems using ordered booting (preferred), only the boot hart - * enters the kernel. Secondary harts are started via SBI HSM extension. - */ +#ifdef WOLFBOOT_RISCV_MMODE +#ifdef WOLFBOOT_MMODE_SMODE_BOOT + /* M-mode -> S-mode transition for Linux boot */ + wolfBoot_printf("M->S transition: entry=0x%lx\n", (unsigned long)app_offset); + setup_pmp_for_smode(); + delegate_traps_to_smode(); + /* This never returns */ + enter_smode((unsigned long)app_offset, hartid, dts_addr); +#else + /* Direct M-mode jump for bare-metal payloads. + * Define WOLFBOOT_MMODE_SMODE_BOOT to boot Linux via S-mode transition. */ + wolfBoot_printf("M-mode direct jump to 0x%lx\n", (unsigned long)app_offset); +#ifdef DEBUG_BOOT + { + volatile uint8_t lsr = MMUART_LSR(DEBUG_UART_BASE); + uint32_t *p = (uint32_t*)app_offset; + wolfBoot_printf("Pre-jump: LSR=0x%x THRE=%d\n", + (unsigned)lsr, (lsr & MSS_UART_THRE) ? 1 : 0); + wolfBoot_printf("App[0]=0x%lx [1]=0x%lx\n", + (unsigned long)p[0], (unsigned long)p[1]); + } + /* Drain UART TX before jumping (~10 ms at 40 MHz) */ + { volatile int i; for (i = 0; i < 400000; i++) {} } +#endif /* DEBUG_BOOT */ + (void)hartid; + (void)dts_addr; + /* fence + fence.i: ensure stores from ELF loading are visible to I-fetch */ + asm volatile("fence" ::: "memory"); + asm volatile("fence.i" ::: "memory"); + asm volatile("jr %0" : : "r"(app_offset)); + __builtin_unreachable(); +#endif /* WOLFBOOT_MMODE_SMODE_BOOT */ -#if __riscv_xlen == 64 -#ifdef MMU +#elif __riscv_xlen == 64 + /* S-mode / RV64 boot */ + asm volatile("fence" ::: "memory"); + riscv_icache_sync(); asm volatile( - #ifndef WOLFBOOT_RISCV_MMODE + #if defined(MMU) && !defined(WOLFBOOT_RISCV_MMODE) + /* S-mode boot (e.g., when running under HSS/OpenSBI) */ "csrw satp, zero\n" "sfence.vma\n" #endif @@ -310,14 +343,7 @@ void do_boot(const uint32_t *app_offset) "jr %2\n" : : "r"(hartid), "r"(dts_addr), "r"(app_offset) : "a0", "a1" ); -#else - asm volatile( - "mv a0, %0\n" - "mv a1, zero\n" - "jr %1\n" - : : "r"(hartid), "r"(app_offset) : "a0", "a1" - ); -#endif + #else /* RV32 */ /* RV32: typically bare-metal without Linux, simpler boot */ asm volatile("jr %0" : : "r"(app_offset)); @@ -329,12 +355,9 @@ void do_boot(const uint32_t *app_offset) void isr_empty(void) { - /* Empty interrupt handler */ } -/* ============================================================================ - * Reboot Functions - * ============================================================================ */ +/* Reboot functions */ #if __riscv_xlen == 32 && defined(RAM_CODE) /* RV32 HiFive1 watchdog-based reboot */ @@ -360,8 +383,6 @@ void RAMFUNCTION arch_reboot(void) AON_WDOGKEY = AON_WDOGKEY_VALUE; AON_WDOGFEED = 1; - while(1) - ; wolfBoot_panic(); } @@ -373,8 +394,6 @@ void WEAKFUNCTION arch_reboot(void) SYSREG_MSS_RESET_CR = 0xDEAD; #endif - while(1) - ; wolfBoot_panic(); } diff --git a/src/boot_riscv_start.S b/src/boot_riscv_start.S index 78ba50d51e..7ecb0d9bd0 100644 --- a/src/boot_riscv_start.S +++ b/src/boot_riscv_start.S @@ -25,130 +25,279 @@ #include "hal/mpfs250.h" #endif -/* MODE_PREFIX is now defined in hal/riscv.h */ -/* ============================================================================ - * RISC-V Boot Entry Point - * ============================================================================ - * - * For RV64 (typically running under SBI): - * Entry conditions (passed by prior boot stage / SBI): - * a0 = hart ID (hardware thread identifier) - * a1 = pointer to device tree blob (DTB) in memory - * - * For RV32 (typically bare metal): - * Starts fresh, reads hart ID from CSR - * - * ============================================================================ */ +/* RV64 M-mode: direct boot from eNVM; a0=hartid from CSR + * RV64 S-mode: entered from SBI with a0=hartid, a1=DTB pointer + * RV32: bare metal; stack/gp from linker symbols */ .section .init .globl _reset _reset: #if __riscv_xlen == 64 - /* ---------- RV64 Boot Sequence ---------- */ #ifdef WOLFBOOT_RISCV_MMODE - /* - * Machine Mode: Read hart ID from CSR since we're the first code - * running on this core. In Supervisor mode, the SBI passes it in a0. - */ - csrr a0, CSR_MHARTID + /* RV64 M-mode: first code from eNVM at 0x20220100 */ + call .L_clear_ras /* flush stale RAS prediction state */ + + csrr a0, mhartid + mv tp, a0 + + /* Disable interrupts, clear M-mode CSRs */ + li t0, MSTATUS_MIE + csrc mstatus, t0 + csrw mie, zero + csrw mip, zero + csrw mscratch, zero + csrw mcause, zero + csrw mepc, zero + csrw pmpcfg0, zero + csrw pmpcfg2, zero + + /* Hart 0 (E51) continues; others park in .init until woken */ + mv a0, tp + bnez a0, .L_secondary_hart_wait_envm + +#ifdef TARGET_mpfs250 + /* Enable L2 ways (mask 0x0B: ways 0, 1, 3) and clear shutdown + * before copying text to L2 scratchpad. */ + li t1, 0x02010000 + li t2, 0x0B + sd t2, 8(t1) /* L2_WAY_ENABLE */ + fence + li t1, 0x20002000 + sw zero, 0x174(t1) /* SYSREG_L2_SHUTDOWN_CR = 0 */ + fence #endif - /* - * Preserve boot parameters in callee-saved registers: - * tp (x4) = hart ID - Used for multi-hart coordination. The RISC-V ABI - * reserves tp as thread-local storage pointer, which C code - * won't clobber, making it safe for inter-hart communication. - * s1 (x9) = DTB pointer - Saved for later use (e.g., passing to kernel) - */ + /* Copy .text from eNVM to L2 SRAM. + * Use li/ld for dest/size (la would give wrong address across the + * eNVM-to-L2 gap which exceeds the 32-bit PC-relative range). */ + la t0, _stored_text + la t3, _copy_params + ld t1, 0(t3) /* dest (L2 SRAM base) */ + ld t2, 8(t3) /* byte count */ + add t2, t1, t2 /* end address */ + +.L_copy_text: + bgeu t1, t2, .L_copy_text_done /* if dest >= end, done */ + ld t3, 0(t0) + sd t3, 0(t1) + addi t0, t0, 8 + addi t1, t1, 8 + j .L_copy_text +.L_copy_text_done: + fence.i /* flush icache before jumping to SRAM */ + + lui t0, %hi(.L_sram_entry) + addi t0, t0, %lo(.L_sram_entry) + jr t0 + +/* Two-level nested call to flush the Return Address Stack predictor */ +.L_clear_ras: + mv t0, ra + nop + call .L_clear_ras_inner + nop + mv ra, t0 + ret + +.L_clear_ras_inner: + nop + nop + ret + +/* linker-provided copy params (loaded via la, within eNVM PC range) */ +.align 3 +_copy_params: + .dword _start_text_sram /* destination (L2 SRAM) */ + .dword _text_size /* byte count */ + +/* Secondary harts (U54 cores) park here until woken by E51 via CLINT IPI. + * HLS protocol: signal IN_WFI -> wfi -> clear MSIP -> signal PASSED_WFI -> jump. */ +.L_secondary_hart_wait_envm: + li t0, MSTATUS_MIE + csrc mstatus, t0 + csrw mie, zero + csrw mip, zero + li t0, MIE_MSIE /* wake only on IPI */ + csrw mie, t0 + + /* Set up per-hart stack: base + hartid*STACK_SIZE_PER_HART */ + csrr a0, mhartid + la t0, _secondary_hart_stack_base + li t1, STACK_SIZE_PER_HART + mul t2, a0, t1 + add sp, t0, t2 + li t0, -16 + and sp, sp, t0 + + addi sp, sp, -64 /* allocate 64-byte HLS at top of stack */ + mv s11, sp + sd zero, 0(s11) + sd zero, 8(s11) + sd zero, 16(s11) + sd zero, 24(s11) + sd zero, 32(s11) + sd zero, 40(s11) + sd zero, 48(s11) + sd zero, 56(s11) + + /* Wait for E51 to signal HLS_MAIN_HART_STARTED */ + li t3, 0x12344321 + la t1, _main_hart_hls +.L_wait_main_hart: + lwu t2, 0(t1) + bne t3, t2, .L_wait_main_hart + + li t0, 0x12345678 /* HLS_OTHER_HART_IN_WFI */ + sw t0, 0(s11) + fence iorw, iorw + +.L_secondary_wfi_loop: + wfi + csrr t0, mip + andi t0, t0, MIP_MSIP + beqz t0, .L_secondary_wfi_loop + + csrr a0, mhartid + li t0, 0x02000000 /* CLINT_BASE */ + slli t1, a0, 2 + add t0, t0, t1 + sw zero, 0(t0) /* clear MSIP */ + + li t0, 0x87654321 /* HLS_OTHER_HART_PASSED_WFI */ + sw t0, 0(s11) + fence iorw, iorw + fence.i + + csrr a0, mhartid + mv a1, s11 + la t0, secondary_hart_entry + jr t0 + +/* .text section follows — runs from L2 SRAM after copy from eNVM */ +.section .text +.L_sram_entry: + la t0, trap_vector_table + csrw mtvec, t0 +1: csrr t1, mtvec + bne t0, t1, 1b + + /* Zero all GPRs (tp = hart ID preserved) */ + li x1, 0 + li x2, 0 + li x3, 0 + /* x4 (tp) = hart ID, don't clear */ + li x5, 0 + li x6, 0 + li x7, 0 + li x8, 0 + li x9, 0 + li x10, 0 + li x11, 0 + li x12, 0 + li x13, 0 + li x14, 0 + li x15, 0 + li x16, 0 + li x17, 0 + li x18, 0 + li x19, 0 + li x20, 0 + li x21, 0 + li x22, 0 + li x23, 0 + li x24, 0 + li x25, 0 + li x26, 0 + li x27, 0 + li x28, 0 + li x29, 0 + li x30, 0 + li x31, 0 + + /* Spin until misa confirms RV64 (MSB set = negative when sign-extended) */ +.L_xlen_check: + csrr t0, misa + bltz t0, .L_xlen_ok + j .L_xlen_check +.L_xlen_ok: + + .option push + .option norelax + la gp, __global_pointer$ + .option pop + + la sp, _end_stack + li t0, -16 + and sp, sp, t0 + mv s0, sp + + /* Copy .data from flash to RAM */ + la t0, _stored_data + la t1, _start_data + la t2, _end_data + beq t0, t1, .L_data_copy_done +.L_data_copy: + beq t1, t2, .L_data_copy_done + ld t3, 0(t0) + sd t3, 0(t1) + addi t0, t0, 8 + addi t1, t1, 8 + j .L_data_copy +.L_data_copy_done: + + /* Clear .bss */ + la t0, _start_bss + la t1, _end_bss +.L_bss_clear: + beq t0, t1, .L_bss_clear_done + sd zero, 0(t0) + addi t0, t0, 8 + j .L_bss_clear +.L_bss_clear_done: + +#ifndef TARGET_mpfs250 + /* Clear SiFive bus error unit accrued registers (not present on MPFS) */ + la a4,0x01700020UL + sb x0, 0(a4) + la a4,0x01701020UL + sb x0, 0(a4) + la a4,0x01702020UL + sb x0, 0(a4) + la a4,0x01703020UL + sb x0, 0(a4) + la a4,0x01704020UL + sb x0, 0(a4) +#endif + + mv a0, tp + j main + +#else + /* RV64 S-mode: a0=hartid, a1=DTB pointer (passed by SBI) */ mv tp, a0 mv s1, a1 - - /* - * Initialize global pointer to zero as a safety measure. - * If an exception occurs before the C runtime is initialized, - * gp=0 ensures predictable behavior rather than random memory access. - */ mv gp, zero - /* - * Configure trap/exception handler: - * Load address of trap_vector_table into the trap-vector base-address - * register (mtvec in M-mode, stvec in S-mode). All synchronous exceptions - * and interrupts will vector through this table. - */ la t0, trap_vector_table - csrw MODE_PREFIX(tvec), t0 - - /* - * Disable all interrupt sources initially by clearing the - * interrupt-enable register (mie/sie). This prevents spurious - * interrupts during early initialization. - */ - csrw MODE_PREFIX(ie), zero - - /* - * Enable interrupt sources: - * - Software Interrupts (IPIs) for multi-hart boot coordination - * - External Interrupts for PLIC-routed peripheral interrupts (e.g., MMC) - * - * M-mode: MSIE (Software) + MEIE (External) - * S-mode: SSIE (Software) + SEIE (External) - */ -#ifndef WOLFBOOT_RISCV_MMODE + csrw stvec, t0 + csrw sie, zero li t0, (SIE_SSIE | SIE_SEIE) -#else - li t0, (MIE_MSIE | MIE_MEIE) -#endif - csrs MODE_PREFIX(ie), t0 - - /* - * Enable global interrupts by setting the SIE/MIE bit in sstatus/mstatus. - * Without this, the CPU will never take interrupts regardless of the - * per-source enables in sie/mie. - * - * M-mode: mstatus.MIE (bit 3) - * S-mode: sstatus.SIE (bit 1) - */ -#ifndef WOLFBOOT_RISCV_MMODE + csrs sie, t0 li t0, SSTATUS_SIE -#else - li t0, MSTATUS_MIE -#endif - csrs MODE_PREFIX(status), t0 - - /* - * Initialize stack pointer: - * WOLFBOOT_STACK_TOP = 0x80000000 (M-mode) or 0x80200000 (S-mode) - * - * The stack grows downward from this address. RISC-V calling convention - * requires 16-byte stack alignment, enforced by AND with -16 (0xFFFF...FFF0). - * - * s0 (frame pointer) is set to match sp for the initial stack frame. - */ + csrs sstatus, t0 + li t0, -16 li t1, WOLFBOOT_STACK_TOP and sp, t1, t0 mv s0, sp - - /* - * Set global pointer (gp) to the stack base. - * Note: This is platform-specific. Typically gp points to a .sdata - * section for efficient global variable access, but here it's used - * as a known reference point for early boot. - */ mv gp, s0 - /* - * Jump to C entry point: - * a0 = hart ID (restored from tp for main's first argument) - * - * Using 'j' (jump) instead of 'call' since main() should never return. - * The DTB pointer in s1 can be retrieved later if needed. - */ mv a0, tp j main +#endif /* WOLFBOOT_RISCV_MMODE */ + #else /* __riscv_xlen == 32 */ /* ---------- RV32 Boot Sequence ---------- */ @@ -191,15 +340,7 @@ _reset: #endif /* __riscv_xlen */ -/* ============================================================================ - * reloc_trap_vector - Relocate trap vector table - * - * Parameters: - * a0 = pointer to new trap vector table base (uint32_t*) - * - * Sets the trap-vector base-address register to (address + 4), - * accounting for the header offset in the application image. - * ============================================================================ */ +/* reloc_trap_vector: update tvec to app image trap table (a0 = base uint32_t*) */ .globl reloc_trap_vector reloc_trap_vector: addi a0, a0, 4 /* address + 1 (uint32_t* = +4 bytes) */ diff --git a/src/sdhci.c b/src/sdhci.c index 58d47527c2..5bb218e23c 100644 --- a/src/sdhci.c +++ b/src/sdhci.c @@ -63,6 +63,13 @@ static uint32_t g_rca = 0; /* SD Card Relative Address */ static volatile uint32_t g_mmc_irq_status = 0; static volatile int g_mmc_irq_pending = 0; +/* Microsecond delay using hardware timer */ +static void udelay(uint32_t us) +{ + uint64_t end = hal_get_timer_us() + us; + while (hal_get_timer_us() < end); +} + /* ============================================================================ * Register Access Helpers * ============================================================================ */ @@ -176,7 +183,7 @@ void sdhci_irq_handler(void) /* Signal that interrupt was handled */ g_mmc_irq_pending = 1; -#ifdef DEBUG_SDHCI +#ifdef DEBUG_SDHCI_IRQ wolfBoot_printf("sdhci_irq_handler: status=0x%08X, flags=0x%02X\n", status, g_mmc_irq_status); #endif @@ -529,7 +536,7 @@ static int sdhci_wait_busy(int check_dat0) } /* Reset data and command lines to recover from errors */ -static void sdhci_reset_lines(void) +static inline void sdhci_reset_lines(void) { sdhci_reg_or(SDHCI_SRS11, SDHCI_SRS11_RESET_DAT_CMD); while (SDHCI_REG(SDHCI_SRS11) & SDHCI_SRS11_RESET_DAT_CMD); @@ -953,7 +960,7 @@ static int emmc_send_op_cond(uint32_t ocr_arg, uint32_t *ocr_reg) } /* Small delay between retries */ - for (volatile int i = 0; i < 1000; i++); + udelay(10); } while (--timeout > 0); @@ -1016,6 +1023,13 @@ static int emmc_card_full_init(void) int status; uint32_t ocr_reg; + /* Set power to 3.3v */ + status = sdhci_set_power(SDHCI_SRS10_BVS_3_3V); + if (status != 0) { + wolfBoot_printf("eMMC: Failed to set power\n"); + return status; + } + /* Send CMD0 (GO_IDLE) to reset eMMC */ status = sdhci_cmd(MMC_CMD0_GO_IDLE, 0, SDHCI_RESP_NONE); if (status != 0) { @@ -1024,7 +1038,7 @@ static int emmc_card_full_init(void) } /* Small delay after reset */ - for (volatile int i = 0; i < 10000; i++); + udelay(100); /* Send CMD1 with operating conditions (3.3V, sector mode) */ status = emmc_send_op_cond(MMC_DEVICE_3_3V_VOLT_SET, &ocr_reg); @@ -1387,10 +1401,14 @@ int sdhci_init(void) reg &= ~SDHCI_HRS06_EMM_MASK; #ifdef DISK_EMMC reg |= SDHCI_HRS06_MODE_LEGACY; /* eMMC Legacy mode */ +#ifdef DEBUG_SDHCI wolfBoot_printf("SDHCI: eMMC mode\n"); +#endif #else reg |= SDHCI_HRS06_MODE_SD; /* SD card mode */ +#ifdef DEBUG_SDHCI wolfBoot_printf("SDHCI: SDCard mode\n"); +#endif #endif SDHCI_REG_SET(SDHCI_HRS06, reg); @@ -1465,33 +1483,18 @@ int sdhci_init(void) sdhci_set_clock(SDHCI_CLK_400KHZ); #ifdef DISK_EMMC - /* ========================================================================= - * eMMC Initialization Path - * ========================================================================= */ - - /* Set power to 3.3v */ - status = sdhci_set_power(SDHCI_SRS10_BVS_3_3V); - if (status != 0) { - wolfBoot_printf("eMMC: Failed to set power\n"); - return status; - } - /* Run full eMMC card initialization */ status = emmc_card_full_init(); if (status != 0) { - wolfBoot_printf("eMMC: Card initialization failed\n"); + wolfBoot_printf("eMMC: Card init failed (%d)\n", status); return status; } #else /* DISK_SDCARD */ - /* ========================================================================= - * SD Card Initialization Path - * ========================================================================= */ - /* Run full SD card initialization */ status = sdcard_card_full_init(); if (status != 0) { - wolfBoot_printf("SD Card: Card initialization failed\n"); + wolfBoot_printf("SD Card: Card init failed (%d)\n", status); return status; } @@ -1502,6 +1505,19 @@ int sdhci_init(void) /* Set data timeout to 3000ms */ status = sdhci_set_timeout(SDHCI_DATA_TIMEOUT_US); } + +#ifdef DEBUG_SDHCI + { + const char *card_type; +#ifdef DISK_EMMC + card_type = "eMMC"; +#else + card_type = "SD"; +#endif + wolfBoot_printf("sdhci_init: %s status: %d\n", card_type, status); + } +#endif + return status; } diff --git a/src/string.c b/src/string.c index f03cd6a001..87a82726e9 100644 --- a/src/string.c +++ b/src/string.c @@ -30,7 +30,7 @@ #if defined(_RENESAS_RA_) #include #endif -#ifndef TARGET_library +#if !defined(TARGET_library) && defined(__STDC_HOSTED__) && __STDC_HOSTED__ #include #else size_t strlen(const char *s); /* forward declaration */ @@ -289,7 +289,7 @@ void RAMFUNCTION *memcpy(void *dst, const void *src, size_t n) } #endif /* IAR */ -#ifndef __IAR_SYSTEMS_ICC__ +#if !defined(__IAR_SYSTEMS_ICC__) && !defined(TARGET_X86_64_EFI) void *memmove(void *dst, const void *src, size_t n) { int i; @@ -306,7 +306,7 @@ void *memmove(void *dst, const void *src, size_t n) return memcpy(dst, src, n); } } -#endif +#endif /* !IAR && !X86_64_EFI */ #endif /* __CCRX__ Renesas CCRX */ #endif /* WOLFBOOT_USE_STDLIBC */ diff --git a/src/update_disk.c b/src/update_disk.c index 7ea38e8e07..1fc0a55ea1 100644 --- a/src/update_disk.c +++ b/src/update_disk.c @@ -244,7 +244,9 @@ void RAMFUNCTION wolfBoot_start(void) const uint8_t *hdr_ptr = NULL; #ifdef MMU uint8_t *dts_addr = NULL; + #ifdef WOLFBOOT_FDT uint32_t dts_size = 0; + #endif #endif char part_name[4] = {'P', ':', 'X', '\0'}; BENCHMARK_DECLARE(); diff --git a/src/vector_riscv.S b/src/vector_riscv.S index 70c83e930d..88b80c4beb 100644 --- a/src/vector_riscv.S +++ b/src/vector_riscv.S @@ -65,22 +65,15 @@ STORE x29, 29 * REGBYTES(sp) STORE x30, 30 * REGBYTES(sp) STORE x31, 31 * REGBYTES(sp) -#ifndef WOLFBOOT_RISCV_MMODE - csrr a0, scause - csrr a1, sepc - csrr a2, stval -#else - csrr a0, mcause - csrr a1, mepc - csrr a2, mtval -#endif + + csrr a0, MODE_PREFIX(cause) + csrr a1, MODE_PREFIX(epc) + csrr a2, MODE_PREFIX(tval) + mv a3, sp jal handle_trap -#ifndef WOLFBOOT_RISCV_MMODE - csrw sepc, a0 -#else - csrw mepc, a0 -#endif + csrw MODE_PREFIX(epc), a0 + .endm .macro trap_exit @@ -116,11 +109,9 @@ LOAD x31, 31 * REGBYTES(sp) LOAD x2, 2 * REGBYTES(sp) addi sp, sp, 32 * REGBYTES -#ifndef WOLFBOOT_RISCV_MMODE - sret -#else - mret -#endif + + MODE_PREFIX(ret) + .endm #else /* __riscv_xlen == 32 */ diff --git a/test-app/RISCV64-mpfs250.ld b/test-app/RISCV64-mpfs250.ld index e7b367cab9..9ec66687be 100644 --- a/test-app/RISCV64-mpfs250.ld +++ b/test-app/RISCV64-mpfs250.ld @@ -10,6 +10,9 @@ OUTPUT_ARCH( "riscv" ) ENTRY( _reset ) +/* Provide M-mode symbols as dummy values for test-app (not used at runtime) */ +PROVIDE(_main_hart_hls = 0); + /* Memory areas */ MEMORY { @@ -23,12 +26,23 @@ MEMORY /* Define output sections */ SECTIONS { - /* The startup code goes first into IRAM */ + /* Entry point must be first for raw binary (.bin) boot. + * _reset() initializes GP, SP, BSS, then calls main(). + * For ELF boot the ENTRY(_reset) directive handles this, + * but for .bin wolfBoot jumps to the load address directly. */ + .init : + { + . = ALIGN(8); + KEEP(*(.init)) + . = ALIGN(8); + } >IRAM + + /* Interrupt/trap vector table */ .isr_vector : { . = ALIGN(8); - KEEP(*(.isr_vector)) /* Startup code */ _start_vector = .; + KEEP(*(.isr_vector)) . = ALIGN(8); } >IRAM @@ -42,7 +56,6 @@ SECTIONS *(.rodata) /* .rodata sections (constants, strings, etc.) */ *(.rodata*) /* .rodata* sections (constants, strings, etc.) */ - KEEP (*(.init)) KEEP (*(.fini)) . = ALIGN(8); diff --git a/test-app/app_mpfs250.c b/test-app/app_mpfs250.c index 57d219b817..cf8ce3fd41 100644 --- a/test-app/app_mpfs250.c +++ b/test-app/app_mpfs250.c @@ -36,8 +36,11 @@ void main(void) { - uart_init(); - hal_init(); + /* wolfBoot fully configured UART0 before jumping here. + * Calling uart_init() again clears the TX FIFO (FCR write) while wolfBoot's + * last output may still be draining, which can leave THRE stuck at 0. + * Calling hal_init() writes to _main_hart_hls=0 (NULL ptr crash). + * So use wolfBoot_printf directly — UART0 is already ready. */ wolfBoot_printf("========================\r\n"); wolfBoot_printf("PolarFire SoC MPFS250 wolfBoot demo Application\r\n"); diff --git a/test-app/startup_riscv.c b/test-app/startup_riscv.c index 27b4a13f89..4cfc2e813c 100644 --- a/test-app/startup_riscv.c +++ b/test-app/startup_riscv.c @@ -42,11 +42,11 @@ void __attribute__((naked,section(".init"))) _reset(void) { asm volatile("la gp, _global_pointer"); asm volatile("la sp, _end_stack"); - /* Set up vectored interrupt, with IV starting at offset 0x100 */ -#ifndef WOLFBOOT_RISCV_MMODE - asm volatile("csrw stvec, %0":: "r"((uint8_t *)(&_start_vector) + 1)); -#else + /* Set up vectored interrupt table. The +1 sets MODE=1 (vectored). */ +#ifdef WOLFBOOT_RISCV_MMODE asm volatile("csrw mtvec, %0":: "r"((uint8_t *)(&_start_vector) + 1)); +#else + asm volatile("csrw stvec, %0":: "r"((uint8_t *)(&_start_vector) + 1)); #endif src = (uint32_t *) &_stored_data; @@ -79,10 +79,12 @@ void do_boot(const uint32_t *app_offset) static uint32_t synctrap_cause = 0; void __attribute__((naked)) isr_synctrap(void) { -#ifndef WOLFBOOT_RISCV_MMODE - asm volatile("csrr %0, scause" : "=r"(synctrap_cause)); -#else +#ifdef WOLFBOOT_RISCV_MMODE asm volatile("csrr %0, mcause" : "=r"(synctrap_cause)); + asm volatile("mret"); +#else + asm volatile("csrr %0, scause" : "=r"(synctrap_cause)); + asm volatile("sret"); #endif } diff --git a/test-app/vector_riscv.S b/test-app/vector_riscv.S index 6215b224b2..9edba30e25 100644 --- a/test-app/vector_riscv.S +++ b/test-app/vector_riscv.S @@ -62,11 +62,7 @@ ld x30, 112(sp) ld x31, 120(sp) addi sp, sp, 128 -#ifndef WOLFBOOT_RISCV_MMODE - sret -#else - mret -#endif + MODE_PREFIX(ret) .endm #else /* __riscv_xlen == 32 */ @@ -110,11 +106,7 @@ lw x30, 56(sp) lw x31, 60(sp) addi sp, sp, 64 -#ifndef WOLFBOOT_RISCV_MMODE - sret -#else - mret -#endif + MODE_PREFIX(ret) .endm #endif /* __riscv_xlen */ diff --git a/tools/scripts/mpfs_qspi_prog.py b/tools/scripts/mpfs_qspi_prog.py new file mode 100755 index 0000000000..df3dfe29d4 --- /dev/null +++ b/tools/scripts/mpfs_qspi_prog.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python3 +""" +mpfs_qspi_prog.py - Program QSPI flash on PolarFire SoC MPFS250 via wolfBoot UART + +Requires wolfBoot built with UART_QSPI_PROGRAM=1 and EXT_FLASH=1. + +Usage: + python3 mpfs_qspi_prog.py [qspi_offset] + +Arguments: + serial_port Serial device, e.g. /dev/ttyUSB0 + binary_file Signed firmware image, e.g. test-app/image_v1_signed.bin + qspi_offset Hex or decimal QSPI destination address (default: 0x20000) + Use 0x20000 for the boot partition (WOLFBOOT_PARTITION_BOOT_ADDRESS) + Use 0x2000000 for the update partition (WOLFBOOT_PARTITION_UPDATE_ADDRESS) + +Example: + python3 tools/scripts/mpfs_qspi_prog.py /dev/ttyUSB0 \\ + test-app/image_v1_signed.bin 0x20000 + +Protocol (implemented in hal/mpfs250.c qspi_uart_program()): + 1. wolfBoot prints "QSPI-PROG: Press 'P' ..." prompt at startup + 2. This script sends 'P' to enter programming mode + 3. wolfBoot sends "READY\\r\\n" + 4. Script sends 8 bytes: [4B LE address][4B LE size] + 5. wolfBoot erases sectors and sends "ERASED\\r\\n" + 6. For each 256-byte chunk: + wolfBoot sends ACK (0x06) -> script sends chunk + 7. wolfBoot sends "DONE\\r\\n" and continues booting + +Copyright (C) 2025 wolfSSL Inc. +GPL v3 +""" + +import sys +import os +import time +import struct + +try: + import serial +except ImportError: + print("Error: 'pyserial' not installed. Run: pip install pyserial") + sys.exit(1) + +BAUD_RATE = 115200 +CHUNK_SIZE = 256 +ACK_BYTE = 0x06 +PROMPT_TIMEOUT = 60 # seconds to wait for wolfBoot prompt after reset +ERASE_TIMEOUT = 120 # seconds for sector erase (64KB sectors, ~800ms each) +DONE_TIMEOUT = 30 # seconds to wait for DONE after last chunk + + +def wait_for(port, keyword, timeout_sec, label=""): + """Read lines until one contains keyword, printing each line received.""" + deadline = time.monotonic() + timeout_sec + while time.monotonic() < deadline: + remaining = deadline - time.monotonic() + port.timeout = min(1.0, remaining) + line = port.readline() + if not line: + continue + text = line.decode("ascii", errors="replace").rstrip() + if text: + tag = f"[{label}] " if label else "" + print(f" {tag}< {text}") + if keyword in text: + return True + return False + + +def main(): + if len(sys.argv) < 3: + print(__doc__) + sys.exit(1) + + port_name = sys.argv[1] + bin_path = sys.argv[2] + offset = int(sys.argv[3], 0) if len(sys.argv) > 3 else 0x20000 + + if not os.path.exists(bin_path): + print(f"Error: file not found: {bin_path}") + sys.exit(1) + + with open(bin_path, "rb") as f: + data = f.read() + + # Device-side rejects images > 2 MiB; fail fast here to avoid + # waiting through prompt/erase only to have the target abort. + MAX_IMAGE_SIZE = 0x200000 # 2 MiB, matches device-side check + if len(data) > MAX_IMAGE_SIZE: + print(f"Error: image too large ({len(data):,} bytes, max {MAX_IMAGE_SIZE:,})") + sys.exit(1) + + print(f"wolfBoot QSPI programmer for PolarFire SoC MPFS250") + print(f" Port : {port_name} @ {BAUD_RATE} baud") + print(f" Binary : {bin_path} ({len(data):,} bytes)") + print(f" QSPI addr : 0x{offset:08x}") + n_sectors = (len(data) + 0x10000 - 1) // 0x10000 + print(f" Sectors : {n_sectors} x 64 KB") + print() + + try: + port = serial.Serial(port_name, BAUD_RATE, timeout=1) + except serial.SerialException as e: + print(f"Error: cannot open {port_name}: {e}") + sys.exit(1) + + # Drain stale RX data + port.reset_input_buffer() + time.sleep(0.1) + + # ------------------------------------------------------------------ + # Step 1: Wait for the QSPI-PROG prompt from wolfBoot + # ------------------------------------------------------------------ + print(f"Waiting for wolfBoot 'QSPI-PROG' prompt (up to {PROMPT_TIMEOUT}s)...") + print(" Power-cycle or reset the board now if it has already booted.") + print() + + if not wait_for(port, "QSPI-PROG", PROMPT_TIMEOUT, "wolfBoot"): + print("Error: timed out waiting for 'QSPI-PROG' prompt.") + print(" Is wolfBoot built with UART_QSPI_PROGRAM=1 and EXT_FLASH=1?") + port.close() + sys.exit(1) + + # ------------------------------------------------------------------ + # Step 2: Send 'P' to enter programming mode + # ------------------------------------------------------------------ + print(" Sending 'P' to enter programming mode...") + port.write(b"P") + port.flush() + + if not wait_for(port, "READY", 5, "wolfBoot"): + print("Error: did not receive READY acknowledgement.") + port.close() + sys.exit(1) + + # ------------------------------------------------------------------ + # Step 3: Send destination address and data size (8 bytes, little-endian) + # ------------------------------------------------------------------ + header = struct.pack("