diff --git a/build-scripts/build_llvm.py b/build-scripts/build_llvm.py index e2221b8e54..7f67a1b793 100755 --- a/build-scripts/build_llvm.py +++ b/build-scripts/build_llvm.py @@ -299,7 +299,7 @@ def main(): "arc": { "repo": "https://github.com/llvm/llvm-project.git", "repo_ssh": "git@github.com:llvm/llvm-project.git", - "branch": "release/18.x", + "branch": "release/22.x", }, "xtensa": { "repo": "https://github.com/espressif/llvm-project.git", @@ -309,7 +309,7 @@ def main(): "default": { "repo": "https://github.com/llvm/llvm-project.git", "repo_ssh": "git@github.com:llvm/llvm-project.git", - "branch": "llvmorg-18.1.8", + "branch": "llvmorg-22.1.7", }, } diff --git a/core/iwasm/aot/aot_runtime.c b/core/iwasm/aot/aot_runtime.c index d1ec873c58..e38f9365ca 100644 --- a/core/iwasm/aot/aot_runtime.c +++ b/core/iwasm/aot/aot_runtime.c @@ -5069,8 +5069,13 @@ aot_dump_pgo_prof_data_to_buf(AOTModuleInstance *module_inst, char *buf, } prof_header.magic = 0xFF6C70726F667281LL; +#if LLVM_VERSION_MAJOR >= 19 + /* Version 10 */ + prof_header.version = 0x000000000000000ALL; +#else /* Version 9 */ prof_header.version = 0x0000000000000009LL; +#endif /* with VARIANT_MASK_IR_PROF (IR Instrumentation) */ prof_header.version |= 0x1ULL << 56; /* with VARIANT_MASK_MEMPROF (Memory Profile) */ @@ -5082,6 +5087,11 @@ aot_dump_pgo_prof_data_to_buf(AOTModuleInstance *module_inst, char *buf, /* __llvm_prf_bits won't be used in PGO, set dummy value here */ prof_header.num_prof_bitmaps = 0; prof_header.bitmap_delta = 0; +#if LLVM_VERSION_MAJOR >= 19 + /* vtable value profiling isn't used in PGO, set dummy values here */ + prof_header.num_vtables = 0; + prof_header.vnames_size = 0; +#endif if (!is_little_endian()) { aot_exchange_uint64((uint8 *)&prof_header.magic); @@ -5092,6 +5102,10 @@ aot_dump_pgo_prof_data_to_buf(AOTModuleInstance *module_inst, char *buf, aot_exchange_uint64((uint8 *)&prof_header.names_size); aot_exchange_uint64((uint8 *)&prof_header.counters_delta); aot_exchange_uint64((uint8 *)&prof_header.bitmap_delta); +#if LLVM_VERSION_MAJOR >= 19 + aot_exchange_uint64((uint8 *)&prof_header.num_vtables); + aot_exchange_uint64((uint8 *)&prof_header.vnames_size); +#endif aot_exchange_uint64((uint8 *)&prof_header.value_kind_last); } @@ -5117,6 +5131,10 @@ aot_dump_pgo_prof_data_to_buf(AOTModuleInstance *module_inst, char *buf, prof_data_64->num_bitmaps = 0; prof_data_64->num_value_sites[0] = prof_data->num_value_sites[0]; prof_data_64->num_value_sites[1] = prof_data->num_value_sites[1]; +#if LLVM_VERSION_MAJOR >= 19 + /* vtable value profiling isn't used in PGO, set dummy value */ + prof_data_64->num_value_sites[2] = 0; +#endif if (!is_little_endian()) { aot_exchange_uint64((uint8 *)&prof_data_64->func_hash); @@ -5128,6 +5146,9 @@ aot_dump_pgo_prof_data_to_buf(AOTModuleInstance *module_inst, char *buf, aot_exchange_uint32((uint8 *)&prof_data_64->num_bitmaps); aot_exchange_uint16((uint8 *)&prof_data_64->num_value_sites[0]); aot_exchange_uint16((uint8 *)&prof_data_64->num_value_sites[1]); +#if LLVM_VERSION_MAJOR >= 19 + aot_exchange_uint16((uint8 *)&prof_data_64->num_value_sites[2]); +#endif } buf += sizeof(LLVMProfileData_64); } diff --git a/core/iwasm/aot/aot_runtime.h b/core/iwasm/aot/aot_runtime.h index 687c75e142..bb6ce6491d 100644 --- a/core/iwasm/aot/aot_runtime.h +++ b/core/iwasm/aot/aot_runtime.h @@ -436,6 +436,7 @@ typedef struct AOTFrame { } AOTFrame; #if WASM_ENABLE_STATIC_PGO != 0 +#define LLVM_VERSION_MAJOR 22 /* The bitmaps fields in LLVMProfileRawHeader, LLVMProfileData, * LLVMProfileData_64 all dummy fields, it's used in MC/DC code coverage * instead of PGO. See https://llvm.org/docs/InstrProfileFormat.html#bitmap */ @@ -453,6 +454,10 @@ typedef struct LLVMProfileRawHeader { uint64 counters_delta; uint64 bitmap_delta; uint64 names_delta; +#if LLVM_VERSION_MAJOR >= 19 + uint64 num_vtables; + uint64 vnames_size; +#endif uint64 value_kind_last; } LLVMProfileRawHeader; @@ -473,7 +478,14 @@ typedef struct LLVMProfileData { uintptr_t func_ptr; ValueProfNode **values; uint32 num_counters; +#if LLVM_VERSION_MAJOR >= 19 + /* The array size is IPVK_Last + 1. Since raw profile format version 10 + * (LLVM 19+) added the virtual table value profiling kind, IPVK_Last is 2 + * and the array has 3 elements. */ + uint16 num_value_sites[3]; +#else uint16 num_value_sites[2]; +#endif uint32 num_bitmaps; } LLVMProfileData; @@ -488,7 +500,14 @@ typedef struct LLVMProfileData_64 { uint64 func_ptr; uint64 values; uint32 num_counters; +#if LLVM_VERSION_MAJOR >= 19 + /* The array size is IPVK_Last + 1. Since raw profile format version 10 + * (LLVM 19+) added the virtual table value profiling kind, IPVK_Last is 2 + * and the array has 3 elements. */ + uint16 num_value_sites[3]; +#else uint16 num_value_sites[2]; +#endif uint32 num_bitmaps; } LLVMProfileData_64; #endif /* end of WASM_ENABLE_STATIC_PGO != 0 */ diff --git a/core/iwasm/aot/arch/aot_reloc_riscv.c b/core/iwasm/aot/arch/aot_reloc_riscv.c index 8df9f9f8ed..6339e125cc 100644 --- a/core/iwasm/aot/arch/aot_reloc_riscv.c +++ b/core/iwasm/aot/arch/aot_reloc_riscv.c @@ -366,6 +366,40 @@ check_reloc_offset(uint32 target_section_size, uint64 reloc_offset, return true; } +/** + * Cache used to pair an R_RISCV_PCREL_HI20 (AUIPC) relocation with its + * R_RISCV_PCREL_LO12_I/S relocation(s). + * + * The %pcrel_lo relocation references the label of the AUIPC instruction + * instead of the final target symbol, so its low 12 bits can only be computed + * from the PC-relative value that was resolved for the corresponding AUIPC. + * Older compilers always emitted the paired load/store/ADDI instruction + * immediately after the AUIPC, but newer LLVM versions may schedule unrelated + * instructions in between (and may emit several %pcrel_lo accesses for a single + * AUIPC). We therefore record the value resolved for each AUIPC here so that + * the LO12 relocation can recover the low 12 bits regardless of placement. + * + * Relocations of a section are applied sequentially and an AUIPC is always + * relocated before the LO12 relocation(s) that reference it (lower offset), so + * a small most-recently-used cache is sufficient. + */ +#define PCREL_HI20_CACHE_SIZE 8 + +typedef struct PcrelHi20Entry { + uint8 *auipc_addr; + int32 val; +} PcrelHi20Entry; + +#ifdef os_thread_local_attribute +#define RELOC_THREAD_LOCAL os_thread_local_attribute +#else +#define RELOC_THREAD_LOCAL +#endif + +static RELOC_THREAD_LOCAL PcrelHi20Entry + pcrel_hi20_cache[PCREL_HI20_CACHE_SIZE]; +static RELOC_THREAD_LOCAL uint32 pcrel_hi20_cache_pos; + bool apply_relocation(AOTModule *module, uint8 *target_section_addr, uint32 target_section_size, uint64 reloc_offset, @@ -414,8 +448,7 @@ apply_relocation(AOTModule *module, uint8 *target_section_addr, #endif case R_RISCV_CALL: - case R_RISCV_CALL_PLT: - case R_RISCV_PCREL_HI20: /* S + A - P */ + case R_RISCV_CALL_PLT: /* S + A - P */ { val = (int32)(intptr_t)((uint8 *)symbol_addr + reloc_addend - addr); @@ -437,17 +470,46 @@ apply_relocation(AOTModule *module, uint8 *target_section_addr, rv_calc_imm(val, &imm_hi, &imm_lo); rv_add_val((uint16 *)addr, (imm_hi << 12)); - if ((rv_get_val((uint16 *)(addr + 4)) & 0x7f) == RV_OPCODE_SW) { - /* Adjust imm for SW : S-type */ - val = (((int32)imm_lo >> 5) << 25) - + (((int32)imm_lo & 0x1f) << 7); + /* The JALR paired with the AUIPC of a call always immediately + * follows it and uses an I-type immediate. */ + rv_add_val((uint16 *)(addr + 4), ((int32)imm_lo << 20)); + break; + } + + case R_RISCV_PCREL_HI20: /* S + A - P */ + { + uint32 cache_idx; - rv_add_val((uint16 *)(addr + 4), val); + val = (int32)(intptr_t)((uint8 *)symbol_addr + reloc_addend - addr); + + CHECK_RELOC_OFFSET(sizeof(uint32)); + if (val != (intptr_t)((uint8 *)symbol_addr + reloc_addend - addr)) { + if (symbol_index >= 0) { + /* Reach the target by plt code */ + symbol_addr = (uint8 *)module->code + module->code_size + - get_plt_table_size() + + get_plt_item_size() * symbol_index; + val = (int32)(intptr_t)((uint8 *)symbol_addr - addr); + } } - else { - /* Adjust imm for MV(ADDI)/JALR : I-type */ - rv_add_val((uint16 *)(addr + 4), ((int32)imm_lo << 20)); + + if (val != (intptr_t)((uint8 *)symbol_addr + reloc_addend - addr)) { + goto fail_addr_out_of_range; } + + /* Patch the high 20 bits of the AUIPC only. The low 12 bits are + * patched separately by the R_RISCV_PCREL_LO12_I/S relocation, + * whose instruction is not guaranteed to immediately follow this + * AUIPC. */ + rv_calc_imm(val, &imm_hi, &imm_lo); + rv_add_val((uint16 *)addr, (imm_hi << 12)); + + /* Record the resolved PC-relative value so the matching LO12 + * relocation(s) can recover the low 12 bits. */ + cache_idx = pcrel_hi20_cache_pos % PCREL_HI20_CACHE_SIZE; + pcrel_hi20_cache[cache_idx].auipc_addr = addr; + pcrel_hi20_cache[cache_idx].val = val; + pcrel_hi20_cache_pos++; break; } @@ -470,29 +532,42 @@ apply_relocation(AOTModule *module, uint8 *target_section_addr, case R_RISCV_PCREL_LO12_I: /* S - P */ case R_RISCV_PCREL_LO12_S: /* S - P */ { - /* Already handled in R_RISCV_PCREL_HI20, it should be skipped for - * most cases. But it is still needed for some special cases, e.g. - * ``` - * label: - * auipc t0, %pcrel_hi(symbol) # R_RISCV_PCREL_HI20 (symbol) - * lui t1, 1 - * lw t2, t0, %pcrel_lo(label) # R_RISCV_PCREL_LO12_I (label) - * add t2, t2, t1 - * sw t2, t0, %pcrel_lo(label) # R_RISCV_PCREL_LO12_S (label) - * ``` - * In this case, the R_RISCV_PCREL_LO12_I/S relocation should be - * handled after R_RISCV_PCREL_HI20 relocation. - * - * So, if the R_RISCV_PCREL_LO12_I/S relocation is not followed by - * R_RISCV_PCREL_HI20 relocation, it should be handled here but - * not implemented yet. - */ - - if ((uintptr_t)addr - (uintptr_t)symbol_addr - - (uintptr_t)reloc_addend - != 4) { + /* A %pcrel_lo relocation references the label of its AUIPC + * (%pcrel_hi) instruction, transformed by the AOT compiler into + * the section symbol plus the AUIPC offset as the addend. The low + * 12 bits must be derived from the PC-relative value that was + * resolved when the AUIPC's R_RISCV_PCREL_HI20 relocation was + * applied. Look that value up by the AUIPC address; this works + * whether or not the LO12 instruction immediately follows the + * AUIPC and when several LO12 accesses share one AUIPC. */ + uint8 *auipc_addr = (uint8 *)symbol_addr + (intptr_t)reloc_addend; + uint32 k; + bool found = false; + + CHECK_RELOC_OFFSET(sizeof(uint32)); + + for (k = 0; k < PCREL_HI20_CACHE_SIZE; k++) { + if (pcrel_hi20_cache[k].auipc_addr == auipc_addr) { + val = pcrel_hi20_cache[k].val; + found = true; + break; + } + } + if (!found) { goto fail_addr_out_of_range; } + + rv_calc_imm(val, &imm_hi, &imm_lo); + if (reloc_type == R_RISCV_PCREL_LO12_S) { + /* Adjust imm for store : S-type */ + val = (((int32)imm_lo >> 5) << 25) + + (((int32)imm_lo & 0x1f) << 7); + rv_add_val((uint16 *)addr, val); + } + else { + /* Adjust imm for load/MV(ADDI) : I-type */ + rv_add_val((uint16 *)addr, ((int32)imm_lo << 20)); + } break; } diff --git a/core/iwasm/compilation/aot_llvm.c b/core/iwasm/compilation/aot_llvm.c index e9f86817f0..b9d9aa56f2 100644 --- a/core/iwasm/compilation/aot_llvm.c +++ b/core/iwasm/compilation/aot_llvm.c @@ -2705,8 +2705,8 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option) #if WASM_ENABLE_LINUX_PERF != 0 if (wasm_runtime_get_linux_perf()) { /* FramePointerKind.All */ - LLVMMetadataRef val = - LLVMValueAsMetadata(LLVMConstInt(LLVMInt32Type(), 2, false)); + LLVMMetadataRef val = LLVMValueAsMetadata( + LLVMConstInt(LLVMInt32TypeInContext(comp_ctx->context), 2, false)); const char *key = "frame-pointer"; LLVMAddModuleFlag(comp_ctx->module, LLVMModuleFlagBehaviorWarning, key, strlen(key), val); @@ -2728,7 +2728,8 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option) LLVMAddModuleFlag( comp_ctx->module, LLVMModuleFlagBehaviorWarning, "Debug Info Version", strlen("Debug Info Version"), - LLVMValueAsMetadata(LLVMConstInt(LLVMInt32Type(), 3, false))); + LLVMValueAsMetadata( + LLVMConstInt(LLVMInt32TypeInContext(comp_ctx->context), 3, false))); comp_ctx->debug_file = dwarf_gen_file_info(comp_ctx); if (!comp_ctx->debug_file) {