Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions build-scripts/build_llvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ def main():
"arc": {
"repo": "https://github.com/llvm/llvm-project.git",
"repo_ssh": "git@github.com:llvm/llvm-project.git",
"branch": "release/18.x",
"branch": "release/22.x",
},
"xtensa": {
"repo": "https://github.com/espressif/llvm-project.git",
Expand All @@ -309,7 +309,7 @@ def main():
"default": {
"repo": "https://github.com/llvm/llvm-project.git",
"repo_ssh": "git@github.com:llvm/llvm-project.git",
"branch": "llvmorg-18.1.8",
"branch": "llvmorg-22.1.7",
},
}

Expand Down
21 changes: 21 additions & 0 deletions core/iwasm/aot/aot_runtime.c
Original file line number Diff line number Diff line change
Expand Up @@ -5069,8 +5069,13 @@ aot_dump_pgo_prof_data_to_buf(AOTModuleInstance *module_inst, char *buf,
}

prof_header.magic = 0xFF6C70726F667281LL;
#if LLVM_VERSION_MAJOR >= 19
/* Version 10 */
prof_header.version = 0x000000000000000ALL;
#else
/* Version 9 */
prof_header.version = 0x0000000000000009LL;
#endif
/* with VARIANT_MASK_IR_PROF (IR Instrumentation) */
prof_header.version |= 0x1ULL << 56;
/* with VARIANT_MASK_MEMPROF (Memory Profile) */
Expand All @@ -5082,6 +5087,11 @@ aot_dump_pgo_prof_data_to_buf(AOTModuleInstance *module_inst, char *buf,
/* __llvm_prf_bits won't be used in PGO, set dummy value here */
prof_header.num_prof_bitmaps = 0;
prof_header.bitmap_delta = 0;
#if LLVM_VERSION_MAJOR >= 19
/* vtable value profiling isn't used in PGO, set dummy values here */
prof_header.num_vtables = 0;
prof_header.vnames_size = 0;
#endif

if (!is_little_endian()) {
aot_exchange_uint64((uint8 *)&prof_header.magic);
Expand All @@ -5092,6 +5102,10 @@ aot_dump_pgo_prof_data_to_buf(AOTModuleInstance *module_inst, char *buf,
aot_exchange_uint64((uint8 *)&prof_header.names_size);
aot_exchange_uint64((uint8 *)&prof_header.counters_delta);
aot_exchange_uint64((uint8 *)&prof_header.bitmap_delta);
#if LLVM_VERSION_MAJOR >= 19
aot_exchange_uint64((uint8 *)&prof_header.num_vtables);
aot_exchange_uint64((uint8 *)&prof_header.vnames_size);
#endif
aot_exchange_uint64((uint8 *)&prof_header.value_kind_last);
}

Expand All @@ -5117,6 +5131,10 @@ aot_dump_pgo_prof_data_to_buf(AOTModuleInstance *module_inst, char *buf,
prof_data_64->num_bitmaps = 0;
prof_data_64->num_value_sites[0] = prof_data->num_value_sites[0];
prof_data_64->num_value_sites[1] = prof_data->num_value_sites[1];
#if LLVM_VERSION_MAJOR >= 19
/* vtable value profiling isn't used in PGO, set dummy value */
prof_data_64->num_value_sites[2] = 0;
#endif

if (!is_little_endian()) {
aot_exchange_uint64((uint8 *)&prof_data_64->func_hash);
Expand All @@ -5128,6 +5146,9 @@ aot_dump_pgo_prof_data_to_buf(AOTModuleInstance *module_inst, char *buf,
aot_exchange_uint32((uint8 *)&prof_data_64->num_bitmaps);
aot_exchange_uint16((uint8 *)&prof_data_64->num_value_sites[0]);
aot_exchange_uint16((uint8 *)&prof_data_64->num_value_sites[1]);
#if LLVM_VERSION_MAJOR >= 19
aot_exchange_uint16((uint8 *)&prof_data_64->num_value_sites[2]);
#endif
}
buf += sizeof(LLVMProfileData_64);
}
Expand Down
19 changes: 19 additions & 0 deletions core/iwasm/aot/aot_runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,7 @@ typedef struct AOTFrame {
} AOTFrame;

#if WASM_ENABLE_STATIC_PGO != 0
#define LLVM_VERSION_MAJOR 22
/* The bitmaps fields in LLVMProfileRawHeader, LLVMProfileData,
* LLVMProfileData_64 all dummy fields, it's used in MC/DC code coverage
* instead of PGO. See https://llvm.org/docs/InstrProfileFormat.html#bitmap */
Expand All @@ -453,6 +454,10 @@ typedef struct LLVMProfileRawHeader {
uint64 counters_delta;
uint64 bitmap_delta;
uint64 names_delta;
#if LLVM_VERSION_MAJOR >= 19
uint64 num_vtables;
uint64 vnames_size;
#endif
uint64 value_kind_last;
} LLVMProfileRawHeader;

Expand All @@ -473,7 +478,14 @@ typedef struct LLVMProfileData {
uintptr_t func_ptr;
ValueProfNode **values;
uint32 num_counters;
#if LLVM_VERSION_MAJOR >= 19
/* The array size is IPVK_Last + 1. Since raw profile format version 10
* (LLVM 19+) added the virtual table value profiling kind, IPVK_Last is 2
* and the array has 3 elements. */
uint16 num_value_sites[3];
#else
uint16 num_value_sites[2];
#endif
uint32 num_bitmaps;
} LLVMProfileData;

Expand All @@ -488,7 +500,14 @@ typedef struct LLVMProfileData_64 {
uint64 func_ptr;
uint64 values;
uint32 num_counters;
#if LLVM_VERSION_MAJOR >= 19
/* The array size is IPVK_Last + 1. Since raw profile format version 10
* (LLVM 19+) added the virtual table value profiling kind, IPVK_Last is 2
* and the array has 3 elements. */
uint16 num_value_sites[3];
#else
uint16 num_value_sites[2];
#endif
uint32 num_bitmaps;
} LLVMProfileData_64;
#endif /* end of WASM_ENABLE_STATIC_PGO != 0 */
Expand Down
137 changes: 106 additions & 31 deletions core/iwasm/aot/arch/aot_reloc_riscv.c
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,40 @@ check_reloc_offset(uint32 target_section_size, uint64 reloc_offset,
return true;
}

/**
* Cache used to pair an R_RISCV_PCREL_HI20 (AUIPC) relocation with its
* R_RISCV_PCREL_LO12_I/S relocation(s).
*
* The %pcrel_lo relocation references the label of the AUIPC instruction
* instead of the final target symbol, so its low 12 bits can only be computed
* from the PC-relative value that was resolved for the corresponding AUIPC.
* Older compilers always emitted the paired load/store/ADDI instruction
* immediately after the AUIPC, but newer LLVM versions may schedule unrelated
* instructions in between (and may emit several %pcrel_lo accesses for a single
* AUIPC). We therefore record the value resolved for each AUIPC here so that
* the LO12 relocation can recover the low 12 bits regardless of placement.
*
* Relocations of a section are applied sequentially and an AUIPC is always
* relocated before the LO12 relocation(s) that reference it (lower offset), so
* a small most-recently-used cache is sufficient.
*/
#define PCREL_HI20_CACHE_SIZE 8

typedef struct PcrelHi20Entry {
uint8 *auipc_addr;
int32 val;
} PcrelHi20Entry;

#ifdef os_thread_local_attribute
#define RELOC_THREAD_LOCAL os_thread_local_attribute
#else
#define RELOC_THREAD_LOCAL
#endif

static RELOC_THREAD_LOCAL PcrelHi20Entry
pcrel_hi20_cache[PCREL_HI20_CACHE_SIZE];
static RELOC_THREAD_LOCAL uint32 pcrel_hi20_cache_pos;

bool
apply_relocation(AOTModule *module, uint8 *target_section_addr,
uint32 target_section_size, uint64 reloc_offset,
Expand Down Expand Up @@ -414,8 +448,7 @@ apply_relocation(AOTModule *module, uint8 *target_section_addr,
#endif

case R_RISCV_CALL:
case R_RISCV_CALL_PLT:
case R_RISCV_PCREL_HI20: /* S + A - P */
case R_RISCV_CALL_PLT: /* S + A - P */
{
val = (int32)(intptr_t)((uint8 *)symbol_addr + reloc_addend - addr);

Expand All @@ -437,17 +470,46 @@ apply_relocation(AOTModule *module, uint8 *target_section_addr,
rv_calc_imm(val, &imm_hi, &imm_lo);

rv_add_val((uint16 *)addr, (imm_hi << 12));
if ((rv_get_val((uint16 *)(addr + 4)) & 0x7f) == RV_OPCODE_SW) {
/* Adjust imm for SW : S-type */
val = (((int32)imm_lo >> 5) << 25)
+ (((int32)imm_lo & 0x1f) << 7);
/* The JALR paired with the AUIPC of a call always immediately
* follows it and uses an I-type immediate. */
rv_add_val((uint16 *)(addr + 4), ((int32)imm_lo << 20));
break;
}

case R_RISCV_PCREL_HI20: /* S + A - P */
{
uint32 cache_idx;

rv_add_val((uint16 *)(addr + 4), val);
val = (int32)(intptr_t)((uint8 *)symbol_addr + reloc_addend - addr);

CHECK_RELOC_OFFSET(sizeof(uint32));
if (val != (intptr_t)((uint8 *)symbol_addr + reloc_addend - addr)) {
if (symbol_index >= 0) {
/* Reach the target by plt code */
symbol_addr = (uint8 *)module->code + module->code_size
- get_plt_table_size()
+ get_plt_item_size() * symbol_index;
val = (int32)(intptr_t)((uint8 *)symbol_addr - addr);
}
}
else {
/* Adjust imm for MV(ADDI)/JALR : I-type */
rv_add_val((uint16 *)(addr + 4), ((int32)imm_lo << 20));

if (val != (intptr_t)((uint8 *)symbol_addr + reloc_addend - addr)) {
goto fail_addr_out_of_range;
}

/* Patch the high 20 bits of the AUIPC only. The low 12 bits are
* patched separately by the R_RISCV_PCREL_LO12_I/S relocation,
* whose instruction is not guaranteed to immediately follow this
* AUIPC. */
rv_calc_imm(val, &imm_hi, &imm_lo);
rv_add_val((uint16 *)addr, (imm_hi << 12));

/* Record the resolved PC-relative value so the matching LO12
* relocation(s) can recover the low 12 bits. */
cache_idx = pcrel_hi20_cache_pos % PCREL_HI20_CACHE_SIZE;
pcrel_hi20_cache[cache_idx].auipc_addr = addr;
pcrel_hi20_cache[cache_idx].val = val;
pcrel_hi20_cache_pos++;
break;
}

Expand All @@ -470,29 +532,42 @@ apply_relocation(AOTModule *module, uint8 *target_section_addr,
case R_RISCV_PCREL_LO12_I: /* S - P */
case R_RISCV_PCREL_LO12_S: /* S - P */
{
/* Already handled in R_RISCV_PCREL_HI20, it should be skipped for
* most cases. But it is still needed for some special cases, e.g.
* ```
* label:
* auipc t0, %pcrel_hi(symbol) # R_RISCV_PCREL_HI20 (symbol)
* lui t1, 1
* lw t2, t0, %pcrel_lo(label) # R_RISCV_PCREL_LO12_I (label)
* add t2, t2, t1
* sw t2, t0, %pcrel_lo(label) # R_RISCV_PCREL_LO12_S (label)
* ```
* In this case, the R_RISCV_PCREL_LO12_I/S relocation should be
* handled after R_RISCV_PCREL_HI20 relocation.
*
* So, if the R_RISCV_PCREL_LO12_I/S relocation is not followed by
* R_RISCV_PCREL_HI20 relocation, it should be handled here but
* not implemented yet.
*/

if ((uintptr_t)addr - (uintptr_t)symbol_addr
- (uintptr_t)reloc_addend
!= 4) {
/* A %pcrel_lo relocation references the label of its AUIPC
* (%pcrel_hi) instruction, transformed by the AOT compiler into
* the section symbol plus the AUIPC offset as the addend. The low
* 12 bits must be derived from the PC-relative value that was
* resolved when the AUIPC's R_RISCV_PCREL_HI20 relocation was
* applied. Look that value up by the AUIPC address; this works
* whether or not the LO12 instruction immediately follows the
* AUIPC and when several LO12 accesses share one AUIPC. */
uint8 *auipc_addr = (uint8 *)symbol_addr + (intptr_t)reloc_addend;
uint32 k;
bool found = false;

CHECK_RELOC_OFFSET(sizeof(uint32));

for (k = 0; k < PCREL_HI20_CACHE_SIZE; k++) {
if (pcrel_hi20_cache[k].auipc_addr == auipc_addr) {
val = pcrel_hi20_cache[k].val;
found = true;
break;
}
}
if (!found) {
goto fail_addr_out_of_range;
}

rv_calc_imm(val, &imm_hi, &imm_lo);
if (reloc_type == R_RISCV_PCREL_LO12_S) {
/* Adjust imm for store : S-type */
val = (((int32)imm_lo >> 5) << 25)
+ (((int32)imm_lo & 0x1f) << 7);
rv_add_val((uint16 *)addr, val);
}
else {
/* Adjust imm for load/MV(ADDI) : I-type */
rv_add_val((uint16 *)addr, ((int32)imm_lo << 20));
}
break;
}

Expand Down
7 changes: 4 additions & 3 deletions core/iwasm/compilation/aot_llvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -2705,8 +2705,8 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
#if WASM_ENABLE_LINUX_PERF != 0
if (wasm_runtime_get_linux_perf()) {
/* FramePointerKind.All */
LLVMMetadataRef val =
LLVMValueAsMetadata(LLVMConstInt(LLVMInt32Type(), 2, false));
LLVMMetadataRef val = LLVMValueAsMetadata(
LLVMConstInt(LLVMInt32TypeInContext(comp_ctx->context), 2, false));
const char *key = "frame-pointer";
LLVMAddModuleFlag(comp_ctx->module, LLVMModuleFlagBehaviorWarning, key,
strlen(key), val);
Expand All @@ -2728,7 +2728,8 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
LLVMAddModuleFlag(
comp_ctx->module, LLVMModuleFlagBehaviorWarning, "Debug Info Version",
strlen("Debug Info Version"),
LLVMValueAsMetadata(LLVMConstInt(LLVMInt32Type(), 3, false)));
LLVMValueAsMetadata(
LLVMConstInt(LLVMInt32TypeInContext(comp_ctx->context), 3, false)));

comp_ctx->debug_file = dwarf_gen_file_info(comp_ctx);
if (!comp_ctx->debug_file) {
Expand Down
Loading