diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index aabe44a1c5..7315c7e27a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -281,10 +281,19 @@ jobs: - armv7-unknown-linux-gnueabihf - x86_64-unknown-linux-gnu profile: [dev, release] + cc: [clang, gcc] include: - target: aarch64_be-unknown-linux-gnu build_std: true - + - target: x86_64-unknown-linux-gnu + cc: icx + profile: dev + - target: x86_64-unknown-linux-gnu + cc: icx + profile: release + exclude: + - target: armv7-unknown-linux-gnueabihf + cc: gcc steps: - uses: actions/checkout@v6 - name: Install Rust @@ -301,7 +310,7 @@ jobs: # Configure some env vars based on matrix configuration - run: echo "PROFILE=${{ matrix.profile }}" >> $GITHUB_ENV - - run: ./ci/intrinsic-test-docker.sh ${{ matrix.target }} + - run: ./ci/intrinsic-test-docker.sh ${{ matrix.target }} ${{ matrix.cc }} if: ${{ !startsWith(matrix.target, 'thumb') }} env: TARGET: ${{ matrix.target }} diff --git a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile index e2b3d95585..1b61dd0c1b 100644 --- a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile @@ -15,7 +15,8 @@ RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64 RUN mkdir llvm RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm -ENV PATH="/llvm/bin:$PATH" +ENV CLANG_PATH="/llvm/bin/clang" +ENV GCC_PATH=aarch64-linux-gnu-gcc ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \ CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -cpu max -L /usr/aarch64-linux-gnu" \ diff --git a/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile index d7c12493ad..70acc2d22a 100644 --- a/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile +++ b/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile @@ -23,11 +23,12 @@ RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64 RUN mkdir llvm RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm -ENV PATH="/llvm/bin:$PATH" - ENV AARCH64_BE_TOOLCHAIN="/toolchains/${TOOLCHAIN}" ENV AARCH64_BE_LIBC="${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc" +ENV CLANG_PATH="/llvm/bin/clang" +ENV GCC_PATH="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-gcc" + ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-gcc" ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64_be -cpu max -L ${AARCH64_BE_LIBC}" ENV OBJDUMP="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-objdump" diff --git a/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile index 02744917af..3c8a1b5add 100644 --- a/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile +++ b/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile @@ -14,7 +14,8 @@ RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64 RUN mkdir llvm RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm -ENV PATH="/llvm/bin:$PATH" +ENV CLANG_PATH="/llvm/bin/clang" +ENV GCC_PATH=arm-linux-gnueabihf-gcc ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \ diff --git a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile index 17d1ac67e7..efbb2b0853 100644 --- a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile @@ -6,7 +6,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ make \ ca-certificates \ wget \ - xz-utils + xz-utils \ + gpg RUN wget http://ci-mirrors.rust-lang.org/sde-external-10.8.0-2026-03-15-lin.tar.xz -O sde.tar.xz RUN mkdir intel-sde @@ -16,7 +17,18 @@ RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64 RUN mkdir llvm RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm -ENV PATH="/llvm/bin:$PATH" +RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB |\ + gpg --dearmor |\ + tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null + +RUN echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" |\ + tee /etc/apt/sources.list.d/oneAPI.list + +RUN apt-get update && apt-get install -y --no-install-recommends intel-oneapi-compiler-dpcpp-cpp + +ENV CLANG_PATH="/llvm/bin/clang" +ENV GCC_PATH="gcc" +ENV ICX_PATH="/opt/intel/oneapi/compiler/2026.0/bin/icx" ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \ -cpuid-in /checkout/ci/docker/x86_64-unknown-linux-gnu/cpuid.def \ diff --git a/ci/intrinsic-test-docker.sh b/ci/intrinsic-test-docker.sh index 948b53dc67..c1d44dca91 100755 --- a/ci/intrinsic-test-docker.sh +++ b/ci/intrinsic-test-docker.sh @@ -5,8 +5,8 @@ set -ex -if [ $# -lt 1 ]; then - >&2 echo "Usage: $0 " +if [ $# -lt 2 ]; then + >&2 echo "Usage: $0 " exit 1 fi @@ -29,7 +29,6 @@ run() { --user "$(id -u)":"$(id -g)" \ --env CARGO_HOME=/cargo \ --env CARGO_TARGET_DIR=/checkout/target \ - --env TARGET="${1}" \ --env PROFILE \ --env "${HOST_LINKER}"="cc" \ --env STDARCH_DISABLE_ASSERT_INSTR \ @@ -48,12 +47,12 @@ run() { --workdir /checkout \ --privileged \ stdarch \ - sh -c "HOME=/tmp PATH=\$PATH:/rust/bin exec ci/intrinsic-test.sh" + sh -c "HOME=/tmp PATH=\$PATH:/rust/bin exec ci/intrinsic-test.sh ${1} ${2}" } if [ -z "$1" ]; then >&2 echo "No target specified!" exit 1 else - run "${1}" + run "${1}" "${2}" fi diff --git a/ci/intrinsic-test.sh b/ci/intrinsic-test.sh index 1f3a2caf50..ee07295c5d 100755 --- a/ci/intrinsic-test.sh +++ b/ci/intrinsic-test.sh @@ -2,7 +2,29 @@ set -ex -: "${TARGET?The TARGET environment variable must be set.}" +if [ $# -lt 2 ]; then + >&2 echo "Usage: $0 " + exit 1 +fi + +case ${2} in + clang) + export CC="${CLANG_PATH}" + CC_KIND=clang + ;; + gcc) + export CC="${GCC_PATH}" + CC_KIND=gcc + ;; + icx) + export CC="${ICX_PATH}" + CC_KIND=clang + ;; + *) + >&2 echo "Unknown compiler: ${2}" + exit 1 + ;; +esac export RUSTFLAGS="${RUSTFLAGS} -D warnings -Z merge-functions=disabled -Z verify-llvm-ir" export PROFILE="${PROFILE:="release"}" @@ -12,49 +34,49 @@ echo "PROFILE=${PROFILE}" INTRINSIC_TEST="--manifest-path=crates/intrinsic-test/Cargo.toml" -export CC="clang" - -case ${TARGET} in +case ${1} in aarch64_be*) export CFLAGS="-I${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc/usr/include --sysroot={AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc -Wno-nonportable-vector-initialization" - TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64_be.txt + ARCH=aarch64_be ;; aarch64*) export CFLAGS="-I/usr/aarch64-linux-gnu/include/" - TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt + ARCH=aarch64 ;; armv7*) export CFLAGS="-I/usr/arm-linux-gnueabihf/include/" - TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_arm.txt + ARCH=arm ;; x86_64*) export CFLAGS="-I/usr/include/x86_64-linux-gnu/" - TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_x86.txt + ARCH=x86 ;; *) ;; esac -case "${TARGET}" in +case "${1}" in x86_64-unknown-linux-gnu*) env -u CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER \ cargo run "${INTRINSIC_TEST}" --release \ --bin intrinsic-test -- intrinsics_data/x86-intel.xml \ - --skip "${TEST_SKIP_INTRINSICS}" \ - --target "${TARGET}" - - echo "${CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER}" + --skip "crates/intrinsic-test/missing_${ARCH}_common.txt" \ + --skip "crates/intrinsic-test/missing_${ARCH}_${2}.txt" \ + --target "${1}" \ + --cc-kind "${CC_KIND}" ;; *) cargo run "${INTRINSIC_TEST}" --release \ --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \ - --skip "${TEST_SKIP_INTRINSICS}" \ - --target "${TARGET}" + --skip "crates/intrinsic-test/missing_${ARCH}_common.txt" \ + --skip "crates/intrinsic-test/missing_${ARCH}_${2}.txt" \ + --target "${1}" \ + --cc-kind "${CC_KIND}" ;; esac -cargo test --manifest-path=rust_programs/Cargo.toml --target "${TARGET}" --profile "${PROFILE}" +cargo test --manifest-path=rust_programs/Cargo.toml --target "${1}" --profile "${PROFILE}" diff --git a/crates/intrinsic-test/missing_aarch64_be_clang.txt b/crates/intrinsic-test/missing_aarch64_be_clang.txt new file mode 100644 index 0000000000..001538b3ea --- /dev/null +++ b/crates/intrinsic-test/missing_aarch64_be_clang.txt @@ -0,0 +1,31 @@ +# Bad LLVM codegen for BE in O2 in clang, and release in rust (https://github.com/llvm/llvm-project/issues/166190) +vcmla_laneq_f16 +vcmla_rot180_laneq_f16 +vcmla_rot270_laneq_f16 +vcmla_rot90_laneq_f16 +vcmlaq_lane_f16 +vcmlaq_laneq_f16 +vcmlaq_rot180_lane_f16 +vcmlaq_rot180_laneq_f16 +vcmlaq_rot270_lane_f16 +vcmlaq_rot270_laneq_f16 +vcmlaq_rot90_lane_f16 +vcmlaq_rot90_laneq_f16 + +# Bad codegen for BE in O2 in clang, correct in rust. Same cause as above issue. +vdot_lane_s32 +vdot_lane_u32 +vdot_laneq_s32 +vdot_laneq_u32 +vdotq_lane_s32 +vdotq_lane_u32 +vdotq_laneq_s32 +vdotq_laneq_u32 +vsudot_lane_s32 +vsudot_laneq_s32 +vsudotq_lane_s32 +vsudotq_laneq_s32 +vusdot_lane_s32 +vusdot_laneq_s32 +vusdotq_lane_s32 +vusdotq_laneq_s32 diff --git a/crates/intrinsic-test/missing_aarch64.txt b/crates/intrinsic-test/missing_aarch64_be_common.txt similarity index 93% rename from crates/intrinsic-test/missing_aarch64.txt rename to crates/intrinsic-test/missing_aarch64_be_common.txt index f0c9eeb6ce..327c8207a0 100644 --- a/crates/intrinsic-test/missing_aarch64.txt +++ b/crates/intrinsic-test/missing_aarch64_be_common.txt @@ -79,6 +79,3 @@ vcvtns_s64_f32 vcvtns_u64_f32 vcvtps_s64_f32 vcvtps_u64_f32 - -# Broken in Clang (fixed in https://github.com/llvm/llvm-project/pull/156029) -vcvth_s16_f16 diff --git a/crates/intrinsic-test/missing_aarch64_be_gcc.txt b/crates/intrinsic-test/missing_aarch64_be_gcc.txt new file mode 100644 index 0000000000..eccd8a8a14 --- /dev/null +++ b/crates/intrinsic-test/missing_aarch64_be_gcc.txt @@ -0,0 +1,20 @@ +# Broken in LLVM llvm/llvm-project#196999 +vmull_p64 +vmull_high_p64 + +# Broken in LLVM llvm/llvm-project#197083 +vcvth_n_s32_f16 +vcvth_n_u32_f16 +vcvth_n_s64_f16 +vcvth_n_u64_f16 +vcvth_n_f16_s32 +vcvth_n_f16_u32 +vcvth_n_f16_s64 +vcvth_n_f16_u64 + +# Broken in GCC https://gcc.gnu.org/bugzilla/show_bug.cgi?id=125279 +vmaxh_f16 +vminh_f16 + +# Rounding errors +vfms_n_f64 diff --git a/crates/intrinsic-test/missing_aarch64_clang.txt b/crates/intrinsic-test/missing_aarch64_clang.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/crates/intrinsic-test/missing_aarch64_be.txt b/crates/intrinsic-test/missing_aarch64_common.txt similarity index 57% rename from crates/intrinsic-test/missing_aarch64_be.txt rename to crates/intrinsic-test/missing_aarch64_common.txt index 9163aaa1c8..327c8207a0 100644 --- a/crates/intrinsic-test/missing_aarch64_be.txt +++ b/crates/intrinsic-test/missing_aarch64_common.txt @@ -1,43 +1,3 @@ -# Bad LLVM codegen for BE in O2 in clang, and release in rust (https://github.com/llvm/llvm-project/issues/166190) -vcmla_lane_f16 -vcmla_laneq_f16 -vcmla_rot180_lane_f16 -vcmla_rot180_laneq_f16 -vcmla_rot270_lane_f16 -vcmla_rot270_laneq_f16 -vcmla_rot90_lane_f16 -vcmla_rot90_laneq_f16 -vcmlaq_lane_f16 -vcmlaq_laneq_f16 -vcmlaq_laneq_f32 -vcmlaq_rot180_lane_f16 -vcmlaq_rot180_laneq_f16 -vcmlaq_rot180_laneq_f32 -vcmlaq_rot270_lane_f16 -vcmlaq_rot270_laneq_f16 -vcmlaq_rot270_laneq_f32 -vcmlaq_rot90_lane_f16 -vcmlaq_rot90_laneq_f16 -vcmlaq_rot90_laneq_f32 -# Bad codegen for BE in O2 in clang, correct in rust. Same cause as above issue. -vdot_lane_s32 -vdot_lane_u32 -vdot_laneq_s32 -vdot_laneq_u32 -vdotq_lane_s32 -vdotq_lane_u32 -vdotq_laneq_s32 -vdotq_laneq_u32 -vsudot_lane_s32 -vsudot_laneq_s32 -vsudotq_lane_s32 -vsudotq_laneq_s32 -vusdot_lane_s32 -vusdot_laneq_s32 -vusdotq_lane_s32 -vusdotq_laneq_s32 - -# Below are in common to missing_aarch64.txt # Not supported by qemu (will throw illegal instruction) vamin_f16 vaminq_f16 @@ -119,6 +79,3 @@ vcvtns_s64_f32 vcvtns_u64_f32 vcvtps_s64_f32 vcvtps_u64_f32 - -# Broken in Clang -vcvth_s16_f16 diff --git a/crates/intrinsic-test/missing_aarch64_gcc.txt b/crates/intrinsic-test/missing_aarch64_gcc.txt new file mode 100644 index 0000000000..a7e90142e4 --- /dev/null +++ b/crates/intrinsic-test/missing_aarch64_gcc.txt @@ -0,0 +1,19 @@ +# Broken in GCC https://gcc.gnu.org/bugzilla/show_bug.cgi?id=123584, fixed in GCC 16 +vxarq_u64 + +# Broken in LLVM llvm/llvm-project#197083 +vcvth_n_s32_f16 +vcvth_n_u32_f16 +vcvth_n_s64_f16 +vcvth_n_u64_f16 +vcvth_n_f16_s32 +vcvth_n_f16_u32 +vcvth_n_f16_s64 +vcvth_n_f16_u64 + +# Broken in GCC https://gcc.gnu.org/bugzilla/show_bug.cgi?id=125279 +vmaxh_f16 +vminh_f16 + +# Rounding errors +vfms_n_f64 diff --git a/crates/intrinsic-test/missing_arm_clang.txt b/crates/intrinsic-test/missing_arm_clang.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/crates/intrinsic-test/missing_arm.txt b/crates/intrinsic-test/missing_arm_common.txt similarity index 100% rename from crates/intrinsic-test/missing_arm.txt rename to crates/intrinsic-test/missing_arm_common.txt diff --git a/crates/intrinsic-test/missing_x86_clang.txt b/crates/intrinsic-test/missing_x86_clang.txt new file mode 100644 index 0000000000..b1531830c1 --- /dev/null +++ b/crates/intrinsic-test/missing_x86_clang.txt @@ -0,0 +1,24 @@ +# not present in Clang +_bswap +_bswap64 +_mm_cvtsd_si64x +_mm_cvtsi128_si64x +_mm_cvtsi64x_sd +_mm_cvtsi64x_si128 +_mm_cvttsd_si64x +_popcnt32 +_popcnt64 + +# Clang bug +_mm512_mask_reduce_max_pd +_mm512_mask_reduce_max_ps +_mm512_mask_reduce_min_pd +_mm512_mask_reduce_min_ps + +# Rounding errors in release mode +_mm_maskz_fmadd_sd +_mm_maskz_fmadd_ss +_mm_maskz_fmsub_sd +_mm_maskz_fmsub_ss +_mm_maskz_fnmadd_sd +_mm_maskz_fnmadd_ss diff --git a/crates/intrinsic-test/missing_x86.txt b/crates/intrinsic-test/missing_x86_common.txt similarity index 59% rename from crates/intrinsic-test/missing_x86.txt rename to crates/intrinsic-test/missing_x86_common.txt index c7aabb95a8..f9b71bbe8c 100644 --- a/crates/intrinsic-test/missing_x86.txt +++ b/crates/intrinsic-test/missing_x86_common.txt @@ -1,19 +1,22 @@ -# Are defined under a similar name - -#__bswap_64 -_bswap64 - -# not present in Clang and Rust +# not present in Rust _bit_scan_forward _bit_scan_reverse _castf32_u32 _castf64_u64 _castu32_f32 _castu64_f64 +_cvtsh_ss +_cvtss_sh _lrotl _lrotr _may_i_use_cpu_feature _may_i_use_cpu_feature_ext +_mm256_set1_pch +_mm512_set1_pch +_mm_malloc +_mm_popcnt_u32 +_mm_popcnt_u64 +_mm_set1_pch _rdpmc _rotl _rotl64 @@ -21,29 +24,9 @@ _rotr _rotr64 _rotwl _rotwr -_urdmsr - -# not present in Clang -_bswap -_mm_cvtsd_si64x -_mm_cvtsi128_si64x -_mm_cvtsi64x_sd -_mm_cvtsi64x_si128 -_mm_cvttsd_si64x -_popcnt32 -_popcnt64 - -# not present in Rust -_cvtsh_ss -_cvtss_sh -_mm256_set1_pch -_mm512_set1_pch -_mm_malloc -_mm_popcnt_u32 -_mm_popcnt_u64 -_mm_set1_pch _tpause _umwait +_urdmsr # SDE ERROR: Cannot execute XGETBV with ECX != 0 _xgetbv @@ -63,17 +46,3 @@ _mm512_castph256_ph512 _mm512_castps256_ps512 _mm512_castpd256_pd512 _mm512_castsi256_si512 - -# Clang bug -_mm512_mask_reduce_max_pd -_mm512_mask_reduce_max_ps -_mm512_mask_reduce_min_pd -_mm512_mask_reduce_min_ps - -# Rounding errors in release mode -_mm_maskz_fmadd_sd -_mm_maskz_fmadd_ss -_mm_maskz_fmsub_sd -_mm_maskz_fmsub_ss -_mm_maskz_fnmadd_sd -_mm_maskz_fnmadd_ss diff --git a/crates/intrinsic-test/missing_x86_gcc.txt b/crates/intrinsic-test/missing_x86_gcc.txt new file mode 100644 index 0000000000..5b71b0698e --- /dev/null +++ b/crates/intrinsic-test/missing_x86_gcc.txt @@ -0,0 +1,33 @@ +# not present in GCC +_bextr2_u32 +_bextr2_u64 +_mm512_cvtepi32lo_pd +_mm512_mask_cvtepi32lo_pd +_mm512_cvtepu32lo_pd +_mm512_mask_cvtepu32lo_pd +_mm512_cvtpd_pslo +_mm512_mask_cvtpd_pslo +_mm512_cvtpslo_pd +_mm512_mask_cvtpslo_pd +_mm512_permutevar_epi32 +_mm512_mask_permutevar_epi32 +_mm_tzcnt_32 +_mm_tzcnt_64 + +# GCC bug +_mm512_reduce_max_pd +_mm512_reduce_max_ps +_mm512_reduce_min_pd +_mm512_reduce_min_ps +_mm512_mask_reduce_max_pd +_mm512_mask_reduce_max_ps +_mm512_mask_reduce_min_pd +_mm512_mask_reduce_min_ps + +# Rounding errors in release mode +_mm_maskz_fmadd_sd +_mm_maskz_fmadd_ss +_mm_maskz_fmsub_sd +_mm_maskz_fmsub_ss +_mm_maskz_fnmadd_sd +_mm_maskz_fnmadd_ss diff --git a/crates/intrinsic-test/missing_x86_icx.txt b/crates/intrinsic-test/missing_x86_icx.txt new file mode 100644 index 0000000000..6d3133c85d --- /dev/null +++ b/crates/intrinsic-test/missing_x86_icx.txt @@ -0,0 +1,20 @@ +# not present in ICX +_mm_cvtsd_si64x +_mm_cvtsi128_si64x +_mm_cvtsi64x_sd +_mm_cvtsi64x_si128 +_mm_cvttsd_si64x + +# ICX bug +_mm512_mask_reduce_max_pd +_mm512_mask_reduce_max_ps +_mm512_mask_reduce_min_pd +_mm512_mask_reduce_min_ps + +# Rounding errors in release mode +_mm_maskz_fmadd_sd +_mm_maskz_fmadd_ss +_mm_maskz_fmsub_sd +_mm_maskz_fmsub_ss +_mm_maskz_fnmadd_sd +_mm_maskz_fnmadd_ss diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index 8935b3ca66..cc2cf8ccea 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -5,7 +5,7 @@ mod json_parser; mod types; use crate::common::SupportedArchitectureTest; -use crate::common::cli::ProcessedCli; +use crate::common::cli::{CompilerKind, ProcessedCli}; use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; use intrinsic::ArmIntrinsicType; @@ -29,11 +29,14 @@ impl SupportedArchitectureTest for ArmArchitectureTest { const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS; const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS; - fn arch_flags(&self) -> Vec<&str> { - vec!["-march=armv8.6a+crypto+crc+dotprod+fp16"] + fn arch_flags(&self, cli_options: &ProcessedCli) -> Vec<&str> { + match cli_options.cc_kind { + CompilerKind::Clang => vec!["-march=armv8.6a+crypto+crc+dotprod+fp16"], + CompilerKind::Gcc => vec!["-march=armv8.6-a+crypto+crc+dotprod+fp16+sha3+sm4"], + } } - fn create(cli_options: ProcessedCli) -> Self { + fn create(cli_options: &ProcessedCli) -> Self { let a32 = cli_options.target.starts_with("armv7"); let mut intrinsics = get_neon_intrinsics(&cli_options.filename).expect("Error parsing input file"); diff --git a/crates/intrinsic-test/src/common/cli.rs b/crates/intrinsic-test/src/common/cli.rs index 07f94eba18..fd000739ae 100644 --- a/crates/intrinsic-test/src/common/cli.rs +++ b/crates/intrinsic-test/src/common/cli.rs @@ -13,7 +13,7 @@ pub struct Cli { /// Filename for a list of intrinsics to skip (one per line) #[arg(long)] - pub skip: Option, + pub skip: Vec, /// Pass a target the test suite #[arg(long)] @@ -22,6 +22,16 @@ pub struct Cli { /// Percentage of intrinsics to test (used to limit testing to keep CI times manageable) #[arg(long, default_value_t = 100u8)] pub sample_percentage: u8, + + /// Argument style of the C compiler + #[arg(long)] + pub cc_kind: CompilerKind, +} + +#[derive(Copy, Clone, clap::ValueEnum)] +pub enum CompilerKind { + Gcc, + Clang, } pub struct ProcessedCli { @@ -29,6 +39,7 @@ pub struct ProcessedCli { pub target: String, pub skip: Vec, pub sample_percentage: u8, + pub cc_kind: CompilerKind, } impl ProcessedCli { @@ -37,22 +48,25 @@ impl ProcessedCli { let target = cli_options.target; let sample_percentage = cli_options.sample_percentage; - let skip = if let Some(filename) = cli_options.skip { - let data = std::fs::read_to_string(&filename).expect("Failed to open file"); - data.lines() - .map(str::trim) - .filter(|s| !s.contains('#')) - .map(String::from) - .collect_vec() - } else { - Default::default() - }; + let skip = cli_options + .skip + .iter() + .flat_map(|filename| { + std::fs::read_to_string(&filename) + .expect("Failed to open file") + .lines() + .map(|line| line.trim().to_owned()) + .filter(|line| !line.contains('#')) + .collect_vec() + }) + .collect_vec(); Self { target, skip, filename, sample_percentage, + cc_kind: cli_options.cc_kind, } } } diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index 039e78f577..20122d9a29 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -4,6 +4,7 @@ use itertools::Itertools; use super::intrinsic_helpers::IntrinsicTypeDefinition; use crate::common::argument::ArgumentList; +use crate::common::cli::{CompilerKind, ProcessedCli}; use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; @@ -314,8 +315,18 @@ pub fn write_build_rs( w: &mut impl std::io::Write, i: usize, arch_flags: &[&str], + cli_options: &ProcessedCli, ) -> std::io::Result<()> { - const COMMON_FLAGS: &[&str] = &["-ffp-contract=off", "-ffp-model=strict", "-Wno-narrowing"]; + const COMMON_FLAGS: &[&str] = &["-ffp-contract=off", "-Wno-narrowing"]; + const CLANG_FLAGS: &[&str] = &["-ffp-model=strict"]; + const GCC_FLAGS: &[&str] = &[ + "-flax-vector-conversions", + "-fno-fast-math", + "-frounding-math", + "-fexcess-precision=standard", + "-ftrapping-math", + "-fsignaling-nans", + ]; write!( w, @@ -329,7 +340,16 @@ pub fn write_build_rs( i = i )?; - for flag in COMMON_FLAGS.iter().chain(arch_flags) { + let compiler_specific_flags = match cli_options.cc_kind { + CompilerKind::Gcc => GCC_FLAGS, + CompilerKind::Clang => CLANG_FLAGS, + }; + + for flag in COMMON_FLAGS + .iter() + .chain(compiler_specific_flags) + .chain(arch_flags) + { writeln!(w, "\"{flag}\",")?; } diff --git a/crates/intrinsic-test/src/common/mod.rs b/crates/intrinsic-test/src/common/mod.rs index 86269bab33..3d91e55801 100644 --- a/crates/intrinsic-test/src/common/mod.rs +++ b/crates/intrinsic-test/src/common/mod.rs @@ -30,7 +30,7 @@ pub trait SupportedArchitectureTest { fn intrinsics(&self) -> &[Intrinsic]; - fn create(cli_options: ProcessedCli) -> Self; + fn create(cli_options: &ProcessedCli) -> Self; const NOTICE: &str; @@ -39,7 +39,7 @@ pub trait SupportedArchitectureTest { const PLATFORM_RUST_CFGS: &str; const PLATFORM_RUST_DEFINITIONS: &str; - fn arch_flags(&self) -> Vec<&str>; + fn arch_flags(&self, cli_options: &ProcessedCli) -> Vec<&str>; fn generate_c_file(&self) { let (chunk_size, _chunk_count) = manual_chunk(self.intrinsics().len()); @@ -57,8 +57,8 @@ pub trait SupportedArchitectureTest { .unwrap(); } - fn generate_rust_file(&self) { - let arch_flags = self.arch_flags(); + fn generate_rust_file(&self, cli_options: &ProcessedCli) { + let arch_flags = self.arch_flags(cli_options); std::fs::create_dir_all("rust_programs").unwrap(); @@ -97,7 +97,7 @@ pub trait SupportedArchitectureTest { trace!("generating `{build_rs_filename}`"); let mut file = File::create(&build_rs_filename).unwrap(); - write_build_rs(&mut file, i, &arch_flags).unwrap(); + write_build_rs(&mut file, i, &arch_flags, &cli_options).unwrap(); run_rustfmt(&build_rs_filename); Ok(()) diff --git a/crates/intrinsic-test/src/main.rs b/crates/intrinsic-test/src/main.rs index 9f57c99f12..4c0136041f 100644 --- a/crates/intrinsic-test/src/main.rs +++ b/crates/intrinsic-test/src/main.rs @@ -18,18 +18,24 @@ fn main() { if processed_cli_options.target.starts_with("arm") | processed_cli_options.target.starts_with("aarch64") { - run(ArmArchitectureTest::create(processed_cli_options)) + run( + ArmArchitectureTest::create(&processed_cli_options), + processed_cli_options, + ) } else if processed_cli_options.target.starts_with("x86") { - run(X86ArchitectureTest::create(processed_cli_options)) + run( + X86ArchitectureTest::create(&processed_cli_options), + processed_cli_options, + ) } else { unimplemented!("Unsupported target {}", processed_cli_options.target) } } -fn run(test_environment: impl SupportedArchitectureTest) { +fn run(test_environment: impl SupportedArchitectureTest, processed_cli_options: ProcessedCli) { info!("building C binaries"); test_environment.generate_c_file(); info!("building Rust binaries"); - test_environment.generate_rust_file(); + test_environment.generate_rust_file(&processed_cli_options); } diff --git a/crates/intrinsic-test/src/x86/mod.rs b/crates/intrinsic-test/src/x86/mod.rs index 5d4798482a..288bd8bdf8 100644 --- a/crates/intrinsic-test/src/x86/mod.rs +++ b/crates/intrinsic-test/src/x86/mod.rs @@ -29,8 +29,11 @@ impl SupportedArchitectureTest for X86ArchitectureTest { const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS; const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS; - fn arch_flags(&self) -> Vec<&str> { + fn arch_flags(&self, _cli_options: &ProcessedCli) -> Vec<&str> { vec![ + "-maes", + "-mf16c", + "-mfma", "-mavx", "-mavx2", "-mavx512f", @@ -66,7 +69,7 @@ impl SupportedArchitectureTest for X86ArchitectureTest { ] } - fn create(cli_options: ProcessedCli) -> Self { + fn create(cli_options: &ProcessedCli) -> Self { let mut intrinsics = get_xml_intrinsics(&cli_options.filename).expect("Error parsing input file");