From 10d08e362850037f5126123bee8af6780f20a38f Mon Sep 17 00:00:00 2001 From: John Gibson Date: Thu, 23 Apr 2026 15:45:49 -0400 Subject: [PATCH 1/2] tools/cmake/common: quote DESCRIPTION in define_overridable_option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The set(..CACHE TYPE DOCSTRING [FORCE]) form treats the docstring as a single argument, but `${DESCRIPTION}` was being expanded unquoted, so multi-word descriptions were spilling their trailing words into subsequent set() arguments. This is latent for the common case because most existing STRING options are driven by the else-branch (not overridden via -D), but any STRING option overridden on the cmake command line with a multi-word description hits it — for example: cmake -DEXECUTORCH_VULKAN_FP16_PRECISION=mediump ... fails with the description text being fed to the downstream shader generator as CLI args. One-char fix: quote "${DESCRIPTION}" in both set() calls. Correct for both single- and multi-word descriptions. --- tools/cmake/common/preset.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/cmake/common/preset.cmake b/tools/cmake/common/preset.cmake index 4ac45e28562..7d016db2347 100644 --- a/tools/cmake/common/preset.cmake +++ b/tools/cmake/common/preset.cmake @@ -82,12 +82,12 @@ macro(define_overridable_option NAME DESCRIPTION VALUE_TYPE DEFAULT_VALUE) if(DEFINED ${NAME} AND NOT DEFINED CACHE{${NAME}}) set(${NAME} ${${NAME}} - CACHE ${VALUE_TYPE} ${DESCRIPTION} FORCE + CACHE ${VALUE_TYPE} "${DESCRIPTION}" FORCE ) else() set(${NAME} ${DEFAULT_VALUE} - CACHE ${VALUE_TYPE} ${DESCRIPTION} + CACHE ${VALUE_TYPE} "${DESCRIPTION}" ) endif() From 2ca72a0b6e24f399acfc0f4320d22a7c40d8923f Mon Sep 17 00:00:00 2001 From: John Gibson Date: Thu, 23 Apr 2026 15:07:26 -0400 Subject: [PATCH 2/2] Vulkan: make half-variant GLSL PRECISION configurable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add EXECUTORCH_VULKAN_FP16_PRECISION cmake option (default "highp" — upstream-identical behavior). When set to "mediump" (or "lowp"), gen_vulkan_spv.py overrides the PRECISION define in shader variants where DTYPE=half. GLSL's mediump is a driver hint that fp16 ALUs may be used for arithmetic. On Mali GPUs this is typically honored and gives measurable conv speedups (~15% on Pixel 9 Mali G715 for this repo's conv2d_half / conv2d_pw_tiled_half workloads); on Adreno it is typically ignored (harmless). Accuracy tradeoff: mediump relaxes minimum precision guarantees for fp16 shader math. Kept off by default so this cannot silently affect any existing build. Exposed via scripts/build_android_library.sh env var for Android users. Wiring: * backends/vulkan/runtime/gen_vulkan_spv.py: --fp16-precision CLI flag, forwarded into SPVGenerator.create_shader_params(). * backends/vulkan/cmake/ShaderLibrary.cmake: forward cmake var into the python invocation when set. * tools/cmake/preset/default.cmake: declare overridable STRING option. * scripts/build_android_library.sh: read EXECUTORCH_VULKAN_FP16_PRECISION from env, default "highp". --- backends/vulkan/cmake/ShaderLibrary.cmake | 8 ++++++++ backends/vulkan/runtime/gen_vulkan_spv.py | 24 +++++++++++++++++++++++ scripts/build_android_library.sh | 1 + tools/cmake/preset/default.cmake | 6 ++++++ 4 files changed, 39 insertions(+) diff --git a/backends/vulkan/cmake/ShaderLibrary.cmake b/backends/vulkan/cmake/ShaderLibrary.cmake index a026660dd41..1a08dede41c 100644 --- a/backends/vulkan/cmake/ShaderLibrary.cmake +++ b/backends/vulkan/cmake/ShaderLibrary.cmake @@ -60,6 +60,14 @@ function(gen_vulkan_shader_lib_cpp shaders_path) ) endif() + # Allow overriding GLSL PRECISION for fp16 shader variants. Empty / unset + # keeps upstream default (`highp`). Accepted values: highp, mediump, lowp. + if(EXECUTORCH_VULKAN_FP16_PRECISION) + list(APPEND GEN_SPV_ARGS "--fp16-precision" + "${EXECUTORCH_VULKAN_FP16_PRECISION}" + ) + endif() + # Ninja cannot expand wildcards (*) in DEPENDS lists. file(GLOB VULKAN_SHADERS "${shaders_path}/*.glsl" "${shaders_path}/*.glslh" "${shaders_path}/*.yaml" "${shaders_path}/*.h" diff --git a/backends/vulkan/runtime/gen_vulkan_spv.py b/backends/vulkan/runtime/gen_vulkan_spv.py index dab33fb3097..4d156e05433 100644 --- a/backends/vulkan/runtime/gen_vulkan_spv.py +++ b/backends/vulkan/runtime/gen_vulkan_spv.py @@ -663,6 +663,7 @@ def __init__( glslc_path: Optional[str], glslc_flags: str = "", replace_u16vecn: bool = False, + fp16_precision: str = "highp", ) -> None: if isinstance(src_dir_paths, str): self.src_dir_paths = [src_dir_paths] @@ -678,6 +679,7 @@ def __init__( if "-Os" in self.glslc_flags_no_opt: self.glslc_flags_no_opt.remove("-Os") self.replace_u16vecn = replace_u16vecn + self.fp16_precision = fp16_precision self.src_files: Dict[str, str] = {} self.template_yaml_files: List[str] = [] @@ -857,6 +859,17 @@ def create_shader_params( for key, value in variant_params.items(): shader_params[key] = value + # Optionally override PRECISION for half-precision variants. GLSL + # `mediump` is a hint the driver may use fp16 ALUs for arithmetic. + # On Mali GPUs it's typically honored; on Adreno it's typically + # ignored (harmless). Default is `highp` to match upstream behavior. + if ( + self.fp16_precision != "highp" + and shader_params.get("DTYPE") == "half" + and shader_params.get("PRECISION") == "highp" + ): + shader_params["PRECISION"] = self.fp16_precision + return shader_params def constructOutputMap(self) -> None: @@ -1488,6 +1501,16 @@ def main(argv: List[str]) -> int: default=-1, help="Number of threads for shader compilation. -1 (default) uses all available CPU cores, 1 uses sequential compilation.", ) + parser.add_argument( + "--fp16-precision", + choices=["highp", "mediump", "lowp"], + default="highp", + help=( + "GLSL PRECISION qualifier for DTYPE=half shader variants. " + "`mediump` lets drivers (notably Mali) use fp16 ALUs for arithmetic. " + "Default `highp` matches upstream behavior. Ignored on fp32 variants." + ), + ) options = parser.parse_args() env = DEFAULT_ENV @@ -1520,6 +1543,7 @@ def main(argv: List[str]) -> int: options.glslc_path, glslc_flags=glslc_flags_str, replace_u16vecn=options.replace_u16vecn, + fp16_precision=options.fp16_precision, ) output_spv_files = shader_generator.generateSPV( options.output_path, diff --git a/scripts/build_android_library.sh b/scripts/build_android_library.sh index 0fb9e909884..1207b03827e 100755 --- a/scripts/build_android_library.sh +++ b/scripts/build_android_library.sh @@ -50,6 +50,7 @@ build_android_native_library() { -DEXECUTORCH_BUILD_QNN="${EXECUTORCH_BUILD_QNN}" \ -DQNN_SDK_ROOT="${QNN_SDK_ROOT}" \ -DEXECUTORCH_BUILD_VULKAN="${EXECUTORCH_BUILD_VULKAN}" \ + -DEXECUTORCH_VULKAN_FP16_PRECISION="${EXECUTORCH_VULKAN_FP16_PRECISION:-highp}" \ -DXNNPACK_ENABLE_ARM_SME2="${XNNPACK_ENABLE_ARM_SME2}" \ -DSUPPORT_REGEX_LOOKAHEAD=ON \ -DCMAKE_BUILD_TYPE="${EXECUTORCH_CMAKE_BUILD_TYPE}" \ diff --git a/tools/cmake/preset/default.cmake b/tools/cmake/preset/default.cmake index 2c1be2dc9da..e48310b72e6 100644 --- a/tools/cmake/preset/default.cmake +++ b/tools/cmake/preset/default.cmake @@ -168,6 +168,12 @@ define_overridable_option( define_overridable_option( EXECUTORCH_BUILD_VULKAN "Build the Vulkan backend" BOOL OFF ) +define_overridable_option( + EXECUTORCH_VULKAN_FP16_PRECISION + "GLSL PRECISION for Vulkan half-precision shader variants. Accepted values: highp, mediump, lowp. `mediump` lets Mali drivers use fp16 ALUs; ignored on Adreno. Default `highp` matches upstream." + STRING + highp +) define_overridable_option( EXECUTORCH_BUILD_WEBGPU "Build the WebGPU backend" BOOL OFF )