diff --git a/backends/vulkan/cmake/ShaderLibrary.cmake b/backends/vulkan/cmake/ShaderLibrary.cmake index a026660dd41..1a08dede41c 100644 --- a/backends/vulkan/cmake/ShaderLibrary.cmake +++ b/backends/vulkan/cmake/ShaderLibrary.cmake @@ -60,6 +60,14 @@ function(gen_vulkan_shader_lib_cpp shaders_path) ) endif() + # Allow overriding GLSL PRECISION for fp16 shader variants. Empty / unset + # keeps upstream default (`highp`). Accepted values: highp, mediump, lowp. + if(EXECUTORCH_VULKAN_FP16_PRECISION) + list(APPEND GEN_SPV_ARGS "--fp16-precision" + "${EXECUTORCH_VULKAN_FP16_PRECISION}" + ) + endif() + # Ninja cannot expand wildcards (*) in DEPENDS lists. file(GLOB VULKAN_SHADERS "${shaders_path}/*.glsl" "${shaders_path}/*.glslh" "${shaders_path}/*.yaml" "${shaders_path}/*.h" diff --git a/backends/vulkan/runtime/gen_vulkan_spv.py b/backends/vulkan/runtime/gen_vulkan_spv.py index dab33fb3097..4d156e05433 100644 --- a/backends/vulkan/runtime/gen_vulkan_spv.py +++ b/backends/vulkan/runtime/gen_vulkan_spv.py @@ -663,6 +663,7 @@ def __init__( glslc_path: Optional[str], glslc_flags: str = "", replace_u16vecn: bool = False, + fp16_precision: str = "highp", ) -> None: if isinstance(src_dir_paths, str): self.src_dir_paths = [src_dir_paths] @@ -678,6 +679,7 @@ def __init__( if "-Os" in self.glslc_flags_no_opt: self.glslc_flags_no_opt.remove("-Os") self.replace_u16vecn = replace_u16vecn + self.fp16_precision = fp16_precision self.src_files: Dict[str, str] = {} self.template_yaml_files: List[str] = [] @@ -857,6 +859,17 @@ def create_shader_params( for key, value in variant_params.items(): shader_params[key] = value + # Optionally override PRECISION for half-precision variants. GLSL + # `mediump` is a hint the driver may use fp16 ALUs for arithmetic. + # On Mali GPUs it's typically honored; on Adreno it's typically + # ignored (harmless). Default is `highp` to match upstream behavior. + if ( + self.fp16_precision != "highp" + and shader_params.get("DTYPE") == "half" + and shader_params.get("PRECISION") == "highp" + ): + shader_params["PRECISION"] = self.fp16_precision + return shader_params def constructOutputMap(self) -> None: @@ -1488,6 +1501,16 @@ def main(argv: List[str]) -> int: default=-1, help="Number of threads for shader compilation. -1 (default) uses all available CPU cores, 1 uses sequential compilation.", ) + parser.add_argument( + "--fp16-precision", + choices=["highp", "mediump", "lowp"], + default="highp", + help=( + "GLSL PRECISION qualifier for DTYPE=half shader variants. " + "`mediump` lets drivers (notably Mali) use fp16 ALUs for arithmetic. " + "Default `highp` matches upstream behavior. Ignored on fp32 variants." + ), + ) options = parser.parse_args() env = DEFAULT_ENV @@ -1520,6 +1543,7 @@ def main(argv: List[str]) -> int: options.glslc_path, glslc_flags=glslc_flags_str, replace_u16vecn=options.replace_u16vecn, + fp16_precision=options.fp16_precision, ) output_spv_files = shader_generator.generateSPV( options.output_path, diff --git a/scripts/build_android_library.sh b/scripts/build_android_library.sh index 0fb9e909884..1207b03827e 100755 --- a/scripts/build_android_library.sh +++ b/scripts/build_android_library.sh @@ -50,6 +50,7 @@ build_android_native_library() { -DEXECUTORCH_BUILD_QNN="${EXECUTORCH_BUILD_QNN}" \ -DQNN_SDK_ROOT="${QNN_SDK_ROOT}" \ -DEXECUTORCH_BUILD_VULKAN="${EXECUTORCH_BUILD_VULKAN}" \ + -DEXECUTORCH_VULKAN_FP16_PRECISION="${EXECUTORCH_VULKAN_FP16_PRECISION:-highp}" \ -DXNNPACK_ENABLE_ARM_SME2="${XNNPACK_ENABLE_ARM_SME2}" \ -DSUPPORT_REGEX_LOOKAHEAD=ON \ -DCMAKE_BUILD_TYPE="${EXECUTORCH_CMAKE_BUILD_TYPE}" \ diff --git a/tools/cmake/common/preset.cmake b/tools/cmake/common/preset.cmake index 4ac45e28562..7d016db2347 100644 --- a/tools/cmake/common/preset.cmake +++ b/tools/cmake/common/preset.cmake @@ -82,12 +82,12 @@ macro(define_overridable_option NAME DESCRIPTION VALUE_TYPE DEFAULT_VALUE) if(DEFINED ${NAME} AND NOT DEFINED CACHE{${NAME}}) set(${NAME} ${${NAME}} - CACHE ${VALUE_TYPE} ${DESCRIPTION} FORCE + CACHE ${VALUE_TYPE} "${DESCRIPTION}" FORCE ) else() set(${NAME} ${DEFAULT_VALUE} - CACHE ${VALUE_TYPE} ${DESCRIPTION} + CACHE ${VALUE_TYPE} "${DESCRIPTION}" ) endif() diff --git a/tools/cmake/preset/default.cmake b/tools/cmake/preset/default.cmake index 2c1be2dc9da..e48310b72e6 100644 --- a/tools/cmake/preset/default.cmake +++ b/tools/cmake/preset/default.cmake @@ -168,6 +168,12 @@ define_overridable_option( define_overridable_option( EXECUTORCH_BUILD_VULKAN "Build the Vulkan backend" BOOL OFF ) +define_overridable_option( + EXECUTORCH_VULKAN_FP16_PRECISION + "GLSL PRECISION for Vulkan half-precision shader variants. Accepted values: highp, mediump, lowp. `mediump` lets Mali drivers use fp16 ALUs; ignored on Adreno. Default `highp` matches upstream." + STRING + highp +) define_overridable_option( EXECUTORCH_BUILD_WEBGPU "Build the WebGPU backend" BOOL OFF )