From 35458446e33d068ffdb19a17a2c747bbd270149f Mon Sep 17 00:00:00 2001 From: Min Guo Date: Wed, 24 Jun 2026 17:18:34 -0700 Subject: [PATCH] Enable buck-native x86 simulator test for QNN op tests (#20494) Summary: The QNN operator tests in `backends/qualcomm/tests/test_qnn_delegate.py` already support host x86_64 simulator execution via `--enable_x86_64`, but were only runnable as a standalone argparse script after a CMake build. There was no `buck test` path: `test_qnn_delegate` is a `python_library` (not a test target), and the host `qnn_executor_runner` binary was Android-only. This change wires up a buck-native, internal-only test target `//executorch/backends/qualcomm/tests:test_qnn_delegate_x86` that runs the FP16 and quantized operator suites (`TestQNNFloatingPointOperator`, `TestQNNQuantizedOperator`) on the x86 QNN simulator with no device and no CMake build tree. Changes: - `backends/qualcomm/runtime/targets.bzl`: add `CXX` to the `pal` library platforms (gated `if is_fbcode()`). It already ships `pal/src/linux/*.cpp`, but was gated to `[ANDROID]`, which blocked any host build of `:runtime` (an exported dep). - `examples/qualcomm/executor_runner/targets.bzl`: add `CXX` to `qnn_executor_runner` (gated `if is_fbcode()`) so the host runner binary builds, and add `//executorch/kernels/portable:generated_lib` to its deps. The CMake runner links `full_portable_ops_lib` + `quantized_ops_lib`; the Buck runner had only the quantized lib, so ops that leave a CPU-fallback node (e.g. `acos` -> `aten::asin.out`, `cast`, `index_copy`, `index_put`, `logical_and`, `avg_pool1d`) aborted the runner with a "Missing operator" error. The `CXX` (host) surface is gated to `is_fbcode()` because it is only used by the internal x86 simulator test; in OSS, `CXX` includes macOS (no QNN host libs), so the host runner/pal stay Android-only there, restoring the original OSS build surface. - `backends/qualcomm/tests/test_qnn_delegate_x86.py` (new): under `buck test` the file's argparse `__main__`/`setup_environment()` never runs, so this wrapper sets the equivalent `TestQNN` class attributes (`enable_x86_64`, `backend`, `soc_model`) at import and subclasses the operator TestCases so the runner discovers them. - `backends/qualcomm/tests/BUCK`: add the `test_qnn_delegate_x86` `python_test`, gated behind `runtime.is_oss` via a top-level conditional expression (the BUCK dialect forbids top-level `if`/`def`). It stays out of the OSS graph to preserve the `test-qnn-buck-build-linux` CI signal. The QNN x86 SDK libs and host runner are supplied via `env` (`QNN_SDK_ROOT`, `LD_LIBRARY_PATH`, `QNN_EXECUTOR_RUNNER`). `CUDA_VISIBLE_DEVICES` is forced empty because these tests are CPU-only (calibration, AOT compile, x86 sim); without it, parallel test shards each grabbed CUDA and exhausted GPU memory. The fbcode and xplat copies of all four files are kept byte-identical per the existing twin convention. Differential Revision: D109606746 --- backends/qualcomm/runtime/targets.bzl | 5 +- backends/qualcomm/tests/BUCK | 26 ++++++++++ .../qualcomm/tests/test_qnn_delegate_x86.py | 47 +++++++++++++++++++ examples/qualcomm/executor_runner/targets.bzl | 8 +++- 4 files changed, 84 insertions(+), 2 deletions(-) create mode 100644 backends/qualcomm/tests/test_qnn_delegate_x86.py diff --git a/backends/qualcomm/runtime/targets.bzl b/backends/qualcomm/runtime/targets.bzl index 5ad312020be..03323422238 100644 --- a/backends/qualcomm/runtime/targets.bzl +++ b/backends/qualcomm/runtime/targets.bzl @@ -3,6 +3,7 @@ load( "ANDROID", "CXX", ) +load("@fbsource//tools/build_defs:fbsource_utils.bzl", "is_fbcode") load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") load("@fbsource//xplat/executorch/backends/qualcomm/third-party:third_party_libs.bzl", "qnn_third_party_dep") @@ -55,7 +56,9 @@ def define_common_targets(): }, header_namespace = "", define_static_target = True, - platforms = [ANDROID], + # CXX (host) is only needed for the internal fbcode x86 QNN simulator + # test runner; OSS host builds (incl. macOS) never build this. + platforms = [ANDROID, CXX] if is_fbcode() else [ANDROID], visibility = ["PUBLIC"], ) diff --git a/backends/qualcomm/tests/BUCK b/backends/qualcomm/tests/BUCK index c73a8f89536..e572a88b9c4 100644 --- a/backends/qualcomm/tests/BUCK +++ b/backends/qualcomm/tests/BUCK @@ -87,3 +87,29 @@ fbcode_target(_kind = runtime.python_test, "//executorch/examples/qualcomm/oss_scripts/llama:static_llama", ]), ) + +# Runs the QNN operator tests on the x86_64 host simulator under buck. Internal +# only: the underlying :test_qnn_delegate library pulls in caffe2/torchvision/etc +# that are unavailable in OSS Buck (see the comment on that target), and keeping +# it out of the OSS graph preserves the `test-qnn-buck-build-linux` CI signal. +# The host qnn_executor_runner binary and the QNN x86 SDK libs are supplied via +# env; the test reads QNN_EXECUTOR_RUNNER/QNN_SDK_ROOT and runs the simulator. +None if runtime.is_oss else fbcode_target( + _kind = runtime.python_test, + name = "test_qnn_delegate_x86", + srcs = [ + "test_qnn_delegate_x86.py", + ], + env = { + # Force CPU: quantization calibration, AOT compile, and the x86 HTP sim + # all run on CPU. Without this, parallel test shards each grab CUDA and + # exhaust GPU memory ("CUDA error: out of memory"). + "CUDA_VISIBLE_DEVICES": "", + "LD_LIBRARY_PATH": "$(location fbsource//third-party/qualcomm/qnn/qnn-{0}:qnn_offline_compile_libs)".format(get_qnn_library_version()), + "QNN_EXECUTOR_RUNNER": "$(location //executorch/examples/qualcomm/executor_runner:qnn_executor_runner)", + "QNN_SDK_ROOT": "$(location fbsource//third-party/qualcomm/qnn/qnn-{0}:__dir__)".format(get_qnn_library_version()), + }, + deps = [ + ":test_qnn_delegate", + ], +) diff --git a/backends/qualcomm/tests/test_qnn_delegate_x86.py b/backends/qualcomm/tests/test_qnn_delegate_x86.py new file mode 100644 index 00000000000..a687f1a334f --- /dev/null +++ b/backends/qualcomm/tests/test_qnn_delegate_x86.py @@ -0,0 +1,47 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +Buck-native entry point that runs the QNN operator tests on the x86_64 host +simulator (no connected device, no CMake build tree). + +The full ``test_qnn_delegate.py`` suite is normally driven by its argparse +``__main__`` block (``setup_environment``). That block never executes under a +Buck ``python_test`` runner -- Buck imports the module and runs the discovered +``TestCase`` subclasses directly -- so the flags that select host simulation +(``--enable_x86_64``, ``--soc_model``, ``--backend``) are never parsed. This +module re-exports the operator test classes and sets the equivalent ``TestQNN`` +class attributes at import time so the same tests run unmodified under +``buck test``. + +The QNN x86 SDK libraries and the host ``qnn_executor_runner`` binary are +provided by the Buck target's ``env`` (``QNN_SDK_ROOT``, ``LD_LIBRARY_PATH``, +``QNN_EXECUTOR_RUNNER``); see the ``test_qnn_delegate_x86`` target in BUCK. +""" + +import os + +from executorch.backends.qualcomm.tests import test_qnn_delegate as _ops +from executorch.backends.qualcomm.tests.utils import TestQNN + +# Compile ahead-of-time and execute through the x86 simulator (qnn_executor_runner) +# instead of pushing to a device over adb. +TestQNN.enable_x86_64 = True +TestQNN.backend = os.environ.get("QNN_BACKEND", "htp") +# Only selects the HTP architecture baked into the offline-compiled context +# binary; the x86 simulator runs the same graph regardless of the physical SoC. +TestQNN.soc_model = os.environ.get("QNN_SOC_MODEL", "SM8650") + + +# Subclass (rather than re-import) so the test runner discovers these classes as +# defined in this module. The base classes stay behind the `_ops` module handle +# so they are not collected (and double-run) from this module's namespace. +class TestQNNFloatingPointOperator(_ops.TestQNNFloatingPointOperator): + pass + + +class TestQNNQuantizedOperator(_ops.TestQNNQuantizedOperator): + pass diff --git a/examples/qualcomm/executor_runner/targets.bzl b/examples/qualcomm/executor_runner/targets.bzl index 5d60249b2f6..89a723bb35b 100644 --- a/examples/qualcomm/executor_runner/targets.bzl +++ b/examples/qualcomm/executor_runner/targets.bzl @@ -1,6 +1,7 @@ load( "@fbsource//tools/build_defs:default_platform_defs.bzl", "ANDROID", + "CXX", ) load("@fbsource//tools/build_defs:fbsource_utils.bzl", "is_fbcode") load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") @@ -23,6 +24,7 @@ def define_common_targets(): "//executorch/runtime/executor:program", "//executorch/devtools/etdump:etdump_flatcc", "//executorch/devtools/bundled_program:runtime", + "//executorch/kernels/portable:generated_lib", "//executorch/kernels/quantized:generated_lib", "//executorch/extension/data_loader:buffer_data_loader", "//executorch/extension/data_loader:file_data_loader", @@ -30,7 +32,11 @@ def define_common_targets(): "//executorch/extension/runner_util:inputs", "//executorch/backends/qualcomm/runtime:runtime", ], - platforms = [ANDROID], + # The host (CXX) build of this runner is only used by the internal + # fbcode buck-native x86 QNN simulator test. Keep it out of OSS builds + # (CXX there includes macOS, which has no QNN host libs) -- restore the + # original Android-only surface in OSS. + platforms = [ANDROID, CXX] if is_fbcode() else [ANDROID], external_deps = [ "gflags", ],