diff --git a/examples/qualcomm/oss_scripts/mobilevit_v2.py b/examples/qualcomm/oss_scripts/mobilevit_v2.py index c5e6d43b1aa..15b25edb93b 100644 --- a/examples/qualcomm/oss_scripts/mobilevit_v2.py +++ b/examples/qualcomm/oss_scripts/mobilevit_v2.py @@ -22,6 +22,7 @@ build_executorch_binary, get_backend_type, make_output_dir, + make_quantizer, parse_skip_delegation_node, setup_common_args_and_variables, SimpleADB, @@ -90,9 +91,12 @@ def main(args): pte_filename = "mobilevit_v2_qnn" backend = get_backend_type(args.backend) - quant_dtype = { + quantizer = { QnnExecuTorchBackendType.kGpuBackend: None, - QnnExecuTorchBackendType.kHtpBackend: QuantDtype.use_16a8w, + QnnExecuTorchBackendType.kHtpBackend: make_quantizer( + quant_dtype=QuantDtype.use_16a8w, + eps=2**-10, + ), }[backend] build_executorch_binary( module, @@ -102,7 +106,7 @@ def main(args): inputs, skip_node_id_set=skip_node_id_set, skip_node_op_set=skip_node_op_set, - quant_dtype=quant_dtype, + custom_quantizer=quantizer, backend=backend, shared_buffer=args.shared_buffer, online_prepare=args.online_prepare, diff --git a/examples/qualcomm/util_scripts/cli.py b/examples/qualcomm/util_scripts/cli.py index 21a778e3063..79b9d7fb1f2 100644 --- a/examples/qualcomm/util_scripts/cli.py +++ b/examples/qualcomm/util_scripts/cli.py @@ -135,7 +135,6 @@ def __iter__(self): def quantize(args): logger = get_logger() - # get corresponding QnnQuantizer try: quant_dtype = getattr(QuantDtype, args.config) @@ -147,6 +146,7 @@ def quantize(args): act_observer=act_observer, backend=get_backend_type(args.backend), soc_model=args.model, + eps=args.eps, ) except Exception: logger.error( @@ -450,6 +450,12 @@ def main(): default="htp", help="Backend to be deployed ('htp'/'gpu' are currently supported).", ) + sub_quantize.add_argument( + "--eps", + help="EPS value for quantizer. Accepts floating‑point literal. E.g., 0.0009765625.", + type=float, + default=None, + ) sub_quantize.set_defaults(callback=quantize) sub_compile = subparsers.add_parser( diff --git a/examples/qualcomm/utils.py b/examples/qualcomm/utils.py index 9f2485f0457..0b1da7a8ccf 100755 --- a/examples/qualcomm/utils.py +++ b/examples/qualcomm/utils.py @@ -371,9 +371,9 @@ def make_quantizer( act_symmetric=False, is_qat=False, submodule_qconfig_list: Optional[List[Tuple[Callable, ModuleQConfig]]] = None, - eps=None, backend=QnnExecuTorchBackendType.kHtpBackend, soc_model="SM8750", + eps=None, ): quantizer = QnnQuantizer(backend=backend, soc_model=getattr(QcomChipset, soc_model)) quantizer.add_custom_quant_annotations(custom_annotations)