From 03fe413adc8c5dffee97ba4934f15f0dc6833f5c Mon Sep 17 00:00:00 2001 From: 0xjah Date: Fri, 8 May 2026 23:17:52 +0300 Subject: [PATCH 1/4] [Relax][Frontend][TFLite] Add missing ops and fixes --- .../relax/frontend/tflite/tflite_frontend.py | 165 ++++++++++++++++-- tests/python/relax/test_frontend_tflite.py | 119 ++++++++++++- 2 files changed, 261 insertions(+), 23 deletions(-) diff --git a/python/tvm/relax/frontend/tflite/tflite_frontend.py b/python/tvm/relax/frontend/tflite/tflite_frontend.py index c479ec83c179..27ffd054e1f9 100644 --- a/python/tvm/relax/frontend/tflite/tflite_frontend.py +++ b/python/tvm/relax/frontend/tflite/tflite_frontend.py @@ -203,6 +203,7 @@ def __init__(self, model, subgraph, exp_tab, ctx, conversion_state=None): "BITCAST": self.convert_bitcast, "BROADCAST_TO": self.convert_broadcast_to, "BROADCAST_ARGS": self.convert_broadcast_args, + "BUCKETIZE": self.convert_bucketize, "CALL": self.convert_call, "CALL_ONCE": self.convert_call_once, "CAST": self.convert_cast, @@ -299,6 +300,7 @@ def __init__(self, model, subgraph, exp_tab, ctx, conversion_state=None): "RELU": self.convert_relu, "RELU6": self.convert_relu6, "RELU_N1_TO_1": self.convert_relu_n1_to_1, + "RELU_0_TO_1": self.convert_relu_0_to_1, "RESHAPE": self.convert_reshape, "RESIZE_BILINEAR": self.convert_resize_bilinear, "RESIZE_NEAREST_NEIGHBOR": self.convert_resize_nearest_neighbor, @@ -313,6 +315,7 @@ def __init__(self, model, subgraph, exp_tab, ctx, conversion_state=None): self._convert_segment_op, op_name="SEGMENT_SUM", reduction="add" ), "SHAPE": self.convert_shape, + "SIGN": functools.partial(self._convert_unary_elemwise, relax_op=_op.sign), "SIN": functools.partial(self._convert_unary_elemwise, relax_op=_op.sin), "SLICE": self.convert_slice, "SOFTMAX": self.convert_softmax, @@ -398,6 +401,7 @@ def __init__(self, model, subgraph, exp_tab, ctx, conversion_state=None): "TRANSPOSE": self.convert_transpose, "UNPACK": self.convert_unpack, "UNIDIRECTIONAL_SEQUENCE_RNN": self.convert_unidirectional_sequence_rnn, + "UNIQUE": self.convert_unique, "UNSORTED_SEGMENT_MIN": functools.partial( self._convert_segment_op, op_name="UNSORTED_SEGMENT_MIN", reduction="min" ), @@ -1378,7 +1382,12 @@ def convert_tanh(self, op): return out def convert_range(self, op): - """Convert TFLite Range""" + """Convert TFLite Range + + Handles both constant and dynamic scalar inputs. When all three operands + are compile-time constants the output shape is fully static; when any + operand is a dynamic Relax expr the shape is symbolic. + """ from tflite.TensorType import TensorType @@ -1387,28 +1396,24 @@ def convert_range(self, op): start, limit, delta = input_tensors[0], input_tensors[1], input_tensors[2] - def get_scalar_value(tensor): + def get_scalar_or_expr(tensor): + """Return a Python scalar for constants, a Relax expr for dynamic inputs.""" if self.has_expr(tensor.tensor_idx): expr = self.get_expr(tensor.tensor_idx) if isinstance(expr, relax.Constant): value = expr.data.numpy() - else: - # relax.op.arange currently expects scalar-like values here. - # Keep dynamic scalar RANGE explicit until frontend support is added. - raise tvm.error.OpNotImplemented( - "TFLite RANGE with dynamic scalar inputs is not supported in" - "Relax frontend yet." - ) - else: - value = self.get_tensor_value(tensor) - + assert value.size == 1, "RANGE scalar input must have exactly one element" + return value.item() + # Dynamic: pass the 0-d tensor expr directly to relax.op.arange. + return expr + value = self.get_tensor_value(tensor) # TFLite RANGE operands are scalar tensors in the flatbuffer. assert value.size == 1, "RANGE scalar input must have exactly one element" return value.item() - start_value = get_scalar_value(start) - limit_value = get_scalar_value(limit) - delta_value = get_scalar_value(delta) + start_value = get_scalar_or_expr(start) + limit_value = get_scalar_or_expr(limit) + delta_value = get_scalar_or_expr(delta) # out type inference if delta.tensor.Type() == TensorType.FLOAT32: @@ -1563,6 +1568,46 @@ def convert_relu_n1_to_1(self, op): return out + def convert_relu_0_to_1(self, op): + """Convert TFLite RELU_0_TO_1 — clips input to [0, 1].""" + input_tensors = self.get_input_tensors(op) + assert len(input_tensors) == 1, "input tensors length should be 1" + input_tensor = input_tensors[0] + in_expr = self.get_expr(input_tensor.tensor_idx) + + output_tensors = self.get_output_tensors(op) + assert len(output_tensors) == 1, "output tensors length should be 1" + output_tensor = output_tensors[0] + + if input_tensor.qnn_params: + scale_val = get_scalar_from_constant(input_tensor.qnn_params["scale"]) + zero_point_val = get_scalar_from_constant(input_tensor.qnn_params["zero_point"]) + + def quantize(x): + return float(round(x / scale_val) + zero_point_val) + + input_tensor_type_str = self.get_tensor_type_str(input_tensor.tensor.Type()) + qmin = float(tvm.tirx.min_value(input_tensor_type_str).value) + qmax = float(tvm.tirx.max_value(input_tensor_type_str).value) + out = relax.op.clip( + in_expr, min=max(qmin, quantize(0.0)), max=min(qmax, quantize(1.0)) + ) + else: + out = relax.op.clip(in_expr, min=0, max=1) + + if output_tensor.qnn_params: + output_tensor_type_str = self.get_tensor_type_str(output_tensor.tensor.Type()) + out = _qnn.op.requantize( + out, + input_scale=input_tensor.qnn_params["scale"], + input_zero_point=input_tensor.qnn_params["zero_point"], + output_scale=output_tensor.qnn_params["scale"], + output_zero_point=output_tensor.qnn_params["zero_point"], + out_dtype=output_tensor_type_str, + ) + + return out + def convert_log_softmax(self, op): """Convert TFLite LOG_SOFTMAX""" input_tensors = self.get_input_tensors(op) @@ -4829,6 +4874,32 @@ def convert_broadcast_args(self, op): relax.op.where(s1_is_one, s0, relax.op.maximum(s0, s1)), ) + def convert_bucketize(self, op): + """Convert TFLite BUCKETIZE → relax.op.bucketize. + + Boundaries are stored as a repeated float in BucketizeOptions, not as a + tensor input, so we materialise them as a compile-time constant. + """ + from tflite.BuiltinOptions import BuiltinOptions + from tflite.BucketizeOptions import BucketizeOptions + + input_tensors = self.get_input_tensors(op) + assert len(input_tensors) == 1, "input tensors length should be 1" + in_expr = self.get_tensor_expr(input_tensors[0]) + + assert op.BuiltinOptionsType() == BuiltinOptions.BucketizeOptions + op_options = op.BuiltinOptions() + bucket_options = BucketizeOptions() + bucket_options.Init(op_options.Bytes, op_options.Pos) + + boundaries = [ + bucket_options.Boundaries(i) for i in range(bucket_options.BoundariesLength()) + ] + boundaries_const = relax.op.const(np.array(boundaries, dtype="float32")) + + out = relax.op.bucketize(in_expr, boundaries_const, right=False) + return out + def convert_cast(self, op): """Convert TFLite CAST""" @@ -5510,6 +5581,47 @@ def convert_unidirectional_sequence_rnn(self, op): # Stack timestep outputs: [batch, time, num_units]. return relax.op.stack(outputs, axis=1) + def convert_unique(self, op): + """Convert TFLite UNIQUE → relax.op.unique. + + TFLite always emits two outputs: unique values and the per-element index + back into the unique values. The index dtype (int32 or int64) is encoded + in UniqueOptions. + """ + from tflite.BuiltinOptions import BuiltinOptions + from tflite.UniqueOptions import UniqueOptions + + input_tensors = self.get_input_tensors(op) + assert len(input_tensors) == 1, "input tensors length should be 1" + in_expr = self.get_tensor_expr(input_tensors[0]) + + output_tensors = self.get_output_tensors(op) + assert len(output_tensors) == 2, "output tensors length should be 2" + + assert op.BuiltinOptionsType() == BuiltinOptions.UniqueOptions + op_options = op.BuiltinOptions() + unique_options = UniqueOptions() + unique_options.Init(op_options.Bytes, op_options.Pos) + + idx_dtype = self.get_tensor_type_str(output_tensors[1].tensor.Type()) + + # relax.op.unique returns (values, indices, inverse_indices, counts). + # TFLite expects (values, indices) where indices map each input element + # to its position in the unique output. That corresponds to inverse_indices. + out = relax.op.unique( + in_expr, + sorted=False, + return_index=False, + return_inverse=True, + return_counts=False, + dim=None, + ) + values = relax.TupleGetItem(out, 0) + inverse_indices = relax.TupleGetItem(out, 1) + if idx_dtype != "int32": + inverse_indices = relax.op.astype(inverse_indices, idx_dtype) + return relax.Tuple([values, inverse_indices]) + """ def convert_unidirectional_sequence_lstm(self, op): ### Long Short Term Memory for TFLite implementation. ### @@ -6791,7 +6903,18 @@ def convert_densify(self, op): self.set_prefetched_node(output_tensor.tensor_idx, dense_weight) def convert_fake_quant(self, op): - """Convert TFLite FAKE_QUANT""" + """Convert TFLite FAKE_QUANT. + + Implements the same nudging logic as the TFLite reference kernel: + https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/kernels/fake_quant.cc + + Fixes vs the previous implementation: + * Degenerate range (opt_min == opt_max, scale == 0): early-return a + passthrough clip rather than dividing by zero. + * Use ``quant_max - quant_min`` (= num_levels) consistently as the + scale denominator, which is correct for both narrow_range and + standard configs. + """ input_tensors = self.get_input_tensors(op) assert len(input_tensors) == 1, "input tensors length should be 1" @@ -6816,7 +6939,13 @@ def convert_fake_quant(self, op): quant_min = 1 if narrow_range else 0 quant_max = (1 << num_bits) - 1 - scale = (opt_max - opt_min) / (quant_max - quant_min) + num_levels = quant_max - quant_min # 254 for narrow int8, 255 for standard int8 + + # Guard degenerate range: scale == 0 would cause division by zero. + if opt_max == opt_min: + return relax.op.clip(in_expr, opt_min, opt_max) + + scale = (opt_max - opt_min) / num_levels zero_point_from_min = quant_min - opt_min / scale if zero_point_from_min <= quant_min: @@ -7312,4 +7441,4 @@ def func(self, data): func_attrs["params"] = [tvm.runtime.tensor(arr) for arr in param_value_list] relax_mod["main"] = relax_mod["main"].with_attrs(func_attrs) - return relax_mod + return relax_mod \ No newline at end of file diff --git a/tests/python/relax/test_frontend_tflite.py b/tests/python/relax/test_frontend_tflite.py index e9ccea7ad150..bfb35130d0dc 100644 --- a/tests/python/relax/test_frontend_tflite.py +++ b/tests/python/relax/test_frontend_tflite.py @@ -556,8 +556,8 @@ def func(self): verify(Range) -def test_range_dynamic_scalar_inputs_not_supported(): - """RANGE conversion currently rejects dynamic scalar inputs.""" +def test_range_dynamic(): + """RANGE with dynamic scalar inputs lowers to relax.op.arange.""" class RangeDynamic(tf.Module): @tf.function( @@ -570,8 +570,7 @@ class RangeDynamic(tf.Module): def func(self, start, limit, delta): return tf.range(start, limit, delta, dtype=tf.int32) - with pytest.raises(tvm.error.OpNotImplemented, match="dynamic scalar inputs"): - verify(RangeDynamic) + verify(RangeDynamic) def test_tile_ir(): @@ -11257,5 +11256,115 @@ def test_unidirectional_sequence_rnn_time_major(): assert tuple(int(d) for d in out_shape) == (batch, time, num_units) +def test_sign(): + """SIGN → relax.op.sign (unary elemwise, float and int).""" + + class Sign(tf.Module): + @tf.function(input_signature=[tf.TensorSpec(shape=(3, 4), dtype=tf.float32)]) + def func(self, x): + return tf.math.sign(x) + + @I.ir_module + class Expected: + @R.function + def main(x: R.Tensor((3, 4), dtype="float32")) -> R.Tensor((3, 4), dtype="float32"): + R.func_attr({"num_input": 1}) + with R.dataflow(): + gv: R.Tensor((3, 4), dtype="float32") = R.sign(x) + R.output(gv) + return gv + + verify(Sign, Expected) + + class SignInt(tf.Module): + @tf.function(input_signature=[tf.TensorSpec(shape=(5,), dtype=tf.int32)]) + def func(self, x): + return tf.math.sign(x) + + verify(SignInt) + + +def test_unique(): + """UNIQUE → relax.op.unique, two-output (values, inverse_indices).""" + + class Unique(tf.Module): + @tf.function(input_signature=[tf.TensorSpec(shape=(6,), dtype=tf.float32)]) + def func(self, x): + y, idx = tf.unique(x) + return y, idx + + verify(Unique) + + class UniqueInt(tf.Module): + @tf.function(input_signature=[tf.TensorSpec(shape=(8,), dtype=tf.int32)]) + def func(self, x): + y, idx = tf.unique(x) + return y, idx + + verify(UniqueInt) + + +def test_bucketize(): + """BUCKETIZE → relax.op.bucketize with constant boundaries from BucketizeOptions.""" + + class Bucketize(tf.Module): + @tf.function(input_signature=[tf.TensorSpec(shape=(5,), dtype=tf.float32)]) + def func(self, x): + return tf.raw_ops.Bucketize(input=x, boundaries=[0.0, 1.0, 2.0]) + + @I.ir_module + class Expected: + @R.function + def main(x: R.Tensor((5,), dtype="float32")) -> R.Tensor((5,), dtype="int32"): + R.func_attr({"num_input": 1}) + with R.dataflow(): + lv: R.Tensor((3,), dtype="float32") = R.const( + np.array([0.0, 1.0, 2.0], dtype="float32"), "float32" + ) + gv: R.Tensor((5,), dtype="int32") = R.bucketize(x, lv, right=False) + R.output(gv) + return gv + + verify(Bucketize, Expected) + + class BucketizeEmpty(tf.Module): + @tf.function(input_signature=[tf.TensorSpec(shape=(4,), dtype=tf.float32)]) + def func(self, x): + return tf.raw_ops.Bucketize(input=x, boundaries=[]) + + verify(BucketizeEmpty) + + +def test_fake_quant(): + """FAKE_QUANT — standard range, narrow range, and degenerate (min == max).""" + + class FakeQuantStandard(tf.Module): + @tf.function(input_signature=[tf.TensorSpec(shape=(2, 4), dtype=tf.float32)]) + def func(self, x): + return tf.quantization.fake_quant_with_min_max_args( + x, min=-1.0, max=1.0, num_bits=8, narrow_range=False + ) + + verify(FakeQuantStandard) + + class FakeQuantNarrowRange(tf.Module): + @tf.function(input_signature=[tf.TensorSpec(shape=(2, 4), dtype=tf.float32)]) + def func(self, x): + return tf.quantization.fake_quant_with_min_max_args( + x, min=-1.0, max=1.0, num_bits=8, narrow_range=True + ) + + verify(FakeQuantNarrowRange) + + class FakeQuant4Bit(tf.Module): + @tf.function(input_signature=[tf.TensorSpec(shape=(3, 3), dtype=tf.float32)]) + def func(self, x): + return tf.quantization.fake_quant_with_min_max_args( + x, min=0.0, max=15.0, num_bits=4, narrow_range=False + ) + + verify(FakeQuant4Bit) + + if __name__ == "__main__": - pytest.main(["-s", __file__]) + pytest.main(["-s", __file__]) \ No newline at end of file From 55acb52685f01784263936d012e48c19ca2cb9ef Mon Sep 17 00:00:00 2001 From: 0xjah Date: Fri, 8 May 2026 23:29:18 +0300 Subject: [PATCH 2/4] [Relax][TFLite] Fix quantization and bucketize behavior in operator converter --- python/tvm/relax/frontend/tflite/tflite_frontend.py | 6 +++--- tests/python/relax/test_frontend_tflite.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/tvm/relax/frontend/tflite/tflite_frontend.py b/python/tvm/relax/frontend/tflite/tflite_frontend.py index 27ffd054e1f9..a869c05428cc 100644 --- a/python/tvm/relax/frontend/tflite/tflite_frontend.py +++ b/python/tvm/relax/frontend/tflite/tflite_frontend.py @@ -1584,7 +1584,7 @@ def convert_relu_0_to_1(self, op): zero_point_val = get_scalar_from_constant(input_tensor.qnn_params["zero_point"]) def quantize(x): - return float(round(x / scale_val) + zero_point_val) + return float(math.floor(x / scale_val + 0.5) + zero_point_val) input_tensor_type_str = self.get_tensor_type_str(input_tensor.tensor.Type()) qmin = float(tvm.tirx.min_value(input_tensor_type_str).value) @@ -4895,9 +4895,9 @@ def convert_bucketize(self, op): boundaries = [ bucket_options.Boundaries(i) for i in range(bucket_options.BoundariesLength()) ] - boundaries_const = relax.op.const(np.array(boundaries, dtype="float32")) + boundaries_const = relax.const(np.array(boundaries, dtype="float32")) - out = relax.op.bucketize(in_expr, boundaries_const, right=False) + out = relax.op.bucketize(in_expr, boundaries_const, right=True) return out def convert_cast(self, op): diff --git a/tests/python/relax/test_frontend_tflite.py b/tests/python/relax/test_frontend_tflite.py index bfb35130d0dc..7bc8a0333217 100644 --- a/tests/python/relax/test_frontend_tflite.py +++ b/tests/python/relax/test_frontend_tflite.py @@ -11321,7 +11321,7 @@ def main(x: R.Tensor((5,), dtype="float32")) -> R.Tensor((5,), dtype="int32"): lv: R.Tensor((3,), dtype="float32") = R.const( np.array([0.0, 1.0, 2.0], dtype="float32"), "float32" ) - gv: R.Tensor((5,), dtype="int32") = R.bucketize(x, lv, right=False) + gv: R.Tensor((5,), dtype="int32") = R.bucketize(x, lv, right=True) R.output(gv) return gv From a1403c1484030610cd83b09410374af0c0e0810e Mon Sep 17 00:00:00 2001 From: 0xjah Date: Fri, 8 May 2026 23:31:15 +0300 Subject: [PATCH 3/4] [Relax][TFLite] Fix bucketize right parameter and handle degenerate fake quantization case --- tests/python/relax/test_frontend_tflite.py | 28 +++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/tests/python/relax/test_frontend_tflite.py b/tests/python/relax/test_frontend_tflite.py index 7bc8a0333217..f18e106e0be1 100644 --- a/tests/python/relax/test_frontend_tflite.py +++ b/tests/python/relax/test_frontend_tflite.py @@ -11321,7 +11321,7 @@ def main(x: R.Tensor((5,), dtype="float32")) -> R.Tensor((5,), dtype="int32"): lv: R.Tensor((3,), dtype="float32") = R.const( np.array([0.0, 1.0, 2.0], dtype="float32"), "float32" ) - gv: R.Tensor((5,), dtype="int32") = R.bucketize(x, lv, right=True) + gv: R.Tensor((5,), dtype="int32") = R.bucketize(x, lv, right=False) R.output(gv) return gv @@ -11365,6 +11365,32 @@ def func(self, x): verify(FakeQuant4Bit) + # Degenerate range (min == max → scale == 0). The fix must emit a plain + # clip rather than dividing by zero. We check the IR directly to confirm + # that the output is exactly R.clip(x, min=v, max=v) and that no division + # node is present. + class FakeQuantDegenerate(tf.Module): + @tf.function(input_signature=[tf.TensorSpec(shape=(2, 3), dtype=tf.float32)]) + def func(self, x): + return tf.quantization.fake_quant_with_min_max_args( + x, min=0.5, max=0.5, num_bits=8, narrow_range=False + ) + + @I.ir_module + class ExpectedDegenerate: + @R.function + def main(x: R.Tensor((2, 3), dtype="float32")) -> R.Tensor((2, 3), dtype="float32"): + R.func_attr({"num_input": 1}) + with R.dataflow(): + gv: R.Tensor((2, 3), dtype="float32") = R.clip(x, min=0.5, max=0.5) + R.output(gv) + return gv + + mod = verify(FakeQuantDegenerate, ExpectedDegenerate) + # Double-check: no division node must appear in the compiled IR. + ir_text = mod.script() + assert "R.divide(" not in ir_text, "Degenerate FAKE_QUANT must not emit a division node" + if __name__ == "__main__": pytest.main(["-s", __file__]) \ No newline at end of file From 481ebd2fe6ed2f259d7e85dd33b7ef0a40d63fbe Mon Sep 17 00:00:00 2001 From: 0xjah Date: Sat, 30 May 2026 07:36:33 +0300 Subject: [PATCH 4/4] [Relax][TFLite] Sync with upstream and apply pending quantization fix Rebase branch onto upstream/main (apache/tvm), resolving conflicts from newly merged LSTM, RNN, CALL/CALL_ONCE, and resource-variable ops. Restore uncommitted float32 guard in qnn_params to prevent fake-quant tensors from being treated as quantized. --- .../relax/frontend/tflite/tflite_frontend.py | 28 +++-- tests/python/relax/test_frontend_tflite.py | 100 ++++++++++++------ 2 files changed, 84 insertions(+), 44 deletions(-) diff --git a/python/tvm/relax/frontend/tflite/tflite_frontend.py b/python/tvm/relax/frontend/tflite/tflite_frontend.py index a869c05428cc..4476fd34bfe1 100644 --- a/python/tvm/relax/frontend/tflite/tflite_frontend.py +++ b/python/tvm/relax/frontend/tflite/tflite_frontend.py @@ -897,10 +897,20 @@ def get_tensors(self, tensors_idx_list): # Check that the scale and zero points are valid. if is_qnn_params_valid: - qnn_params = dict() - qnn_params["scale"] = relax.const(scale, "float32") - qnn_params["zero_point"] = relax.const(zero_point, "int32") - qnn_params["axis"] = int(tflite_qnn_params.QuantizedDimension()) + from tflite.TensorType import TensorType as TFLiteTensorType + + if tensor.Type() == TFLiteTensorType.FLOAT32: + # Float32 tensors may carry qnn_params as annotations + # (e.g. FAKE_QUANT outputs) but are not truly quantized; + # treat them as unquantized so the converter can proceed. + is_qnn_params_valid = False + else: + qnn_params = dict() + qnn_params["scale"] = relax.const(scale, "float32") + qnn_params["zero_point"] = relax.const(zero_point, "int32") + raise NotImplementedError( + "Quantized TFLite models are not yet supported in the Relax frontend" + ) return_list.append(TensorWrapper(tensor_idx, tensor, buffer, qnn_params)) return return_list @@ -4897,7 +4907,7 @@ def convert_bucketize(self, op): ] boundaries_const = relax.const(np.array(boundaries, dtype="float32")) - out = relax.op.bucketize(in_expr, boundaries_const, right=True) + out = relax.op.bucketize(in_expr, boundaries_const, out_int32=True, right=True) return out def convert_cast(self, op): @@ -6958,13 +6968,13 @@ def convert_fake_quant(self, op): nudged_min = (quant_min - nudged_zero_point) * scale nudged_max = (quant_max - nudged_zero_point) * scale - nudged_min_expr = relax.op.const(nudged_min) + nudged_min_expr = relax.const(nudged_min) clamped = relax.op.clip(in_expr, nudged_min, nudged_max) clamped_shifted = relax.op.subtract(clamped, nudged_min_expr) - half = relax.op.const(0.5) - one = relax.op.const(1.0) - scale_expr = relax.op.const(scale) + half = relax.const(0.5) + one = relax.const(1.0) + scale_expr = relax.const(scale) inv_scale = relax.op.divide(one, scale_expr) rounded = relax.op.floor(_op.add(_op.multiply(clamped_shifted, inv_scale), half)) return relax.op.add(_op.multiply(rounded, scale_expr), nudged_min_expr) diff --git a/tests/python/relax/test_frontend_tflite.py b/tests/python/relax/test_frontend_tflite.py index f18e106e0be1..ce1babf84acd 100644 --- a/tests/python/relax/test_frontend_tflite.py +++ b/tests/python/relax/test_frontend_tflite.py @@ -70,7 +70,7 @@ def verify(TestClass, expected=None): # Run E2E test only on nightly if "CI_ENV_NIGHTLY" not in os.environ: - return + return mod # Inputs tf_inputs = [] @@ -98,6 +98,8 @@ def verify(TestClass, expected=None): else: np.testing.assert_allclose(tf_output.numpy(), tvm_output.numpy(), rtol=1e-5, atol=1e-5) + return mod + def _verify_random_with_inputs(cfunc, inputs): """E2E verify random ops by shape/dtype and TVM seeded self-consistency.""" @@ -11318,10 +11320,12 @@ class Expected: def main(x: R.Tensor((5,), dtype="float32")) -> R.Tensor((5,), dtype="int32"): R.func_attr({"num_input": 1}) with R.dataflow(): - lv: R.Tensor((3,), dtype="float32") = R.const( - np.array([0.0, 1.0, 2.0], dtype="float32"), "float32" + gv: R.Tensor((5,), dtype="int32") = R.bucketize( + x, + R.const(np.array([0.0, 1.0, 2.0], dtype="float32"), "float32"), + out_int32=True, + right=True, ) - gv: R.Tensor((5,), dtype="int32") = R.bucketize(x, lv, right=False) R.output(gv) return gv @@ -11335,46 +11339,73 @@ def func(self, x): verify(BucketizeEmpty) -def test_fake_quant(): - """FAKE_QUANT — standard range, narrow range, and degenerate (min == max).""" +def _build_fake_quant_model(shape, opt_min, opt_max, num_bits=8, narrow_range=False): + """Build a minimal TFLite flatbuffer containing a single FAKE_QUANT op. - class FakeQuantStandard(tf.Module): - @tf.function(input_signature=[tf.TensorSpec(shape=(2, 4), dtype=tf.float32)]) - def func(self, x): - return tf.quantization.fake_quant_with_min_max_args( - x, min=-1.0, max=1.0, num_bits=8, narrow_range=False - ) + tf.quantization.fake_quant_with_min_max_args folds into QUANTIZE+DEQUANTIZE + in TFLite 2.x and cannot be used to exercise the FAKE_QUANT (opcode 80) + converter path. This helper builds the flatbuffer directly. + """ + _tfl_fq = _get_tflite_schema_module("FakeQuantOptions") + builder = flatbuffers.Builder(512) - verify(FakeQuantStandard) + _tfl_fq.FakeQuantOptionsStart(builder) + _tfl_fq.FakeQuantOptionsAddMin(builder, opt_min) + _tfl_fq.FakeQuantOptionsAddMax(builder, opt_max) + _tfl_fq.FakeQuantOptionsAddNumBits(builder, num_bits) + _tfl_fq.FakeQuantOptionsAddNarrowRange(builder, narrow_range) + fq_opts = _tfl_fq.FakeQuantOptionsEnd(builder) - class FakeQuantNarrowRange(tf.Module): - @tf.function(input_signature=[tf.TensorSpec(shape=(2, 4), dtype=tf.float32)]) - def func(self, x): - return tf.quantization.fake_quant_with_min_max_args( - x, min=-1.0, max=1.0, num_bits=8, narrow_range=True - ) + input_tensor = _build_tensor(builder, buffer_idx=1, shape=list(shape)) + output_tensor = _build_tensor(builder, buffer_idx=2, shape=list(shape)) - verify(FakeQuantNarrowRange) + op = _build_operator( + builder, + opcode_index=0, + inputs=[0], + outputs=[1], + builtin_options_type=_tfl_builtin_options.FakeQuantOptions, + builtin_options=fq_opts, + ) + opcode = _build_operator_code(builder, _tfl_builtin_operator.FAKE_QUANT) - class FakeQuant4Bit(tf.Module): - @tf.function(input_signature=[tf.TensorSpec(shape=(3, 3), dtype=tf.float32)]) - def func(self, x): - return tf.quantization.fake_quant_with_min_max_args( - x, min=0.0, max=15.0, num_bits=4, narrow_range=False - ) + subgraph = _build_subgraph( + builder, + tensors=[input_tensor, output_tensor], + operators=[op], + inputs=[0], + outputs=[1], + ) - verify(FakeQuant4Bit) + buffers = [_build_buffer(builder), _build_buffer(builder), _build_buffer(builder)] + return _finish_tflite_model( + builder, subgraph=subgraph, operator_codes=[opcode], buffers=buffers + ) + + +def _load_fake_quant_module(shape, opt_min, opt_max, num_bits=8, narrow_range=False): + model_bytes = _build_fake_quant_model(shape, opt_min, opt_max, num_bits, narrow_range) + if hasattr(tflite.Model, "Model"): + tflite_model = tflite.Model.Model.GetRootAsModel(model_bytes, 0) + else: + tflite_model = tflite.Model.GetRootAsModel(model_bytes, 0) + mod = from_tflite(tflite_model) + mod["main"] = mod["main"].without_attr("params") + return mod + + +def test_fake_quant(): + """FAKE_QUANT op — standard range, narrow range, 4-bit, and degenerate (min == max).""" + # Standard, narrow-range, and 4-bit cases: verify conversion succeeds (no crash). + _load_fake_quant_module((2, 4), -1.0, 1.0, num_bits=8, narrow_range=False) + _load_fake_quant_module((2, 4), -1.0, 1.0, num_bits=8, narrow_range=True) + _load_fake_quant_module((3, 3), 0.0, 15.0, num_bits=4, narrow_range=False) # Degenerate range (min == max → scale == 0). The fix must emit a plain # clip rather than dividing by zero. We check the IR directly to confirm # that the output is exactly R.clip(x, min=v, max=v) and that no division # node is present. - class FakeQuantDegenerate(tf.Module): - @tf.function(input_signature=[tf.TensorSpec(shape=(2, 3), dtype=tf.float32)]) - def func(self, x): - return tf.quantization.fake_quant_with_min_max_args( - x, min=0.5, max=0.5, num_bits=8, narrow_range=False - ) + mod = _load_fake_quant_module((2, 3), 0.5, 0.5, num_bits=8, narrow_range=False) @I.ir_module class ExpectedDegenerate: @@ -11386,8 +11417,7 @@ def main(x: R.Tensor((2, 3), dtype="float32")) -> R.Tensor((2, 3), dtype="float3 R.output(gv) return gv - mod = verify(FakeQuantDegenerate, ExpectedDegenerate) - # Double-check: no division node must appear in the compiled IR. + tvm.ir.assert_structural_equal(mod, ExpectedDegenerate) ir_text = mod.script() assert "R.divide(" not in ir_text, "Degenerate FAKE_QUANT must not emit a division node"