From 03fe413adc8c5dffee97ba4934f15f0dc6833f5c Mon Sep 17 00:00:00 2001
From: 0xjah <ahmad.jahaf@proton.me>
Date: Fri, 8 May 2026 23:17:52 +0300
Subject: [PATCH 1/4] [Relax][Frontend][TFLite] Add missing ops and fixes

---
 .../relax/frontend/tflite/tflite_frontend.py  | 165 ++++++++++++++++--
 tests/python/relax/test_frontend_tflite.py    | 119 ++++++++++++-
 2 files changed, 261 insertions(+), 23 deletions(-)

diff --git a/python/tvm/relax/frontend/tflite/tflite_frontend.py b/python/tvm/relax/frontend/tflite/tflite_frontend.py
index c479ec83c179..27ffd054e1f9 100644
--- a/python/tvm/relax/frontend/tflite/tflite_frontend.py
+++ b/python/tvm/relax/frontend/tflite/tflite_frontend.py
@@ -203,6 +203,7 @@ def __init__(self, model, subgraph, exp_tab, ctx, conversion_state=None):
             "BITCAST": self.convert_bitcast,
             "BROADCAST_TO": self.convert_broadcast_to,
             "BROADCAST_ARGS": self.convert_broadcast_args,
+            "BUCKETIZE": self.convert_bucketize,
             "CALL": self.convert_call,
             "CALL_ONCE": self.convert_call_once,
             "CAST": self.convert_cast,
@@ -299,6 +300,7 @@ def __init__(self, model, subgraph, exp_tab, ctx, conversion_state=None):
             "RELU": self.convert_relu,
             "RELU6": self.convert_relu6,
             "RELU_N1_TO_1": self.convert_relu_n1_to_1,
+            "RELU_0_TO_1": self.convert_relu_0_to_1,
             "RESHAPE": self.convert_reshape,
             "RESIZE_BILINEAR": self.convert_resize_bilinear,
             "RESIZE_NEAREST_NEIGHBOR": self.convert_resize_nearest_neighbor,
@@ -313,6 +315,7 @@ def __init__(self, model, subgraph, exp_tab, ctx, conversion_state=None):
                 self._convert_segment_op, op_name="SEGMENT_SUM", reduction="add"
             ),
             "SHAPE": self.convert_shape,
+            "SIGN": functools.partial(self._convert_unary_elemwise, relax_op=_op.sign),
             "SIN": functools.partial(self._convert_unary_elemwise, relax_op=_op.sin),
             "SLICE": self.convert_slice,
             "SOFTMAX": self.convert_softmax,
@@ -398,6 +401,7 @@ def __init__(self, model, subgraph, exp_tab, ctx, conversion_state=None):
             "TRANSPOSE": self.convert_transpose,
             "UNPACK": self.convert_unpack,
             "UNIDIRECTIONAL_SEQUENCE_RNN": self.convert_unidirectional_sequence_rnn,
+            "UNIQUE": self.convert_unique,
             "UNSORTED_SEGMENT_MIN": functools.partial(
                 self._convert_segment_op, op_name="UNSORTED_SEGMENT_MIN", reduction="min"
             ),
@@ -1378,7 +1382,12 @@ def convert_tanh(self, op):
         return out
 
     def convert_range(self, op):
-        """Convert TFLite Range"""
+        """Convert TFLite Range
+
+        Handles both constant and dynamic scalar inputs.  When all three operands
+        are compile-time constants the output shape is fully static; when any
+        operand is a dynamic Relax expr the shape is symbolic.
+        """
 
         from tflite.TensorType import TensorType
 
@@ -1387,28 +1396,24 @@ def convert_range(self, op):
 
         start, limit, delta = input_tensors[0], input_tensors[1], input_tensors[2]
 
-        def get_scalar_value(tensor):
+        def get_scalar_or_expr(tensor):
+            """Return a Python scalar for constants, a Relax expr for dynamic inputs."""
             if self.has_expr(tensor.tensor_idx):
                 expr = self.get_expr(tensor.tensor_idx)
                 if isinstance(expr, relax.Constant):
                     value = expr.data.numpy()
-                else:
-                    # relax.op.arange currently expects scalar-like values here.
-                    # Keep dynamic scalar RANGE explicit until frontend support is added.
-                    raise tvm.error.OpNotImplemented(
-                        "TFLite RANGE with dynamic scalar inputs is not supported in"
-                        "Relax frontend yet."
-                    )
-            else:
-                value = self.get_tensor_value(tensor)
-
+                    assert value.size == 1, "RANGE scalar input must have exactly one element"
+                    return value.item()
+                # Dynamic: pass the 0-d tensor expr directly to relax.op.arange.
+                return expr
+            value = self.get_tensor_value(tensor)
             # TFLite RANGE operands are scalar tensors in the flatbuffer.
             assert value.size == 1, "RANGE scalar input must have exactly one element"
             return value.item()
 
-        start_value = get_scalar_value(start)
-        limit_value = get_scalar_value(limit)
-        delta_value = get_scalar_value(delta)
+        start_value = get_scalar_or_expr(start)
+        limit_value = get_scalar_or_expr(limit)
+        delta_value = get_scalar_or_expr(delta)
 
         # out type inference
         if delta.tensor.Type() == TensorType.FLOAT32:
@@ -1563,6 +1568,46 @@ def convert_relu_n1_to_1(self, op):
 
         return out
 
+    def convert_relu_0_to_1(self, op):
+        """Convert TFLite RELU_0_TO_1 — clips input to [0, 1]."""
+        input_tensors = self.get_input_tensors(op)
+        assert len(input_tensors) == 1, "input tensors length should be 1"
+        input_tensor = input_tensors[0]
+        in_expr = self.get_expr(input_tensor.tensor_idx)
+
+        output_tensors = self.get_output_tensors(op)
+        assert len(output_tensors) == 1, "output tensors length should be 1"
+        output_tensor = output_tensors[0]
+
+        if input_tensor.qnn_params:
+            scale_val = get_scalar_from_constant(input_tensor.qnn_params["scale"])
+            zero_point_val = get_scalar_from_constant(input_tensor.qnn_params["zero_point"])
+
+            def quantize(x):
+                return float(round(x / scale_val) + zero_point_val)
+
+            input_tensor_type_str = self.get_tensor_type_str(input_tensor.tensor.Type())
+            qmin = float(tvm.tirx.min_value(input_tensor_type_str).value)
+            qmax = float(tvm.tirx.max_value(input_tensor_type_str).value)
+            out = relax.op.clip(
+                in_expr, min=max(qmin, quantize(0.0)), max=min(qmax, quantize(1.0))
+            )
+        else:
+            out = relax.op.clip(in_expr, min=0, max=1)
+
+        if output_tensor.qnn_params:
+            output_tensor_type_str = self.get_tensor_type_str(output_tensor.tensor.Type())
+            out = _qnn.op.requantize(
+                out,
+                input_scale=input_tensor.qnn_params["scale"],
+                input_zero_point=input_tensor.qnn_params["zero_point"],
+                output_scale=output_tensor.qnn_params["scale"],
+                output_zero_point=output_tensor.qnn_params["zero_point"],
+                out_dtype=output_tensor_type_str,
+            )
+
+        return out
+
     def convert_log_softmax(self, op):
         """Convert TFLite LOG_SOFTMAX"""
         input_tensors = self.get_input_tensors(op)
@@ -4829,6 +4874,32 @@ def convert_broadcast_args(self, op):
             relax.op.where(s1_is_one, s0, relax.op.maximum(s0, s1)),
         )
 
+    def convert_bucketize(self, op):
+        """Convert TFLite BUCKETIZE → relax.op.bucketize.
+
+        Boundaries are stored as a repeated float in BucketizeOptions, not as a
+        tensor input, so we materialise them as a compile-time constant.
+        """
+        from tflite.BuiltinOptions import BuiltinOptions
+        from tflite.BucketizeOptions import BucketizeOptions
+
+        input_tensors = self.get_input_tensors(op)
+        assert len(input_tensors) == 1, "input tensors length should be 1"
+        in_expr = self.get_tensor_expr(input_tensors[0])
+
+        assert op.BuiltinOptionsType() == BuiltinOptions.BucketizeOptions
+        op_options = op.BuiltinOptions()
+        bucket_options = BucketizeOptions()
+        bucket_options.Init(op_options.Bytes, op_options.Pos)
+
+        boundaries = [
+            bucket_options.Boundaries(i) for i in range(bucket_options.BoundariesLength())
+        ]
+        boundaries_const = relax.op.const(np.array(boundaries, dtype="float32"))
+
+        out = relax.op.bucketize(in_expr, boundaries_const, right=False)
+        return out
+
     def convert_cast(self, op):
         """Convert TFLite CAST"""
 
@@ -5510,6 +5581,47 @@ def convert_unidirectional_sequence_rnn(self, op):
         # Stack timestep outputs: [batch, time, num_units].
         return relax.op.stack(outputs, axis=1)
 
+    def convert_unique(self, op):
+        """Convert TFLite UNIQUE → relax.op.unique.
+
+        TFLite always emits two outputs: unique values and the per-element index
+        back into the unique values.  The index dtype (int32 or int64) is encoded
+        in UniqueOptions.
+        """
+        from tflite.BuiltinOptions import BuiltinOptions
+        from tflite.UniqueOptions import UniqueOptions
+
+        input_tensors = self.get_input_tensors(op)
+        assert len(input_tensors) == 1, "input tensors length should be 1"
+        in_expr = self.get_tensor_expr(input_tensors[0])
+
+        output_tensors = self.get_output_tensors(op)
+        assert len(output_tensors) == 2, "output tensors length should be 2"
+
+        assert op.BuiltinOptionsType() == BuiltinOptions.UniqueOptions
+        op_options = op.BuiltinOptions()
+        unique_options = UniqueOptions()
+        unique_options.Init(op_options.Bytes, op_options.Pos)
+
+        idx_dtype = self.get_tensor_type_str(output_tensors[1].tensor.Type())
+
+        # relax.op.unique returns (values, indices, inverse_indices, counts).
+        # TFLite expects (values, indices) where indices map each input element
+        # to its position in the unique output.  That corresponds to inverse_indices.
+        out = relax.op.unique(
+            in_expr,
+            sorted=False,
+            return_index=False,
+            return_inverse=True,
+            return_counts=False,
+            dim=None,
+        )
+        values = relax.TupleGetItem(out, 0)
+        inverse_indices = relax.TupleGetItem(out, 1)
+        if idx_dtype != "int32":
+            inverse_indices = relax.op.astype(inverse_indices, idx_dtype)
+        return relax.Tuple([values, inverse_indices])
+
     """
     def convert_unidirectional_sequence_lstm(self, op):
         ### Long Short Term Memory for TFLite implementation. ###
@@ -6791,7 +6903,18 @@ def convert_densify(self, op):
         self.set_prefetched_node(output_tensor.tensor_idx, dense_weight)
 
     def convert_fake_quant(self, op):
-        """Convert TFLite FAKE_QUANT"""
+        """Convert TFLite FAKE_QUANT.
+
+        Implements the same nudging logic as the TFLite reference kernel:
+          https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/kernels/fake_quant.cc
+
+        Fixes vs the previous implementation:
+          * Degenerate range (opt_min == opt_max, scale == 0): early-return a
+            passthrough clip rather than dividing by zero.
+          * Use ``quant_max - quant_min`` (= num_levels) consistently as the
+            scale denominator, which is correct for both narrow_range and
+            standard configs.
+        """
         input_tensors = self.get_input_tensors(op)
         assert len(input_tensors) == 1, "input tensors length should be 1"
 
@@ -6816,7 +6939,13 @@ def convert_fake_quant(self, op):
 
         quant_min = 1 if narrow_range else 0
         quant_max = (1 << num_bits) - 1
-        scale = (opt_max - opt_min) / (quant_max - quant_min)
+        num_levels = quant_max - quant_min  # 254 for narrow int8, 255 for standard int8
+
+        # Guard degenerate range: scale == 0 would cause division by zero.
+        if opt_max == opt_min:
+            return relax.op.clip(in_expr, opt_min, opt_max)
+
+        scale = (opt_max - opt_min) / num_levels
 
         zero_point_from_min = quant_min - opt_min / scale
         if zero_point_from_min <= quant_min:
@@ -7312,4 +7441,4 @@ def func(self, data):
         func_attrs["params"] = [tvm.runtime.tensor(arr) for arr in param_value_list]
         relax_mod["main"] = relax_mod["main"].with_attrs(func_attrs)
 
-        return relax_mod
+        return relax_mod
\ No newline at end of file
diff --git a/tests/python/relax/test_frontend_tflite.py b/tests/python/relax/test_frontend_tflite.py
index e9ccea7ad150..bfb35130d0dc 100644
--- a/tests/python/relax/test_frontend_tflite.py
+++ b/tests/python/relax/test_frontend_tflite.py
@@ -556,8 +556,8 @@ def func(self):
     verify(Range)
 
 
-def test_range_dynamic_scalar_inputs_not_supported():
-    """RANGE conversion currently rejects dynamic scalar inputs."""
+def test_range_dynamic():
+    """RANGE with dynamic scalar inputs lowers to relax.op.arange."""
 
     class RangeDynamic(tf.Module):
         @tf.function(
@@ -570,8 +570,7 @@ class RangeDynamic(tf.Module):
         def func(self, start, limit, delta):
             return tf.range(start, limit, delta, dtype=tf.int32)
 
-    with pytest.raises(tvm.error.OpNotImplemented, match="dynamic scalar inputs"):
-        verify(RangeDynamic)
+    verify(RangeDynamic)
 
 
 def test_tile_ir():
@@ -11257,5 +11256,115 @@ def test_unidirectional_sequence_rnn_time_major():
     assert tuple(int(d) for d in out_shape) == (batch, time, num_units)
 
 
+def test_sign():
+    """SIGN → relax.op.sign (unary elemwise, float and int)."""
+
+    class Sign(tf.Module):
+        @tf.function(input_signature=[tf.TensorSpec(shape=(3, 4), dtype=tf.float32)])
+        def func(self, x):
+            return tf.math.sign(x)
+
+    @I.ir_module
+    class Expected:
+        @R.function
+        def main(x: R.Tensor((3, 4), dtype="float32")) -> R.Tensor((3, 4), dtype="float32"):
+            R.func_attr({"num_input": 1})
+            with R.dataflow():
+                gv: R.Tensor((3, 4), dtype="float32") = R.sign(x)
+                R.output(gv)
+            return gv
+
+    verify(Sign, Expected)
+
+    class SignInt(tf.Module):
+        @tf.function(input_signature=[tf.TensorSpec(shape=(5,), dtype=tf.int32)])
+        def func(self, x):
+            return tf.math.sign(x)
+
+    verify(SignInt)
+
+
+def test_unique():
+    """UNIQUE → relax.op.unique, two-output (values, inverse_indices)."""
+
+    class Unique(tf.Module):
+        @tf.function(input_signature=[tf.TensorSpec(shape=(6,), dtype=tf.float32)])
+        def func(self, x):
+            y, idx = tf.unique(x)
+            return y, idx
+
+    verify(Unique)
+
+    class UniqueInt(tf.Module):
+        @tf.function(input_signature=[tf.TensorSpec(shape=(8,), dtype=tf.int32)])
+        def func(self, x):
+            y, idx = tf.unique(x)
+            return y, idx
+
+    verify(UniqueInt)
+
+
+def test_bucketize():
+    """BUCKETIZE → relax.op.bucketize with constant boundaries from BucketizeOptions."""
+
+    class Bucketize(tf.Module):
+        @tf.function(input_signature=[tf.TensorSpec(shape=(5,), dtype=tf.float32)])
+        def func(self, x):
+            return tf.raw_ops.Bucketize(input=x, boundaries=[0.0, 1.0, 2.0])
+
+    @I.ir_module
+    class Expected:
+        @R.function
+        def main(x: R.Tensor((5,), dtype="float32")) -> R.Tensor((5,), dtype="int32"):
+            R.func_attr({"num_input": 1})
+            with R.dataflow():
+                lv: R.Tensor((3,), dtype="float32") = R.const(
+                    np.array([0.0, 1.0, 2.0], dtype="float32"), "float32"
+                )
+                gv: R.Tensor((5,), dtype="int32") = R.bucketize(x, lv, right=False)
+                R.output(gv)
+            return gv
+
+    verify(Bucketize, Expected)
+
+    class BucketizeEmpty(tf.Module):
+        @tf.function(input_signature=[tf.TensorSpec(shape=(4,), dtype=tf.float32)])
+        def func(self, x):
+            return tf.raw_ops.Bucketize(input=x, boundaries=[])
+
+    verify(BucketizeEmpty)
+
+
+def test_fake_quant():
+    """FAKE_QUANT — standard range, narrow range, and degenerate (min == max)."""
+
+    class FakeQuantStandard(tf.Module):
+        @tf.function(input_signature=[tf.TensorSpec(shape=(2, 4), dtype=tf.float32)])
+        def func(self, x):
+            return tf.quantization.fake_quant_with_min_max_args(
+                x, min=-1.0, max=1.0, num_bits=8, narrow_range=False
+            )
+
+    verify(FakeQuantStandard)
+
+    class FakeQuantNarrowRange(tf.Module):
+        @tf.function(input_signature=[tf.TensorSpec(shape=(2, 4), dtype=tf.float32)])
+        def func(self, x):
+            return tf.quantization.fake_quant_with_min_max_args(
+                x, min=-1.0, max=1.0, num_bits=8, narrow_range=True
+            )
+
+    verify(FakeQuantNarrowRange)
+
+    class FakeQuant4Bit(tf.Module):
+        @tf.function(input_signature=[tf.TensorSpec(shape=(3, 3), dtype=tf.float32)])
+        def func(self, x):
+            return tf.quantization.fake_quant_with_min_max_args(
+                x, min=0.0, max=15.0, num_bits=4, narrow_range=False
+            )
+
+    verify(FakeQuant4Bit)
+
+
 if __name__ == "__main__":
-    pytest.main(["-s", __file__])
+    pytest.main(["-s", __file__])
\ No newline at end of file

From 55acb52685f01784263936d012e48c19ca2cb9ef Mon Sep 17 00:00:00 2001
From: 0xjah <ahmad.jahaf@proton.me>
Date: Fri, 8 May 2026 23:29:18 +0300
Subject: [PATCH 2/4] [Relax][TFLite] Fix quantization and bucketize behavior
 in operator converter

---
 python/tvm/relax/frontend/tflite/tflite_frontend.py | 6 +++---
 tests/python/relax/test_frontend_tflite.py          | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/tvm/relax/frontend/tflite/tflite_frontend.py b/python/tvm/relax/frontend/tflite/tflite_frontend.py
index 27ffd054e1f9..a869c05428cc 100644
--- a/python/tvm/relax/frontend/tflite/tflite_frontend.py
+++ b/python/tvm/relax/frontend/tflite/tflite_frontend.py
@@ -1584,7 +1584,7 @@ def convert_relu_0_to_1(self, op):
             zero_point_val = get_scalar_from_constant(input_tensor.qnn_params["zero_point"])
 
             def quantize(x):
-                return float(round(x / scale_val) + zero_point_val)
+                return float(math.floor(x / scale_val + 0.5) + zero_point_val)
 
             input_tensor_type_str = self.get_tensor_type_str(input_tensor.tensor.Type())
             qmin = float(tvm.tirx.min_value(input_tensor_type_str).value)
@@ -4895,9 +4895,9 @@ def convert_bucketize(self, op):
         boundaries = [
             bucket_options.Boundaries(i) for i in range(bucket_options.BoundariesLength())
         ]
-        boundaries_const = relax.op.const(np.array(boundaries, dtype="float32"))
+        boundaries_const = relax.const(np.array(boundaries, dtype="float32"))
 
-        out = relax.op.bucketize(in_expr, boundaries_const, right=False)
+        out = relax.op.bucketize(in_expr, boundaries_const, right=True)
         return out
 
     def convert_cast(self, op):
diff --git a/tests/python/relax/test_frontend_tflite.py b/tests/python/relax/test_frontend_tflite.py
index bfb35130d0dc..7bc8a0333217 100644
--- a/tests/python/relax/test_frontend_tflite.py
+++ b/tests/python/relax/test_frontend_tflite.py
@@ -11321,7 +11321,7 @@ def main(x: R.Tensor((5,), dtype="float32")) -> R.Tensor((5,), dtype="int32"):
                 lv: R.Tensor((3,), dtype="float32") = R.const(
                     np.array([0.0, 1.0, 2.0], dtype="float32"), "float32"
                 )
-                gv: R.Tensor((5,), dtype="int32") = R.bucketize(x, lv, right=False)
+                gv: R.Tensor((5,), dtype="int32") = R.bucketize(x, lv, right=True)
                 R.output(gv)
             return gv
 

From a1403c1484030610cd83b09410374af0c0e0810e Mon Sep 17 00:00:00 2001
From: 0xjah <ahmad.jahaf@proton.me>
Date: Fri, 8 May 2026 23:31:15 +0300
Subject: [PATCH 3/4] [Relax][TFLite] Fix bucketize right parameter and handle
 degenerate fake quantization case

---
 tests/python/relax/test_frontend_tflite.py | 28 +++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/tests/python/relax/test_frontend_tflite.py b/tests/python/relax/test_frontend_tflite.py
index 7bc8a0333217..f18e106e0be1 100644
--- a/tests/python/relax/test_frontend_tflite.py
+++ b/tests/python/relax/test_frontend_tflite.py
@@ -11321,7 +11321,7 @@ def main(x: R.Tensor((5,), dtype="float32")) -> R.Tensor((5,), dtype="int32"):
                 lv: R.Tensor((3,), dtype="float32") = R.const(
                     np.array([0.0, 1.0, 2.0], dtype="float32"), "float32"
                 )
-                gv: R.Tensor((5,), dtype="int32") = R.bucketize(x, lv, right=True)
+                gv: R.Tensor((5,), dtype="int32") = R.bucketize(x, lv, right=False)
                 R.output(gv)
             return gv
 
@@ -11365,6 +11365,32 @@ def func(self, x):
 
     verify(FakeQuant4Bit)
 
+    # Degenerate range (min == max → scale == 0).  The fix must emit a plain
+    # clip rather than dividing by zero.  We check the IR directly to confirm
+    # that the output is exactly R.clip(x, min=v, max=v) and that no division
+    # node is present.
+    class FakeQuantDegenerate(tf.Module):
+        @tf.function(input_signature=[tf.TensorSpec(shape=(2, 3), dtype=tf.float32)])
+        def func(self, x):
+            return tf.quantization.fake_quant_with_min_max_args(
+                x, min=0.5, max=0.5, num_bits=8, narrow_range=False
+            )
+
+    @I.ir_module
+    class ExpectedDegenerate:
+        @R.function
+        def main(x: R.Tensor((2, 3), dtype="float32")) -> R.Tensor((2, 3), dtype="float32"):
+            R.func_attr({"num_input": 1})
+            with R.dataflow():
+                gv: R.Tensor((2, 3), dtype="float32") = R.clip(x, min=0.5, max=0.5)
+                R.output(gv)
+            return gv
+
+    mod = verify(FakeQuantDegenerate, ExpectedDegenerate)
+    # Double-check: no division node must appear in the compiled IR.
+    ir_text = mod.script()
+    assert "R.divide(" not in ir_text, "Degenerate FAKE_QUANT must not emit a division node"
+
 
 if __name__ == "__main__":
     pytest.main(["-s", __file__])
\ No newline at end of file

From 481ebd2fe6ed2f259d7e85dd33b7ef0a40d63fbe Mon Sep 17 00:00:00 2001
From: 0xjah <ahmad.jahaf@proton.me>
Date: Sat, 30 May 2026 07:36:33 +0300
Subject: [PATCH 4/4] [Relax][TFLite] Sync with upstream and apply pending
 quantization fix

Rebase branch onto upstream/main (apache/tvm), resolving conflicts from
newly merged LSTM, RNN, CALL/CALL_ONCE, and resource-variable ops. Restore
uncommitted float32 guard in qnn_params to prevent fake-quant tensors from
being treated as quantized.
---
 .../relax/frontend/tflite/tflite_frontend.py  |  28 +++--
 tests/python/relax/test_frontend_tflite.py    | 100 ++++++++++++------
 2 files changed, 84 insertions(+), 44 deletions(-)

diff --git a/python/tvm/relax/frontend/tflite/tflite_frontend.py b/python/tvm/relax/frontend/tflite/tflite_frontend.py
index a869c05428cc..4476fd34bfe1 100644
--- a/python/tvm/relax/frontend/tflite/tflite_frontend.py
+++ b/python/tvm/relax/frontend/tflite/tflite_frontend.py
@@ -897,10 +897,20 @@ def get_tensors(self, tensors_idx_list):
 
                 # Check that the scale and zero points are valid.
                 if is_qnn_params_valid:
-                    qnn_params = dict()
-                    qnn_params["scale"] = relax.const(scale, "float32")
-                    qnn_params["zero_point"] = relax.const(zero_point, "int32")
-                    qnn_params["axis"] = int(tflite_qnn_params.QuantizedDimension())
+                    from tflite.TensorType import TensorType as TFLiteTensorType
+
+                    if tensor.Type() == TFLiteTensorType.FLOAT32:
+                        # Float32 tensors may carry qnn_params as annotations
+                        # (e.g. FAKE_QUANT outputs) but are not truly quantized;
+                        # treat them as unquantized so the converter can proceed.
+                        is_qnn_params_valid = False
+                    else:
+                        qnn_params = dict()
+                        qnn_params["scale"] = relax.const(scale, "float32")
+                        qnn_params["zero_point"] = relax.const(zero_point, "int32")
+                        raise NotImplementedError(
+                            "Quantized TFLite models are not yet supported in the Relax frontend"
+                        )
             return_list.append(TensorWrapper(tensor_idx, tensor, buffer, qnn_params))
         return return_list
 
@@ -4897,7 +4907,7 @@ def convert_bucketize(self, op):
         ]
         boundaries_const = relax.const(np.array(boundaries, dtype="float32"))
 
-        out = relax.op.bucketize(in_expr, boundaries_const, right=True)
+        out = relax.op.bucketize(in_expr, boundaries_const, out_int32=True, right=True)
         return out
 
     def convert_cast(self, op):
@@ -6958,13 +6968,13 @@ def convert_fake_quant(self, op):
         nudged_min = (quant_min - nudged_zero_point) * scale
         nudged_max = (quant_max - nudged_zero_point) * scale
 
-        nudged_min_expr = relax.op.const(nudged_min)
+        nudged_min_expr = relax.const(nudged_min)
         clamped = relax.op.clip(in_expr, nudged_min, nudged_max)
         clamped_shifted = relax.op.subtract(clamped, nudged_min_expr)
 
-        half = relax.op.const(0.5)
-        one = relax.op.const(1.0)
-        scale_expr = relax.op.const(scale)
+        half = relax.const(0.5)
+        one = relax.const(1.0)
+        scale_expr = relax.const(scale)
         inv_scale = relax.op.divide(one, scale_expr)
         rounded = relax.op.floor(_op.add(_op.multiply(clamped_shifted, inv_scale), half))
         return relax.op.add(_op.multiply(rounded, scale_expr), nudged_min_expr)
diff --git a/tests/python/relax/test_frontend_tflite.py b/tests/python/relax/test_frontend_tflite.py
index f18e106e0be1..ce1babf84acd 100644
--- a/tests/python/relax/test_frontend_tflite.py
+++ b/tests/python/relax/test_frontend_tflite.py
@@ -70,7 +70,7 @@ def verify(TestClass, expected=None):
 
     # Run E2E test only on nightly
     if "CI_ENV_NIGHTLY" not in os.environ:
-        return
+        return mod
 
     # Inputs
     tf_inputs = []
@@ -98,6 +98,8 @@ def verify(TestClass, expected=None):
     else:
         np.testing.assert_allclose(tf_output.numpy(), tvm_output.numpy(), rtol=1e-5, atol=1e-5)
 
+    return mod
+
 
 def _verify_random_with_inputs(cfunc, inputs):
     """E2E verify random ops by shape/dtype and TVM seeded self-consistency."""
@@ -11318,10 +11320,12 @@ class Expected:
         def main(x: R.Tensor((5,), dtype="float32")) -> R.Tensor((5,), dtype="int32"):
             R.func_attr({"num_input": 1})
             with R.dataflow():
-                lv: R.Tensor((3,), dtype="float32") = R.const(
-                    np.array([0.0, 1.0, 2.0], dtype="float32"), "float32"
+                gv: R.Tensor((5,), dtype="int32") = R.bucketize(
+                    x,
+                    R.const(np.array([0.0, 1.0, 2.0], dtype="float32"), "float32"),
+                    out_int32=True,
+                    right=True,
                 )
-                gv: R.Tensor((5,), dtype="int32") = R.bucketize(x, lv, right=False)
                 R.output(gv)
             return gv
 
@@ -11335,46 +11339,73 @@ def func(self, x):
     verify(BucketizeEmpty)
 
 
-def test_fake_quant():
-    """FAKE_QUANT — standard range, narrow range, and degenerate (min == max)."""
+def _build_fake_quant_model(shape, opt_min, opt_max, num_bits=8, narrow_range=False):
+    """Build a minimal TFLite flatbuffer containing a single FAKE_QUANT op.
 
-    class FakeQuantStandard(tf.Module):
-        @tf.function(input_signature=[tf.TensorSpec(shape=(2, 4), dtype=tf.float32)])
-        def func(self, x):
-            return tf.quantization.fake_quant_with_min_max_args(
-                x, min=-1.0, max=1.0, num_bits=8, narrow_range=False
-            )
+    tf.quantization.fake_quant_with_min_max_args folds into QUANTIZE+DEQUANTIZE
+    in TFLite 2.x and cannot be used to exercise the FAKE_QUANT (opcode 80)
+    converter path.  This helper builds the flatbuffer directly.
+    """
+    _tfl_fq = _get_tflite_schema_module("FakeQuantOptions")
+    builder = flatbuffers.Builder(512)
 
-    verify(FakeQuantStandard)
+    _tfl_fq.FakeQuantOptionsStart(builder)
+    _tfl_fq.FakeQuantOptionsAddMin(builder, opt_min)
+    _tfl_fq.FakeQuantOptionsAddMax(builder, opt_max)
+    _tfl_fq.FakeQuantOptionsAddNumBits(builder, num_bits)
+    _tfl_fq.FakeQuantOptionsAddNarrowRange(builder, narrow_range)
+    fq_opts = _tfl_fq.FakeQuantOptionsEnd(builder)
 
-    class FakeQuantNarrowRange(tf.Module):
-        @tf.function(input_signature=[tf.TensorSpec(shape=(2, 4), dtype=tf.float32)])
-        def func(self, x):
-            return tf.quantization.fake_quant_with_min_max_args(
-                x, min=-1.0, max=1.0, num_bits=8, narrow_range=True
-            )
+    input_tensor = _build_tensor(builder, buffer_idx=1, shape=list(shape))
+    output_tensor = _build_tensor(builder, buffer_idx=2, shape=list(shape))
 
-    verify(FakeQuantNarrowRange)
+    op = _build_operator(
+        builder,
+        opcode_index=0,
+        inputs=[0],
+        outputs=[1],
+        builtin_options_type=_tfl_builtin_options.FakeQuantOptions,
+        builtin_options=fq_opts,
+    )
+    opcode = _build_operator_code(builder, _tfl_builtin_operator.FAKE_QUANT)
 
-    class FakeQuant4Bit(tf.Module):
-        @tf.function(input_signature=[tf.TensorSpec(shape=(3, 3), dtype=tf.float32)])
-        def func(self, x):
-            return tf.quantization.fake_quant_with_min_max_args(
-                x, min=0.0, max=15.0, num_bits=4, narrow_range=False
-            )
+    subgraph = _build_subgraph(
+        builder,
+        tensors=[input_tensor, output_tensor],
+        operators=[op],
+        inputs=[0],
+        outputs=[1],
+    )
 
-    verify(FakeQuant4Bit)
+    buffers = [_build_buffer(builder), _build_buffer(builder), _build_buffer(builder)]
+    return _finish_tflite_model(
+        builder, subgraph=subgraph, operator_codes=[opcode], buffers=buffers
+    )
+
+
+def _load_fake_quant_module(shape, opt_min, opt_max, num_bits=8, narrow_range=False):
+    model_bytes = _build_fake_quant_model(shape, opt_min, opt_max, num_bits, narrow_range)
+    if hasattr(tflite.Model, "Model"):
+        tflite_model = tflite.Model.Model.GetRootAsModel(model_bytes, 0)
+    else:
+        tflite_model = tflite.Model.GetRootAsModel(model_bytes, 0)
+    mod = from_tflite(tflite_model)
+    mod["main"] = mod["main"].without_attr("params")
+    return mod
+
+
+def test_fake_quant():
+    """FAKE_QUANT op — standard range, narrow range, 4-bit, and degenerate (min == max)."""
+    # Standard, narrow-range, and 4-bit cases: verify conversion succeeds (no crash).
+    _load_fake_quant_module((2, 4), -1.0, 1.0, num_bits=8, narrow_range=False)
+    _load_fake_quant_module((2, 4), -1.0, 1.0, num_bits=8, narrow_range=True)
+    _load_fake_quant_module((3, 3), 0.0, 15.0, num_bits=4, narrow_range=False)
 
     # Degenerate range (min == max → scale == 0).  The fix must emit a plain
     # clip rather than dividing by zero.  We check the IR directly to confirm
     # that the output is exactly R.clip(x, min=v, max=v) and that no division
     # node is present.
-    class FakeQuantDegenerate(tf.Module):
-        @tf.function(input_signature=[tf.TensorSpec(shape=(2, 3), dtype=tf.float32)])
-        def func(self, x):
-            return tf.quantization.fake_quant_with_min_max_args(
-                x, min=0.5, max=0.5, num_bits=8, narrow_range=False
-            )
+    mod = _load_fake_quant_module((2, 3), 0.5, 0.5, num_bits=8, narrow_range=False)
 
     @I.ir_module
     class ExpectedDegenerate:
@@ -11386,8 +11417,7 @@ def main(x: R.Tensor((2, 3), dtype="float32")) -> R.Tensor((2, 3), dtype="float3
                 R.output(gv)
             return gv
 
-    mod = verify(FakeQuantDegenerate, ExpectedDegenerate)
-    # Double-check: no division node must appear in the compiled IR.
+    tvm.ir.assert_structural_equal(mod, ExpectedDegenerate)
     ir_text = mod.script()
     assert "R.divide(" not in ir_text, "Degenerate FAKE_QUANT must not emit a division node"