From 4433d1fc1bc1a5bd5c25275c44ca3564cd2b940f Mon Sep 17 00:00:00 2001
From: Devin Lai <markauto75@gmail.com>
Date: Wed, 24 Jun 2026 02:39:17 +0800
Subject: [PATCH 1/7] [MLX] Add leaky_relu op handler (#20305)

Summary:
- Add MLX lowering for aten.leaky_relu.default using existing
GreaterEqual, Multiply, and Where nodes.
- Add focused MLX op tests for custom negative_slope values, including a
slope above 1.

Test Plan:
- python -m py_compile backends/mlx/ops.py backends/mlx/test/test_ops.py
- git diff --check HEAD^..HEAD
- PATH="$PWD/.venv-mlx/bin:$PATH" .venv-mlx/bin/lintrunner
backends/mlx/ops.py backends/mlx/test/test_ops.py
- .venv-mlx/bin/python -m executorch.backends.mlx.test.run_all_tests
leaky_relu --timeout 180

cc @metascroy
---
 backends/mlx/ops.py           | 59 +++++++++++++++++++++++++++++++++++
 backends/mlx/test/test_ops.py | 54 ++++++++++++++++++++++++++++++++
 2 files changed, 113 insertions(+)

diff --git a/backends/mlx/ops.py b/backends/mlx/ops.py
index 44536e675da..e3a636466c1 100644
--- a/backends/mlx/ops.py
+++ b/backends/mlx/ops.py
@@ -163,6 +163,8 @@
 from executorch.exir.dialects._ops import ops as exir_ops
 from torch.fx.node import Node
 
+_LEAKY_RELU_DEFAULT_NEGATIVE_SLOPE = 0.01
+
 
 def require_static_int(value: Any, param_name: str, op_name: str) -> None:
     """
@@ -2786,6 +2788,63 @@ def _relu_handler(P: MLXProgramBuilder, n: Node) -> Slot:
     return out
 
 
+@REGISTRY.register(target=[torch.ops.aten.leaky_relu.default])
+def _leaky_relu_handler(P: MLXProgramBuilder, n: Node) -> Slot:
+    """Handle aten.leaky_relu.default - leaky rectified linear unit.
+
+    leaky_relu(x) = x          if x >= 0
+                  = slope * x  otherwise
+
+    Implemented as where(x >= 0, x, slope * x) so it stays correct for any
+    negative_slope (including values > 1), matching eager PyTorch.
+    """
+    args = P.args(n)
+    require_args(args, 1, 2, "aten.leaky_relu")
+    require_kwargs(P.kwargs(n), set(), "aten.leaky_relu")
+
+    x = args[0]
+    negative_slope = _LEAKY_RELU_DEFAULT_NEGATIVE_SLOPE
+    if len(args) > 1 and args[1] is not None:
+        negative_slope = float(args[1])
+
+    x_meta = n.args[0].meta.get("val")
+    if x_meta is None:
+        raise ValueError("Input tensor metadata not found for leaky_relu")
+    dtype = x_meta.dtype
+
+    zero_slot = emit_lifted_constant(P, 0.0, dtype)
+    slope_slot = emit_lifted_constant(P, negative_slope, dtype)
+
+    _, cond_slot = P.make_tmp_slot()
+    P.emit(
+        GreaterEqualNode(
+            a=P.slot_to_tid(x),
+            b=P.slot_to_tid(zero_slot),
+            out=P.slot_to_tid(cond_slot),
+        )
+    )
+
+    _, scaled_slot = P.make_tmp_slot()
+    P.emit(
+        MultiplyNode(
+            a=P.slot_to_tid(slope_slot),
+            b=P.slot_to_tid(x),
+            out=P.slot_to_tid(scaled_slot),
+        )
+    )
+
+    out = P.make_or_get_slot(n)
+    P.emit(
+        WhereNode(
+            condition=P.slot_to_tid(cond_slot),
+            x=P.slot_to_tid(x),
+            y=P.slot_to_tid(scaled_slot),
+            out=P.slot_to_tid(out),
+        )
+    )
+    return out
+
+
 @REGISTRY.register(target=[torch.ops.aten._log_softmax.default])
 def _log_softmax_handler(P: MLXProgramBuilder, n: Node) -> Slot:
     """Handle aten._log_softmax.default - log of softmax.
diff --git a/backends/mlx/test/test_ops.py b/backends/mlx/test/test_ops.py
index 8f52116f6b8..e96c8075903 100644
--- a/backends/mlx/test/test_ops.py
+++ b/backends/mlx/test/test_ops.py
@@ -405,6 +405,60 @@ def create_inputs(self) -> Tuple[torch.Tensor, ...]:
         return (x,)
 
 
+class LeakyReLUModel(nn.Module):
+    """Model that applies leaky_relu with an optional negative slope."""
+
+    def __init__(self, negative_slope: Optional[float] = 0.01):
+        super().__init__()
+        self.negative_slope = negative_slope
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if self.negative_slope is None:
+            return torch.nn.functional.leaky_relu(x)
+        return torch.nn.functional.leaky_relu(x, negative_slope=self.negative_slope)
+
+
+@register_test
+class LeakyReLUTest(OpTestCase):
+    """Test case for leaky_relu activation with various negative slopes."""
+
+    name = "leaky_relu"
+    rtol = 1e-5
+    atol = 1e-5
+
+    def __init__(
+        self,
+        shape: Tuple[int, ...] = (2, 3, 4),
+        negative_slope: Optional[float] = 0.01,
+    ):
+        self.shape = shape
+        self.negative_slope = negative_slope
+        shape_str = "x".join(str(s) for s in shape)
+        slope_str = "default" if negative_slope is None else f"slope{negative_slope}"
+        self.name = f"leaky_relu_{slope_str}_{shape_str}"
+
+    @classmethod
+    def get_test_configs(cls) -> List["LeakyReLUTest"]:
+        return [
+            cls(shape=(2, 3, 4), negative_slope=0.01),
+            cls(shape=(2, 3, 4), negative_slope=None),
+            cls(shape=(4, 8), negative_slope=0.1),
+            cls(shape=(10,), negative_slope=0.2),
+            cls(shape=(10,), negative_slope=1.5),
+            cls(shape=(2, 8, 16), negative_slope=0.01),
+        ]
+
+    def create_model(self) -> nn.Module:
+        return LeakyReLUModel(self.negative_slope)
+
+    def create_inputs(self) -> Tuple[torch.Tensor, ...]:
+        numel = 1
+        for size in self.shape:
+            numel *= size
+        x = torch.linspace(-4.0, 4.0, steps=numel).reshape(self.shape)
+        return (x,)
+
+
 class GELUModel(nn.Module):
     """Simple model using GELU activation."""
 

From 58447b268273be4e4b333527656805308fa15e3e Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Tue, 23 Jun 2026 12:37:26 -0700
Subject: [PATCH 2/7] [MLX] Support multiple KV cache sessions, with shared
 constant data (#20408)

MLX backend already has mutable state in a separate execution context
from its constant data. This PR exposes a way to configure that for
external callers, and uses this to support serve.py on MLX like CUDA
backend.
---
 .github/workflows/mlx.yml                     |   6 +-
 backends/mlx/CMakeLists.txt                   |   6 +-
 .../mlx/custom_kernel_ops/gated_delta_rule.py |  37 ++
 .../test/test_gated_delta_rule.py             |   5 +-
 backends/mlx/runtime/MLXBackend.cpp           |  16 +
 backends/mlx/runtime/mlx_mutable_state.cpp    | 339 ++++++++++++++++++
 backends/mlx/runtime/mlx_mutable_state.h      | 204 +++++++++++
 backends/mlx/test/CMakeLists.txt              |  19 +
 backends/mlx/test/mlx_mutable_state_test.cpp  | 132 +++++++
 examples/models/qwen3_5_moe/CMakeLists.txt    |   1 +
 examples/models/qwen3_5_moe/CMakePresets.json |   4 +-
 examples/models/qwen3_5_moe/README.md         |  60 ++++
 .../qwen3_5_moe/mlx_source_transformations.py |  26 +-
 .../models/qwen3_5_moe/qwen35_moe_engine.cpp  |  60 ++--
 .../models/qwen3_5_moe/qwen35_moe_engine.h    |  34 +-
 15 files changed, 888 insertions(+), 61 deletions(-)
 create mode 100644 backends/mlx/runtime/mlx_mutable_state.cpp
 create mode 100644 backends/mlx/runtime/mlx_mutable_state.h
 create mode 100644 backends/mlx/test/mlx_mutable_state_test.cpp

diff --git a/.github/workflows/mlx.yml b/.github/workflows/mlx.yml
index 5a4ccbb4952..167ceb7da83 100644
--- a/.github/workflows/mlx.yml
+++ b/.github/workflows/mlx.yml
@@ -66,7 +66,11 @@ jobs:
         echo "::endgroup::"
 
         echo "::group::Build test runners"
-        ${CONDA_RUN} cmake --build cmake-out --target op_test_runner multi_thread_test_runner -j$(( $(sysctl -n hw.ncpu) - 1 ))
+        ${CONDA_RUN} cmake --build cmake-out --target op_test_runner multi_thread_test_runner mlx_mutable_state_test -j$(( $(sysctl -n hw.ncpu) - 1 ))
+        echo "::endgroup::"
+
+        echo "::group::Run mutable-state (multi-session) unit test"
+        ./cmake-out/backends/mlx/test/mlx_mutable_state_test
         echo "::endgroup::"
 
         echo "::group::Run op unit tests"
diff --git a/backends/mlx/CMakeLists.txt b/backends/mlx/CMakeLists.txt
index 43968d09b5d..acb96fb1ed9 100644
--- a/backends/mlx/CMakeLists.txt
+++ b/backends/mlx/CMakeLists.txt
@@ -255,8 +255,10 @@ option(ET_MLX_ALLOW_CUSTOM_KERNEL_EXECUTION
        ON
 )
 
-set(_mlx_backend__srcs ${CMAKE_CURRENT_SOURCE_DIR}/runtime/MLXLoader.cpp
-                       ${CMAKE_CURRENT_SOURCE_DIR}/runtime/MLXBackend.cpp
+set(_mlx_backend__srcs
+    ${CMAKE_CURRENT_SOURCE_DIR}/runtime/MLXLoader.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/runtime/MLXBackend.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/runtime/mlx_mutable_state.cpp
 )
 
 add_library(mlxdelegate ${_mlx_backend__srcs})
diff --git a/backends/mlx/custom_kernel_ops/gated_delta_rule.py b/backends/mlx/custom_kernel_ops/gated_delta_rule.py
index 423ffd0b034..41eb8ce7b98 100644
--- a/backends/mlx/custom_kernel_ops/gated_delta_rule.py
+++ b/backends/mlx/custom_kernel_ops/gated_delta_rule.py
@@ -53,6 +53,15 @@ def gated_delta_rule(
     B, T_len, Hk, Dk = q.shape
     Hv, Dv = v.shape[-2:]
 
+    # The Metal kernel maps each v-head to its k-head group
+    # (hk_idx = hv_idx / (Hv / Hk)); mirror that here so the eager reference also
+    # supports Hk != Hv (GQA) instead of relying on broadcasting, which requires
+    # Hk == Hv. repeat_interleave on the head dim reproduces that index mapping.
+    if Hk != Hv:
+        q = q.repeat_interleave(Hv // Hk, dim=2)
+        k = k.repeat_interleave(Hv // Hk, dim=2)
+        Hk = Hv
+
     s = state.clone()
 
     ys = []
@@ -101,6 +110,7 @@ def gated_delta_rule_fake(
     IntOrVid,
     MetalKernelNode,
     MultiplyNode,
+    RepeatNode,
     ScanNode,
     SubtractNode,
     SumNode,
@@ -450,6 +460,33 @@ def _emit_scan(self, P: MLXProgramBuilder, n: Node) -> Slot:
             ]
         )
 
+        # GQA: q/k carry Hk heads but the recurrence state/v have Hv heads. Expand
+        # q/k to Hv (repeat_interleave on the head axis) so the per-step broadcasts
+        # match, mirroring the Metal kernel's hk_idx = hv_idx / (Hv / Hk).
+        Hk = int(self.q_node.meta["val"].shape[-2])
+        Hv = int(self.v_node.meta["val"].shape[-2])
+        if Hk != Hv:
+            rep = IntOrVid.from_literal(Hv // Hk)
+            _, q_exp = P.make_tmp_slot()
+            P.emit(
+                RepeatNode(
+                    x=P.slot_to_tid(q_slot),
+                    out=P.slot_to_tid(q_exp),
+                    repeats=rep,
+                    axis=2,
+                )
+            )
+            _, k_exp = P.make_tmp_slot()
+            P.emit(
+                RepeatNode(
+                    x=P.slot_to_tid(k_slot),
+                    out=P.slot_to_tid(k_exp),
+                    repeats=rep,
+                    axis=2,
+                )
+            )
+            q_slot, k_slot = q_exp, k_exp
+
         # Carry needs a writable slot. This is node n's persistent output (the
         # mutated state), so it must be a node-owned slot — not a temp slot, whose
         # id is reclaimed on tmp_scope exit and would be read as dead by a later
diff --git a/backends/mlx/custom_kernel_ops/test/test_gated_delta_rule.py b/backends/mlx/custom_kernel_ops/test/test_gated_delta_rule.py
index 0a7e6a687f9..dfee111e74b 100644
--- a/backends/mlx/custom_kernel_ops/test/test_gated_delta_rule.py
+++ b/backends/mlx/custom_kernel_ops/test/test_gated_delta_rule.py
@@ -96,9 +96,8 @@ def forward(
         g: torch.Tensor,  # [B, T, Hv]
         beta: torch.Tensor,  # [B, T, Hv]
     ) -> torch.Tensor:
-        if self.head_repeat > 1:
-            q = q.repeat_interleave(self.head_repeat, dim=2)
-            k = k.repeat_interleave(self.head_repeat, dim=2)
+        # Pass native Hk (no repeat_interleave): the op itself must handle
+        # GQA head expansion (kernel via hk_idx mapping, scan/eager internally).
         return torch.ops.mlx.gated_delta_rule(
             q, k, v, g, beta, self.state, use_custom_kernel=self.use_custom_kernel
         )
diff --git a/backends/mlx/runtime/MLXBackend.cpp b/backends/mlx/runtime/MLXBackend.cpp
index 5bd3bf263d1..0dbdec22436 100644
--- a/backends/mlx/runtime/MLXBackend.cpp
+++ b/backends/mlx/runtime/MLXBackend.cpp
@@ -9,6 +9,7 @@
 #include "MLXExecutor.h"
 #include "MLXInterpreter.h"
 #include "MLXLoader.h"
+#include "mlx_mutable_state.h"
 
 #include <executorch/runtime/backend/interface.h>
 #include <executorch/runtime/core/error.h>
@@ -277,6 +278,12 @@ class MLXBackend final : public ::executorch::runtime::BackendInterface {
         eval(handle->constants.tensors);
       }
 
+      // Register the handle with the per-session mutable-state manager. This is
+      // a no-op unless a multi-session owner is active for this load (see
+      // mlx_mutable_state.h); single-session execution is unaffected.
+      mutable_state_note_handle(
+          handle, &handle->program, &handle->mutable_buffers);
+
     } catch (const std::exception& e) {
       ET_LOG(Error, "Failed to load MLX program: %s", e.what());
       handle->~MLXHandle();
@@ -366,6 +373,14 @@ class MLXBackend final : public ::executorch::runtime::BackendInterface {
           }
         }
 
+        // Select the active session's mutable buffers (KV cache, recurrent/conv
+        // state) before running. No-op for single-session handles; weights stay
+        // shared via ExecutionState::constants.
+        if (Error rebind_err = mutable_state_rebind_for_execute(h, h->state);
+            rebind_err != Error::Ok) {
+          return rebind_err;
+        }
+
         // Run the MLX program (builds lazy computation graph)
         h->interpreter.run(program, h->state, h->stream);
 
@@ -431,6 +446,7 @@ class MLXBackend final : public ::executorch::runtime::BackendInterface {
   void destroy(DelegateHandle* handle) const override {
     std::lock_guard<std::mutex> lock(mlx_global_mutex());
     if (handle != nullptr) {
+      mutable_state_forget_handle(handle);
       auto* mlx_handle = static_cast<MLXHandle*>(handle);
       mlx_handle->~MLXHandle();
     }
diff --git a/backends/mlx/runtime/mlx_mutable_state.cpp b/backends/mlx/runtime/mlx_mutable_state.cpp
new file mode 100644
index 00000000000..2f00d917136
--- /dev/null
+++ b/backends/mlx/runtime/mlx_mutable_state.cpp
@@ -0,0 +1,339 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "mlx_mutable_state.h"
+
+#include "MLXExecutor.h"
+#include "MLXLoader.h"
+
+#include <executorch/runtime/platform/log.h>
+
+#include <mutex>
+#include <unordered_map>
+
+namespace executorch {
+namespace backends {
+namespace mlx {
+
+using ::executorch::runtime::Error;
+using ::executorch::runtime::Result;
+
+namespace {
+
+struct HandleInfo {
+  const MLXProgram* program{nullptr};
+  MutableBufferData* default_buffers{nullptr};
+};
+
+struct Context {
+  // Delegate handles associated with this loaded program (one per loaded
+  // method). Keyed by opaque MLXHandle pointer.
+  std::unordered_map<const void*, HandleInfo> handles;
+  // Per-session mutable buffers: token -> (handle -> buffers). Allocated lazily
+  // on first execute for a given (session, handle).
+  std::unordered_map<int, std::unordered_map<const void*, MutableBufferData>>
+      sessions;
+  int next_token{0};
+  // Sticky setup failure. Once set (e.g. by nested load scopes), available(),
+  // validate_coverage(), create_session(), and rebind fail consistently.
+  Error build_error{Error::Ok};
+};
+
+// Process-global registry. MLX serializes execution via its own global mutex
+// and the engine serializes per session, but the registry itself is guarded
+// here so context/session lifecycle calls from other threads are safe.
+std::mutex& registry_mutex() {
+  static std::mutex m;
+  return m;
+}
+
+std::unordered_map<MutableStateContext, Context>& contexts() {
+  static std::unordered_map<MutableStateContext, Context> c;
+  return c;
+}
+
+std::unordered_map<const void*, MutableStateContext>& handle_ctx() {
+  static std::unordered_map<const void*, MutableStateContext> m;
+  return m;
+}
+
+MutableStateContext g_next_ctx = 1; // 0 is reserved as invalid.
+
+// Thread-local load scope and active (ctx, session) selection.
+thread_local MutableStateContext tl_loading_ctx = kInvalidMutableContext;
+thread_local MutableStateContext tl_active_ctx = kInvalidMutableContext;
+thread_local int tl_active_token = kNoMutableSession;
+
+} // namespace
+
+namespace detail {
+
+MutableStateContext mutable_state_create_context() {
+  std::lock_guard<std::mutex> g(registry_mutex());
+  MutableStateContext ctx = g_next_ctx++;
+  if (ctx == kInvalidMutableContext) {
+    ctx = g_next_ctx++;
+  }
+  contexts()[ctx];
+  return ctx;
+}
+
+void mutable_state_destroy_context(MutableStateContext ctx) {
+  std::lock_guard<std::mutex> g(registry_mutex());
+  auto it = contexts().find(ctx);
+  if (it == contexts().end()) {
+    return;
+  }
+  for (const auto& kv : it->second.handles) {
+    handle_ctx().erase(kv.first);
+  }
+  contexts().erase(it);
+}
+
+void mutable_state_begin_load(MutableStateContext ctx) {
+  if (tl_loading_ctx != kInvalidMutableContext) {
+    // Nested load scopes would silently overwrite the thread-local association.
+    // Mark both the already-active and the new context invalid instead.
+    std::lock_guard<std::mutex> g(registry_mutex());
+    auto active = contexts().find(tl_loading_ctx);
+    if (active != contexts().end()) {
+      active->second.build_error = Error::InvalidState;
+    }
+    auto nested = contexts().find(ctx);
+    if (nested != contexts().end()) {
+      nested->second.build_error = Error::InvalidState;
+    }
+    ET_LOG(Error, "mutable_state: nested load scopes are not supported");
+    tl_loading_ctx = kInvalidMutableContext;
+    return;
+  }
+  tl_loading_ctx = ctx;
+}
+
+void mutable_state_end_load() {
+  tl_loading_ctx = kInvalidMutableContext;
+}
+
+bool mutable_state_available(MutableStateContext ctx) {
+  if (ctx == kInvalidMutableContext) {
+    return false;
+  }
+  std::lock_guard<std::mutex> g(registry_mutex());
+  auto it = contexts().find(ctx);
+  return it != contexts().end() && it->second.build_error == Error::Ok &&
+      !it->second.handles.empty();
+}
+
+int64_t mutable_state_bytes_per_session(MutableStateContext ctx) {
+  std::lock_guard<std::mutex> g(registry_mutex());
+  auto it = contexts().find(ctx);
+  if (it == contexts().end()) {
+    return 0;
+  }
+  int64_t total = 0;
+  for (const auto& kv : it->second.handles) {
+    const MutableBufferData* bufs = kv.second.default_buffers;
+    if (bufs == nullptr) {
+      continue;
+    }
+    for (const auto& t : bufs->tensors) {
+      if (t.has_value()) {
+        total += static_cast<int64_t>(t->nbytes());
+      }
+    }
+  }
+  return total;
+}
+
+Error mutable_state_validate_coverage(MutableStateContext ctx) {
+  std::lock_guard<std::mutex> g(registry_mutex());
+  auto it = contexts().find(ctx);
+  if (it == contexts().end()) {
+    return Error::InvalidArgument;
+  }
+  if (it->second.build_error != Error::Ok) {
+    return it->second.build_error;
+  }
+  // MLX clones all mutable buffers by tid; there is no FQN coverage to verify.
+  return Error::Ok;
+}
+
+Result<int> mutable_state_create_session(MutableStateContext ctx) {
+  std::lock_guard<std::mutex> g(registry_mutex());
+  auto it = contexts().find(ctx);
+  if (it == contexts().end()) {
+    ET_LOG(Error, "mutable_state_create_session: unknown context %d", ctx);
+    return Error::InvalidState;
+  }
+  Context& c = it->second;
+  if (c.build_error != Error::Ok) {
+    return c.build_error;
+  }
+  if (c.handles.empty()) {
+    ET_LOG(
+        Error, "mutable_state_create_session: no backend handles registered");
+    return Error::NotSupported;
+  }
+  int token = c.next_token++;
+  // Per-handle buffers are allocated lazily on first execute.
+  c.sessions[token];
+  return token;
+}
+
+void mutable_state_destroy_session(MutableStateContext ctx, int token) {
+  std::lock_guard<std::mutex> g(registry_mutex());
+  auto it = contexts().find(ctx);
+  if (it == contexts().end()) {
+    return;
+  }
+  it->second.sessions.erase(token);
+}
+
+void mutable_state_set_active(MutableStateContext ctx, int token) {
+  tl_active_ctx = ctx;
+  tl_active_token = token;
+}
+
+} // namespace detail
+
+void mutable_state_note_handle(
+    const void* handle,
+    const MLXProgram* program,
+    MutableBufferData* default_buffers) {
+  if (tl_loading_ctx == kInvalidMutableContext) {
+    return; // No multi-session owner active during this load: single-session.
+  }
+  std::lock_guard<std::mutex> g(registry_mutex());
+  auto it = contexts().find(tl_loading_ctx);
+  if (it == contexts().end()) {
+    return;
+  }
+  it->second.handles[handle] = HandleInfo{program, default_buffers};
+  handle_ctx()[handle] = tl_loading_ctx;
+}
+
+void mutable_state_forget_handle(const void* handle) {
+  std::lock_guard<std::mutex> g(registry_mutex());
+  auto hit = handle_ctx().find(handle);
+  if (hit == handle_ctx().end()) {
+    return;
+  }
+  auto cit = contexts().find(hit->second);
+  if (cit != contexts().end()) {
+    cit->second.handles.erase(handle);
+    for (auto& session : cit->second.sessions) {
+      session.second.erase(handle);
+    }
+  }
+  handle_ctx().erase(hit);
+}
+
+Error mutable_state_rebind_for_execute(
+    const void* handle,
+    ExecutionState& state) {
+  std::lock_guard<std::mutex> g(registry_mutex());
+  auto hit = handle_ctx().find(handle);
+  if (hit == handle_ctx().end()) {
+    if (tl_active_token != kNoMutableSession) {
+      ET_LOG(
+          Error,
+          "mutable_state_rebind_for_execute: active session set but handle has "
+          "no mutable-state context");
+      return Error::Internal;
+    }
+    // Handle was not loaded under a multi-session owner: keep default buffers.
+    return Error::Ok;
+  }
+  auto cit = contexts().find(hit->second);
+  if (cit == contexts().end()) {
+    return Error::Ok;
+  }
+  Context& ctx = cit->second;
+  if (ctx.build_error != Error::Ok) {
+    return ctx.build_error;
+  }
+  // Invariant: a handle present in handle_ctx() is present in ctx.handles. Look
+  // it up explicitly (not operator[]) so a broken invariant fails loudly
+  // instead of inserting a {nullptr, nullptr} entry that later null-derefs in
+  // load_mutable_buffers(*info.program, ...).
+  auto info_it = ctx.handles.find(handle);
+  if (info_it == ctx.handles.end()) {
+    ET_LOG(
+        Error,
+        "mutable_state_rebind_for_execute: handle has a context but no "
+        "registered HandleInfo (invariant broken)");
+    return Error::Internal;
+  }
+  HandleInfo& info = info_it->second;
+
+  const bool has_active_session = tl_active_token != kNoMutableSession;
+  const bool active_for_this_ctx =
+      has_active_session && tl_active_ctx == hit->second;
+
+  // A session is active, but for a different context than the one this handle
+  // belongs to. Falling back to default buffers would silently execute with the
+  // wrong model/session state, so refuse instead.
+  if (has_active_session && !active_for_this_ctx) {
+    ET_LOG(
+        Error,
+        "mutable_state_rebind_for_execute: active context mismatch (a session "
+        "is active for a different loaded program than the one executing)");
+    return Error::Internal;
+  }
+
+  if (!active_for_this_ctx) {
+    // No session selected. Refuse if sessions exist (running against the
+    // default buffers here would not isolate state from created sessions).
+    if (!ctx.sessions.empty()) {
+      ET_LOG(
+          Error,
+          "mutable_state_rebind_for_execute: no active session selected but "
+          "sessions exist for this program");
+      return Error::InvalidState;
+    }
+    state.mutable_buffers = info.default_buffers;
+    return Error::Ok;
+  }
+
+  auto sit = ctx.sessions.find(tl_active_token);
+  if (sit == ctx.sessions.end()) {
+    ET_LOG(
+        Error,
+        "mutable_state_rebind_for_execute: unknown session token %d",
+        tl_active_token);
+    return Error::InvalidState;
+  }
+
+  auto& per_handle = sit->second;
+  auto bit = per_handle.find(handle);
+  if (bit == per_handle.end()) {
+    // First execute for this (session, handle): allocate fresh zeroed buffers.
+    // Constants/weights stay shared (ExecutionState::constants is untouched);
+    // only the mutable buffers are per-session.
+    MutableBufferData buffers;
+    try {
+      load_mutable_buffers(*info.program, buffers);
+    } catch (const std::exception& e) {
+      ET_LOG(
+          Error,
+          "mutable_state_rebind_for_execute: failed to allocate session "
+          "buffers: %s",
+          e.what());
+      return Error::MemoryAllocationFailed;
+    }
+    bit = per_handle.emplace(handle, std::move(buffers)).first;
+  }
+  // unordered_map keeps element pointers stable across rehash, so this remains
+  // valid for the duration of the execute.
+  state.mutable_buffers = &bit->second;
+  return Error::Ok;
+}
+
+} // namespace mlx
+} // namespace backends
+} // namespace executorch
diff --git a/backends/mlx/runtime/mlx_mutable_state.h b/backends/mlx/runtime/mlx_mutable_state.h
new file mode 100644
index 00000000000..84420812360
--- /dev/null
+++ b/backends/mlx/runtime/mlx_mutable_state.h
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <utility>
+
+#include <executorch/runtime/core/error.h>
+#include <executorch/runtime/core/result.h>
+#include <executorch/runtime/platform/compiler.h>
+
+// MLX-private support for running one loaded MLX program with multiple isolated
+// instances of its mutable buffers (KV cache, conv/recurrent state). Callers
+// create sessions and execute with one active session selected.
+//
+// Unlike the CUDA backend, the MLX runtime owns mutable buffers directly in a
+// swappable container (ExecutionState::mutable_buffers is a
+// MutableBufferData*), so per-session isolation is a pointer swap to a fresh
+// MutableBufferData — no FQN registration / constant-repoint hook is needed.
+
+namespace executorch {
+namespace backends {
+namespace mlx {
+
+// Forward declarations (defined in MLXLoader.h / MLXExecutor.h).
+struct MLXProgram;
+struct MutableBufferData;
+struct ExecutionState;
+
+// Opaque per-loaded-program context id (0 = invalid).
+using MutableStateContext = int;
+constexpr MutableStateContext kInvalidMutableContext = 0;
+
+// Sentinel for execution without per-session rebinding.
+constexpr int kNoMutableSession = -1;
+
+// Implementation entry points. Callers should use MutableStateContextOwner.
+namespace detail {
+
+MutableStateContext mutable_state_create_context();
+void mutable_state_destroy_context(MutableStateContext ctx);
+void mutable_state_begin_load(MutableStateContext ctx);
+void mutable_state_end_load();
+bool mutable_state_available(MutableStateContext ctx);
+int64_t mutable_state_bytes_per_session(MutableStateContext ctx);
+::executorch::runtime::Error mutable_state_validate_coverage(
+    MutableStateContext ctx);
+::executorch::runtime::Result<int> mutable_state_create_session(
+    MutableStateContext ctx);
+void mutable_state_destroy_session(MutableStateContext ctx, int token);
+void mutable_state_set_active(MutableStateContext ctx, int token);
+
+} // namespace detail
+
+// Caller-facing owner for one mutable-state context. Mirrors the CUDA backend's
+// MutableStateContextOwner so the example engine can use a symmetric API.
+class ET_EXPERIMENTAL MutableStateContextOwner final {
+  class LoadScope final {
+   public:
+    explicit LoadScope(MutableStateContext ctx) {
+      detail::mutable_state_begin_load(ctx);
+    }
+
+    ~LoadScope() {
+      detail::mutable_state_end_load();
+    }
+
+    LoadScope(const LoadScope&) = delete;
+    LoadScope& operator=(const LoadScope&) = delete;
+  };
+
+  class ActiveSessionScope final {
+   public:
+    ActiveSessionScope(MutableStateContext ctx, int token) {
+      detail::mutable_state_set_active(ctx, token);
+    }
+
+    ~ActiveSessionScope() {
+      detail::mutable_state_set_active(
+          kInvalidMutableContext, kNoMutableSession);
+    }
+
+    ActiveSessionScope(const ActiveSessionScope&) = delete;
+    ActiveSessionScope& operator=(const ActiveSessionScope&) = delete;
+  };
+
+ public:
+  MutableStateContextOwner() : ctx_(detail::mutable_state_create_context()) {}
+
+  ~MutableStateContextOwner() {
+    destroy();
+  }
+
+  MutableStateContextOwner(const MutableStateContextOwner&) = delete;
+  MutableStateContextOwner& operator=(const MutableStateContextOwner&) = delete;
+
+  MutableStateContextOwner(MutableStateContextOwner&& other) noexcept
+      : ctx_(std::exchange(other.ctx_, kInvalidMutableContext)) {}
+
+  MutableStateContextOwner& operator=(
+      MutableStateContextOwner&& other) noexcept {
+    if (this != &other) {
+      destroy();
+      ctx_ = std::exchange(other.ctx_, kInvalidMutableContext);
+    }
+    return *this;
+  }
+
+  MutableStateContext get() const {
+    return ctx_;
+  }
+
+  explicit operator bool() const {
+    return ctx_ != kInvalidMutableContext;
+  }
+
+  // Associates delegate handles created by `fn` with this context.
+  template <typename Fn>
+  auto with_load_scope(Fn&& fn) const -> decltype(std::forward<Fn>(fn)()) {
+    LoadScope scope(ctx_);
+    return std::forward<Fn>(fn)();
+  }
+
+  // Selects this context/session while `fn` executes. The caller is responsible
+  // for serializing execution that touches the same loaded program.
+  //
+  // Thread-safety contract: destroy_session()/forget_handle() only take the
+  // registry mutex, while rebind (under with_active_session) hands execute a
+  // raw pointer into Context::sessions that is dereferenced after the lock is
+  // released. The caller must therefore guarantee a session is never destroyed
+  // while it is the active session mid-execute (the engine upholds this: a
+  // session's buffers are freed only when its owning LLMSession drops, never
+  // concurrently with its own execute). Destroying *other* sessions
+  // concurrently is safe — unordered_map keeps element pointers stable across
+  // rehash.
+  template <typename Fn>
+  auto with_active_session(int token, Fn&& fn) const
+      -> decltype(std::forward<Fn>(fn)()) {
+    ActiveSessionScope scope(ctx_, token);
+    return std::forward<Fn>(fn)();
+  }
+
+  // True only after this context has been associated with at least one loaded
+  // MLX backend handle can create isolated mutable-buffer sessions.
+  bool available() const {
+    return detail::mutable_state_available(ctx_);
+  }
+
+  int64_t bytes_per_session() const {
+    return detail::mutable_state_bytes_per_session(ctx_);
+  }
+
+  ::executorch::runtime::Error validate_coverage() const {
+    return detail::mutable_state_validate_coverage(ctx_);
+  }
+
+  // Creates an isolated mutable-buffer session for this context.
+  // Fails if no loaded MLX backend handle has been associated with the context.
+  ET_NODISCARD ::executorch::runtime::Result<int> create_session() const {
+    return detail::mutable_state_create_session(ctx_);
+  }
+
+  void destroy_session(int token) const {
+    detail::mutable_state_destroy_session(ctx_, token);
+  }
+
+ private:
+  void destroy() {
+    if (ctx_ != kInvalidMutableContext) {
+      detail::mutable_state_destroy_context(ctx_);
+      ctx_ = kInvalidMutableContext;
+    }
+  }
+
+  MutableStateContext ctx_ = kInvalidMutableContext;
+};
+
+// --- MLXBackend hooks --------------------------------------------------------
+//
+// Called from MLXBackend init/execute/destroy. `handle` is an opaque key (the
+// MLXHandle pointer). `program` and `default_buffers` are the handle's own
+// program and (init-time) mutable buffers; the manager swaps in per-session
+// buffers (or restores the default) by re-pointing `state.mutable_buffers`.
+
+void mutable_state_note_handle(
+    const void* handle,
+    const MLXProgram* program,
+    MutableBufferData* default_buffers);
+
+void mutable_state_forget_handle(const void* handle);
+
+::executorch::runtime::Error mutable_state_rebind_for_execute(
+    const void* handle,
+    ExecutionState& state);
+
+} // namespace mlx
+} // namespace backends
+} // namespace executorch
diff --git a/backends/mlx/test/CMakeLists.txt b/backends/mlx/test/CMakeLists.txt
index 39024639d1d..2d494652138 100644
--- a/backends/mlx/test/CMakeLists.txt
+++ b/backends/mlx/test/CMakeLists.txt
@@ -69,3 +69,22 @@ if(EXECUTORCH_MLX_ENABLE_SANITIZERS)
     multi_thread_test_runner PRIVATE ${_mlx_sanitizer_link_options}
   )
 endif()
+
+# Per-session mutable-state manager unit test (no model/tokenizer needed).
+add_executable(mlx_mutable_state_test mlx_mutable_state_test.cpp)
+target_include_directories(
+  mlx_mutable_state_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../runtime
+)
+target_link_libraries(
+  mlx_mutable_state_test PRIVATE mlxdelegate mlx_schema mlx executorch_core
+)
+if(EXECUTORCH_MLX_ENABLE_SANITIZERS)
+  target_compile_options(
+    mlx_mutable_state_test PRIVATE -fsanitize=address,undefined
+                                   -fno-omit-frame-pointer
+  )
+  target_link_options(
+    mlx_mutable_state_test PRIVATE ${_mlx_sanitizer_link_options}
+  )
+endif()
+add_test(NAME mlx_mutable_state COMMAND mlx_mutable_state_test)
diff --git a/backends/mlx/test/mlx_mutable_state_test.cpp b/backends/mlx/test/mlx_mutable_state_test.cpp
new file mode 100644
index 00000000000..99a646701ef
--- /dev/null
+++ b/backends/mlx/test/mlx_mutable_state_test.cpp
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// Unit test for the MLX per-session mutable-state manager
+// (backends/mlx/runtime/mlx_mutable_state.{h,cpp}).
+//
+// Verifies that two sessions created on one loaded program get independent
+// mutable buffers: writing into session A's buffer does not leak into session
+// B's, and A's value persists across a rebind to B and back. This is the MLX
+// analogue of the CUDA "no-bleed" guarantee, exercised directly on the manager
+// (no model or tokenizer needed).
+
+#include "MLXExecutor.h"
+#include "MLXLoader.h"
+#include "mlx_mutable_state.h"
+
+#include <mlx/mlx.h>
+
+#include <cstdio>
+
+using namespace ::executorch::backends::mlx;
+
+namespace {
+
+int g_failures = 0;
+
+#define CHECK(cond)                                         \
+  do {                                                      \
+    if (!(cond)) {                                          \
+      std::printf("FAIL: %s (line %d)\n", #cond, __LINE__); \
+      ++g_failures;                                         \
+    }                                                       \
+  } while (0)
+
+// Build a minimal program with a single 1-element float mutable buffer at tid
+// 0.
+MLXProgram make_program() {
+  MLXProgram program;
+  program.num_mutable_buffer_tensors = 1;
+  program.mutable_buffer_map.push_back(SlotVariant{0, SlotType::TensorSlot});
+  TensorMeta meta;
+  meta.shape.push_back(ShapeDim{/*value=*/1});
+  meta.scalar_type = ScalarType::Float;
+  program.tensor_meta.resize(1);
+  program.tensor_meta[0] = meta;
+  return program;
+}
+
+float read0(const MutableBufferData& bufs) {
+  auto arr = bufs.get(Tid{0});
+  ::mlx::core::eval(arr);
+  return arr.item<float>();
+}
+
+} // namespace
+
+int main() {
+  MLXProgram program = make_program();
+
+  // Handle's default (init-time) mutable buffers.
+  MutableBufferData default_bufs;
+  load_mutable_buffers(program, default_bufs);
+
+  int dummy = 0;
+  const void* handle = &dummy;
+
+  MutableStateContextOwner owner;
+  CHECK(static_cast<bool>(owner));
+
+  // Associate the handle with the context (as MLXBackend::init would).
+  owner.with_load_scope(
+      [&]() { mutable_state_note_handle(handle, &program, &default_bufs); });
+
+  CHECK(owner.available());
+  CHECK(owner.bytes_per_session() == static_cast<int64_t>(sizeof(float)));
+
+  auto tokA = owner.create_session();
+  auto tokB = owner.create_session();
+  CHECK(tokA.ok());
+  CHECK(tokB.ok());
+  CHECK(tokA.get() != tokB.get());
+
+  ExecutionState state;
+
+  // Session A: rebind, then write a marker (7.0) into its buffer.
+  owner.with_active_session(tokA.get(), [&]() {
+    auto err = mutable_state_rebind_for_execute(handle, state);
+    CHECK(err == ::executorch::runtime::Error::Ok);
+    state.mutable_buffers->set(
+        Tid{0}, ::mlx::core::full({1}, 7.0f, ::mlx::core::float32));
+    return err;
+  });
+
+  // Session B: a fresh rebind must see zeros, not A's marker.
+  owner.with_active_session(tokB.get(), [&]() {
+    auto err = mutable_state_rebind_for_execute(handle, state);
+    CHECK(err == ::executorch::runtime::Error::Ok);
+    CHECK(read0(*state.mutable_buffers) == 0.0f);
+    return err;
+  });
+
+  // Back to session A: the marker must persist (isolation, no bleed).
+  owner.with_active_session(tokA.get(), [&]() {
+    auto err = mutable_state_rebind_for_execute(handle, state);
+    CHECK(err == ::executorch::runtime::Error::Ok);
+    CHECK(read0(*state.mutable_buffers) == 7.0f);
+    return err;
+  });
+
+  // With sessions present, executing without an active session is refused
+  // (prevents running against unmanaged/shared state).
+  {
+    auto err = mutable_state_rebind_for_execute(handle, state);
+    CHECK(err == ::executorch::runtime::Error::InvalidState);
+  }
+
+  owner.destroy_session(tokA.get());
+  owner.destroy_session(tokB.get());
+  mutable_state_forget_handle(handle);
+
+  if (g_failures == 0) {
+    std::printf("OK: mlx_mutable_state isolation test passed\n");
+    return 0;
+  }
+  std::printf("FAILED: %d checks\n", g_failures);
+  return 1;
+}
diff --git a/examples/models/qwen3_5_moe/CMakeLists.txt b/examples/models/qwen3_5_moe/CMakeLists.txt
index 726657a3779..aeb97f76ab7 100644
--- a/examples/models/qwen3_5_moe/CMakeLists.txt
+++ b/examples/models/qwen3_5_moe/CMakeLists.txt
@@ -89,6 +89,7 @@ endif()
 
 if(TARGET mlxdelegate)
   executorch_target_copy_mlx_metallib(qwen3_5_moe_runner)
+  executorch_target_copy_mlx_metallib(qwen3_5_moe_worker)
 endif()
 
 if(EXECUTORCH_BUILD_CUDA)
diff --git a/examples/models/qwen3_5_moe/CMakePresets.json b/examples/models/qwen3_5_moe/CMakePresets.json
index 276c2116148..6adcb8aa9cb 100644
--- a/examples/models/qwen3_5_moe/CMakePresets.json
+++ b/examples/models/qwen3_5_moe/CMakePresets.json
@@ -70,9 +70,9 @@
         },
         {
             "name": "qwen3-5-moe-mlx",
-            "displayName": "Build Qwen3.5 MoE runner (MLX)",
+            "displayName": "Build Qwen3.5 MoE runner and worker (MLX)",
             "configurePreset": "qwen3-5-moe-mlx",
-            "targets": ["qwen3_5_moe_runner"]
+            "targets": ["qwen3_5_moe_runner", "qwen3_5_moe_worker"]
         }
     ],
     "workflowPresets": [
diff --git a/examples/models/qwen3_5_moe/README.md b/examples/models/qwen3_5_moe/README.md
index c275641bfd7..77f53aefcc6 100644
--- a/examples/models/qwen3_5_moe/README.md
+++ b/examples/models/qwen3_5_moe/README.md
@@ -302,6 +302,66 @@ python -m executorch.examples.models.qwen3_5_moe.run \
     --max-new-tokens 50
 ```
 
+### Serving (MLX, multi-session)
+
+The MLX worker hosts multiple isolated sessions on **one** weight load, so an
+OpenAI-compatible server can serve concurrent conversations without duplicating
+the ~weights. `make qwen3_5_moe-mlx` builds both `qwen3_5_moe_runner` and
+`qwen3_5_moe_worker` (each with `mlx.metallib` copied alongside).
+
+Start the server (it auto-locates the worker binary):
+
+```bash
+# tokenizer.json the C++ worker opens (resolve from the HF cache)
+TOKENIZER_JSON=$(ls "${HF_HOME:-$HOME/.cache/huggingface}"/hub/models--Qwen--Qwen3.5-35B-A3B/snapshots/*/tokenizer.json | head -n1)
+
+python -m executorch.examples.models.qwen3_5_moe.serve \
+    --model-path ./qwen35_moe_mlx/model.pte \
+    --tokenizer-path "$TOKENIZER_JSON" \
+    --hf-tokenizer Qwen/Qwen3.5-35B-A3B \
+    --max-sessions 4 \
+    --host 127.0.0.1 \
+    --port 8000
+```
+
+- `--tokenizer-path` is the raw `tokenizer.json` **file** the worker loads;
+  `--hf-tokenizer` (HF id or local dir) supplies the chat template on the Python
+  side. No `--data-path` (the MLX `.pte` is self-contained).
+- `--max-sessions N` caps physical sessions on the single weight load. One slot
+  is reserved for anonymous requests (requests sent without a session id), so
+  `N` allows `N-1` concurrently named sessions.
+
+Query it (OpenAI-compatible) from another terminal. Route each conversation to a
+session with the `session_id` header:
+
+```bash
+curl http://127.0.0.1:8000/v1/chat/completions \
+  -H "Content-Type: application/json" -H "session_id: alice" \
+  -d '{"model":"qwen3.5-moe",
+       "messages":[{"role":"user","content":"What is the capital of France?"}],
+       "max_tokens":50,"chat_template_kwargs":{"enable_thinking":false}}'
+```
+
+Endpoints: `GET /health`, `GET /v1/models`, `POST /v1/chat/completions`,
+`DELETE /v1/sessions/{id}` (free a session + its slot), `POST /v1/sessions/{id}/reset`.
+
+Session/memory semantics on MLX:
+- This server uses the standard **stateless** OpenAI contract — send the full
+  `messages` history each request. `session_id` + warm-resume is a KV-cache reuse
+  optimization for the shared prefix, not server-side memory.
+- Each session adds **one** set of mutable buffers (KV + recurrent/conv state) on
+  top of the shared weights; per-session cost scales with `max_seq_len`. Weights
+  are never duplicated.
+- KV persists across requests for a live session and is **released on close**
+  (`DELETE`/reset). Named sessions are not auto-closed — close them to free slots.
+  MLX's Metal allocator pools freed buffers (so RSS may not shrink immediately),
+  but they are reused by later sessions, keeping memory bounded.
+- Requests are processed **one at a time** (a single in-flight request per
+  worker). A request runs to completion and head-of-line-blocks every other
+  session until it finishes; there is no token-level interleaving or parallel
+  execution. This holds on both MLX and CUDA; multi-session provides memory
+  isolation and warm resume, not added throughput.
+
 ### Tiny Model Test
 
 For CI or quick pipeline validation (no model download needed):
diff --git a/examples/models/qwen3_5_moe/mlx_source_transformations.py b/examples/models/qwen3_5_moe/mlx_source_transformations.py
index 9a49f8a84f6..3c460fc9c54 100644
--- a/examples/models/qwen3_5_moe/mlx_source_transformations.py
+++ b/examples/models/qwen3_5_moe/mlx_source_transformations.py
@@ -113,12 +113,14 @@ def _full_attention_forward(self, x, input_pos):
 
     k, v = self.kv_cache.update(input_pos, k, v)
 
-    if self.n_kv_groups > 1:
-        k = k.repeat_interleave(self.n_kv_groups, dim=1)
-        v = v.repeat_interleave(self.n_kv_groups, dim=1)
-
-    attn_mask = self.mask[input_pos].unsqueeze(0).unsqueeze(0)
-    y = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=attn_mask)
+    y = torch.ops.mlx.custom_sdpa(
+        q,
+        k,
+        v,
+        start_pos=pos,
+        dropout_p=0.0,
+        is_causal=True,
+    )
 
     y = y.transpose(1, 2).contiguous().view(B, T, -1)
 
@@ -184,10 +186,8 @@ def _exportable_gated_delta_net_forward(self, x, input_pos):
         k, (self.head_k_dim,), self._qk_rms_weight, eps=1e-6
     )
 
-    # head_repeat for k_heads != v_heads
-    if self.head_repeat > 1:
-        q = q.repeat_interleave(self.head_repeat, dim=2)
-        k = k.repeat_interleave(self.head_repeat, dim=2)
+    # GQA head expansion (k_heads != v_heads) is handled inside
+    # mlx::gated_delta_rule
 
     # Mamba-style gating
     beta = b.sigmoid()
@@ -278,17 +278,13 @@ def _swap_gated_delta_net(model, model_dtype):
 
 
 def _swap_full_attention(model, config):
-    """FullAttention → mlx::rope custom op + causal mask."""
+    """FullAttention → mlx::rope custom op"""
     rope_theta = config.rope_theta if config else 10000.0
-    max_seq_len = config.max_seq_len if config else 4096
     count = 0
     for _name, module in model.named_modules():
         if isinstance(module, FullAttention):
             module._rope_dims = module.rotary_emb.rotary_dim
             module._rope_base = rope_theta
-            mask = torch.full((max_seq_len, max_seq_len), float("-inf"))
-            mask = torch.triu(mask, diagonal=1)
-            module.register_buffer("mask", mask)
             module.forward = types.MethodType(_full_attention_forward, module)
             count += 1
     return count
diff --git a/examples/models/qwen3_5_moe/qwen35_moe_engine.cpp b/examples/models/qwen3_5_moe/qwen35_moe_engine.cpp
index 713f6211330..fd81f60c23a 100644
--- a/examples/models/qwen3_5_moe/qwen35_moe_engine.cpp
+++ b/examples/models/qwen3_5_moe/qwen35_moe_engine.cpp
@@ -183,9 +183,9 @@ class Qwen35MoESession : public LLMSession {
       ::tokenizers::Tokenizer* tokenizer,
       std::unordered_map<std::string, int64_t> metadata,
       std::unordered_set<uint64_t> eos_ids
-#ifdef EXECUTORCH_BUILD_CUDA
+#ifdef QWEN_HAS_MUTABLE_STATE
       ,
-      ::executorch::backends::cuda::MutableStateContextOwner* mutable_state,
+      MutableStateContextOwner* mutable_state,
       int session_token
 #endif
       )
@@ -195,7 +195,7 @@ class Qwen35MoESession : public LLMSession {
         tokenizer_(tokenizer),
         metadata_(std::move(metadata)),
         eos_ids_(std::move(eos_ids))
-#ifdef EXECUTORCH_BUILD_CUDA
+#ifdef QWEN_HAS_MUTABLE_STATE
         ,
         mutable_state_(mutable_state),
         session_token_(session_token)
@@ -212,9 +212,8 @@ class Qwen35MoESession : public LLMSession {
   }
 
   ~Qwen35MoESession() override {
-#ifdef EXECUTORCH_BUILD_CUDA
-    if (mutable_state_ != nullptr &&
-        session_token_ != ::executorch::backends::cuda::kNoMutableSession) {
+#ifdef QWEN_HAS_MUTABLE_STATE
+    if (mutable_state_ != nullptr && session_token_ != kNoMutableSession) {
       mutable_state_->destroy_session(session_token_);
     }
 #endif
@@ -425,8 +424,8 @@ class Qwen35MoESession : public LLMSession {
       float temperature,
       bool sync_after) {
     std::lock_guard<std::mutex> guard(*exec_mutex_);
-#ifdef EXECUTORCH_BUILD_CUDA
-    Result<std::vector<EValue>> res = mutable_state_ != nullptr
+#ifdef QWEN_HAS_MUTABLE_STATE
+    auto res = mutable_state_ != nullptr
         ? mutable_state_->with_active_session(
               session_token_,
               [&]() { return module_->execute(method, inputs); })
@@ -465,10 +464,11 @@ class Qwen35MoESession : public LLMSession {
   int64_t decode_pos_data_[1] = {0};
   TensorPtr decode_tokens_;
   TensorPtr decode_pos_;
+#ifdef QWEN_HAS_MUTABLE_STATE
+  MutableStateContextOwner* mutable_state_ = nullptr;
+  int session_token_ = kNoMutableSession;
+#endif
 #ifdef EXECUTORCH_BUILD_CUDA
-  ::executorch::backends::cuda::MutableStateContextOwner* mutable_state_ =
-      nullptr;
-  int session_token_ = ::executorch::backends::cuda::kNoMutableSession;
   float temp_val_ = 1e-6f;
   TensorPtr temp_tensor_;
 #endif
@@ -529,17 +529,17 @@ Result<std::unique_ptr<Qwen35MoEEngine>> Qwen35MoEEngine::create(
         "not stop at end of turn");
   }
 
+#ifdef QWEN_HAS_MUTABLE_STATE
+  std::unique_ptr<MutableStateContextOwner> mutable_state;
+#endif
 #ifdef EXECUTORCH_BUILD_CUDA
-  std::unique_ptr<::executorch::backends::cuda::MutableStateContextOwner>
-      mutable_state;
   if (config.enable_cuda_graph) {
     ET_LOG(
         Info,
         "Qwen35MoEEngine: CUDA graph requested; per-session rebinding disabled "
         "and serving capacity clamped to 1 session.");
   } else {
-    auto candidate = std::make_unique<
-        ::executorch::backends::cuda::MutableStateContextOwner>();
+    auto candidate = std::make_unique<MutableStateContextOwner>();
     if (Error e = register_mutable_fqns(meta_module.get(), *candidate);
         e == Error::Ok) {
       mutable_state = std::move(candidate);
@@ -550,9 +550,13 @@ Result<std::unique_ptr<Qwen35MoEEngine>> Qwen35MoEEngine::create(
           "serving capacity clamped to 1 session.");
     }
   }
+#elif defined(EXECUTORCH_BUILD_MLX)
+  // MLX owns mutable buffers directly and selects per-session storage at
+  // execute time; no FQN registration or coverage check is required.
+  mutable_state = std::make_unique<MutableStateContextOwner>();
 #endif
 
-#ifdef EXECUTORCH_BUILD_CUDA
+#ifdef QWEN_HAS_MUTABLE_STATE
   auto module_res = mutable_state != nullptr
       ? mutable_state->with_load_scope(
             [&]() { return build_qwen_module(config); })
@@ -566,16 +570,14 @@ Result<std::unique_ptr<Qwen35MoEEngine>> Qwen35MoEEngine::create(
   std::unique_ptr<Module> shared_module = std::move(module_res.get());
 
   bool rebind_available = false;
-#ifdef EXECUTORCH_BUILD_CUDA
+#ifdef QWEN_HAS_MUTABLE_STATE
   rebind_available = mutable_state != nullptr && mutable_state->available();
-  if (rebind_available) {
-    if (mutable_state->validate_coverage() != Error::Ok) {
-      ET_LOG(
-          Error,
-          "Qwen35MoEEngine: mutable-buffer coverage check failed; disabling "
-          "multi-session (capacity clamped to 1).");
-      rebind_available = false;
-    }
+  if (rebind_available && mutable_state->validate_coverage() != Error::Ok) {
+    ET_LOG(
+        Error,
+        "Qwen35MoEEngine: mutable-buffer coverage check failed; disabling "
+        "multi-session (capacity clamped to 1).");
+    rebind_available = false;
   }
   if (!rebind_available) {
     ET_LOG(
@@ -592,7 +594,7 @@ Result<std::unique_ptr<Qwen35MoEEngine>> Qwen35MoEEngine::create(
       std::move(eos_ids),
       std::move(shared_module),
       rebind_available
-#ifdef EXECUTORCH_BUILD_CUDA
+#ifdef QWEN_HAS_MUTABLE_STATE
       ,
       std::move(mutable_state)
 #endif
@@ -621,7 +623,7 @@ Result<std::unique_ptr<LLMSession>> Qwen35MoEEngine::create_session() {
   }
 
   int token = -1; // kNoMutableSession: single-session / no rebind
-#ifdef EXECUTORCH_BUILD_CUDA
+#ifdef QWEN_HAS_MUTABLE_STATE
   if (rebind_available_) {
     auto t = mutable_state_->create_session();
     if (t.error() != Error::Ok) {
@@ -638,7 +640,7 @@ Result<std::unique_ptr<LLMSession>> Qwen35MoEEngine::create_session() {
       tokenizer_.get(),
       metadata_,
       eos_ids_
-#ifdef EXECUTORCH_BUILD_CUDA
+#ifdef QWEN_HAS_MUTABLE_STATE
       ,
       mutable_state_.get(),
       token
@@ -648,7 +650,7 @@ Result<std::unique_ptr<LLMSession>> Qwen35MoEEngine::create_session() {
 
 LLMServingCapacity Qwen35MoEEngine::serving_capacity() const {
   LLMServingCapacity cap; // default: 1 session, 0 bytes (unknown)
-#ifdef EXECUTORCH_BUILD_CUDA
+#ifdef QWEN_HAS_MUTABLE_STATE
   if (rebind_available_) {
     cap.max_physical_sessions_without_weight_duplication =
         config_.max_sessions > 1 ? config_.max_sessions : 1;
diff --git a/examples/models/qwen3_5_moe/qwen35_moe_engine.h b/examples/models/qwen3_5_moe/qwen35_moe_engine.h
index c7ea53115b8..5a5e286c9c5 100644
--- a/examples/models/qwen3_5_moe/qwen35_moe_engine.h
+++ b/examples/models/qwen3_5_moe/qwen35_moe_engine.h
@@ -7,8 +7,8 @@
  */
 
 // Engine/Session adapter for the Qwen3.5 MoE exported prefill/decode methods.
-// CUDA builds can host multiple sessions on one loaded model by rebinding the
-// model's mutable buffers before each execute.
+// CUDA and MLX builds can host multiple sessions on one loaded model by
+// rebinding the model's mutable buffers before each execute.
 
 #pragma once
 
@@ -28,10 +28,28 @@
 
 #ifdef EXECUTORCH_BUILD_CUDA
 #include <executorch/backends/cuda/runtime/cuda_mutable_state.h>
+#elif defined(EXECUTORCH_BUILD_MLX)
+#include <executorch/backends/mlx/runtime/mlx_mutable_state.h>
+#endif
+
+#if defined(EXECUTORCH_BUILD_CUDA) || defined(EXECUTORCH_BUILD_MLX)
+#define QWEN_HAS_MUTABLE_STATE 1
 #endif
 
 namespace executorch::extension::llm {
 
+#if defined(EXECUTORCH_BUILD_CUDA)
+using MutableStateContextOwner =
+    ::executorch::backends::cuda::MutableStateContextOwner;
+constexpr int kNoMutableSession =
+    ::executorch::backends::cuda::kNoMutableSession;
+#elif defined(EXECUTORCH_BUILD_MLX)
+using MutableStateContextOwner =
+    ::executorch::backends::mlx::MutableStateContextOwner;
+constexpr int kNoMutableSession =
+    ::executorch::backends::mlx::kNoMutableSession;
+#endif
+
 /// Immutable configuration for a Qwen3.5 MoE engine.
 struct Qwen35MoEConfig {
   std::string model_path; // .pte
@@ -77,10 +95,9 @@ class ET_EXPERIMENTAL Qwen35MoEEngine : public LLMEngine {
       std::unordered_set<uint64_t> eos_ids,
       std::unique_ptr<Module> shared_module,
       bool rebind_available
-#ifdef EXECUTORCH_BUILD_CUDA
+#ifdef QWEN_HAS_MUTABLE_STATE
       ,
-      std::unique_ptr<::executorch::backends::cuda::MutableStateContextOwner>
-          mutable_state
+      std::unique_ptr<MutableStateContextOwner> mutable_state
 #endif
       )
       : config_(std::move(config)),
@@ -89,7 +106,7 @@ class ET_EXPERIMENTAL Qwen35MoEEngine : public LLMEngine {
         eos_ids_(std::move(eos_ids)),
         shared_module_(std::move(shared_module)),
         rebind_available_(rebind_available)
-#ifdef EXECUTORCH_BUILD_CUDA
+#ifdef QWEN_HAS_MUTABLE_STATE
         ,
         mutable_state_(std::move(mutable_state))
 #endif
@@ -104,9 +121,8 @@ class ET_EXPERIMENTAL Qwen35MoEEngine : public LLMEngine {
   std::unique_ptr<Module> shared_module_;
   std::mutex exec_mutex_;
   bool rebind_available_ = false;
-#ifdef EXECUTORCH_BUILD_CUDA
-  std::unique_ptr<::executorch::backends::cuda::MutableStateContextOwner>
-      mutable_state_;
+#ifdef QWEN_HAS_MUTABLE_STATE
+  std::unique_ptr<MutableStateContextOwner> mutable_state_;
 #endif
   std::atomic<int> live_sessions_{0};
 };

From 3169302acdf08e690296c2b61392de5f388c5f46 Mon Sep 17 00:00:00 2001
From: Irina Korchakova <irina.trukhina@nxp.com>
Date: Tue, 23 Jun 2026 21:59:47 +0200
Subject: [PATCH 3/7] NXP backend: Add nxp backend profiling support (#19225)

### Summary
Add profiling support for the NXP backend.

### Test plan
All CI tests passed including new test for the profiling feature.

---------

Signed-off-by: Irina Korchakova <irina.trukhina@nxp.com>
---
 .../nxp/backend/edge_program_converter.py     |  46 +-
 .../ir/converter/builder/model_builder.py     |  31 ++
 .../ir/tflite_generator/tflite_model.py       |   5 +
 .../nxp/backend/neutron_converter_manager.py  |  12 +
 backends/nxp/backend/neutron_map.py           | 457 ++++++++++++++++++
 backends/nxp/nxp_backend.py                   |  72 ++-
 backends/nxp/runtime/NeutronBackend.cpp       |  77 ++-
 backends/nxp/tests/executorch_pipeline.py     |   5 +
 backends/nxp/tests/executors.py               |   2 +-
 .../tests/generic_tests/test_aot_example.py   |  87 +++-
 ...st_move_activation_before_concatenation.py |  16 +-
 .../test_neutron_backend_executor.py          |   6 +-
 .../test_neutron_converter_manager.py         |   2 +-
 .../test_per_channel_conversion.py            |   2 +-
 .../nxp/tests/generic_tests/test_profiling.py | 158 ++++++
 .../nxp/tests/generic_tests/test_quantizer.py |   8 +-
 .../node_converter/test_addmm_converter.py    |   4 +-
 .../node_converter/test_bmm_converter.py      |   2 +-
 .../node_converter/test_clone_converter.py    |   6 +-
 .../node_converter/test_conv_converter.py     |   4 +-
 .../node_converter/test_mm_converter.py       |   4 +-
 .../node_converter/test_neg_converter.py      |   4 +-
 .../node_converter/test_prelu_converter.py    |   2 +-
 .../node_converter/test_softmax_converter.py  |   4 +-
 .../test_view_copy_converter.py               |   4 +-
 .../ir/edge_passes/test_linear_bn_fusing.py   |   2 +-
 backends/nxp/tests/nsys_testing.py            |  12 +
 .../_static/img/nxp/nxp-mcuxpresso-etdump.png | Bin 0 -> 44262 bytes
 docs/source/backends/nxp/nxp-overview.md      |   3 +
 docs/source/backends/nxp/nxp-profiling.md     | 205 ++++++++
 examples/nxp/analyzing_with_inspector.py      |  58 +++
 examples/nxp/aot_neutron_compile.py           |  31 +-
 32 files changed, 1268 insertions(+), 63 deletions(-)
 create mode 100644 backends/nxp/backend/neutron_map.py
 create mode 100644 backends/nxp/tests/generic_tests/test_profiling.py
 create mode 100644 docs/source/_static/img/nxp/nxp-mcuxpresso-etdump.png
 create mode 100644 docs/source/backends/nxp/nxp-profiling.md
 create mode 100644 examples/nxp/analyzing_with_inspector.py

diff --git a/backends/nxp/backend/edge_program_converter.py b/backends/nxp/backend/edge_program_converter.py
index ff8cbb660cb..9df8290e85d 100644
--- a/backends/nxp/backend/edge_program_converter.py
+++ b/backends/nxp/backend/edge_program_converter.py
@@ -74,13 +74,16 @@ class EdgeProgramToIRConverter:
     _default_target_spec = NeutronTargetSpec("imxrt700")
     _default_delegation_options = CustomDelegationOptions()
 
+    def __init__(self):
+        self.edge_to_tflite_map = {}
+
     def convert_program(
         self,
         edge_program: ExportedProgram,
         conversion_config: ConversionConfig = _default_conversion_config,
         neutron_target_spec: NeutronTargetSpec = _default_target_spec,
         custom_delegation_options: CustomDelegationOptions = _default_delegation_options,
-    ) -> tuple[bytes, dict[str, dict[str, DataFormat]]]:
+    ) -> tuple[bytes, dict[str, dict[str, DataFormat]], dict[int, tuple[int, ...]]]:
         """
         Convert ExportedProgram in Edge dialect to IR (TFLite flatbuffers) as bytes.
 
@@ -88,8 +91,11 @@ def convert_program(
         :param conversion_config: ConversionConfig instance.
         :param neutron_target_spec: Object for querying the target platform to retrieve its properties.
         :param custom_delegation_options: Custom user options which affect node delegation.
-        :return: TFLite flatbuffers as bytes.
+        :return: TFLite flatbuffers as bytes, I/O formats, and edge-to-tflite mapping.
         """
+        # Reset the edge to tflite map for each conversion
+        self.edge_to_tflite_map = {}
+
         parameters_mapping = self.map_inputs_to_parameters(edge_program)
         dim_order_map = self.map_nodes_to_dim_order(edge_program)
 
@@ -113,6 +119,9 @@ def convert_program(
         # Apply optimizations and finalize the model.
         internal_tflite_model = cc.tflite_builder.finish()
 
+        # Get the final edge to tflite mapping after optimization
+        self.edge_to_tflite_map = cc.tflite_builder.edge_to_tflite_map
+
         # Extract the formats of the model's inputs and outputs.
         io_formats = cc.tflite_builder.get_io_formats(edge_program.graph_signature)
 
@@ -120,7 +129,7 @@ def convert_program(
         flatbuffers_builder = flatbuffers.Builder()
         internal_tflite_model.gen_tflite(flatbuffers_builder)
 
-        return bytes(flatbuffers_builder.Output()), io_formats
+        return bytes(flatbuffers_builder.Output()), io_formats, self.edge_to_tflite_map
 
     @staticmethod
     def append_placeholders_and_tensors(nodes: list[Node], context: ConversionContext):
@@ -162,7 +171,6 @@ def _process_nodes(self, nodes: list[Node], conversion_context: ConversionContex
             exir_ops.edge.quantized_decomposed.dequantize_per_channel.default,
             exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
         ]
-
         for node in nodes:
             if node.op == "call_function":
                 if node.target in qdq_related_functions and "cluster" in node.meta:
@@ -174,7 +182,37 @@ def _process_nodes(self, nodes: list[Node], conversion_context: ConversionContex
                     # The node was already processed alongside the Q/DQ ops.
                     pass
                 elif node.target in functions_converters:
+                    # Get TFLite op count BEFORE conversion
+                    tflite_op_count_before = len(
+                        conversion_context.tflite_builder.get_operators().vector
+                    )
+                    # Convert the node
                     functions_converters[node.target](conversion_context).convert(node)
+                    # Get TFLite op count AFTER conversion
+                    tflite_op_count_after = len(
+                        conversion_context.tflite_builder.get_operators().vector
+                    )
+
+                    # Track the mapping - store edge debug handle in operators.
+                    # Get the edge debug handle so it can be associated with newly created operators.
+                    edge_debug_handle = node.meta.get("debug_handle", None)
+                    if (
+                        edge_debug_handle is not None
+                        and tflite_op_count_after > tflite_op_count_before
+                    ):
+                        operators = (
+                            conversion_context.tflite_builder.get_operators().vector
+                        )
+                        # Node converters append new operators to the TFLite builder.
+                        # Only operators added during this conversion step (from "before" to "after")
+                        # are tagged with the current edge_debug_handle.
+                        for i in range(tflite_op_count_before, tflite_op_count_after):
+                            # Store edge debug handle in operator's temporary attribute
+                            operators[i].tmp_edge_debug_handle = edge_debug_handle
+                        logger.d(
+                            f"Tagged TFLite ops {list(range(tflite_op_count_before, tflite_op_count_after))} with edge debug_handle={edge_debug_handle} for node '{node.name}'"
+                        )
+
                 else:
                     logger.e(
                         logger.Code.NOT_IMPLEMENTED,
diff --git a/backends/nxp/backend/ir/converter/builder/model_builder.py b/backends/nxp/backend/ir/converter/builder/model_builder.py
index f97a194ce87..41820c3ab61 100755
--- a/backends/nxp/backend/ir/converter/builder/model_builder.py
+++ b/backends/nxp/backend/ir/converter/builder/model_builder.py
@@ -85,6 +85,10 @@ class ModelBuilder:
 
     conversion_config: ConversionConfig
 
+    edge_to_tflite_map: dict[
+        int, tuple[int, ...]
+    ]  # Mapping edge debug handles to tuple of TFLite operator indices
+
     _default_conversion_config = ConversionConfig()
 
     def __init__(
@@ -105,6 +109,7 @@ def __init__(
         self._nchw_tensor_version = {}
         self._skipped_output_map = {}
         self._zeros_tensor_map = {}
+        self.edge_to_tflite_map = {}
 
     def create_zeros_tensor(
         self, dims: List[int], name: str, dtype: np.dtype, can_reuse: bool = False
@@ -503,6 +508,9 @@ def finish(self) -> tflite_model.Model:
             self.conversion_config.optimization_blacklist,
         )
 
+        # Create the final edge-to-tflite mapping after model optimization
+        self._create_edge_to_tflite_mapping()
+
         self._keep_one_empty_buffer()
 
         # Remove outputs, which are not produced by any node. Otherwise, there would be errors after inference.
@@ -524,6 +532,29 @@ def finish(self) -> tflite_model.Model:
 
         return self._tfl_model
 
+    def _create_edge_to_tflite_mapping(self):
+        """Create edge-to-TFLite mapping and save it to the edge_to_tflite_map class variable.
+
+        This function should be called after all model optimizations have been applied to match the output TFLite model.
+        """
+
+        edge_to_tflite_dict = {}
+        for idx, op in enumerate(self.get_operators().vector):
+            if (
+                hasattr(op, "tmp_edge_debug_handle")
+                and op.tmp_edge_debug_handle is not None
+            ):
+                debug_handle = op.tmp_edge_debug_handle
+                if debug_handle not in edge_to_tflite_dict:
+                    edge_to_tflite_dict[debug_handle] = []
+                edge_to_tflite_dict[debug_handle].append(idx)
+
+        # Convert lists to tuples in the dictionary
+        self.edge_to_tflite_map = {k: tuple(v) for k, v in edge_to_tflite_dict.items()}
+        logger.i(
+            f"\nFinal edge_to_tflite_map after optimization: {self.edge_to_tflite_map}"
+        )
+
     def _assign_io_tensor_indices(self, inputs, outputs, allow_inputs_stripping: bool):
         for tensor in outputs.tmp_outputs:
             try:
diff --git a/backends/nxp/backend/ir/tflite_generator/tflite_model.py b/backends/nxp/backend/ir/tflite_generator/tflite_model.py
index 6e8e7b6c33b..d8d0bada57d 100755
--- a/backends/nxp/backend/ir/tflite_generator/tflite_model.py
+++ b/backends/nxp/backend/ir/tflite_generator/tflite_model.py
@@ -514,6 +514,9 @@ class Operator(meta.TFLiteObject):
     # If `True`, this is an extra operator added during conversion. It was not present in the original input model.
     tmp_added_extra: bool
 
+    # Edge program debug handle for mapping edge nodes to TFLite operators
+    tmp_edge_debug_handle: Optional[int]
+
     def __init__(
         self,
         inputs: OperatorInputs = None,
@@ -541,6 +544,8 @@ def __init__(
         self.tmp_version = 1
         self.tmp_added_extra = False
 
+        self.tmp_edge_debug_handle = None
+
     def uses_per_channel_quantization(self) -> bool:
         """Determine if this operator uses per-channel quantization."""
         for tensor in itertools.chain(self.tmp_inputs, self.tmp_outputs):
diff --git a/backends/nxp/backend/neutron_converter_manager.py b/backends/nxp/backend/neutron_converter_manager.py
index 0abee0cdc86..92b4e25a5de 100644
--- a/backends/nxp/backend/neutron_converter_manager.py
+++ b/backends/nxp/backend/neutron_converter_manager.py
@@ -25,6 +25,15 @@ def _build_compilation_context(compilation_opts):
     cctx.compilationOpts.dumpKernelSelectionCode = compilation_opts[
         "dumpKernelSelectionCode"
     ]
+    if (
+        hasattr(cctx.compilationOpts, "useProfiling")
+        and compilation_opts["useProfiling"]
+    ):
+        cctx.compilationOpts.useProfiling = compilation_opts["useProfiling"]
+        cctx.compilationOpts.dumpAfterImport = "console"
+        cctx.compilationOpts.dumpAfterGenerate = "console"
+        cctx.compilationOpts.verbose = compilation_opts["useProfiling"]
+
     return cctx
 
 
@@ -81,6 +90,7 @@ def convert(
         target: str,
         delegation_tag: str,
         fetch_constants_to_sram: bool = False,
+        use_profiling: bool = False,
     ) -> bytes:
         """
         Call Neutron Converter.
@@ -89,6 +99,7 @@ def convert(
         :param target: The target platform.
         :param delegation_tag: The delegation tag of model partition.
         :param fetch_constants_to_sram: Add microcode that fetches weights from external memory.
+        :param use_profiling: Use profiling for neutron delegated model.
         This allows running models which do not fit into SRAM. Applies to Neutron-C only (microcontrollers).
 
         :return: TFLite model with Neutron microcode as bytes.
@@ -102,6 +113,7 @@ def convert(
             "excludeGraphPasses": "HoistSliceAboveTranspose,MergeTranspose",
             "fetchConstantsToSRAM": fetch_constants_to_sram,
             "dumpKernelSelectionCode": self.dump_kernel_selection_code,
+            "useProfiling": use_profiling,
         }
 
         # Try to use multiprocessing for isolation, but fall back to direct execution
diff --git a/backends/nxp/backend/neutron_map.py b/backends/nxp/backend/neutron_map.py
new file mode 100644
index 00000000000..e2da653daa3
--- /dev/null
+++ b/backends/nxp/backend/neutron_map.py
@@ -0,0 +1,457 @@
+# Copyright 2026 NXP
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import logging
+import re
+from dataclasses import dataclass
+
+# example:  Type: CONV_2D
+#               Inputs:
+#                 [0]: quantized_decomposed_quantize_per_tensor_default_4
+#                 [1]: quantized_decomposed_dequantize_per_channel_default_2
+#               Outputs:
+#                 [0]: quantized_decomposed_quantize_per_tensor_default_5
+#               Location: 4
+PATTERN_NODE = (
+    r"Type:\s+(?P<type>\w+)\s+"
+    r"Inputs:(?P<inputs>[\s\S]*?)"
+    r"Outputs:(?P<outputs>[\s\S]*?)"
+    r"Location:\s+(?P<location>\d+)"
+)
+# The pattern is very similar to operator pattern
+PATTERN_SUBGRAPH = (
+    r"^(?P<num>\d+)\s*"
+    r"Inputs:(?P<inputs>[\s\S]*?)"
+    r"Outputs:(?P<outputs>[\s\S]*?)"
+    r"Tensors:"
+)
+# example:  [0]: quantized_decomposed_quantize_per_tensor_default_4
+PATTERN_IO_TENSOR_NAME = r"\[\d+\]:\s+(?P<name>[\S]+)"
+# example: Statistics for NeutronGraph "subgraph_195":
+PATTERN_GRAPH = r"Statistics for NeutronGraph \"subgraph_(?P<num>\d+)\":"
+# example:      NeutronOperator "subgraph_001":
+#                       Operators:
+#                           PAD
+#                           CONV_2D
+#                       Kernels:
+#                           Pad
+#                           Conv2DStandardV2
+#               NeutronOperator "subgraph_002":
+PATTERN_VERBOSE_KERNELS = (
+    r"\"subgraph_(?P<subgraph>\d+)\"\:\s*"
+    r"Operators:[\s\S]*?"
+    r"Kernels:\s*(?P<kernels>[\s\S]*?)"
+    r"\s*(NeutronOperator|^$|=)"
+)
+# example:  NeutronGraph "subgraph_074":
+PATTERN_VERBOSE_GRAPH = (
+    r"NeutronGraph\s*\"subgraph_(?P<subgraph>\d+)\":(?P<operators>[\s\S]*?)\s*(^$|=)"
+)
+# Two graphs are expected in the input log: original and converted.
+EXPECTED_GRAPHS = 2
+# List of single-input nodes that shouldn't be mapped on the same TFLite node.
+SINGLE_INPUT_NODES = [
+    "ABS",
+    "AVERAGE_POOL_2D",
+    "CAST",
+    "EXP",
+    "HARD_SWISH",
+    "LEAKY_RELU",
+    "LOG",
+    "LOGISTIC",
+    "MAX_POOL_2D",
+    "QUANTIZE",
+    "RSQRT",
+    "TANH",
+]
+
+
+@dataclass
+class Node:
+    name: str  # Name of the node.
+    inputs: list[str]  # List of nodes inputs.
+    outputs: list[str]  # List of nodes outputs.
+    location: int  # Location in graph/subgraph.
+
+
+@dataclass
+class SubgraphInfo:
+    num: int  # Subgraph number.
+    location: int  # Location in neutron graph
+    inputs: list[str]  # List of subgraphs inputs.
+    outputs: list[str]  # List of subgraphs outputs.
+    kernels: int  # Number of neutron kernels in neutron subgraph.
+    nodes: list[Node]  # List of tflite nodes in neutron subgraph.
+
+
+def get_tensors_name(tensors: str) -> list[str]:
+    """Split input string with tensor names into list of names"""
+    return [m.group("name") for m in re.finditer(PATTERN_IO_TENSOR_NAME, tensors)]
+
+
+class NeutronMap:
+    """Mapping between Neutron, TFLite, and Edge operators based on the Neutron converter log.
+
+    Parses the Neutron converter log to extract information about TFLite nodes and Neutron subgraphs.
+    Maps TFLite operators to corresponding Neutron operators.
+    Maps Edge operators to Neutron operators via the Edge-to-TFLite mapping.
+
+    Attributes:
+        tflite_nodes (list[Node]): TFLite node information extracted from the converter log.
+        neutron_subgraphs (list[SubgraphInfo]): Neutron subgraph information extracted from the converter log.
+        neutron_graphs (list[int]): Indices of final Neutron graphs derived from neutron_subgraphs.
+        edge_to_tflite_map (dict[int, tuple[int, ...]]): Mapping from Edge operators to TFLite operators.
+        edge_to_neutron_map (dict[int, tuple[int, ...]]): Mapping from Edge operators to Neutron operators.
+        tflite_to_neutron_map (dict[int, tuple[int, ...]]): Mapping from TFLite operators to Neutron operators.
+
+    Example:
+        >>> map = NeutronMap(log_output, edge_to_tflite_map)
+        >>> neutron_to_edge_map = map.get_neutron_to_edge_map()
+    """
+
+    tflite_nodes: list[Node]
+    neutron_subgraphs: list[SubgraphInfo]
+    neutron_graphs: list[int]
+    edge_to_tflite_map: dict[int, tuple[int, ...]]
+    edge_to_neutron_map: dict[int, tuple[int, ...]]
+    tflite_to_neutron_map: dict[int, tuple[int, ...]]
+
+    def __init__(
+        self, neutron_converter_log: str, edge_to_tflite_map: dict[int, tuple[int, ...]]
+    ) -> None:
+        """Initialize neutron map from neutron converter log.
+
+        :param neutron_converter_log: neutron converter log obtained during model conversion. It should contain
+        original tflite graph and neutron graph dump. To add these dumps to converter log the dumpAfterImport and
+        dumpAfterGenerate flags have to be set to "console".
+        """
+        super().__init__()
+        self.tflite_nodes = []
+        self.neutron_subgraphs = []
+        self.neutron_graphs = []
+        self.edge_to_tflite_map = edge_to_tflite_map
+        self.tflite_to_neutron_map = {}
+        self.edge_to_neutron_map = {}
+        self.neutron_kernels_num = 0
+        self._split_profiling_log(neutron_converter_log)
+
+    def _split_profiling_log(self, log: str) -> None:
+        """Process profiling log to split it into original TFLite and converted Neutron nodes.
+
+        :param log: Neutron converter log obtained during model conversion, containing the original
+            TFLite graph and Neutron graph dump.
+        :return: None. Sets class attributes tflite_nodes and neutron_subgraphs with node information.
+        """
+        graphs = log.split("Graphs:")
+        # Check if there is two graphs in the input dump
+        if len(graphs) != EXPECTED_GRAPHS + 1:
+            return
+        optimization_dump, neutron_graph_dump = graphs[1:]
+
+        # Get tflite model dump
+        tflite_graph_dump = optimization_dump.partition("= Optimize Graph =")[0]
+
+        # Get verbose Neutron graphs located in the Extract Graphs section.
+        extracted_graph_dump = optimization_dump.partition("= Extract Graphs =")[
+            2
+        ].partition("Generate code for NeutronGraph")[0]
+
+        # Get list of original operators from first dumped graph.
+        self.tflite_nodes = [
+            Node(
+                matched_operator.group("type"),
+                get_tensors_name(matched_operator.group("inputs")),
+                get_tensors_name(matched_operator.group("outputs")),
+                int(matched_operator.group("location")),
+            )
+            for matched_operator in re.finditer(PATTERN_NODE, tflite_graph_dump)
+        ]
+        # Get list of neutron subgraphs.
+        self.neutron_subgraphs = self._get_neutron_subgraphs(neutron_graph_dump)
+        if self.neutron_subgraphs:
+            self._update_neutron_subgraphs_info(extracted_graph_dump)
+
+    def _get_neutron_subgraphs(self, graph_dump: str) -> list[SubgraphInfo]:
+        """Parse Neutron graph dump and extract subgraph information.
+
+        :param graph_dump: String containing the Neutron graph dump from the converter log.
+        :return: List of SubgraphInfo objects containing subgraph metadata and operator nodes.
+        """
+
+        def get_subgraph_nodes(subrgraph_dump: str) -> list[Node]:
+            """Parse subgraph dump and extract operator nodes.
+
+            :param subgraph_dump: String containing a single Neutron subgraph definition.
+            :return: List of Node objects representing operators in the subgraph.
+            """
+            return [
+                Node(
+                    matched_operator.group("type"),
+                    get_tensors_name(matched_operator.group("inputs")),
+                    get_tensors_name(matched_operator.group("outputs")),
+                    int(matched_operator.group("location")),
+                )
+                for matched_operator in re.finditer(PATTERN_NODE, subrgraph_dump)
+            ]
+
+        subgraphs = graph_dump.split(r"Name: subgraph_")
+        if len(subgraphs) < 3:
+            return []
+
+        # Get numbers of final neutron graphs in converted model.
+        self.neutron_graphs = [
+            int(matched_graphs.group("num"))
+            for matched_graphs in re.finditer(PATTERN_GRAPH, subgraphs[-1])
+        ]
+        if not self.neutron_graphs:
+            return []
+
+        # Get subgraphs
+        neutron_subgraphs: list[SubgraphInfo] = []
+        for subgraph in subgraphs[1:]:
+            subgraph_match = re.search(PATTERN_SUBGRAPH, subgraph)
+            if not subgraph_match:
+                continue
+            neutron_subgraph = SubgraphInfo(
+                int(subgraph_match.group("num")),
+                -1,
+                get_tensors_name(subgraph_match.group("inputs")),
+                get_tensors_name(subgraph_match.group("outputs")),
+                0,
+                get_subgraph_nodes(subgraph),
+            )
+            neutron_subgraphs.append(neutron_subgraph)
+        return neutron_subgraphs
+
+    def _update_neutron_subgraphs_info(self, extracted_graph: str) -> None:
+        """Update Neutron subgraphs with verbose info.
+
+        - Set numbers of Neutron kernels in each Neutron subgraph. 99% of subgraphs contain only one Neutron kernel,
+        but there are some exceptions and some subgraphs can have more kernels. This number can be taken from
+        final Neutron graph info.
+        - Set Neutron subgraphs location in the final Neutron Graph. The function updates the location parameter
+        for each Neutron subgraph according to its position in the final Neutron graph. Location is calculated
+        continuously across all Neutron graphs in the model. Non-Neutron operators are skipped.
+
+        :param extracted_graph: verbose Neutron graph dump.
+        """
+        # Neutron graphs.
+        neutron_graphs = extracted_graph.split("NeutronGraph")
+        location_shift = 0
+        for neutron_graph in neutron_graphs:
+
+            subgraph_nodes = {
+                int(matched_subgraph.group("subgraph")): {
+                    "location": i + location_shift,
+                    "kernels": [
+                        kernel.replace(" ", "")
+                        for kernel in matched_subgraph.group("kernels").split("\n")
+                        if kernel.strip()
+                    ],
+                }
+                for i, matched_subgraph in enumerate(
+                    re.finditer(PATTERN_VERBOSE_KERNELS, neutron_graph)
+                )
+            }
+            if not subgraph_nodes:
+                continue
+            # Update location offset according to the number of kernels in the subgraph.
+            location_shift += len(subgraph_nodes)
+
+            # Neutron graphs.
+            graph_num = -1
+            matched_graph = re.search(r"subgraph_(?P<subgraph>\d+)", neutron_graph)
+            if matched_graph:
+                graph_num = int(matched_graph.group("subgraph"))
+
+            # Update number of kernels for all subgraphs.
+            for subgraph in self.neutron_subgraphs:
+                if subgraph.num in subgraph_nodes:
+                    subgraph.kernels = len(subgraph_nodes[subgraph.num]["kernels"])
+                    subgraph.location = subgraph_nodes[subgraph.num]["location"]
+                elif subgraph.num == graph_num:
+                    subgraph.kernels = sum(
+                        len(s["kernels"]) for s in subgraph_nodes.values()
+                    )
+                    self.neutron_kernels_num += subgraph.kernels
+
+    def _nodes_match_by_io(self, tf_node: Node, neutron_node: Node) -> bool:
+        """
+        Determine whether a TFLite node can be mapped to a Neutron node
+        based on their input and output compatibility.
+
+        :param tf_node: Source TFLite node.
+        :param neutron_node: Target Neutron node.
+        :return: True if the nodes can be considered mapped, False otherwise.
+        """
+
+        def get_name_matches(tf_names: list[str], neutron_names: list[str]) -> int:
+            # Count how many names from tf_names have a corresponding match in
+            # neutron_names. A match is defined as:
+            #   - exact equality, or
+            #   - one name being a hierarchical variant of the other
+            #     (i.e., sharing a common prefix separated by "/").
+            result = 0
+            for tf_name in tf_names:
+                # Determine if the tensor name corresponds to a special operation input.
+                # Matches names like "perm0", "perm1", etc. used by Transpose ops,
+                # and names like "padding0", "padding1", etc. used by Pad ops.
+                special_op = (
+                    "permutation"
+                    if re.fullmatch(r"perm(\d+)?", tf_name)
+                    else (
+                        "padding"
+                        if re.fullmatch(r"padding(s)?(\d+)?", tf_name)
+                        else None
+                    )
+                )
+                for neutron_name in neutron_names:
+                    if (
+                        neutron_name == tf_name
+                        or neutron_name + "/" in tf_name
+                        or tf_name + "/" in neutron_name
+                    ):
+                        result += 1
+                        break
+
+                    # Check if the neutron input is also the special op (Pad or Transpose)
+                    if special_op and special_op in neutron_name:
+                        result += 1
+                        break
+            return result
+
+        name_matches = get_name_matches(tf_node.inputs, neutron_node.inputs)
+        # Map the node if all TFLite inputs match Neutron inputs.
+        # Note: the Neutron node may still have additional extra inputs.
+        if name_matches == len(tf_node.inputs):
+            return True
+        elif name_matches == len(tf_node.inputs) - 1:
+            # If there is only one unmatched input, check matching of outputs.
+            name_matches = get_name_matches(tf_node.outputs, neutron_node.outputs)
+            if name_matches == len(tf_node.outputs):
+                # Map the node if all TFLite outputs match Neutron outputs.
+                return True
+        return False
+
+    def get_tflite_to_neutron_map(self) -> dict[int, tuple[int, ...]]:
+        """Map TFLite nodes from the original model to Neutron nodes in the converted model.
+
+        The mapping is built based on input and output tensor names. Neutron tensors may have
+        exactly the same names or use the format "tflite_input/additional_name".
+
+        :return: Dictionary mapping TFLite node indices to tuple of Neutron subgraph indices.
+        """
+        tflite_to_neutron_dict = {}
+        for tf_idx, tf_node in enumerate(self.tflite_nodes):
+            subgraph_idxs = []
+            for subgraph in self.neutron_subgraphs:
+                if (
+                    subgraph.num in self.neutron_graphs
+                    or subgraph.location in subgraph_idxs
+                ):
+                    continue
+                for neutron_node in subgraph.nodes:
+                    if self._nodes_match_by_io(tf_node, neutron_node):
+                        subgraph_idxs.append(subgraph.location)
+                        break
+            # Filter subgraph_idxs to avoid mapping multiple parallel single-input nodes that consume the
+            # same input tensor into the same TFLite node.
+            subgraph_idxs = self._filter_single_input_nodes(tf_node.name, subgraph_idxs)
+            if subgraph_idxs:
+                tflite_to_neutron_dict[tf_idx] = tuple(subgraph_idxs)
+
+        self.tflite_to_neutron_map = tflite_to_neutron_dict
+        return self.tflite_to_neutron_map
+
+    def _filter_single_input_nodes(
+        self, node_name: str, subgraph_loc: list[int]
+    ) -> list[int]:
+        """
+        Filter the Neutron-to-TFLite mapping to avoid mapping multiple parallel single-input nodes
+        that consume the same input tensor to a single TFLite node.
+
+        The function checks whether the current TFLite node is a supported single-input node
+        (as defined in SINGLE_INPUT_NODES) and whether it is mapped to multiple Neutron nodes.
+        In such cases, it is possible that parallel single-input Neutron nodes were incorrectly
+        mapped to the same TFLite node.
+
+        If more than one single-input Neutron node is mapped, only one is kept in the mapping:
+        the Neutron node whose operation name matches the operation name of the current TFLite node.
+
+        :param node_name: Operation name of the current TFLite node.
+        :param subgraph_loc: List of Neutron subgraph indices whose inputs correspond to the
+                            input of the current TFLite node.
+        :return: Filtered list of Neutron subgraph indices to be mapped to the current TFLite node.
+        """
+        # Check if there can be potential issue in mapping.
+        if node_name in SINGLE_INPUT_NODES and len(subgraph_loc) > 1:
+            single_in_nodes = []
+            # Find all single-input nodes in subgraph_idxs.
+            subgraphs = (
+                subgraph
+                for subgraph in self.neutron_subgraphs
+                if subgraph.location in subgraph_loc
+            )
+            for subgraph in subgraphs:
+                for neutron_node in subgraph.nodes:
+                    if neutron_node.name in SINGLE_INPUT_NODES:
+                        single_in_nodes.append((subgraph.location, neutron_node.name))
+            if len(single_in_nodes) > 0:
+                # Keep only the node with the matching name when multiple single-input nodes are present in subgraph_idxs.
+                for subgraph_id, single_in_node_name in single_in_nodes:
+                    if single_in_node_name == node_name:
+                        return [subgraph_id]
+                return []
+        return subgraph_loc
+
+    def get_edge_to_neutron_map(self) -> dict[int, tuple[int, ...]]:
+        """Map Edge nodes to Neutron nodes.
+
+        :return: Dictionary mapping Edge node handles to tuple of Neutron subgraph indices.
+        """
+        self.get_tflite_to_neutron_map()
+        edge_to_neutron_dict = {}
+
+        for edge_handle, tflite_indices in self.edge_to_tflite_map.items():
+            neutron_nodes = set()
+            for tf_node in tflite_indices:
+                if tf_node in self.tflite_to_neutron_map:
+                    neutron_nodes.update(self.tflite_to_neutron_map[tf_node])
+            if neutron_nodes:
+                edge_to_neutron_dict[edge_handle] = tuple(neutron_nodes)
+
+        self.edge_to_neutron_map = edge_to_neutron_dict
+        return self.edge_to_neutron_map
+
+    def get_neutron_to_edge_map(self) -> dict[int, tuple[int, ...]]:
+        """
+        Transform edge-to-neutron map to neutron-to-edge map.
+
+        :return: Dictionary mapping neutron_index to tuple of edge_handles
+        """
+        if not self.edge_to_neutron_map:
+            _ = self.get_edge_to_neutron_map()
+
+        neutron_to_edge = {}
+
+        for edge_handle, neutron_indices in self.edge_to_neutron_map.items():
+            for neutron_idx in neutron_indices:
+                if neutron_idx not in neutron_to_edge:
+                    neutron_to_edge[neutron_idx] = []
+                neutron_to_edge[neutron_idx].append(edge_handle)
+
+        # Fill gaps with empty tuples and convert lists to tuples.
+        if neutron_to_edge:
+            max_neutron_idx = self.neutron_kernels_num
+            result = {}
+            # Add one more non-mapped event at the end of list for the Neutron Dump event.
+            for i in range(max_neutron_idx + 1):
+                if i in neutron_to_edge:
+                    result[i] = tuple(neutron_to_edge[i])
+                else:
+                    result[i] = ()
+            logging.info(f"Neutron to Edge map was created: {result}")
+            return result
+        else:
+            return {}
diff --git a/backends/nxp/nxp_backend.py b/backends/nxp/nxp_backend.py
index 1a84a418e92..ee711c34369 100644
--- a/backends/nxp/nxp_backend.py
+++ b/backends/nxp/nxp_backend.py
@@ -11,6 +11,8 @@
 import logging
 import os
 import struct
+import tempfile
+from contextlib import contextmanager
 from typing import final
 
 import numpy as np
@@ -26,6 +28,8 @@
 from executorch.backends.nxp.backend.neutron_converter_manager import (
     NeutronConverterManager,
 )
+
+from executorch.backends.nxp.backend.neutron_map import NeutronMap
 from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
 from executorch.backends.nxp.neutron_node_extraction import (
     extract_artifacts_from_neutron_node,
@@ -54,6 +58,7 @@ def __init__(self):
         self.use_neutron_for_format_conversion = True
         self.fetch_constants_to_sram = False
         self.dump_kernel_selection_code = False
+        self.use_profiling = False
 
     def _replace_colons(self, operator: str) -> str:
         """
@@ -70,6 +75,7 @@ def neutron_compile_spec(
         use_neutron_for_format_conversion: bool = True,
         fetch_constants_to_sram: bool = False,
         dump_kernel_selection_code: bool = False,
+        use_profiling: bool = False,
     ) -> "NeutronCompileSpecBuilder":
         """Generate compile spec for Neutron NPU
 
@@ -83,6 +89,7 @@ def neutron_compile_spec(
         :param fetch_constants_to_sram: If True, the Neutron Converter will insert microinstructions to prefetch weights
                                      from FLASH to SRAM. This should be used when the whole model does not fit into SRAM.
         :param dump_kernel_selection_code: Whether Neutron converter dumps kernel selection code.
+        :param use_profiling: If true Neutron Converter will enable profiling for neutron delegated model
         :return: self for method chaining
         """
 
@@ -106,6 +113,7 @@ def neutron_compile_spec(
         self.use_neutron_for_format_conversion = use_neutron_for_format_conversion
         self.fetch_constants_to_sram = fetch_constants_to_sram
         self.dump_kernel_selection_code = dump_kernel_selection_code
+        self.use_profiling = use_profiling
 
         return self
 
@@ -135,6 +143,10 @@ def build(self):
                     "dump_kernel_selection_code",
                     f"{self.dump_kernel_selection_code}".encode(),
                 ),
+                CompileSpec(
+                    "use_profiling",
+                    f"{self.use_profiling}".encode(),
+                ),
             ]
 
         return self.compile_spec
@@ -149,6 +161,7 @@ def generate_neutron_compile_spec(
     use_neutron_for_format_conversion: bool = True,
     fetch_constants_to_sram: bool = False,
     dump_kernel_selection_code: bool = False,
+    use_profiling: bool = False,
 ) -> list[CompileSpec]:
     return (
         NeutronCompileSpecBuilder()
@@ -160,11 +173,36 @@ def generate_neutron_compile_spec(
             use_neutron_for_format_conversion=use_neutron_for_format_conversion,
             fetch_constants_to_sram=fetch_constants_to_sram,
             dump_kernel_selection_code=dump_kernel_selection_code,
+            use_profiling=use_profiling,
         )
         .build()
     )
 
 
+@contextmanager
+def capture_fd_output():
+    tmp = tempfile.TemporaryFile()
+
+    # Save original stdout / stderr
+    original_stdout_fd = os.dup(1)
+    original_stderr_fd = os.dup(2)
+
+    try:
+        # Redirect fd=1 and fd=2 to temp file
+        os.dup2(tmp.fileno(), 1)
+        os.dup2(tmp.fileno(), 2)
+
+        yield tmp  # give access to the temp file
+
+    finally:
+        # Restore original fds
+        os.dup2(original_stdout_fd, 1)
+        os.dup2(original_stderr_fd, 2)
+
+        os.close(original_stdout_fd)
+        os.close(original_stderr_fd)
+
+
 @final
 class NeutronBackend(BackendDetails):
 
@@ -185,6 +223,7 @@ def preprocess(  # noqa C901
         use_neutron_for_format_conversion = None
         fetch_constants_to_sram = False
         dump_kernel_selection_code = None
+        use_profiling = False
         for spec in compile_spec:
             if spec.key == "output_format":
                 output_format = spec.value.decode()
@@ -200,6 +239,8 @@ def preprocess(  # noqa C901
                 fetch_constants_to_sram = spec.value.decode() == "True"
             if spec.key == "dump_kernel_selection_code":
                 dump_kernel_selection_code = spec.value.decode() == "True"
+            if spec.key == "use_profiling":
+                use_profiling = spec.value.decode() == "True"
 
         # Check that the output format is set in the compile spec
         if not output_format:
@@ -229,19 +270,32 @@ def preprocess(  # noqa C901
                 if use_neutron_for_format_conversion is not None
                 else {}
             )
-            tflite_model, io_formats = EdgeProgramToIRConverter().convert_program(
+            (
+                tflite_model,
+                io_formats,
+                edge_to_tflite_map,
+            ) = EdgeProgramToIRConverter().convert_program(
                 edge_program,
                 neutron_target_spec=NeutronTargetSpec(target),
                 conversion_config=conversion_config,
                 custom_delegation_options=CustomDelegationOptions(),
             )
 
-            neutron_model = NeutronConverterManager(dump_kernel_selection_code).convert(
-                tflite_model,
-                target,
-                delegation_tag,
-                fetch_constants_to_sram,
-            )
+            with capture_fd_output() as tmp:
+                neutron_model = NeutronConverterManager(
+                    dump_kernel_selection_code
+                ).convert(
+                    tflite_model,
+                    target,
+                    delegation_tag,
+                    fetch_constants_to_sram,
+                    use_profiling,
+                )
+                tmp.seek(0)
+                log_output = tmp.read().decode()
+            # Get mapping from tflite to neutron
+            map = NeutronMap(log_output, edge_to_tflite_map)
+            neutron_to_edge_map = map.get_neutron_to_edge_map()
 
             # Dump the tflite file if intermediates_dir is set
             if intermediates_dir != "None":
@@ -265,7 +319,9 @@ def preprocess(  # noqa C901
         else:
             raise RuntimeError(f"Unknown format {output_format}")
 
-        return PreprocessResult(processed_bytes=binary)
+        return PreprocessResult(
+            processed_bytes=binary, debug_handle_map=neutron_to_edge_map
+        )
 
 
 class PayloadComposer:
diff --git a/backends/nxp/runtime/NeutronBackend.cpp b/backends/nxp/runtime/NeutronBackend.cpp
index 3ea973b7c5b..6fe0482ed89 100644
--- a/backends/nxp/runtime/NeutronBackend.cpp
+++ b/backends/nxp/runtime/NeutronBackend.cpp
@@ -10,6 +10,7 @@
 #include <executorch/runtime/backend/interface.h>
 #include <executorch/runtime/core/error.h>
 #include <executorch/runtime/core/evalue.h>
+#include <executorch/runtime/core/event_tracer_hooks_delegate.h>
 #include <executorch/runtime/core/exec_aten/util/dim_order_util.h>
 
 #include "NeutronDriver.h"
@@ -25,6 +26,8 @@ namespace neutron {
 #define ALIGN_SIZE(size) \
   ((size + BUFFER_ALIGNMENT - 1) & (~(BUFFER_ALIGNMENT - 1)))
 
+#define KOPC_CALLARGS 6 // The operation for TileIR
+
 // clang-format off
 /* Header schema:
      +----------------------------+-----------------------------+------------------------+
@@ -84,6 +87,19 @@ typedef struct {
   const uint8_t* outputMap;
 } NeutronExecutorchConfig;
 
+typedef struct {
+  uint8_t eventCode;
+  uint8_t opCode;
+  uint8_t functionCode;
+  uint8_t timestampCode;
+  uint32_t time;
+} NeutronSingleProfilingEvent;
+
+typedef struct {
+  NeutronSingleProfilingEvent startEvent;
+  NeutronSingleProfilingEvent stopEvent;
+} NeutronFullProfilingEvent;
+
 #ifdef EXTERNAL_MEM
 // Neutron compute has no access to FLASH.
 // Prefetch weights from FLASH to SRAM using memcpy.
@@ -508,12 +524,11 @@ class NeutronBackend final : public PyTorchBackendInterface {
       }
     }
 
-#ifdef NEUTRON_PROFILE
-    // TODO: Use trace from BackendExecutionContext.
-    NeutronTraceConfig trace_config{.traceConfig = 0};
-    neutronSetTrace(cfg->nmh, &trace_config);
+#ifdef ET_EVENT_TRACER_ENABLED
+    // Save ticks before neutron compute to measure how much time profiling dump
+    // takes
+    et_timestamp_t start_ticks = ::executorch::runtime::pal_current_ticks();
 #endif
-
     // Run neutron compute.
     NeutronError neutronRC = neutronRunBlocking(cfg->nmh, &cfg->dcfg);
     if (neutronRC != ENONE) {
@@ -523,6 +538,11 @@ class NeutronBackend final : public PyTorchBackendInterface {
           neutronRC);
       return Error::InvalidProgram;
     }
+#ifdef ET_EVENT_TRACER_ENABLED
+    // Save ticks after neutron compute to measure how much time profiling dump
+    // takes
+    et_timestamp_t stop_ticks = ::executorch::runtime::pal_current_ticks();
+#endif
 
     // Transpose outputs.
     for (int i = 0; i < cfg->numOutputs; i++) {
@@ -558,6 +578,53 @@ class NeutronBackend final : public PyTorchBackendInterface {
         }
       }
     }
+#ifdef ET_EVENT_TRACER_ENABLED
+    // Add traced evens only if model has profiling info.
+    auto profile_size = cfg->profileSize;
+    if (profile_size > 0) {
+      int events_num = static_cast<int>(profile_size / 16);
+      auto profiling_index = cfg->numOutputs + 1;
+      char* profile_info =
+          static_cast<char*>(cfg->dcfg.outputs[profiling_index]);
+      NeutronFullProfilingEvent* neutron_events =
+          (NeutronFullProfilingEvent*)profile_info;
+      executorch::runtime::EventTracer* tracer = context.event_tracer();
+      uint32_t start_time = 0;
+      int index = 0;
+      // Post log neutron events from profiling output.
+      for (int i = 0; i < events_num; i++) {
+        if (start_time == 0) {
+          start_time = neutron_events[i].startEvent.time;
+        }
+        if (neutron_events[i].stopEvent.opCode != KOPC_CALLARGS) {
+          // Only KOPC_CALLARGS events can be mapped to original .pte model.
+          continue;
+        } else {
+          event_tracer_log_profiling_delegate(
+              tracer,
+              nullptr,
+              index,
+              start_time,
+              neutron_events[i].stopEvent.time,
+              static_cast<const void*>(
+                  &neutron_events[i].startEvent.functionCode),
+              sizeof(uint8_t));
+          start_time = 0;
+          index++;
+        }
+      }
+      event_tracer_log_profiling_delegate(
+          tracer,
+          nullptr,
+          index,
+          neutron_events[events_num - 1].startEvent.time,
+          neutron_events[events_num - 1].stopEvent.time + stop_ticks -
+              start_ticks,
+          static_cast<const void*>(
+              &neutron_events[events_num - 1].startEvent.functionCode),
+          sizeof(uint8_t));
+    }
+#endif
 
     return Error::Ok;
   }
diff --git a/backends/nxp/tests/executorch_pipeline.py b/backends/nxp/tests/executorch_pipeline.py
index 44a96010593..1309e019428 100644
--- a/backends/nxp/tests/executorch_pipeline.py
+++ b/backends/nxp/tests/executorch_pipeline.py
@@ -190,6 +190,7 @@ def to_quantized_edge_program(
     use_quant_state_dict: bool = True,
     fetch_constants_to_sram: bool = False,
     dump_kernel_selection_code: bool = False,
+    use_profiling: bool = False,
     delegate_to_npu=True,
 ) -> EdgeProgramManager:
     _neutron_target_spec = NeutronTargetSpec(target)
@@ -223,6 +224,7 @@ def to_quantized_edge_program(
         use_neutron_for_format_conversion=use_neutron_for_format_conversion,
         fetch_constants_to_sram=fetch_constants_to_sram,
         dump_kernel_selection_code=dump_kernel_selection_code,
+        use_profiling=use_profiling,
     )
     post_quant_state_dict = (
         exir_program_aten__module_quant.state_dict() if use_quant_state_dict else None
@@ -244,6 +246,7 @@ def to_quantized_edge_program(
         export(exir_program_aten__module_quant, example_input, strict=True),
         transform_passes=NeutronEdgePassManager(),
         partitioner=partitioners,
+        generate_etrecord=use_profiling,
         compile_config=EdgeCompileConfig(
             _check_ir_validity=False,
             _core_aten_ops_exception_list=core_aten_ops_exception_list,
@@ -274,6 +277,7 @@ def to_quantized_executorch_program(
     use_neutron_for_format_conversion: bool = True,
     dataset_dir: str | None = None,
     delegate_to_npu=True,
+    use_profiling: bool = False,
     operators_not_to_delegate: list[str] = None,
     remove_quant_io_ops: bool = False,
 ) -> ExecutorchProgramManager:
@@ -295,6 +299,7 @@ def to_quantized_executorch_program(
         train_fn=train_fn,
         use_neutron_for_format_conversion=use_neutron_for_format_conversion,
         delegate_to_npu=delegate_to_npu,
+        use_profiling=use_profiling,
         operators_not_to_delegate=operators_not_to_delegate,
         remove_quant_io_ops=remove_quant_io_ops,
         **get_calibration_inputs_fn,
diff --git a/backends/nxp/tests/executors.py b/backends/nxp/tests/executors.py
index 319f372b5fa..94e91a31b95 100644
--- a/backends/nxp/tests/executors.py
+++ b/backends/nxp/tests/executors.py
@@ -325,7 +325,7 @@ def convert_run_compare(
 
     if tfl_model is None:
         NodeFormatInference(edge_program).identify_node_formats()
-        tfl_model, _ = EdgeProgramToIRConverter().convert_program(
+        tfl_model, *_ = EdgeProgramToIRConverter().convert_program(
             edge_program, conversion_config
         )
 
diff --git a/backends/nxp/tests/generic_tests/test_aot_example.py b/backends/nxp/tests/generic_tests/test_aot_example.py
index 893041fe372..8a1e5e49555 100644
--- a/backends/nxp/tests/generic_tests/test_aot_example.py
+++ b/backends/nxp/tests/generic_tests/test_aot_example.py
@@ -2,11 +2,13 @@
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
-
+import os
 import subprocess
 import sys
 from pathlib import Path
 
+from executorch.backends.nxp.tests.config_importer import test_config
+
 # noinspection PyProtectedMember
 from executorch.exir._serialize import _deserialize_pte_binary
 from executorch.exir.schema import DelegateCall, KernelCall
@@ -15,9 +17,8 @@
 def test_aot_example__mobilenet_v2():
     """Test that mobilenet can be lowered to Neutron backend via `aot_neutron_compile.py` and all ops are delegated."""
 
-    # Find the executorch root directory (5 levels up from this test file)
-    executorch_root = Path(__file__).parent.parent.parent.parent.parent
-    assert executorch_root.exists(), f"Executorch root not found at {executorch_root}"
+    # Set the executorch root directory.
+    executorch_root = test_config.PROJECT_DIR
 
     # Run the compilation script as a module (like run_aot_example.sh does)
     cmd = [
@@ -34,14 +35,14 @@ def test_aot_example__mobilenet_v2():
     ]
 
     # Output file will be created in executorch_root
-    pte_file = executorch_root / "mobilenetv2_nxp_delegate.pte"
+    pte_file = Path(os.path.join(executorch_root, "mobilenetv2_nxp_delegate.pte"))
 
     try:
         result = subprocess.run(
             cmd,
             capture_output=True,
             text=True,
-            timeout=300,  # 5 minute timeout just in case. On my machine, the test usually runs ~1 minute.
+            timeout=300,  # 5 minute timeout just in case. On 8-core x86 the test usually runs ~1 minute.
             cwd=str(
                 executorch_root
             ),  # Run from executorch root (like run_aot_example.sh)
@@ -95,3 +96,77 @@ def test_aot_example__mobilenet_v2():
         # Clean up the generated file
         if pte_file.exists():
             pte_file.unlink()
+
+
+def test_aot_example__mobilenet_v2__profiling():
+    """Test that mobilenet_v2 can be lowered to Neutron backend via `aot_neutron_compile.py`, all ops are delegated,
+    the output model is profilable and ETRecord is generated properly."""
+
+    # Set the executorch root directory.
+    executorch_root = test_config.PROJECT_DIR
+
+    # Run the compilation script as a module (like run_aot_example.sh does)
+    cmd = [
+        sys.executable,
+        "-m",
+        "examples.nxp.aot_neutron_compile",
+        "--model_name",
+        "mobilenetv2",
+        "--delegate",
+        "--quantize",
+        "--target",
+        "imxrt700",
+        "--remove-quant-io-ops",
+        "--use_channels_last_dim_order",
+        "--use_profiling",  # Generate profilable model and create ETRecord
+        "--use_random_dataset",  # Avoid downloading the dataset.
+    ]
+
+    # Output files will be created in executorch_root.
+    pte_file = Path(
+        os.path.join(executorch_root, "mobilenetv2_nxp_delegate_profile.pte")
+    )
+    etrecord_file = Path(
+        os.path.join(executorch_root, "etrecord", "mobilenetv2_etrecord.bin")
+    )
+
+    try:
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=300,  # 5 minute timeout just in case. On 8-core x86 the test usually runs ~1 minute.
+            cwd=str(
+                executorch_root
+            ),  # Run from executorch root (like run_aot_example.sh)
+        )
+
+        # Check script ran successfully.
+        assert result.returncode == 0, (
+            f"Script failed with return code {result.returncode}\n"
+            f"STDOUT:\n{result.stdout}\n"
+            f"STDERR:\n{result.stderr}"
+        )
+
+        # Check if delegated model was created and saved.
+        assert pte_file.exists(), f"PTE file not created at {pte_file}"
+
+        # Combine stdout and stderr to capture all subprocess output, including logs.
+        process_output = result.stdout + result.stderr
+
+        # Check if nonempty Neutron to Edge map was created.
+        assert "Neutron to Edge map was created:" in process_output
+
+        # Check if ETRecord was created and saved.
+        assert "The ETRecord for the model was saved to" in process_output
+        assert etrecord_file.exists(), f"ETRecord file not created at {etrecord_file}"
+
+    finally:
+        # Clean up the generated files.
+        if pte_file.exists():
+            pte_file.unlink()
+        if etrecord_file.exists():
+            etrecord_file.unlink()
+            parent = etrecord_file.parent
+            if not any(parent.iterdir()):
+                parent.rmdir()
diff --git a/backends/nxp/tests/generic_tests/test_move_activation_before_concatenation.py b/backends/nxp/tests/generic_tests/test_move_activation_before_concatenation.py
index 27bd675a487..6aa07dbba8d 100644
--- a/backends/nxp/tests/generic_tests/test_move_activation_before_concatenation.py
+++ b/backends/nxp/tests/generic_tests/test_move_activation_before_concatenation.py
@@ -629,7 +629,7 @@ def test_move_activation_before_concat_quantization__conv(
                 "lowered_module" in node.name for node in edge_program.graph.nodes
             )
 
-            tflite_flatbuffers_model, io_formats = converter_spy.calls[-1].return_value
+            tflite_flatbuffers_model, *_ = converter_spy.calls[-1].return_value
             exported_program: ExportedProgram = converter_spy.calls[-1].args[0]
             input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(
                 np.int8
@@ -668,7 +668,7 @@ def test_move_activation_before_concat_quantization__linear(
                 "lowered_module" in node.name for node in edge_program.graph.nodes
             )
 
-            tflite_flatbuffers_model, io_formats = converter_spy.calls[-1].return_value
+            tflite_flatbuffers_model, *_ = converter_spy.calls[-1].return_value
             exported_program: ExportedProgram = converter_spy.calls[-1].args[0]
             input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(
                 np.int8
@@ -706,7 +706,7 @@ def test_move_activation_before_concat_quantization__addmm(
                 "lowered_module" in node.name for node in edge_program.graph.nodes
             )
 
-            tflite_flatbuffers_model, io_formats = converter_spy.calls[-1].return_value
+            tflite_flatbuffers_model, *_ = converter_spy.calls[-1].return_value
             exported_program: ExportedProgram = converter_spy.calls[-1].args[0]
             input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(
                 np.int8
@@ -744,7 +744,7 @@ def test_move_activation_before_concat_quantization__mm(
                 "lowered_module" in node.name for node in edge_program.graph.nodes
             )
 
-            tflite_flatbuffers_model, io_formats = converter_spy.calls[-1].return_value
+            tflite_flatbuffers_model, *_ = converter_spy.calls[-1].return_value
             exported_program: ExportedProgram = converter_spy.calls[-1].args[0]
             input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(
                 np.int8
@@ -788,9 +788,7 @@ def test_concat_cluster_quantization__conv(
                     "lowered_module" in node.name for node in edge_program.graph.nodes
                 )
 
-                tflite_flatbuffers_model, io_formats = converter_spy.calls[
-                    -1
-                ].return_value
+                tflite_flatbuffers_model, *_ = converter_spy.calls[-1].return_value
                 exported_program: ExportedProgram = converter_spy.calls[-1].args[0]
                 exir_program_aten_quant: GraphModule = quantizer_spy.calls[
                     -1
@@ -861,9 +859,7 @@ def test_concat_cluster_quantization__linear(
                     "lowered_module" in node.name for node in edge_program.graph.nodes
                 )
 
-                tflite_flatbuffers_model, io_formats = converter_spy.calls[
-                    -1
-                ].return_value
+                tflite_flatbuffers_model, *_ = converter_spy.calls[-1].return_value
                 exported_program: ExportedProgram = converter_spy.calls[-1].args[0]
                 exir_program_aten_quant: GraphModule = quantizer_spy.calls[
                     -1
diff --git a/backends/nxp/tests/generic_tests/test_neutron_backend_executor.py b/backends/nxp/tests/generic_tests/test_neutron_backend_executor.py
index 8cf7dfe3dc2..52654a482b9 100644
--- a/backends/nxp/tests/generic_tests/test_neutron_backend_executor.py
+++ b/backends/nxp/tests/generic_tests/test_neutron_backend_executor.py
@@ -37,7 +37,7 @@ def test_lowered_program_and_tflite_output_match__conv2d__no_bias(mocker):
     )
 
     # Capture generated model
-    tflite_flatbuffers_model, io_formats = converter_spy.spy_return
+    tflite_flatbuffers_model, *_ = converter_spy.spy_return
 
     tflite_model = Model.GetRootAs(tflite_flatbuffers_model)
     sub_graph = tflite_model.Subgraphs(0)
@@ -84,7 +84,7 @@ def test_conv_fc__lowered_program_and_tflite_output_match(mocker):
     exported_program: ExportedProgram = converter_spy.call_args.args[1]
 
     # Capture generated model
-    tflite_flatbuffers_model, _ = converter_spy.spy_return
+    tflite_flatbuffers_model, *_ = converter_spy.spy_return
 
     # No Transpose ops in produced TFLite model
     tflite_subgraph = Model.GetRootAs(tflite_flatbuffers_model).Subgraphs(0)
@@ -148,7 +148,7 @@ def test_delegating_format_related_transpose_operators__supported_case(mocker):
     )
 
     # Capture the converted IR model.
-    tflite_flatbuffers_model, _ = converter_spy.spy_return
+    tflite_flatbuffers_model, *_ = converter_spy.spy_return
 
     # Make sure the `Transpose` ops ARE in the IR model.
     tflite_subgraph = Model.GetRootAs(tflite_flatbuffers_model).Subgraphs(0)
diff --git a/backends/nxp/tests/generic_tests/test_neutron_converter_manager.py b/backends/nxp/tests/generic_tests/test_neutron_converter_manager.py
index 0705203db06..359dfdb67e9 100644
--- a/backends/nxp/tests/generic_tests/test_neutron_converter_manager.py
+++ b/backends/nxp/tests/generic_tests/test_neutron_converter_manager.py
@@ -28,7 +28,7 @@ def test_conv2d_neutron_conversion():
 
     NodeFormatInference(edge_program_manager.exported_program()).identify_node_formats()
     edge_program_converter = EdgeProgramToIRConverter()
-    tflite_model, _ = edge_program_converter.convert_program(
+    tflite_model, *_ = edge_program_converter.convert_program(
         edge_program_manager.exported_program()
     )
 
diff --git a/backends/nxp/tests/generic_tests/test_per_channel_conversion.py b/backends/nxp/tests/generic_tests/test_per_channel_conversion.py
index 706d8ed3e14..af9ef08057b 100644
--- a/backends/nxp/tests/generic_tests/test_per_channel_conversion.py
+++ b/backends/nxp/tests/generic_tests/test_per_channel_conversion.py
@@ -153,7 +153,7 @@ def test_per_channel_convolution(self, _, use_qat: bool):
                 use_neutron_for_format_conversion=False,
             )
 
-            tflite_flatbuffers_model, io_formats = converter_spy.calls[-1].return_value
+            tflite_flatbuffers_model, *_ = converter_spy.calls[-1].return_value
             exported_program: ExportedProgram = converter_spy.calls[-1].args[0]
 
             input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(
diff --git a/backends/nxp/tests/generic_tests/test_profiling.py b/backends/nxp/tests/generic_tests/test_profiling.py
new file mode 100644
index 00000000000..c922eb070c3
--- /dev/null
+++ b/backends/nxp/tests/generic_tests/test_profiling.py
@@ -0,0 +1,158 @@
+# Copyright 2026 NXP
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import ast
+import logging
+import re
+
+import numpy as np
+import pytest
+import torch
+from executorch.backends.nxp.tests.graph_verifier import BaseGraphVerifier
+from executorch.backends.nxp.tests.model_output_comparator import (
+    NumericalStatsOutputComparator,
+)
+
+from executorch.backends.nxp.tests.models import AvgPool2dModule, SoftmaxModule
+from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
+
+from executorch.examples.nxp.experimental.cifar_net.cifar_net import CifarNetModel
+
+
+@pytest.fixture(autouse=True)
+def reseed_model_per_test_run():
+    torch.manual_seed(23)
+    np.random.seed(23)
+
+
+PATTERN_NEUTRON_MAP = r"Neutron to Edge map was created: (\{.*\})"
+
+
+def extract_map_from_logs(caplog):
+    for record in caplog.records:
+        msg = record.getMessage()
+        neutron_map_match = re.search(PATTERN_NEUTRON_MAP, msg)
+        if neutron_map_match:
+            dict_str = neutron_map_match.group(1)
+            return ast.literal_eval(dict_str)
+    return None
+
+
+class ParallelPoolModel(torch.nn.Module):
+    def __init__(self, channels: int):
+        super().__init__()
+        self.conv_in = torch.nn.Conv2d(channels, channels, kernel_size=3, padding=1)
+        self.max_pool2d = torch.nn.MaxPool2d(kernel_size=2, stride=2)
+        self.avg_pool2d = torch.nn.AvgPool2d(kernel_size=2, stride=2)
+        self.conv_out = torch.nn.Conv2d(2 * channels, channels, kernel_size=1)
+
+    def forward(self, x):
+        x = self.conv_in(x)
+        x = torch.cat((self.max_pool2d(x), self.avg_pool2d(x)), dim=1)
+        x = self.conv_out(x)
+        return x
+
+
+class TestProfiling:
+    @pytest.mark.xfail(reason="SoftMax support PR is not merged so far.", strict=True)
+    def test__softmax(self, caplog, request):
+        caplog.set_level(logging.INFO)
+        model = SoftmaxModule(-1)
+        lower_run_compare(
+            model,
+            (10,),
+            dlg_model_verifier=BaseGraphVerifier(1, []),
+            request=request,
+            use_profiling=True,
+            output_comparator=NumericalStatsOutputComparator(),
+        )
+
+        # Neuron map for 1D Softmax with input size 10 should contain 4 nodes:
+        # 3 Neuron kernels (pad, softmax, and slice) and 1 unmapped node used for profiling dum
+        neutron_map = extract_map_from_logs(caplog)
+        assert neutron_map == {
+            0: (2,),  # Pad
+            1: (2,),  # Softmax
+            2: (2,),  # Slice
+            3: (),  # Neutron Dump
+        }
+
+    def test__parallel_pool(self, caplog, request):
+        caplog.set_level(logging.INFO)
+        input_shape = (1, 3, 32, 32)
+        model = ParallelPoolModel(input_shape[1])
+        lower_run_compare(
+            model,
+            input_shape,
+            dlg_model_verifier=BaseGraphVerifier(1, []),
+            request=request,
+            output_comparator=NumericalStatsOutputComparator(),
+            use_neutron_for_format_conversion=False,
+            use_profiling=True,
+        )
+        neutron_map = extract_map_from_logs(caplog)
+        assert neutron_map == {
+            0: (6,),  # Conv2DStandardV2
+            1: (),  # Conv2DDepthwiseV2 (AvgPool)
+            2: (7,),  # MaxPool
+            3: (),  # TransposeCHW
+            4: (),  # TransposeCHW
+            5: (),  # TransposeCHW
+            6: (),  # Slice
+            7: (),  # Pad
+            8: (),  # Conv2DPointwise
+            9: (),  # Slice
+            10: (),  # Neutron Dump
+        }
+
+    @pytest.mark.xfail(reason="SoftMax support PR is not merged so far.", strict=True)
+    def test__cifar(self, caplog, request):
+        caplog.set_level(logging.INFO)
+        input_shape = (1, 3, 32, 32)
+        model = CifarNetModel()
+        lower_run_compare(
+            model,
+            input_shape,
+            dlg_model_verifier=BaseGraphVerifier(1, []),
+            request=request,
+            output_comparator=NumericalStatsOutputComparator(),
+            use_neutron_for_format_conversion=False,
+            use_profiling=True,
+        )
+        neutron_map = extract_map_from_logs(caplog)
+        assert neutron_map == {
+            0: (10,),  # Pad
+            1: (10, 11),  # Conv2DStandardV1 (Pad + Conv2d)
+            2: (12,),  # MaxPool
+            3: (13, 14),  # Conv2DStandardV1 (Pad + Conv2d)
+            4: (15,),  # MaxPool
+            5: (16, 17),  # Conv2DStandardV1 (Pad + Conv2d)
+            6: (18,),  # MaxPool
+            7: (20,),  # FullyConnected
+            8: (21,),  # Pad
+            9: (21,),  # Softmax
+            10: (21,),  # Slice
+            11: (),  # Neutron Dump
+        }
+
+    def test__avg_pool(self, caplog, request):
+        caplog.set_level(logging.INFO)
+        input_shape = (2, 9, 6, 15)
+        model = AvgPool2dModule(False, 0)
+        lower_run_compare(
+            model,
+            input_shape,
+            dlg_model_verifier=BaseGraphVerifier(1, []),
+            request=request,
+            output_comparator=NumericalStatsOutputComparator(),
+            use_neutron_for_format_conversion=False,
+            use_profiling=True,
+        )
+        neutron_map = extract_map_from_logs(caplog)
+        assert neutron_map == {
+            0: (2,),  # Pad
+            1: (2,),  # Conv2DDepthwiseDense
+            2: (2,),  # Slice
+            3: (),  # Neutron Dump
+        }
diff --git a/backends/nxp/tests/generic_tests/test_quantizer.py b/backends/nxp/tests/generic_tests/test_quantizer.py
index 3c23241e01e..6180d2fd9ae 100644
--- a/backends/nxp/tests/generic_tests/test_quantizer.py
+++ b/backends/nxp/tests/generic_tests/test_quantizer.py
@@ -432,7 +432,7 @@ def test_quantizer__linear_w_activation(mocker, activation, inplace, use_qat):
     )
     assert any("lowered_module" in node.name for node in edge_program.graph.nodes)
 
-    tflite_flatbuffers_model, io_formats = converter_spy.spy_return
+    tflite_flatbuffers_model, *_ = converter_spy.spy_return
     exported_program: ExportedProgram = converter_spy.call_args.args[1]
     exir_program_aten_quant: GraphModule = quantizer_spy.spy_return
 
@@ -477,7 +477,7 @@ def test_quantizer__addmm_w_activation(mocker, activation, inplace, use_qat):
     )
     assert any("lowered_module" in node.name for node in edge_program.graph.nodes)
 
-    tflite_flatbuffers_model, io_formats = converter_spy.spy_return
+    tflite_flatbuffers_model, *_ = converter_spy.spy_return
     exported_program: ExportedProgram = converter_spy.call_args.args[1]
     exir_program_aten_quant: GraphModule = quantizer_spy.spy_return
 
@@ -522,7 +522,7 @@ def test_quantizer__mm_w_activation(mocker, activation, inplace, use_qat):
     )
     assert any("lowered_module" in node.name for node in edge_program.graph.nodes)
 
-    tflite_flatbuffers_model, io_formats = converter_spy.spy_return
+    tflite_flatbuffers_model, *_ = converter_spy.spy_return
     exported_program: ExportedProgram = converter_spy.call_args.args[1]
     exir_program_aten_quant: GraphModule = quantizer_spy.spy_return
 
@@ -567,7 +567,7 @@ def test_quantizer__conv_w_activation(mocker, activation, inplace, use_qat):
     )
     assert any("lowered_module" in node.name for node in edge_program.graph.nodes)
 
-    tflite_flatbuffers_model, io_formats = converter_spy.spy_return
+    tflite_flatbuffers_model, *_ = converter_spy.spy_return
     exported_program: ExportedProgram = converter_spy.call_args.args[1]
     exir_program_aten_quant: GraphModule = quantizer_spy.spy_return
 
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_addmm_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_addmm_converter.py
index a8cdee41830..668deb28c96 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_addmm_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_addmm_converter.py
@@ -51,7 +51,7 @@ def test_addmm_conversion(self, _, use_qat: bool):
                 "lowered_module" in node.name for node in edge_program.graph.nodes
             )
 
-            tflite_flatbuffers_model, io_formats = converter_spy.calls[-1].return_value
+            tflite_flatbuffers_model, *_ = converter_spy.calls[-1].return_value
             exported_program: ExportedProgram = converter_spy.calls[-1].args[0]
             input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(
                 np.int8
@@ -84,7 +84,7 @@ def test_linear_conversion__with_bias(self, _, use_qat: bool):
                 "lowered_module" in node.name for node in edge_program.graph.nodes
             )
 
-            tflite_flatbuffers_model, io_formats = converter_spy.calls[-1].return_value
+            tflite_flatbuffers_model, *_ = converter_spy.calls[-1].return_value
             exported_program: ExportedProgram = converter_spy.calls[-1].args[0]
             input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(
                 np.int8
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_bmm_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_bmm_converter.py
index dc442a4931c..466f596bf91 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_bmm_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_bmm_converter.py
@@ -59,7 +59,7 @@ def test_convert_bmm__supported(mocker, input_shape_x1, input_shape_x2, use_qat)
 
     # Verify correct behavior of the converted NeutronIR model.
     intermediate_ep = converter_spy.call_args.args[1]
-    neutron_ir_model, _ = converter_spy.spy_return
+    neutron_ir_model, *_ = converter_spy.spy_return
 
     input_data_1 = (
         np.random.random(input_shape_x1).astype(np.float32) * 256.0 - 128.0
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_clone_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_clone_converter.py
index b4b828cd4e6..5ee3db6752f 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_clone_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_clone_converter.py
@@ -182,7 +182,7 @@ def test_conv_dropout_quant(
                 use_neutron_for_format_conversion=False,
             ).exported_program()
 
-            tflite_flatbuffers_model, _ = converter_spy.calls[-1].return_value
+            tflite_flatbuffers_model, *_ = converter_spy.calls[-1].return_value
             exported_program: ExportedProgram = converter_spy.calls[-1].args[0]
 
             assert not graph_contains_any(
@@ -241,7 +241,7 @@ def test_clone_pool_view_copy_quant(
                 use_neutron_for_format_conversion=False,
             ).exported_program()
 
-            tflite_flatbuffers_model, _ = converter_spy.calls[-1].return_value
+            tflite_flatbuffers_model, *_ = converter_spy.calls[-1].return_value
             exported_program: ExportedProgram = converter_spy.calls[-1].args[0]
 
             assert not graph_contains_any(
@@ -311,7 +311,7 @@ def test_clone__to_contiguous_format(self):
         ).identify_node_formats()
 
         # Convert to the IR.
-        converted_model, _ = EdgeProgramToIRConverter().convert_program(
+        converted_model, *_ = EdgeProgramToIRConverter().convert_program(
             edge_program_manager.exported_program()
         )
 
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_conv_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_conv_converter.py
index 828647d2113..7105514514a 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_conv_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_conv_converter.py
@@ -177,7 +177,7 @@ def test_conv2d_quant_conversion(mocker, model: torch.nn.Module, input_shape, us
     )
 
     # Capture generated model
-    tflite_flatbuffers_model, io_formats = converter_spy.spy_return
+    tflite_flatbuffers_model, *_ = converter_spy.spy_return
 
     # Capture converted program
     exported_program: ExportedProgram = converter_spy.call_args.args[1]
@@ -367,7 +367,7 @@ def test_conv_transpose2d_conversion__quantized(
     assert any("lowered_module" in node.name for node in edge_program.graph.nodes)
 
     # Capture generated model
-    tflite_flatbuffers_model, io_formats = converter_spy.spy_return
+    tflite_flatbuffers_model, *_ = converter_spy.spy_return
 
     # Capture converted program
     exported_program: ExportedProgram = converter_spy.call_args.args[1]
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_mm_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_mm_converter.py
index 60dbfd1b215..79fffff3b78 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_mm_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_mm_converter.py
@@ -51,7 +51,7 @@ def test_mm_conversion(self, _, use_qat: bool):
                 "lowered_module" in node.name for node in edge_program.graph.nodes
             )
 
-            tflite_flatbuffers_model, io_formats = converter_spy.calls[-1].return_value
+            tflite_flatbuffers_model, *_ = converter_spy.calls[-1].return_value
             exported_program: ExportedProgram = converter_spy.calls[-1].args[0]
             input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(
                 np.int8
@@ -85,7 +85,7 @@ def test_linear_conversion__without_bias(self, _, use_qat: bool):
                 "lowered_module" in node.name for node in edge_program.graph.nodes
             )
 
-            tflite_flatbuffers_model, io_formats = converter_spy.calls[-1].return_value
+            tflite_flatbuffers_model, *_ = converter_spy.calls[-1].return_value
             exported_program: ExportedProgram = converter_spy.calls[-1].args[0]
             input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(
                 np.int8
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_neg_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_neg_converter.py
index e0fc0d85066..2e7f9035e8a 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_neg_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_neg_converter.py
@@ -74,7 +74,7 @@ def test_convert_neg(mocker, input_shape):
 
     # Verify correct behavior of the converted NeutronIR model.
     intermediate_ep = converter_spy.call_args.args[1]
-    neutron_ir_model, _ = converter_spy.spy_return
+    neutron_ir_model, *_ = converter_spy.spy_return
 
     input_data = (
         np.random.random(input_shape).astype(np.float32) * 256.0 - 128.0
@@ -105,7 +105,7 @@ def test_convert_neg__channels_last(mocker):
 
     # Verify correct behavior of the converted NeutronIR model.
     intermediate_ep = converter_spy.call_args.args[1]
-    neutron_ir_model, _ = converter_spy.spy_return
+    neutron_ir_model, *_ = converter_spy.spy_return
 
     input_data = (
         np.random.random(input_shape).astype(np.float32) * 256.0 - 128.0
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_prelu_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_prelu_converter.py
index fb25f02785a..c5c7aa55b03 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_prelu_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_prelu_converter.py
@@ -50,7 +50,7 @@ def test_prelu_with_linear_quant_conversion(mocker, input_shape):
     ).exported_program()
 
     # Capture generated entities
-    neutron_ir_model, _ = converter_spy.spy_return
+    neutron_ir_model, *_ = converter_spy.spy_return
     exported_program: ExportedProgram = converter_spy.call_args.args[1]
 
     # Check `prelu` was not decomposed into simpler edge operators
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_softmax_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_softmax_converter.py
index 2621baf18ee..00c10bd257d 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_softmax_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_softmax_converter.py
@@ -85,7 +85,7 @@ def test_softmax_delegation(input_shape, dim: int, mocker):
 
     # Verify correct behavior of the converted NeutronIR model.
     intermediate_ep = converter_spy.call_args.args[1]
-    neutron_ir_model, _ = converter_spy.spy_return
+    neutron_ir_model, *_ = converter_spy.spy_return
     input_data = random_input_data(input_shape)
 
     # Make sure the tested program contains the `softmax`, and its input has the expected rank.
@@ -121,7 +121,7 @@ def test_softmax_delegation__channel_first(input_shape, dim: int, mocker):
 
     # Verify correct behavior of the converted NeutronIR model.
     intermediate_ep = converter_spy.call_args.args[1]
-    neutron_ir_model, _ = converter_spy.spy_return
+    neutron_ir_model, *_ = converter_spy.spy_return
     input_data = random_input_data(input_shape)
 
     # Make sure the tested program contains the `softmax`.
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_view_copy_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_view_copy_converter.py
index cb5f398fa21..276b29da142 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_view_copy_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_view_copy_converter.py
@@ -265,7 +265,7 @@ def test_view_copy_w_linear_quant_conversion(mocker, input_shape, new_shape, use
     )
 
     # Capture generated model
-    tflite_flatbuffers_model, io_formats = converter_spy.spy_return
+    tflite_flatbuffers_model, *_ = converter_spy.spy_return
 
     # Capture converted program
     edge_program: ExportedProgram = converter_spy.call_args.args[1]
@@ -299,7 +299,7 @@ def test_view_w_conv_linear_quant_conversion(
     )
 
     # Capture generated model
-    tflite_flatbuffers_model, io_formats = converter_spy.spy_return
+    tflite_flatbuffers_model, *_ = converter_spy.spy_return
 
     # Capture converted program
     edge_program: ExportedProgram = converter_spy.call_args.args[1]
diff --git a/backends/nxp/tests/ir/edge_passes/test_linear_bn_fusing.py b/backends/nxp/tests/ir/edge_passes/test_linear_bn_fusing.py
index 88ea567381f..aadef8c7731 100644
--- a/backends/nxp/tests/ir/edge_passes/test_linear_bn_fusing.py
+++ b/backends/nxp/tests/ir/edge_passes/test_linear_bn_fusing.py
@@ -251,7 +251,7 @@ def test_linear_bn_full_qat_pipeline_conversion(
     assert any("lowered_module" in node.name for node in edge_program.graph.nodes)
 
     # Capture generated model
-    tflite_flatbuffers_model, _ = converter_spy.spy_return
+    tflite_flatbuffers_model, *_ = converter_spy.spy_return
 
     # Capture converted program
     exported_program: ExportedProgram = converter_spy.call_args.args[1]
diff --git a/backends/nxp/tests/nsys_testing.py b/backends/nxp/tests/nsys_testing.py
index d5ff3680f38..ef6fe9c864c 100644
--- a/backends/nxp/tests/nsys_testing.py
+++ b/backends/nxp/tests/nsys_testing.py
@@ -101,6 +101,8 @@ def _run_delegated_executorch_program(
     mocker,
     use_qat: bool = False,
     train_fn: Callable[[torch.fx.GraphModule], None] | None = None,
+    use_profiling: bool = False,
+    use_neutron_for_format_conversion=True,
     operators_not_to_delegate: list[str] = None,
     remove_quant_io_ops: bool = False,
 ) -> tuple[ExportedProgram, str]:
@@ -129,6 +131,8 @@ def wrapper(*args, **kwargs):
             delegate_to_npu=True,
             use_qat=use_qat,
             train_fn=train_fn,
+            use_profiling=use_profiling,
+            use_neutron_for_format_conversion=use_neutron_for_format_conversion,
             operators_not_to_delegate=operators_not_to_delegate,
             remove_quant_io_ops=remove_quant_io_ops,
         )
@@ -405,6 +409,8 @@ def lower_run_compare(
     reference_model: ReferenceModel = ReferenceModel.QUANTIZED_EXECUTORCH_CPP,
     use_qat: bool = False,
     train_fn: Callable[[torch.fx.GraphModule], None] | None = None,
+    use_profiling: bool = False,
+    use_neutron_for_format_conversion=True,
     operators_not_to_delegate: list[str] = None,
     remove_quant_io_ops: bool = False,
 ):
@@ -424,6 +430,10 @@ def lower_run_compare(
     :param reference_model: Version of the model which will be run to obtain reference output data.
     :param use_qat: If True, applies quantization-aware training before conversion (without the QAT training).
     :param train_fn: Train/finetune function for QAT training. Is used only when `use_qat=True`.
+    :param use_profiling: Enable profiling for neutron delegated model.
+    :param use_neutron_for_format_conversion: If True, the EdgeProgramToIRConverter will insert `Transpose` ops to
+                                                ensure that the IO matches the executorch partition, which will be
+                                                delegated to Neutron,
     :param operators_not_to_delegate: list of operators not to delegate.
     :param remove_quant_io_ops: If true, IO q-ops are removed and verification is done on quantized
         version of dataset (quantized INT8 input samples).
@@ -468,6 +478,8 @@ def lower_run_compare(
         mocker,
         use_qat=use_qat,
         train_fn=train_fn,
+        use_profiling=use_profiling,
+        use_neutron_for_format_conversion=use_neutron_for_format_conversion,
         operators_not_to_delegate=operators_not_to_delegate,
         remove_quant_io_ops=remove_quant_io_ops,
     )
diff --git a/docs/source/_static/img/nxp/nxp-mcuxpresso-etdump.png b/docs/source/_static/img/nxp/nxp-mcuxpresso-etdump.png
new file mode 100644
index 0000000000000000000000000000000000000000..50ed49f57ec93f279668df064157b6063ec01627
GIT binary patch
literal 44262
zcmagF2Ut^0*EWg;3#bT46BVUOGZg71qM&qCdRKZ2ktQvGqDTkngaA<ll-?7-0E(dp
zA|N#oG=W6AA(T+_$LD#!^Zn;P=R0R#SCXCCW%iymvu54*x+ngj@q_a$msyyYn9l3z
zYCmRTIss;4I;L^v6r&`c+eV!6b1dNTgS$-CLsu3Ve@?n<8EG*w)upl?JY{D5Kl@DA
zI)I7k!nePlV?E$cE=)|7;d<IyW+4u%hoPDFU0G|1Zl%JI?r=Iv7SFq$5`g`@o*zEc
z7PRJP>Ixg07JvF|y>()2#(%w@{oRah=M$g)<li$RxyQwWS)83uD7t=LIjQ$V$hDL|
zjaP|R_{;MfuZpEllr|JxSIyy{-X$fuRmEA=<;}W~@MY^gxv$zRv@yBt-XL+!K`U>z
z)wvG%(E%Aa`u)vkNqUFCTPwyx|J{B*)QU$+L=PTD0lx$M{w-tra`y#>|3CZZn0isi
z{@wq=^j(JpVlz@y`u^s;lTluNfjs$qK*J9+*KyI780^f7utHX<sKU2x5nZXno?Fv$
zn~JTCyKhx4gvPCvN9AgM4XRw2kS?{Zc+}u}A#CXOSnL~*Pvf_HzR1~=L(5EHAC7*d
zl?FJD1w$LeT^GzBYO4AeREZ|1D0<RaNzJBc>>ru2?j2KP^*@huDmg_S++f7*6Q1-l
z4<_HU_wWW`P>sDdHU=g+No)QT7MJY&F1PS!7xm<<>jgEpR(#qEkMSj`f%4Ad7YNe#
zDy)|cC~hqk4pKw?{fWOzt7K+*OZPTe?c=L00y6KzF8cnhM)~&Qlsq_~VKpA9kzurv
z4svw@+;LhIOOWSh8#YofC0V=Qt+wVT=+vszHfsBvAAakCne^Hc6^U=L$nYYeybIZC
zB9aFlQ%X-JnmW3|Y0agNqZAD+l~%2<*EgFT^3~m>Bw*b}^OamzT`v^S(13k?=O--P
zU}DYK;gMR}6N2R8=##@9sY-sKmf)p{k{vxzwC`i1g7DaNNs)Im`(h7eL5axmpuol2
zZDdL4Rgm9ahveA^9*}I1y1{|+g2a9KYkc1#O?7^2ZJ#Hc|NBm{a&4g`qt})J*{I~y
zUx+@E@RWAJMK18l2Z>?jy%(Kb0VZ-_!R-3Z4}G2JjI)AnR$rRu-SOJAXV!#wUB{D|
z<1<1zUlQUq*gfK(ark{g`XMqujkKd<K|;rk&q{nueJ=OX3-;;!+k%@>DKZlL9UN9e
zydnsRP6=UAeAr8FSAkMk>6W66$ZxNSVWFj){*F`OWww;kT0$bDJxZ8GdWdl~`t4b@
zqkbYdsSMtaWwMxwI@YaIl)uCA#XabnJXoq#R3{hu%`O^q8EqST^Zg~3tPnS&_?xCW
zy<7&>7aQWR&(W}|WuL`B9J+9LfQAu@(@A4b?IHq_I?g*0jD}qI$1R#tF%g_`4fXy8
zA}_PVCkUX#VCic`u=|JZa+13#==qT=tB_@vim7*xxT8kxfy$@<ww_FGU{<Ip;L1}i
zlIx6r8c%*P!3R74X1g$r^%?QE7z<A`RRnKUWEQFk>PtYf{H>6!cSh-z1ilzjh=%=V
zWL<Do#h?a<SBmLx`u;$bLSO0yjQ2JA?VdtLyR77Ir^kEW`&?$Vm)6Pb3qZrpWL^It
z`Wy73OTU-vVSSq{*UK@4kxkub-Z->+8*R&mFq%fBfihf)j*kgdO#|wuCm79nFa^pO
zq1SeR9)n-qwpB4dCy<i(+0Hcvl=1g9m}b9QlZ@(J)~G(+5K0Mip|rR0DJ)txy~Uuv
zO(S#Rlky&m0~3x%<t8x(Ty(xu&Ur}QX!Uxh2&_&ndcb$exz3cRU#;#L|3t6fzOEbX
zU{$ol_lT5jfSZrGVPtP&7|lT`9YSV4Nyqxz02{bz9tLWz`#W8hM>NnCCojBVlG7@S
zGIZa8{`&4lCx<LMaDyWaqtUtZc3&>7G)~Rdx69JQ0%9)7=4h3U@8GUh%0smMU{TSE
z*Rai|a`H-TP&zsOzQ>E9#Um(E=dP2Ai(kF%tGhlp7bQbA=$-G<_fdmX>9IsO%fsUr
zJE;Hmoy_XOP+P-2k(92Y2`3NDM^h3Ri%X*33wxAEmSce_RMTVA^|rF%ciHOy8U9SM
zs{fn&W7y8hZr)W)QIe-=383(=TYMt^>i^wY{trI%e=oLo!Eb+e2{-|oygpvsH>DDQ
zmr*h>GIs-|+ZJO6SKn(iWc+Xu7_0>`k0^h6{Bf+ZOv&WJ(0aOwsXab(stY_VEiBjZ
z4bQKc>~{b_U0?V>*?50hVIdaxgU$W4Ym=&x^pDh5BP9>z-yIeLj+1kl0ZtEr*V}(Y
zVpDpQ<kV4tJ!IK?$!YAD9aN05z>eb6EJc{_tqTLRJ#djbJRG0DEav8+(W6u!M*(S)
zc)ivlveRpNUJeFgmPA)0wnICWPQBRuNW|cLGMI7Kgjz`?Or54#TnN>8zBZQm3L1Ck
z`#ke5!(FoTCQ9R{`GKs>$JoKq7;x2;KApg&MT%3hRkRl%Ex+?tc<`WXN=~|T`8I~<
zruB~sfd$g8%{%jUwDPpeqxY#+-sjYMSRZ(IqS+TioEqz%P-Pv>_#8V@o;h*#^i&Lb
zQR-gUcI^Ps{PN!dl=QbK;trMAIf!AU!-G+&`=pB4!Ih};`Y7&a6=J`4+b@BSvnr>o
zw<r>D5KLHJ*mlbrIoM63$_n|aFm!0=c0(j{6^?R_`QtGr>K$xhC=3=8vYe1DwLOc`
z%E)<lij4f8`QleheYGXAtD*Ywq{CO%>DHkq;>4KHwo+Uyv}&=H^0a__zzqYG+WZJS
zn3(LaUD?a`9d81foLlJmsIP-kC{QlCTV5jX(9&_<@}Yy@kv+A0y@}^NGyzH2T?iX_
z)AIor-~)-6-Sr$7CVmZgN*>IdYSH^eDO)*%Ogv)Fb1Y0yN^#F54(+l|a)GbC4{@%1
zHw~5x-V$$n#{E>JM?s{zwCskg3K5=1@YCl`#2VFoBYpZ&GfZn%0e1t}ipL?8?zj-2
zW~cQ1r@+0v-mUZU0T8<4$T{cab)A-2$0w%+t%NJG<ALcf6B1zKCzJCkv%H<V9l6Zl
zS6*$ZjQMy(HOVRfS>)S=k7W7aJSUBMX+F=N8FnkJC4+tR0X`r3h4dU}lT#0wSDoXq
z?3|V!O_Yemjy0ZPr@Abq$Q1CHu5kNRV@P@1wVm(Z^>4u*&z)cgSH+u4EPE_t{~YL|
zB+Y%)*Ch;;?mHi7T)lSf!&SAV+x>@qEmvpSTl5dA&30-q83}0v^G8*3x0B=aIN>x`
zM~;k?mx&^gwnImj8?+s0d;_KC3O{k(WSX8ru3+Ia@^*_R$I)k?{dgXZxmFp?!dF?a
zZBdHIf<(KK9%N}xiYOO9P*-;qYyNrr?vD<3#j3$ZRZ`Svm`eKgfMIz15ia;)L>4#r
zv|>%Et9o`*=@F)N0fLG6Gg%m3h%1wtw6U?x*msf!%{fdEe+A%L4$(^skVp~G^zH7Q
znD|!%imMUrdo<<tt%?=_&N&-SM?t)s;%Hb3P<*cWm9zX1+aH=p25(Z->hlL4Tup3~
z5DbeKTHD5RdzA^f&_YycJ@VyFqv$ieVA&)=mDs%otv7_NnnCp{dWw(8$0(&9<I81M
z7q!pTzR-Pmywe3fgMCxKEts@4YJ6UtN9&2PxZivNJNw0#syF-96|TN7xkyLRw$yJ9
z_a)w6`y|3__u$ZxE34<DcC0BdDb+hj3a?Lw_X%_)DI^>1J-YiNPU-!#$ic15Z``c6
z)r>X7S`*GFi`*>GtfvHu;nO>2vOHv&kFCZjSsd$AVYeK)B~Ge;bgfpE`@I5&Nbd4U
zjOqJRi}zQtH^IfdFpVC#+L19uMf01ayD(Qxa^xkSYMS{;+f%%}s2sFkrv1H1__;O8
zmM$f88A_8(;J-HkRf>-%mnnApt&yF{`|@dOni0l5Sy$nM#hd<+w4%@l-Rtq@?D9jj
zm;(E#+iInX+h`7*0MH}W~I_{_WRMxkxD%Z&+SEiOqvWqsm);m1SH`nE=6wQb5F
z?x*7YFUGaCyjlAb4oqtw3Jiau%gv?owil0fizK5`YLk<ruBOf<N4@R<s43r9k#F$j
za_mXyRDR;-bTzd^Z$G^bO|b2z@cj~9SRNmalq`LHUGqH*WBY@y%I!GRa)9BVupgXo
zB;Dt__K@1(6R19wee<~QY^IQ?`}puue1y%<IAxpeiKz7e$l*j?ZN=zwZnx283+mPc
zNU`AszpGZu#NvlA(TA`3dwPyCXyXm<0tp!br!V|zbIBj9bR3ERE}yL=U~DG~nWxR9
z61(dvtsbZq#_y)po8OK^3MFV<$20xrnsvup6&oeU$j7$KyArPBX@I;CH!&f(5UtYX
zn7d__ZQPG_0!8q}Q8jh<(XnJfg6SxXrdmokAg2Ji6KOsVP*bgt$X>HRoC{hugEGZT
zFd!_tl8oZd2UMG(#NQWVd!|20=M6bMdN#1oaA+BvwVS!}qPp}@a(KzXR<>Yj^dwZJ
zeSA$m{7?{I)E*$`c<-L`?X1e8XiUD#D#?>!$Gd2EODHlQlb7Tu=ZFZcw(^h3)qx~5
zD~K6%!Kwzx4=#s=zsKE=haDc>)ga2VQ9Am0RIMqw4z}Qf`&TC0Ie4TrM`Rm9;$2E-
zw)xE9Hnr^+ld%-Q0@p#X6n-*Fs)2hC`){OpohUnnu_y~^2--dHZ$H9rgdzt?V-;n6
zgn@OzKhPe`UI?UOls2zCh&eyppOxO?a}TA1BfCswD|vv;P<jF?)+$VQOco`miei7m
zLJOm?K2~G5LHE;fN~LzFGt$<R2BZX@4TtRV#<p94bX9ZoFeDM_OI*(PqDFbdA)QpG
z6vaPs-$jW;#IFxO%;lq-do)c7^n9Hto|6On%{_;3Mc^p$KV?ANrl_D)m!XgYk&2%p
zl6Z-v4l7jC15qk>GT>tD)6myL^5Aq8$tX?GVgi!SrUSy249+q9SnV8P$q5#9CN%-y
z;Pwe7$0(=nIDUUh0;RZ#9V!{V!J_(D4f7fn<2CHsk3o@kn~Gl!vYMr;$K)S+9RoNy
z_i~<dU0qO-L`E7QnCsa-(~yb5TcLSf=(moJ&zN8PtXG^bu{~em3p*EKQ;sH|&H7X`
zgfz+?gueww#*aDh-)INP1xHH8BX{Q%#VO4nw-DL^Pd-WEU2qOVi2HmUsDj5pYAK6k
z98wWslt=uvXU2gIyw|^LCE)fQCsnX5=#WooE-F!|lDE%-pOrV{Jtm=3b#@zQw}WiW
zN|VIrQx=*t+U1Evozf~Gm?&uv-F^do<8wkP^8FjhcL%g`5G>Pg;Jy032)0qg`l~*n
z>NHMtF#Eyoz3VfI)`~|CamXTfd_83sC+f;iD5qR)p`7Iue75cR=)2R)X0Kx7ef^VA
zSGQTOg*i^;WBPaMqRRZAFs7A4Y&intZ*I!^>4ZGYT(qvVFw}HNA`TfkqgeL(3af<a
zElM9Xe9Mp|9dU2|<)GA=oUx`eoLbzVIU*=yqbOrR=yv1w9%jIpiD`A%nj{S=9L%j5
zZ2Z#TI;cm7Hfv-|ZSy-pTdoLam*~(sFE|)d36I9h>uV9@F((IZ8cCT;W_MKqHF>@t
z=DR%<Ct}~7=(M$Be*KfSjL2$B%BcD#z)6^kdj*FGFiaM?0c`go^7sI+z*LB)W)6x`
zBE(>_31YR6)xb<csNbKAgbpR28%i?j79nO=-4T3}`OFNU<p&9&0-m)@Z+E&5ckmMz
z5CRJm@_i0a{VSHO(y})>DGOP3evmbUI$u=1^rQZegOQKGNN&zj0rm?^I${Ubh4$X#
zR=l4mYMPZ%><~oM7RLn@ae{B|n*wKc<v!)HEhG3b?-a#PYxZ;&3QXDYZH~n}$o5Bp
zWag9lt!PkUmk0fq!iHmrQ+E#L6;QJ;p^rAQ@h(bUb1XJ9d4N{=f>@sq3SC5CXteT_
zS_T!+pEMVr8j}@wi+}Edl4j`rTac&m`vJ#%TXs6%`fafZdii!+<r~(85H;s@1pVpL
zJO#HOSkD;t89clq02)j^W7rWTDXJhBdFaHU(4HmfDU#~wz#`Z=SZE122F)hdZ~AR?
zX51}Dfgaa;i%{yWZxD#P`VKaojoJ^V_YEOdvR3i_^wVg1{SW%Kwr*MH3adf33XtbX
z|Jb1jK=2nunz--0P-=9$n=Zf(_ePF3Kn`;nYBO@6E1*(<DxYIlr(e&7_BSc1ZN_w0
z2CC|$Ou<<%zHiR&DXm_dPFAN&Y37W15NAEz)?c{%S=oL(24t17MeOG0Z(H9OUt~3y
zS7}`k@?Koaue{3iPIVSYMd)rCTw#(rSqI99<zRd<onv8q&&qs|ds)!-%;m=K9U6Du
zB%<$S$p<)#@||VN%3C4OvrO-<wuX^FvT0UK%9p+axvtX*{TANW{OMk0Q4(KL=G;b@
zc+a^cei-^)8$|gwS1qt*$`rA!^R_Czj}<TjKdy{Ulg`k)-wckTeb)FRG|MD)>;I0V
z7|^Yjw|BsJCBWekKQ(gg=i66gu_Jk=@sLP~w#^SFX*tPKfXj0>(}nvu9|r2@a>4(F
zdyMS~(;d=3L<VF36x07l@jLA9#?9fZ{W2mCPay8v-)j%Gc%Vp)+fy@l_!Z+6JpG>!
z<tt$ZADd=b!{)hfiL4sDwaVrgw4r5}T^?Jlku#ZiRez&=3^sq{Mh%<=SX#ih`@Omy
zE3BzYAw7j9z;gx3*xW@SVd{qwyVb6HK0!s{usy2nF5Lm@49*HG4bgh$q)S+0SK6>b
zuV<}60^_z^SCeR)NVfgZ3W_*!eM09bIaxC~mAnr6=62Mr(tho9$ok3s9KYb-NZhtu
zdrmK{J8tbMa3fT+yTt=@+rvehUP$Pt;E$R&4V_=#e`7g9YhRm7P#(4FYF}B`tS+v*
zx)LC6_X--+h#8`nwy6L&)#;c@TjPrE7P<rdegVq-apY;x+qtB(OuzY6hfsrx*3VXD
zqrH8Ht}Ne+iBnpCZQ{Al;d8|x*0Y>#%r)7?<55E)%XM}u3y2={O#P;ES51!GV7r$>
zPgF%;4~oT9U{sMXu~hf)g~IH1G%YS(i5>PUbX$mU=d92zmYCQxs+PCobr+lB%J1^V
z4rD2$3_yH&JaJ7S87eM7>J2Z~_SG$A=gic)P7hZ0EV(E-ko(Z+K|x=18Gna+%Cmjr
zD1qE{4fc=vwG?F?6exFuXs_QWYBB5)5M_oks`UCE7+bRnw`xkr+y89WJb-1XEC(zK
zbod%omYNSnJfbMpg<Ul|UrS?uI+^PqIeD3hNwwG=iy9n6h~e+b{bO+M_aVfNUMn-}
z^O@|fDxGDzC<WM7W3mw5&W^(E)Me`+z}+|E3W@tu8=Y)~hDo}~73nQCywm+jl(%Zu
zRm+<FS^p4ix9Cz*Z1+0liF|}b{^W|eVX;mZEPi$mK}x{NWrh&<`gmz{r96sdJOI-y
z?kw<2_2;?1lxNnsM97*Nfcjh{kzFOK1e07Q6tN?i@!Ma7_4<OXlgtF5PvRLwf-2-H
zcpc-Jh<Pt<f&pRc5<b<t66l4!SK{xaZ%pZ0-?-xVh-4(^7)qI{mrvs5xN)=RJ-lcI
zihD!9IaXI26?`Rh6+{PLb<TJ$stCRia)kbvFl?778P<H>wBGdFw|i#%a}P`hPbKR=
zFH^j4Y9ZC+_DMGH=$Z}4Cx1Kqgy*ofv)S(Vg^g5m;-)&1M@zAj49eDb?+kSObyrma
z?;n|1RZyA30X8L?<;dzLN%na3L~1BroqHy#sdT~v+j7WO^@F&~EWez+sHp_q&7tTg
zVfpLO)DGXQ_WL6{{dGn!<K}lAbKICMQ>u!mJyhxG=KMHH{$-Qt34HIt1GMcBxXn6h
z%ww3O3&Ga$nqewGEDYph9DmZjkjC`fWa_eDzpZgcOjxX!#;pbCRncj0^3dbF2(>#f
zOMtt8x99#W*1R3bU_Y<@!+zTQ@9d}T8>@YeBENID(=f1%-Hnr`202~Y7K~mox=k;L
zM{WYDA(U*c_u29xk3Ml9_y``Kk;)d!YqmqDNTz7#hB_rL=YJ(jA=;_}0LU!nPdfb?
zbzN|I!DI^I%x^`iC}qa`qY)waz&yc(G{|R~l+wj7R5#mu*pJc&5_(v!us_q6!O{3s
zCMwQl6zqhr$o5b;Qf}CN(bW*SgQ@Epf~#U>+moF$+)+}3=hlPx{R-rF?S6MSQ@Sq=
z!p{`T3sI(gQT{K1J+CdEZYSfc*q^5+73}-f<h;#^Fg>2)oTm^V>Ljz`I9;AA61nD%
z7?pCkRv+Y-OFy^_D>{_&(UxhnQGF4A{~M3DAnCGEiV1wQC=oa1Oi`FNL?>q|m0qpu
zTBs1@LwpL656lU5rhIkshbI|4bZShC8`Y<jVQf`P3qK%kjay9`E-KINIwDiH8+i3D
z%Jxv#ht?$u=@ESg4S>QZ$><eY`u7%fO*%Cr)Vq#mGdA=7<i|$fbxPG`ldc>s?Yt%6
z8N;C(n4D8u%(NFbk+}bu(maJR8PP}2w4N;#m;l0UhgPj}V~FE-7+Hz(;RsT4%tY}S
z`G8yl2hnP;eWrL<&l^QzTo0C@$i=_~ks_%YOo<`NdU!mq1_PtHQnF3+5~bS5I%xdV
z*XPs{UE9%ivJ0*xTEn2v*(@}>vyOI>Z<lRunZI-SB{=59K9nu9IWyGx*9|_!;I)(N
zUOJSW<5Sg(_P_@a>tbwfh`~=@9jj(Wc+k*pzpP!{^>#P(>^|qOOR$cUVw%Y1a)QxP
z*N~+Dbn|>u%`_dAF7zqW<7~sV{(Q_8KoWmN{$Idj=w@KW=wGvni`{18?aUX!yHxlf
zr0-x|7j5<yYTHL$S0^GT2}WJWFeuO5g?PZTY%x5Ca5%yzvwKJNxM4x!AX{Rs`(cPq
zzQXe-%fd|W)LQ!)4$fFY%R<^&6mS!&09ROT+VLY=U7mNZJZUM@RJx2(j-Qa`0w?__
zQ&Qsr#{}NMS6Zc2CfcAFlfapV{%2%D4Fh}}O5vqAV&g8dE7t1_)n6n?f3@p)E(d1u
z&0+UFH6_Qlm&ayyYT%5{=ks!3;{oSNu2O`jkdxWiBE?(jTA#&w^R}8oOgk&}0A}_S
zgpJhkJ!z`q7lP5*2E{!E>YNEwDfPtZ-(Oz4oT_lgUy}V*dhw~U7WQ<z(_ia`X~ja3
zDtQGyVc@uPaO(`7x3}F#Im4SF_a47TMeuP9j1;woaLy}lm-a{yjJ8B<uaobgvXX?@
ztTR75>7*<SW@gv(<i;uc$GqlvVH`fuA(0`ahj!$RO_cm%rm_7Cv><iI6*0M@h<NzU
zA|Vx*9gp-y7_p2vG%d8_yQ%K8X48s&ev-cgSEpATA$?m7gDw}$=r0AJA3ud;ge1$<
zH%C`3*-Ct7fHz^>Wsz&awxyhKjfHNq8~Vp&i|@*m%Gavp8>>#_6Hcq)g7{Kb98u<c
zvJr*QSn%eJ9Ys_p%+{sQk^R9|*`Xe)9^~JcHXf{F?d^6~W7K>Xs`N_oi*|m}g$d6N
z%Q~I&S?|-`yC7*izA!V{r13UThA$-D6N9!C#fM;)S}N<V?m|BZ`gg%DnM3Kf9i|jp
z^htC1Zz=IN)9-1tvzMDkkOpPy!W0yCG~7vvhp9h2;bOTqjlNTI8uXXFI1GqitvD*e
z0%4>Nsas8ZM9F6NZGQr@?pf^~AQj*)fnfR8CIdOi-@i|<N=YOhFtVIMsspGPYy%&)
zD~H<yQK_=1K59`GAuEw0@|rFYY3U)mX2L8SZBQ6)kg5|{vYO1tMamZGG={Viq^SWX
z+nFSj0N3|A(*>KWrE;@W2xyjTi8~_~G;7eW#Z3v)H+K-#6!&%48|s0(KFsOrsX2{7
zn88NtM<xAt>CBVO3o#-ss|EzqfTuVcPQMR{oUL47!4!Xa@Rbf8XPHR8kB!N$?fnl4
z(#?_EyOLiM%D-3kJ_f9u8JLRwJifW~3_1gRdaA;fA}V+@JG23CNzX}MR+vJu69t&1
z)ipdb#nvpGX_ndNhQW)FTCGz`N^WCsY8o_#mVw=kVT-!S-8)UNg1o20oS`>*Gc)1|
zXY>3Vbmhz{9#s`;5=E`+!rDlg)Z_Rm#lD1>mp{09!!na6vcCT~?l~QIYgAFz@wK>?
z2Qkn=IbD$0n}D4Ec>H-18HuFm9Xxg;GytR@5wIzPmlJPW8j2h?9%h`BlF)ZN(@9-N
z{c-kK(6{NMf*oz->b$Ow^~~>0`d@p+AFD3p-t(lVC*VEfz}3~cxoq)U+O;aw9KMfN
zW9N(~QDIjVqG<>*oT!dj{O^E$ZlbCi?gL8Pe@ge^T6^v53pbDDpnhnRVr#GtWX*@i
zYk$Uw$S;?qHU;i9*t|Q=bm!VXKtfmQok{$m856HZW{4zv#p93q&shUae7CCvXA#(u
zqWIybmtE_I)!^?`88{`y<;e6a^IkmRA5@W3^AD<EDh}Mj;G9MPelL`v0*j}#;p*cM
zna#bB!i$fxT>_(hERH*RPc2P&cCWwvY1*@!F|p<c9L;-(J2T1?_kvlc&FnQ9C{}TT
z8=79pNKrSQkOMQIjfKbH1YIl2ImYTE{huV=leeKKp0uB39)}3LIug9}WcJz#M)3dh
zEi+W<FA896yiAN!{@s0XjOqWsxV0Cx`o52JYPwddQZJ(+fP{MLo1IsVnHEMqgMMHn
zYV-<7^HT6K<I6pdop?ZeVf^;fklrh?>e|6$KtuRX)$xsG?$O|mZ=%2zjoZbQ<HRqe
z%LJqnv7mY)UAfEsbB9i~)8aP)nW*B$8Otqul8?T1C|4$%XCjXn0vq`1O>FGejx&T_
z)CTcymA(66uxq)4--h%2<*QjLm1QcMk>{)<#XyaE`=1YET(-i!zpV%Wvl%GqN#t*3
zsv^$iE25{jszoKBl8s9ckK)t}XfW~`DqRTw;TN<kT6e|Y(BuyxG<r=g_Nvg`GB)Pd
z-@P_EB_6mccn2-rUMh?mV`Q_h+*9&(fgGytblPt@`B_*vpzV{9*qRXHvb-0wafF1V
zOn?9B7vpG;>X1BM(D%*j7s@mF@~jl}xBOb%uBL!XwJJjJ99msH$%{Ow(EO)Qr*p&F
zEB1>N>h$de&ipkDRfmym;glUu31L<F(PiuBM!eBl=GL#iY}l*deGi(HHe|e5IT6@k
ztk5Ub;#8(!6y*C+-ngCyIwe@DV<aYNPQ*6ZnC8Si`4xaLLDsze6v8>|a?m=Sa_TVw
z)Cx_}>UL`_<A7_Bp?r>N=x+D@RO@7(XjkDK<HOcX%`&6!M&<lu{I7lec)-rlS#Ie6
zoLPUJZ(8kio0uk>$p(DdDr*?Tt_T5~3VvHui8|w{CN<#<0U9arOC8_c`=fE&)pGCP
z&?9L{b*VKuc{I|5xzVFYR{z_Gan|yBY#ckgi+`j~Lf6R&vj~cx)HiM}Mh-+fvsF>_
zF4|07jC-y*!9;fUU0O5-e)ScOei0dkx9}Ov#F(g5LC<bK`baWE(B<4NSA1%jgR=F#
zRVW>$y}sr$$6$Jym@rEJ2D|c;+q}LoALDeRA0@ioL%y)eTbn22UBw?ahpg$5l658@
z8lN%%+0X0nSL9|_`i5$iA7O7VYjx|Pudpg7a*No_T}uQ!n9*$!Lo}o6|A_)Kx`bJW
zZr|bdFYEXx1l*4vEb)NMzSEhCT`cu%OSU-MIvunbr<yQhx$VI6w=I=oxE#x#j7Fyu
z{m_<-<a3tf(~rck;@z~)JwB@^H=jCRkk8D@d5^f&zvG-hOEn5&o7(p*8h)28cG3!?
z(5P%0s~_m6W?CZBZgsPHL4vb5g_pmA_&6>>>7Ei4iK40deVgBcqXA^=Psu4MBg$$G
zV=j^Eb)rr~Q26uQwAIuDcWflT4|5U0sGD3SmE!4=nYTa9-llnxA@*S+!T!n$M*fu*
zB*^*IuljS~L}NmEz!HgB4O@T*4;K}OIzEY!63VX2lU)$6yLiBi47;ykqBwp&A6PS_
zzHzsf#g*W51C7fT5BL-k=X-ez-d8D#ua`CdpflKg(@~&(Xjmu0I+WS}WY(FNRwQo^
z52-;6rC`~$(94d5uA3xONog89qO(9*x$hA7^XonZs8GL2eEbln11XCvKU?KRNG3lU
zJ2R#>qgqy2=IjR?i=q8>V*S7Wn|${FhkPcV5e>KGVpw=2=bvy(+n}OrcGH$r1w`W@
zbuMUItx)zV(oIJo_i;$_{m@7F%7%-6h29zdA}Rj;l4)Z2%s-w&iN);#=P#4O%N*sv
z*Ej@N(-@&&Lhduu-t4}idLLOv1i*}Kh!0^!8p+MZDINRAiBgSXp-Khf0dq2Znp9DM
z%vnd>`mbd_+y1;hT!qg3WZ0Kz6^8j7!g<4@79?k)+-J+}YU^@MFlEWO+z4URzip5x
z*>5wdnlaHHV!)gm_pR~_h%!0vxY<Bt)KktI4az8?NOMufr(FwQEmwt@9<3ns7NaKT
z6wBrmL$7KGM(CgWAmD2-!*zq;W5vBW^W3Izmasfuy*j{nFG+l($<*^)<Zn2ytDiX5
z3+g<Tt=vDyW;@5FBVDN0H*X$E8oVB%Yw;){ko(g$^^UQk?l_l;!0rK&%Z@^7g)>Jw
z&F4RN4)XZO_Z_@GUoko;59V-m76gewn^y+(YC#N1!X5hPf04Ouf5}`Z{J4y*$mm8n
zpL#O7zVRn>ZA98DIR~2P^bIL_lKSY63sXGVaqj|<@}AV?#>^K6{nol=?nJ{fm#!rT
z6;>h3{ZH<ge7Qah#a;R>;V@Rv@a$b%?mbH@V5Hh%Jv(>g{Y)JkPq#Z74)Pv^ztgv3
zpE8gGeD-q?OvejB*jfObo(atj3l@+Vw=T=3jXyCWQ#r}c+J+lfODuZN>aR@IBZo_f
zY<bb;zd>nUH~~jsP(d5U#yXUJ^hcaeVkL|x$%D3k<7m}D?Z=K##2dqUr;S71fmfjT
zjimfp#7$RXP_;L6{@R?cr(yE4%87^31KoVH+r5$gUAB)Ph=SD#o23Vlci%B8RtF0H
ztfz8z^L{uDhPh~eF9q1gqx$E3O3<tZjBdOs|6g?FiL;6CA|vfJcyA|#P!UGQWvn&{
zACG4;q1XS4(Q+UBhnl=YMi(AC@eVX21+QPRFl5>*yyh~?xY3K4|6xXdqUVCQI08r2
ztdPc=WO&b=diC{*jMc<8Obq^EPIK${xp7N`upIaz_&>*L9WJB!D+Q$cP#RQ>d?2-k
zu4@8341E7AKxfqHIOU(q8FzsJUWL5c`XB;Do;PJ1t-!XY_%?f2n|;|j@!xmam5Wod
z0_JgmJ3^PN_qK#OKGJvg+YRZU!wJvlSn%%;#JP8LN{Y$ISvXCw?e{C-_DMS+)Bjn8
zIAxo@*CUzPcChLFyMVvr0X>U(2CxSGPz9Va_~_I6CM9HNs_@0=%Ks3n#C?q{hR8YO
zokrV&R~s$Q4S#>@O)ofFt27AO<pQ7SfAhO!T|KSZZyF5w=vci{;2!P@P&leqS?&lA
zLOElP)Cv&t08Qu8)bGaOI@1<YTx`RMv2hLuL$-_44TX>4bAjW=rje2b!mi^Ee!<hX
zyh<Z|+v+#))8sjEKcJ34a1YMeV2>7wg1ld$uKU;EI&)_L06tOG#f^~Nkp^2&=~T<L
zIrvIy7F)J`_;v<q&)!nC5ZkfL97eTPl8X1k!YPNc8^_z`7lKDqce?titE}te)P17E
z(E8Bl-!&?Q^H^|Q4p2KMGPNeSGV%$>s6i~eD4^Z8>gdU|@ilVB<_Nn65DeLOXx+qo
z^pK7c3k#E0ZIToy&XFFS_>;`<`%a)K)G-J&LwCGIKALM!MC9)Z_Xzr7%9XF<pB#GV
zqd=%5a-sFHU~&!Hh&fQ@4tW2^#HS<ZK-+)mU?%@d2P4Vl(mK0RC=nEK9UlgAADuhj
z_EJ%_CuhWS-H`e187ay3V#AZxVflHEfTrY?<&v6b2aMKFH8F$FDiU_qtG@@`J@z~g
z=U@myqhWD_e-4H|FB%>2yt1M3-v~}$?5X(1I*#pVNVw?q(&8pV#u)A`5Y=^%pi~$0
z+gWp!SObaEocncG=cFAe(t4zub<^@fcF4Z=*DbDN$n6Aw%Bc!3NYs^~ou0dZdTJ8S
z93Wz%sx0@CR<JT_bHv!mDX|Mt>6#9YR?Zj2KijA6<VR&TKNX80#PR$t25iNL$Uk3I
zF&S}eXm)%?dH&6QGpar$JmU;vpy{m|r5;??^78MPV075NiePu2%_Q%|$$)kpHqE2c
zdp!p2_vrgLO?DSNgye?f0f&nk!SwH9PZO#pxD%A5%PZgZ$JJl?WhRU_R$D&r$^QVg
zyY-mDoyP?Zjj+m)vhR*%2YVf>M%!A%39|;?!ny=MPvC8!*Gq04{~MUPS<b8?fy?6|
zZC6Y?`?n}84!x?xQK-DM12Bp^6+2$1DX|Ief(_a&MXr^4m(1`3cc{hz4%2Oq_Yay^
zEt|q`=F$r`{kRA9!nc>D(YtxN+uB*o6Zc)a=MU5(=z8Jp+h9eIy|-9K=~{9F?OJ{0
zB)!BJA{VJ(43awn8MnBcGf#5Q+qHyB_nbyLE+e91=h}bJ_DWV&#^u3A++GufRr0am
zu-HY*C2hjawBs#e*)6=VbIIOMihV!PlnUCbJ*>c3KD0crOsMJguK`PpGZ0s6q^0T(
zLjj@PJ(xHfyr^tm5UgIZHe5~>S_RT?6&(9<IA<i*zq7b+ArnV$w1YU7bJvOvbWsX>
z=W24!`OP>IcC*eUBs~>y6hlHL#dB6T6M4qyS$jv{kKjd|ZTt6jmIpgLVR3vdzzY)-
zMoske7GU|UdL27xB^EaHM;<)Vk;%2~=*G|*adzlsV-p?srVVf_GqnsOiyX;+AOCd(
zbVuhWuWNIw%DJnW@}v+gE<4L+Hfm6BXp|NkYqg*;lw@WKD2QMh;eV*)e`tUWduTtG
z69;eOBE`k@Id<USuu94y*^T7To^wl~UFTt0``pmPgFS_bo^=?mqi1l1vO~-YU+l3H
zvl%*at|KQP`F-`sCcEU_zQ#E{XUXQF@833CK1*N{z3-#MzdnzNLw@|dvVI5kb#Edw
zWdD0k6k%%4k#=>@M9f}CY5h>zzB5X;{U`}AiUaPo957_A5C!)>oHE~k!sWAq+Z<m{
zQN~Ym_n}mv027(jAFi@#<~Q+^Ifqg11nJpZPyoJO(2O6TsD0;CyH2l4rw(7@X$L0H
zuXUzp0g{?#wzmwrE&|Zn9?uHA7R-Ib>|u=)P89`)*d85aJ<RxNz$r6BUkxFfIbque
zW9qr|ij9pUMqESq#t+D;d!|oLG}?1NmXS&Ad~{azBT$KRn_`*g?Yva6jZ#J_T3_+x
zWaq*umYvK;yC8xh3wfp=V-HzTI1J2&mrSG6K8GAsgHxhbKSy`9al8vVLdFZYA4xU=
z8h`H9-BHz(Ga^}?6)uDu9`pluL;^B=;<?`6*upKs2UFIsPEqve-n)_d)u$&S0!BLG
zQ=N5*88N{&4qg3n;HA9?69Mw>1nUuss(y>VCaw*EUd@ma1?w&kY+d~ojz4o!<Qm5f
z_x>rmVpFZVUw;txSL;OK;XA&bpl&gw_{|)UUn_RyFfALp`-ocOOpDGF;0N>wT1#<O
z(9-=|fYY;zW#nNV@b-a0Nz`bpOTzTD10Oqht;GbjuVqpxG6x7);}pzy_!|9U-=x0)
zQ-CJB`c5I^&hgKQrCg-Alf1=Pxwdym@{*ab5P8b%7pJvZxsD-~HLB}Cq0_JWMB=>j
z{$aiB0u&wgeA*IY2B`=A*n8|0zYY{?*EVZQs=<;CuLecr)0%!0nhjAC`jxSBMnJh$
z>OmrtX;1eC8YhPTMkOXFcmk-zruDn1rpdVkvBfLwK59C>6su&me<u7L4319wqO|xs
z6;wip#^6%d!g6oXzs_!^uvWkSHW#6I2-K9}$D-=~<Xw8|b|K*G$>~D92a|l>Z?J=0
zOq6kN&S9dufhZPbpT;80?i%V^ofWSnIb99?m(V6@rdsxe$ciU=g-a16*#VEcSzC*%
zbI;t7WXuN@yc0WemIo~w6z(_y#Vl$62sEM5;(Zi*oWFyH((imrX9c?8lI=@nX9GgU
z_Rx$i=P`C~PX2p7m0s*?KNc4edK17Nfq{cwTH8trh_m})Q$$)F8z4Yz0_)ORY==%m
z!!fb^22!`;%g*~1Nr53g=4jhTp<LeHIN`*a+$iZ=PKOD(a@MC6Iv^m9I$JlvliD}Y
z{gW=$)-JCj5@?U3ha6Zv!OUkTjJnM%Zbc3rrnx+K@LgjYbvMixrv8jXQ1sweePBPR
zu!PGykThip1vOu#YvF7A!K02aav8*+;Q=F;?Q0lADU;57`Sz_pyP@j>M@=TVD;UX{
zf$J`;aB^Ku$K(TwH!Te0Be~7-R6jnrmfOmL)VoS6`J<}01_vEtTVO0BSJ#&?weH|m
z5ZCxuCqJHAiiW*oAPSMg9?qH~*c#+5o<3Fa8*oGl3?ufzOXy_rIfK_Q$vl#qSl0Cl
z(Uy5;u?A8AlN>nWNhqHrsQO7ODM6|BPr_f8y>w`<V5}u1F-v6peMke2MWZ1)nkStq
ziT53|{wn!cfUoYM(`>xbT2wPCiXAN0H;$J1Ahv4I-CEoQOM>%YOX$aaaV|^oG9IQk
zL%UZO_UzZ}I_^|zp<)u(n;EUFzc<^GvD}mqI!Z8d9d=rfo1jYIo1^+RrZ8;#7sx_t
zz*hkLNPA_-cGoNKL<v0Y=&e*HTqhxSqVzl`<<ho5cCa-5%4e;10k)9=TA^8V6+=J%
z`bH1-uA&C=8AEr!{J@j|TFnB!pHtsHy$49H5&wnwwS?n*&%F<<kj~2@kL4R$(=aep
zT(Qcs%XG8)PICH1MoWx!!8B6Z7V8p1Oe6ZZdv`B_MTl`QZUG0yvJ&JYuQb7h1C{uW
z9-L_C4dLjUQ%-J$Kq~rfd<;&s>%n~&O0=>BUSb=5Mw`M(%ciuK{Zwg-nN)-VsvFBR
z4H5G;TQk<R=5=5C50*i3Q_Rio%+bY<_ti}kcVag!bTlm{QpunFNk-@xyF}rPAG?S?
z6vLq*3<(*A9~gXib}#^+H2DiFSdGU_Ot6%$5Y*Nj0kj5k*N&c4N=8UYg!Z!qYZ~`}
z_ZXs=Cw;|o5z*(1w)Kzt-L>9C+C}z5m0Ez@PjD{P%Kl3Wc~_CiVw2mSkWbh8w-_`%
zYP<ilRetHJh$eq(W*<(Pp}G5kxT6LP#fdsEh&r^LPa{g^nLsm)(fqc}gO5N1^SKPE
zNVyM8iUT|nxtcRV=|}W+MDnEeG_L;@F1B@$c6T4WWXjpEr-Cwe|6`NYsIZ0b#rmm;
zz(16azc;}vMIYgzw=A2*hlQOaCpT`AmI2a#e5?cHLPUpt`ohtOzAkyX`9bw}r+_Gl
z>+E(LGqzz;MUo&eCQc10#r~Wy$_0kPh5j&GJ%zl~VJPq21?|#(U|biau5<<nifUhn
z1}w!0Z%nZ?)^Bid`n|(>rBOqJuAIrcV&FgoL=V#5=I|9er8~#_3<PxP`qH=ggmI?@
zK3n&j>GFN`8tTZ6DGWml(JVXl+R$CupDBym0>RS;<8p&VXd)6PC-VGr$^Ew1uX&F8
zwq@AQl%{&`qV_uz%qlOzHo(&h`%2LdAd>X3gFQZR2i^Mw9>>Mw-x^H_0k|Ak99+0G
z`1i6H7GChpd6P)iG2-pme2<P^v|V!yeH#CxO!((UH(BbP9bJ7Vhryk^JwkGI2j7c1
zYU<lELcsL}NPGRApC^GmZVtsiVN&=ew!WnT^v*{Rd9TI|^@QhZMkCpqsCj}XbKr2j
ztyX&)^@`_P4vfeC*$i&T|9~rVAW?Cq)j!J4Yn_ez(%QkN9Dlp7cY84p<mme6ra*qA
z7!eN35;*Hm!~lj@ov1S3^?4J#Kd`ypc1gHLxS^{qq}_3-owH8d%^?d+US2pjB<ynd
z9fa<i9&>a`^FHilRhijdqkZE^`9!M+2vc;tj6>yuDKtc0_!Fsm&>u7au~XXXjeV-s
zB`XQM@@>jk)M7SjqyxG$$!ZV?-;24nArWDYMLk>Ke4wuDJ@<7dvuwvH7+1Un-|yt$
z_7BAgW`I7Z1C7I>aHakOVa+6gIHk!OPWq_BLE8CEc#7RUaNB(M<6olm_0tR>)WW@A
z#xz-E<AW1splE5G{o|~KR&QiY9V8%66G0cjwUM_)@Y=O{+f@nwfMr5ALw3k2O%<Ar
zeK9PSx;o%|XnIT=0NGFu>yup5KT<$g9)$;;W8m?EyaV67!6$EO_G%tqST^|wS&LId
z?|v$OmO<Y+!I8*JIRE?;j7yLf<xY=f;CEiB+y8;q<|kqM^vEyCCm3hkw7q%f+03l$
zKN#QX?*laoQ!UJ!`{?_};{UGM`@3D>%GUnBv_#qe$6AknPhtA+6>u^yAPZyKmK!4n
zyF8e9#g_iYsOoRqPkt>5QW3GDFn*73r|LwFhK8WjH!ap)<#OH`Go9}%QLk@bU<&)v
zcyw66GZDWATEc=C=w!b)KdPXwbV&D8tO;%Y9V(Ar{E)uP!*uusDx?kgwLU?`!Z>49
z<+T9|DvzRWB?q_bCQ@Dnt%T?PPKeN+ZEb4*0Wb04e;&xaoMTCayKBUKbpLbE;RC%k
z3iKi9__D1VD7ZsFfORJdX!mPa%IJ)k*Aa4O+hNhR{obbKz~I)D?&gZ%VT<2rpZ5CC
zb#en1%GpL>8dgjFMc3e@8_xuv&7saM7i@dufTNx>>U4nPS85c;aOAZPyTqvGNlNff
zj<79RqgI&76&57%?sq>D2+<b}N@e#62q0MrNNDGWiXHFoX9%j8e)CB?T%rgZ=uh(A
z2tGych?C#bMxC|vm$Ka8{ql2T;Su+Q0S%0J-V;ocaQJiw6*{A)wW)X{FM+sDm%~mZ
zt2-9V+#Nx9*P)}1Pz8qv<v4HAyJf?+FMmj`&0?JAcT>Bzxw!KiP*$rh7fM#M!arZK
zL4Aojy}xT7D|<6!jnUbyfI|^nYs6*h1$NoG<!=P{?WtSq6dmo?zpT|ZY6hob*=PUU
zp1VhCwjT;*N0MEG9<1)-!+Cb-7VJKzHoXxmxT7H2P#>TuNbLya(HT}Y+~AHUx!u5j
zVRA;pIDGD-LY?A(iB=8tBJr)tkOru#<hG9}#;w^WdkE2ywyfvqzjpr?&o2>j&HUaH
zO5FQFk~qc9seTn4hzpo9NLrVnX1iP#k#t}Bw7oXxUkygc9~W551ve6Z?lllz5kp|R
z6CtpxnVRP6cI24!4kK(P!VbF)9EegujpdsjF|QE~xlb!<@(pN|Rz40V={v}X23*RV
z_2?H~{u=dTkus?|M63Qu=@Ykh=|KMw=ZE9)P4dddII}HmP|+bPB*NuD@#wrAU}a+O
z;Ze)#DaDp+<$2H!mjma;1M{L!ZAsX^Q&za^#DJv`O28B!M2avtR8mDh;eKm=(rwX2
zeIzoQN4Xpb74VG`VP#Q?VxBM|K?hcv%A;}aa&*b2sD*Fc;Da90`oPeELCcJDx^*T+
z520Ug_KVip1u;YU&4E(~Ybch?B6v)G1#hU260ht>f-_w)(XGK7Je%3g?lzNImuf8c
zF+E7_+oo&Xk;rlh$+5Zx?GeL*{c7!BV04cDh034M3s95Q-?a^`#0G@3yQEjuTT38o
zp1r@qND${``^~LQPEGfPiBr0rv!iNtN${?GJ0QET14Q&Qshj7YDiHFrR}+xJrO8*6
zuvna{W;4COLW**wV9?o+6eSI3(I#0=#tz;xXt9I4x3neE0aJ55)f0X#5ixhKOW=jQ
zyPO13n&Xb<?VW_aAwP~e`BE9o0W<X$RGRM<u7h!rPdHS7I|qO8`mSmB@Y8Lh*YHii
zRBeGB`RbKr0|%YWh;7%8*8%&YWOLC~;cE?T*jES+SRC@a;3REbkR9x}CxWfv_xQUW
z#NL19X(MD_cyAj3uR54jF<WHt(Sqo_hIvqaX22Sv_n)PmjGWZTc;*)lf`SHTMHSfv
z1S_TL{{(lv<&cC72|A$%eo)BT!hBH#9E%7=MAL_#-BD0^Eokp;fbTh-o5y;y8_D#F
z+`;!cdu&~>cZBr4(zaO##)62&Zc=yQmmaTS&z`T@jlblq16z-(=Q|E0T%9IkrLt@0
zyAT`^2cg+O94|WNOij4G;|$4A^CNdn=xU3)C$>&EG`f)-P-x-~+0nJH&ftTacFY=@
zdPd+n&b+FP>i(=Lhq!al(&GhjGoH^slQ^?maVKkAhnUfl?d|mF_?z_ypJ_bD{7UU4
zafLk4K1hILeQpjYYKQm6?E$ze#{KeFF~U4(yOnQx$jlF3-<&kqku=BY{gqbryFO@*
z%Tb+FM2Xiu!z1&E;`UdhMLm=(REj}k$(71Y6p_Pj0#PwNboqJ3vNh^Xl3Q)FNs|Z9
zQ&GQGDmHrg8q!fSiW_b~nuxn4`P=2lvAa&woxA+;t(^q5_S`rq7K1YgJ!0eiDw_DK
z;YyUC{*XjsDo&R)m{KO>-)(=^cR#1Tm*>}wPI%4N2p_p?>~1^F0YXkOHL1yC3DqZN
zYA)B%2z7BG4GVHd!ne2$*5{Qa<NwKW32TOHG)Kh)*d0%?tCrJ$Uhdr$juh&g0o4nh
zi(2RwrG5+8&gbjUsrR;=S(rI}EM`inq3MyfmBE@kdCzL29<2X`va_%+>h@XV2Upn0
zHVeDYis|c_p(ObF_dbA)5wQazN}f`ULq7klN$nQ&dHgy@1ydq5YGHBhZTyKByU{$-
zd|$ss-RbMdx7EIMDM86L2RjfD6pg#@I@DZf(CvQA95>LSPT^)O!;#)zb;G-;Lnk{G
zj}=-;mMwpc=t#zucepgq8iX9pHzZphap=oQycB`f)o;K2e$j=pepZz~;31ads-y8N
zF9S+en_PR9vzxvw)Y)~(z}i<zhstBmeA3JZuBPem$+g?H2I}c3f%mc3q(5%=-Qek9
zJVkcW!zLF<_SbgjX#KrZ*Y*_}XiK=FlRVd&K9C>cM6~swaQngno$KKlk<OGp^;&o}
z8b*6U5w)eu>B6r(2s{Gazg{aW-xJ4MPo=Y^Po-1xIwLQFe<AcFt>*Lht=SW6Oeu_k
z$;pW+u(HKs7BJRn^a5k<z5(^Vt-8C}bwm>gpf-eaC^G1x04wwlHbtG1eZ4c8`Nu*v
zlRVjpN<F+2*O#a_H1EyvnSOE+xHF}s=FYqm3lbP;FBqi4F_QaXJo_xmS$=&Lj@rqq
zld4m!|Iu@OHJ5%W3Q%U~xtuez_I0J0q|Tr`7@2Y$-$BlJ)<G;%<g)E|(O0GpD$AmO
znpwJd;J3KMbXH4HrwN17V5MczKq0&tcXCatv_3bUWJUVu!SC|x!2F&5K4UdWE}{@i
z3@KTU96+6~3~|=ZWdJt)KTg)&RJlTWWIsweFC?IkFIa?SIzfiDLAT|PvI{DzZ|8A`
z3tl~+R_=zsbiX*XbMSiSpO!C{NpciXU51BBY{w`|#wlsZd7KUjT}8x(ECc@^3!fD=
z>S70!l)3u*2fq6;)ykMnDSGM*x1oB|K<TRqNQb|gC++Faeyz=QH)#zvR|ed-cM)HZ
zm<KpQ@*^Cl(K($h_2MY&e1j8fLZWzWkA=O;@9j&7zNFqW7hk3))x2{!H#UCeT$8T5
z7U^s?>S&O*l6|K|lOwztGy;;G7^&jSCciHP5X03lz2=?*SK$<)x=awP@yK-m*4Tta
ziA-27`CAu;EO#OL)G4AAhDJ(GYwvLYsDF&tFn?(>$)ro#wO*rPNgjO4@`VVMU#fcO
zHlKu&kch4=Bfw73s*Z;WP~3ZNxu$yt!b45}1m&)SGC~PPg{I*!jHU71#+6y<P(Yy9
zIXQZ1yJWu^U(a<SD^H&*VUR_}dS>8CpM`QxgW;*AH+QuExuXw%W=rFQ-md&%dA$`R
zLJB{x(uAaL^-(1|_2W6@#pSw#|0p!w{l?{5@A^nC6kN53fobj4){I(ig<QCoCe%qT
zfkZq%Mb4llIdZ*1^VrQXbk6L))lVw+vis{n8PS7<t`PG}U__fFCQJygk1&cPyMzrV
zxZVyNK`vq+2A2e8|1ZwoJF2N|YadpR^<YB;RGLynN(7~sSU^NTjv&26=}3w6P?Fe?
z4kDcZks^pRX$d6|3q^XbfuIQxAQVGF3nck%&pr3P_Z{E(e%}~h{!8}CT5Hd}_9}Be
z&ohI$&=%L`J^pjNEU&?x0~VP>FZm@`F*mIkp<w<WsdUcVd!cnJKHA5e^hjlI*S;9o
zr{;(AhG7lb<F35PZ)TMTiJ?CuhwK$ym%<*sJyz)x*>S-1n4sy*pQ6Q%*nr6IHRS3S
zQuC0VKNXs%)1D{i+N+9c>-uMGR()#Hb0GDV@6*TR{MsI=cDH@gVm{cs5H6vAJbofj
zSSYqnG*4y%ahauYE58(JWov-<wa@8zo2LG~a@XYKR`_`mC;B2n33<Oh8=M9_)#O@>
z7oVkr+NEYhxSs~HN4N~?yhVRZ`MP(F88hEfKP+_w3vbb<?rWvFl&=}o8`{_8w$yL?
z!&UuEUkhQQr!BW{o!-skHftUf!JmPKEvJUE2sHirWHrEJYF&y{0MLP%kaAZRfj_<u
z2<E_Vw)1M{Ol=B6)^DAbCf5yX3hXaxK?|Xj>V<_$zc_$oxo`5Lkl6UG!L#uLaQCR+
zb@t*Ly-fJVDrpmGFDwV<HbL{zS7kmv)S~_L+^q&GjYjP@WVL=FtWO$*X&x4sxgT$=
z#c?H>l0aFU+BvR`<A)BEMpj#;M=tp6T7VAvw5>Hg3~#?U)~<cKu!UQsOLKfZ`^$C`
zf6m?W2{wp^FU>XLqxkejxuIxn4ZMaJL71!6<{Wy6i0NWL_df8y9><K1N1*EI0G=*W
z#9}sP->mAoy(Q_4%Zvl9gK&{vUX6nD{B(h|<q<D-AdQRBhkiJ&QTBCS1`g!7IN#Q)
zq=hsj0^=X{j=v8lcZPN5$iAkFm=zy=(F5W9BL<tNTs5@!R)#D-ls*)2I3X)4dy%<O
zbYP#Qtj-iK>)|tl-@voVWgF~S&co09)pv9&3TY7SY`P-P>A{g}(3q%}1D{Q9cm>yR
zMBC75qW0#oMtp+?QBT8GaIb~CL04p)Do!Br4b!7X=F!c&iQfD8l9a2w;l~gKjg*Uj
z>jnHi0saBF^!_;sUfuWq`{x_~7ut!{{N__FYLS}3^E6w4>(%BDk9_?kU?&^$PqV-N
zgW$URUkENM!0~9sSV^4sawJ<miF{UayXhaDmIEQ1-GnJm6e+Gt%Am!L`%ITf!3f|i
z{O7Xbzi7ihQ`?^soc!K;=H0J!ecLL;Rp->@^q2ju>+ur%TK-Xgxp-$QL#5+g)Gx(|
z`hyv&9m7#>9+G{XfNsTdE~^{m&~U#t_tHltoB4zPbk#p2ILUQX=ZzVU?fQ!(EBZvF
z3dM5#+C;&xCaK^$A6be2fbLoruF{qDs%2}Y{ucvLbQI@0H7g+{kd>C><`eY6mH0P~
z>38+N@g{#GPG`V*fLuiqg`oJ=@R;(Da34<k7e@bIJ-89OiVn6B2h4aFLHUOy^hVDD
z7+#XUhu6P8vIqYkRHWY*{^y{-@AJO@mTCC^F8zNI#?kHc98|7#S;&8OJ)XTET=h?<
z(n0^?-h5h7aQH9oO`qowzn@=q&vSn0i=%bw!~Z;pckPU2fa?ueiNwD7t2-r;zujTz
z$>@JfqgABRhl~2lqxWr^3V@P{R_!t<t(684UhhSo!~Q`zb|3jo`-K^qcVzM&tuY)e
zKaioy3|~|qdHwpssAmSxKep-DvU3Iww!eMan+2J;A5NoTfr*s4NrIv@pz3q<-|xb+
zop;~51-Q!fIVX}HZyMS^8q6{>UjDg$qLN{K#76RvkRAu{_x!8Q^i%(Sa!ToZFaRrj
zUX#al!Ktg@VkJWiMiBp3sYluizjZwFAf%^X{ziHH{&_y;Z@mt2zJHb3r~2@23*jQb
z;raU`c;$EvER+-_1^9se1?4!m{N`$;%D=xHKP<2ha3B445&eDn)BaBqAfhmG&&Rn^
z@xKb4wHX8;VVD4_dBbW0HE-@ewH;T!j1g}oe0h0fu^r4-fU~F&V_EZ4|Au|M{BpQ8
zH4E0rVegt1_V9ZD2kde6oM*pRM-*1h2IKTfphe)H&H?@L|KI(&>WO;wU){Tk`*iTX
zD*v2X0IQaC{APUU^M7vP{UZZwT+~m!hR6Tz<+#c3yZUzlplRuI|JCBl6VGnq{?o+O
z@qOuM{ta&VT>%v;5G}3#Kel+ek@mlf|6l7mXK#7nf6V*GBfwd>iN%Ni-JJ4gNhBJ)
zF>HF(vhTQu&bd;~angz3H}^^65#Wup7Z{DaYRNhOPge~-13twj<x=j)pV-&_d4j3G
zdmiFtDfaad%k>}I%I*K{51jmT=hYWx9}v&&fg=8RDdfAkA=9?dw%9kgJRHuLrh*EE
z#*`Kt%F_Ck#=>%%X+5Apd>HFq0|`=|2MXKqV1SawhNgE<>aYOAT7rRlEFF8TBzk-l
zX>Nqm+B-d6PhRmSj6n<=o7eIEWfp9`N%#mS!JEV6yh9S$%4Wh|N2v8tB=&Y2bZ~;7
zC6n#*b8Nwp0?`%G-s<va7L~sn(xyqMZ)9~X%w7W<Q<cNw+q4yLO0kAi%Xa$jsM8oe
z%;_}*|3>hKNwc)FTn+R#59d(8o0HnKb6V@;j*MSl+83VJw=WXw+dpg&+7~BLtNB`|
zwC|{}Nc-$UMz9O691LiQ`WGg<aV=LRhwd1{ngCy^#fGi7*X*Wk>9GW_Y_@_BmBS7z
zClBp*oItxaULOnO{Q5Iw`{$8-ugBInt)1DV2fM2$R0gQp>bey>-~Ef57R6=?r?DK(
zr6YlER-SRNjuGr&Fl5IkSBvrMTSnm3bEPWg`nYL!kC^+G1^e3cwO#8FhK0y`Ft{=U
z5xQ&hAz&KPUk(V2`M0aY$6{V?5Or|bp&jN=?0tmbs~=bVC&pYB^190NWtw;3OlAPM
z0P2m;z!<LOQJaG_ywI#eJ$t{>7%bG*fV3;*FpUowtj6q&9K^g@$dXDz|8qa>+hC0_
ziRm58RzC}`dE=D!&T*qvTm)ice5ht^06f&Iv6@=}2{j!a%k?bL_T^!Du^gC<l`;2L
zpaKGm<G|g1;|52(m^)TYvEL2~VN~j+C|x+AzW3{!Re@kg3_4pwKCdVz<RR<jy>0}K
zZ0b)x0eKf8ycbZz%JtSId2K|v?u0fdFq@7WH5Zi_qjTHQFwrHhzy(XdTx>INu4@iF
zCnuC0{=x3o2<j5?(Wl@V6WZplms^89kUfYShcUod+seMx$J)6UvTB?*B(erh6yE(Q
zv$*QPwlI_5a9Bz4vN66*#kO{ZI5nyW8?<{*W$)Nd_&QD*2_y*R<z8~c#1CUBvBB`y
z?1wBzce1zNRBjY3v`?CUFckchTL?MXS~Gtewa|jRP*f}8ZNkb&nYDSbGSUWj`<O2}
zzv?Tm-bUlHv>C-RB$Aw9yC$S4(6F4`Un$@oR8Y!4AB`P#7b)De4?fG<38*NFh!5kP
z-oZlqBC!M!*xR;>^k}BZXnwtH15%>_OPPq@`+PryYR%Vt1T9w^q}*F1R)=VYk=F=5
z->xrrlPmextglvLSDaQuw=ElBV|%iir7#<x?ZleB6g{Wq=Mk&1F<37tKih9?mITII
zW{keO5WYh67d#;nq_DE|bid?E0^$2uS<iU0A4`%sghsyyYq4yHH`g?iyc^g<jBjhm
z#6z|>52sN)W@pQA&tjA1UfX$)rO|yJ#=^OYKQD!Bv|=$r$V8aNQjNfNXo&nYZQ<I$
zTA|09b<lpKF<yipYFac<#m%t~>uJIJOuw^#6so-y62g+0+hBev)CQ>6Zf#qc$C#~-
zTrv(@w#>sy771FyAjsXAGHx(@=hR}N6kF#An&~&J?KhV&c@{c0C@qsZbl2CoDU-cZ
zbSh+vQ(IK}ZZ6Eop4A7Q151f)#zQBztwUDYY=(CTZ4kXKh#xnL@jRk|6H1;mb!%Nu
zoYfY2Vr#So9w(xs&h-5mPnmxUE65SLU5XRnnW-Rrpvak@6woY54&LqHXLGGgBMwe)
zj&KaINEUt|2P$+EOQ#UZna2KOBOKk7_DKo8KQjV7x9+H8z8*j{w-7KJ&6f#HK4M-;
zeHl!GTT@v5iW=6o?P`rZmO2Q$vwk#{@6KC&TSIFcR>J({_@fQBwE0*u!L=9Gtsc`d
zeyxO6&Ppp|fl9oa(Dc)&Gmz8TwZPs*P@=FRYpfG?%#pP_aB1p*?a54-#vW^kfvNNx
z$*YJBD$_vQcWjW6QM91D$C5W{ly(e41-xuO)8q?a0v9HG0#UA~G;4~)=@^u<P>`V4
z>^9%M{y>N9Xspmo<~t;ZT+K*<PTx#J(D@p+KZ*?2L&o)8<BUt__=d%6HJn}7{)BRl
zIO|8K#Zy!k_4)^r*Op{R&(RqU?)%a#lP?M@c0c3i+G6lMTsY2sC3Injf>zPfS+hq1
zBcW)$mEcZTtSs=1zHg&^>syh1pMw}=_V{ZUD({p$<W2=@`%pq(7J;tDoQjuPh&mM8
zXhHJYS)cR&_2Y|MhNV^g?@6H7x!V877pcXZ2z2zM8U&Xp{89F-PMuu-hQ*BWsmV2}
z7nw0RSCJb7Xcv&j<iHu2Db72#{lRv+WS9=F327iYu5-0{lZ2ueJp$IhkJb{j&3prn
z0Be!GHG~#nUu7-QB(-)vw=WithAwoV65dH685SQkN98L8A3yBiGsEeR%1PPqm4OcQ
z4g<ItKl|xk!xlmYKO)Y%p0(P--k20vJfgz+s==S>H2X0UTV8I4#e62M$UsytaYKVd
zz<03tz01rUg^D7LlMwwN!L35Wd9U^!d_kG1ir&AUYN#&RhLMv5`ir!ZkIv3b>LcD;
z+wI5qbF?-Og?wuS74q9(mAN&)Z3-$#^J9&F^(TaM)b*gBestiHBen(doKT5;y`&R?
zOk8*#amtIfS>~12Z;FJyM$i{T7JllnvxR{La`E|$w(b^ZUZ1HIgOx)62%je>4Duvs
z-I{6MJma0@mwng@CxlH545<5l<qWjKcFSu+i<h=p$%QAwjXCRb(0eU}m#4Jv>fz?(
z_OzPH^I$xV1J%m`{vCL&`^I$vLIMYA7>eJW>vF!aP2IS;V>MU48p&erXp;SAiZnLC
zx4=-X3$@qeC#<Y+Q5QpcTEXv+d}E4cZENJeN{Xx<^DeOQ+`OV{f0>r5wwen(tw{w5
zqEq5uPT(m3Rzm8TR(5YiB?GS?#*DABs)_P=@;x8gW~eL@XPH)}rmuy{Nu@W6J37Zt
zj6zQ=MP7cZYe=%-<6^s3mQhVb7I*A=1MhBDE5r>b(cWL%&khbUEZ2wVXHiu?Y7bZ1
zfCP(d%pH6OW+fz$dqYD}ET=6y#`iB$IKNya?ZBuri1X;5s#%U&2?ADOzFG5G8fo6k
z8Pp|P)TeA@b}W*w<7aKTzffa(mjP-+2d9PMSTJH#aN&Wh3eFK0X>CS-YmJGGh3&4V
zt_`f<#|1Xtd+wiC)R0=-lt9`KZ7ylHJ77i2XF8ETGd5R)qkfu)HlGOHne=e$apyPd
z#AP#b;}{mU5WdQ0beTFeaJkiAhI5FV7Dd|{A>8jcKsDDonehiVyj_ys>S%7$TQ_Y!
z;hmlX!OvaNhWzM?R#_V#wb8Q?_MI>PgNG)`0YoVst?OD_2)75C1KnwDw0ggVw&J#4
zzdD0smzyat`-m~GTpN{68ecPrF>a=%1ZE0b7tP<<9rCgR+blR}xGr7D7_Vd9t;#m-
zLv24L5YCm1kIJEoq}Z(UlY4RK!Bz`adM|$O2&2OwXwa%>tzS=m@@7%?mK^gt7-=&y
zf!Ux>Nm4@Y+DbtmaNPnSYK3TgO5Orn4RWUn{iO3i6KA%cG;H%}NGYy~xy0}6weB@p
zf!F~YLbth|yA=C91SBjW8Njkb**lUH*!FOR>Gg}d6B1S`*V;cqh0>@R#Z+cs>0Mlt
zgD00z-^`HZ{0c8}Yf=X{{gf=*1PtfdwZ#zzOg0Ii2vv<dJ)ezXDM<7Jj`Rud;$f1P
zh8H4RsL#WV#BaS^?{ynutX&@gEM(LKf??J@F_9l^GtWfDsGgQG(p3QEe~rc^72o5(
zbhO>46PxXMX*c8>)#{mcs?3enb<D#jKZCRq`dY}|*evEnIfx5prkI|f2WL%X0YM;}
zHKiUjR&ME5E)Y+TY8{Of`ME}xl1FVUzvMTTLXTw!X=zzLMHgaFZIE5-$?c&dRWljW
zZ{Zpl<@GxbAwBEP9a|){6lgsGig@NdEl!X`cIGWznk>_Kv%7PlVL?d~>HB-F^ry2H
zx8SIW5NSGd3CWInLT>$J)H12W@ZgS0BwL0~Q*>**&(9o*(`R<HKK8Vr!X7p{E})UV
z_zR(h%jw8MG^|p%ZR^b{HJ$5a^LB`QiSZzO42M2Ru79w_v)kNHV>QI7P-&I>jFtrb
za>tH%J}rF<+r3HWTs{jm5%x&4{1j=#pKpD`-A4@Ao8lF@p9aUQcaz5La7%qr=_X!-
z+QVPftlX%ogXGTzBJ~5)s#(o;2F#e+2dUS3qa{fzj6Pfapw%W&rPmNR*A6^G$>#D}
z(NnQF4PD*%G<JozoSl{7>$eQC@bhfT2|96$^RqT}okSg<wzc>FDSa<1cys!X3BR^x
zZMZ89a+m9B8awNFHB!J>wF1)=DLS3T{3Fv*@cw*V<4*Nvnn&~f4di4&3*iuCdp|sE
zC}<&{f{fArMJsV%&gb0XY(EDLG@O83(*In>xtnC)wo#3=Ivb-R(_iy}RG0Fmzr7)r
zVjY!G=3ix<Tu~&4U1y!86~DTl2ofVQT*bz>iJJGrDvv&~cw^~t{WZ@`msY~hH1`3Q
zGT`MTC+^-5DVl4vEIo0Gu6YhD>aaY%tQqyel+4W?Bp4G^R4zjt3?!j<{SGB4aqEPk
z-@hn^)0EYAbk(<qulaMPL>AaHtmg<80a>&=zRK!ZTO>vxD=<#eyZuRw@ip}Ok_x^{
zf+#V}4P4yay?c?gI^B!3x)Fy5OSC#@R8Or~UGg()WE``Uk{+I>mRgnMtUYX5zpSLl
z3CkiIw4mbkmhQ5+wa|n12eWLC&?&Jq6|vz2^*n_dQQD@3)SgTnZR=qR!4=dfy_3`I
z+|$1JYe@SJ2sw6+#me+^V|a0-Sl>d8DQk;k>Sv$?W3KSDqj~L%_xX&v?%1P_Dsw;R
zvd3m1zQC6+pN8Ih2LJ5caowTEb3j(6D}nrwsmvt-Dd<S;7;@YFvbUUj=7Gqzvd}}8
z`xne|4RxfFS=uH=GJbV&3(2~<3eom>unkKD@p*QYf%I_tO+%O%5qE5aeBAa&2lG$0
zVpA<2OSxM|5liYJ?-thDKX~Pvnr9i>AXqmx`*^U1uckay28~6o&XoLsZNZvhHqFbH
zd@xZ5%gfE)YT02u6=sL;y&CJVVYo_&LZn1ZVzR{#8<*+}d6R39_?=}8K46II)>;*C
z-M#H>TlFoHk4}o_DX9Zb(kxt<4NUE!vBy(s?B&gXm>R2I_%we8IG%Mio~0dc#~dJe
z@nhjaUJvQq!x1Nv&CWnoyhHSJVM*(jjV)QaWsTd%6GmPrcl$cGpzP05kyejlWS<Q6
z-L!M0V(;*k5x;79KY8Hk*inp*JZ>s1Kj@VW1dlQ!v8NF02G_S1c%r6%<)eq-Tbxl~
z?}F<Wx4^0SB?WmgY+rN1?VqB2wgaZCa2}`KlVphNaQ|y*XeJ}#Z8t$A2N9pc9P(p~
zi=f}@u~}Bko;5U05o!u<c-b6EmU{q>>Y>=Ki`wSAlt%Md)$lWTyDN{LRYKbB-DD4L
zAu@>g$G4)e?K#_GAZ_-I1kKI0>p{P^7>#;4c{9CQH}Rni{f!D{QBLr&rdL!~PM-W3
zC?I8O2*b&auZL&57tOb&3iw#e8XQZJ`&h;vvupQlan8_hTfq>#9R=pe5eql@+=EAR
zU`b@pthc&D8JLGRc3xch^)2sb-2s~Ua{D3#5T-hdH!|BDgWBIu(A>GM)p=kQ*kJy}
zNtl3&aCqQ55P3&VrJ4g*i_M4+)fe^HG0AZ3HkRqtaz=q~SOv41d4yHzstCnO#z?J=
z^FWt1#13LBw`UK@r4><3WX?cA8f+PVMWnGVu8I9BXvl}NeagQDg3q+4<z8DX0dFg3
zf7<a3Sy4mhIri=yZ2CDow1{uo$!9F#k=M9XvrpVB{R_tuuzTw?YND~hY+vX#`0ABZ
zHS)IgOw0#B+akJL=Px7pUt1iQ)p@S^i}9w-LNVX0uT%jCQ1hHWo1b>Q4=ugsTGWr_
zd3=uNadCwPFsE#!>a{&?wGACDf69Y`(AsW^gXqANT8moX<94%)RN1u(D{L2a*v>pi
zpG>cD=uQWG&EXR6f%judEO%`zajm=EZTW4m@!iD>d=M;OTc*o)>4?-?7sHu+Zz;1o
z^XP3<aHF~Nucdpxv_4FJK0I(G3M(Mb?z%;joU06sQoAK(n|_D?qwmnfp+fBVFxRN~
zALQhE#NHuq+3Jh%8W2$vS;;(;z<Wl&K7K+Z-}3738Q;i$m(vdwF<Nd^nZ92Zx^qf_
zV(Yekr}G{`Qb^|P{!XXkhhE%HnX`L&7j=uqx0PE~UB9=GAwq8<e9<B-27&9|Ac8n{
z=V^f>N~VlzWTVWIROL=q*9TaMV#Up-NAa~$NG)WxCn{_aY=2WkoiYei%vt1tyi0q^
zTK<|u_E&s_$Y>dhf(;@33**|`fn67W+AhGvi*V>sYBV#!NUai^G`OFN4lZqkw5^Au
zEH7#+wil<wl!tMXM|-x!w6kQh$YP$RBkVZ{-Vh&3a%c9}b8dl<L+<|TYH20p6eQ;X
z6K%g^&y+(Fk(`_AOHge?5S$g#;LR-jfbMNUCGS{WwUeAs_fGQ6b)vV}gt4`#M`)YP
zGK(+wfbDtU418<g5WnpWF)=YlbU@Jn0i>Q1uSJF~YJdS}&<yPl=J;5%P6R-sWDFRG
zW#t@2hsFT%>*)Nkgs^Af<JtcDmp{Zhqk~ICi3PpXg)>QIrYmQjWAfk0iY~|UYK2OP
zQa4c9X(_PLdYA#V>fJ+v<XWr~dA-HPvf>)5qMHz*Vl=eKZ#=-HH<z(Rs`G?o-hK&*
z!yOba75f^6zjlSH1R}}3b?1^Bm&~xvWc^8?Y)!<$N(R&Typ?UF{1^>mUJnU6cC<o*
zeHW_=284y5*WM$o?D99`{=(@8m)be><|*5zyYPQ>=zTF{<JoyY#V+!mP2-~=Rvp?g
z0~AVx{AgRE?QE>x8e0V0evtV&f7GDDc2dnbN{!6ur2t@`4OiQHyNHcP4m0dkIR+iV
z?$qF)JXQo<GQl}hOnUHY=IFAgCpy>5Z!Ul#xVhcCWvqJ5Idt#+76LgGZ*U5}=*DE7
zGJbyqq!CTo*>8-qqs37dK5K31vcyNJ_1BSq5fpd+7)A5&U7E*jI8uw?hv;W`p$;p3
z=vw@&?oU1Kfb-ct#JQTEQy|{SMKmtf-shGqy{)aXxGYL@Q?3JIV&g8BAR*uH-=s(@
zjOC|a(WpXjd39{Gl8m%8B%QID^xo=6f-`O1L3QJ$Pm@XsL&o52?x&AgzFpCWc&{bM
zoV~Y`XAmEw6AMw9N#Is$i^xht^Do@N+3k!Vb7y{St>8;Fqwr~cgE0^~qG6Y)H=^9h
zfo<<DJeJ>`8f!Kd(O#4BXKg{>2cg&dru3YIHO@er?<NI`X$nu6)NjmkZ~DZLzs)Ew
zdV9rM>ygR;lz9UFt@d)5F&Ji^L}Xys-mBoN?^kHDWE<uz8jCj?n;4%Rw+6FMtrW^2
ze<?9bOu?O^MLIa?pUQbv9-@(ZWwGuWwMM?P4;zS{77yF!CzQ6T`2xfk``c%z@&kP(
z())Qm<7BUN#fTZ>IxLBw;pin7jOA&>({{3CD#<GeFpVs)BY`lkyf{j2x%Fi<{+W!8
z*uA!uUciY9S#4B#B9-Dis}ouI!H(k6^stSpx}k|;+B0l%v@P=-3mfoxZkPBufFGys
zxDvxm?t5bJeq#PZym7Po#ALb%_3=kgaoSAU(Jy0zt!g%kaGIo~!zr7~M@E2&ZO!f}
z?4O1BN|cI~<~pg9%~g-x9;PgtHx~3VH#%+&Z=mdOg$UE<xrb7;+hazv_)|ZOCxtv$
z_3LfyyDTwR4mO2ACkzSoeh*r8aB;u_3p$9_UoOH<>mCsa9R$WFO=N1k7S`46e8(+=
z-}j(%@YuWW_@>d;vC2jO5|nH#MOyg@64C3MLWn+~UT3m;@kJ3qVc&V6Hm-=fjcaw+
zfXVJqde@-ZPw=bi0N{NMqPwmL3}FX4D@)vJ*u0LA!c0eB&n|PN^1L;+lksqZa+0$_
zTZ>O|TEf%PJBEw&V%08Qs12;Nyh1$2{D%xTv>as9hb-3PO<J~Ya9&pq=>1c5>wMi(
zDle3oB~XO84?6qTh;A$d`#m8HV{<sc4tsyvW;zlJvfoe&S@?lo{KcXAhp@x>7w)n-
zs}NtqJ)yI-&5LPp2L~BD*n~3k7^i+Kl}Eb=t~3AXn^%&_So8PUkzSa8Tg)L;<_ti2
zzSEr%{ALILnqQv?AIycBHbZzk$qc?n&Ad_Aiu;Hd`v4NZ`&?Ao`P~Dok(un9pkT7V
zf<#eRY~V}~*Yw6OklU`%*nr&D-m^d=OYS3bQIk*Zx!Y;lJgb-B;r6xbX;h>y_WqME
zg}O<I2a%b29XBLi=x`}(;fn@~i%t$5d25(u!`J*t+@w$uYU0vjd!}7_Nez38=Hi%m
zJmbl?z-XB)@!k1tfBSoHLBu`fb<$bZS8i_K=8A1D1IB2Fe@spzt7>Zfm9^P^3tb#<
zRP_^r^xTf}RuJ(*$2G@~VZtpaKF})eFg%(!QU$dL%$2VFo|;T&$*zJpy2;yh%jvSD
z5Q1YVsIiBh9+DlewMx23pND>_0cq>E9~3xSB2H}fBR*iX@7|A(#<oM5GG@XaN2pCv
zuq3%RD)g_dlVjMP!DVB_YBtvjyvXh>KUAx#RCN)#+a5qEB&|e?MR{x3PE>;thqPdG
zAEb!h%$@xWBmq+fbyp;ksXpx0*BrVS4`D}p&(LK;Sc^7`t!p^(N|n7KWi!dZpc3)W
z`W8bV#X@cRv&LmW>~SoDL?WNAHK^D-to7?VdJ(LVfz>#rdFzz+$5sN%7PC__lRpHe
z&yl(bnl?ow|K_s0xTqCqty>>xEl*=>@2x#82x$Of;fZ%S8yiIf5Zu}y!&%MIfgGkD
zV_Iu$D0mG1qB)q#TjMjIw~Xn~#fZ|(<hxJG$?&+D9keB0puLt!$QMDdDi#|pns2f=
zz}KH^x0zey-mZiP6g($IC3Ahmf9AP{rbt9)_<<9Lsl&;zB>Y~q=h&EWo{c<GOKW?G
zXQ3lPW#AefA1mBM*YaP(CKRAOwKKizm*4||pNPN;gPc9oHLjN#l?`J`TJQe!S+jLp
zQ=7Z>M5s<>shU5K+A2#NmgywVvD`$72*j^bj0mu{(H3;CB1h|2>jf6`2YNwZM&rgV
zg?%@OwR(zCOGVy=5CZ>>yljDgT?B$jV}mg>8}%lkbiMNY&{>Fz7$P+<$}Nc#<F-4P
z)|`A)RSk6DKEhPS)9*5&WiE+9tp^W8^s=t7Mo`)};;Neq;%6Am#(*>!ahqs70J7{B
z+DSb%6a#6@DbHr!9PZnjG`9vT-lCQxZfJG!R8*^)ixY)|XSqY@5m?EYR{S=>cnHbC
zgG;nIl(Er8Wa;izIC->UkdafeSS6q`cnkuQg6}n)fJ+kh<Wfeu9%Wkt=GWse`9fS)
zOBl;y5Q|MR5`GOSvTEEJgebn#x7RC&<Y7HQmvWy3qzVE%xbDa|XCUOxf4i%8+#Hx^
zRd%X?c+(JLsDz#9J+-I8$fhUc3!tER=rG#aB4tK90{<zZ*P&Eo*)?eO{Vr_(RJ=ZF
z$JXumM_6@WXR(NZ=k#vhdgm=v#x(E7k(c!@;i#SlJO2UQrD|NV&MQ=8{imK>ZvhbD
z))sr!*negUy;u$b=d#hR{I-hqk4XB&sSI|`^ywyM9(ptTDNho<PuH{7LvOFghmm7?
z|9x7H|CMa^hMzm>HP-?Isg+{yyMBx@PEODz$O0^-OVeJN%%I-Ru18G@kys)SoGrC5
ze*lD!4f<B=$S0X2&l^*O+*Zs+9SC?Nx(dEGJ&uKfSDYI|p#~FnK_c(|z-Qu*OM1Fz
z^wdOIhIZKy-^wbYLs#qFShuI;_x6+<W@Uy_maEgesRsLN(?^S8p*u6puf6Lx%$e-o
zZ2#`e4F)HeBF#V(Yvb>ODM?F32OPW}MZBIX<Hm<hB>EGu#EwmQ=NZ;lnZ++h5ZTFn
zVXL4;3^i-L%RcyfjLNxE3xFB*J0v}R=F|OFf#a#J|L@^rAmaQx1a5QSH<_yTaoo|D
z8))f&1Q$Gi$zk?Id{M6**8-~J1E|9>@W=lNxt}{e3jU{^e}&Hf07BaTj>n%nuDoxw
z8P-S+Q}<oCtImnPYn8APPfsvJ@JrV!)#!F&OZ9t&g4sgujX1j31fO;F4aa2=b)fEB
zXp4Q4SYMIG*M^s#t93X94SL<PnY7K7lj@xBj3eG-o|Sk?3njs~7g8OGr`OCKydbPR
zL!o;YwO<Rs^dI}CiN_vMSjUO*YM>R|0d>@zA?#kgUr!#3n9`zZ_-#H!-0uO?v1Q6z
zCqYgF+H{o$@)6Rf1jDCp@(U`xZ3~?Z_P0?DvN3*_$Wxx%{+7-=$@son_rvXil^7hg
zoe)GmP54Mz!L%><G#|MWua5Q(m6e+b`Fd0+fbCV-#8ch<vPOSTRg!FkFl53SGFcn5
znNvRdg?cLIHwmVMds6b(=x5|0mV^+JqB;{dtp@Itt6Z;DV?R+}u+{Jg3fnZR@tnP@
z9<+5cL{<%X1ioq3<<^3rB&enZiX@9%c7=2$6w(X42Q8Mfpf<*~_q#?72lErz1eKu~
z`PH@fzQK}Ix>8!-H2dOCU&s#+s(#H2N{HJ6{QA!KFU@r-%o)speneArZ{x-T^>e+u
zeewm9(fNy0HX<FVwO$0bvL(H+dbDEze0L&T$=c_uc#qZ-L;;dut?t=+f!Q{d-w||Y
zNf0x|uHUq0wyg~+dJsmNBiBx`veijL*nY7iVlziJS$Vu&53|2bs|Qnr{8;?8K#r_$
z%uQS^+D`Regb-YQT9Go<)ybcrB?y_{>s$Cb@Xc)}a+5=Z?#1IdnALNv2!M_K5rK_k
z9kCA1VP|cZy1`R}W%y2s&evQ_3c1q~?6uyD^_Z@)yNpUWN9ig$`qI*>!s}9n%|IQW
zV!XE}msce%=DXnhT0HaiG#L5N7m;hE6T^^0e(G?|8H)YhsR0=&fa~CBd6sS2N0qgk
zpS7-#WDN(;1#o;zb@eKrkwfV6#QX0&rdo@;1!9tw#(o``X>*~-N146K<gs|Ktfjaa
zdOfC+VAgG2AQW7%_eb~;Rt>5f9a+oc8C=j*R;2Uufvozs)`pt%KSQJX2(;d=@%d>$
zps$d%dY=%{wly%IM`;m*3mNGNU3#H@_N<9y-frYWAizTMYd2mOm90MlHb74^&xr{9
z9FB+aJn&~6^WRGpEe*X`<%zW8!j$=&81KDmLs<Mm17exHWjAme!|G?Dq8BZ4j2nkH
zoNk)f-0Vs~f|`ORE;LMXLkXk8cckpK(*8{DAMXpdIW>6Gte1t4+{H^QK$z;w2QwcY
z_+BQ4HZCZ+Mqy!mIF%Sb|6lQ+*!SStG---?f7LgzR||^UF>pEeZdcaK9Rh}iK2JhB
z3e5Syhgz;~im2wyXgg^8;lU}0R^zx{{777G=7~43Bt@}qadM@Lfk|p)1CBml*Kp!1
zDOB4*@GLrb6a<%Z5^Pe8SGzK}-V1uBKOckd3rq}c-_aX*K#7=8;a1cfYvO|rVr2&@
z(z`=B?-e8+UHAWp>iQs80~?yS205y0=m5T1UQ$ysUZ^K|z+m-K(^Dl9HIu&ZLFbJc
zrR?(4g9JW{@8KJY?QP=Jwv}sn-aia=jGe+#dTQ1(V2*?kQ8|sDnRf$?nCh9Mf=F?p
z0HPQB>)l$b+wJrqKmjeuN8{%cdnetsFX?~9WE-rwqT#$kaJH8}VMnio(_V#K$yhu^
z@`|!V#;A0BDAPn*yjgs^DSJbcW}eR#n6^74Df(0*8{btSvY8?X@jKJB&{DD(BuVE_
z=um3sjMipgYU$7uindli#jUVOz2LNXb!J$@NDr=KoJYZ}k2ibF`Q9h}45#V8%}YLS
zL_XH}F_RnWQdzHyfi3!@oz6~q#S|a)<u5cNbZpADM?}ijhTq3RswJzm2Du+&;=It@
z&Ub3&vMtafn@UuO&b#=IuB?MT?;T@X$ky>|%P_})GLh-Fcu4zP+fQLsdZ}*vLc9@n
zi0-*7yU<skLkzo9cG(+zpX$+-$*@(gcpTf-zM~m*Gt@aFn6cZ5w{L^`$38wDCa+$E
zfHXAGs0h%yw#B)jL<+isO7!l`lk^ku4xQE3>4wd8w2uUR^IlVb%U^LG5|(f!_u&T4
zRZ!0pC6Pe+;6%vvBeu9vb}sC^94aZ%)xT4E$f1|n+4r4x>p`{(x{qAxgn9IO119Jp
zp8p=Khn2eW)2ZF#Q1Yf&OzY!#Jy$=$f!XMr6)alfX6|JJN5hJjk-{D38GgCOwU)Di
z=2_`g{+fldwDW=*?0C&J&1U<@91NguT@Z(Q+*zq1uNr;-Q$|s=7XxPem(@5-%hY}$
zf+{EnXj95@JL?sfy<rDa4WgR!w-IIpxs}@D%f#Uw?i@YpjDj#{AY&!Dt#2f3+Q~*L
zO$fQ+=&!-`oZHVR{%%+}yz`vi+g;nqiQrVkGJ1bUFu@+Ka%%?BDAuc$HYyH3e`!8;
z8a(PVE*IGjfh%DHFzJ&ku_TD|7f&sjD>Yyi{70}FOd}iV-|$|cSZXCWH-)!t(7t74
z#Q&>ev)-vfA7G7mTb4w>6INVGIOeYeV(d{}dX5%#l2%*`Pmyg%227O|$(_ke9}%Bb
zZ?V-Y$<hOJ2n<Wkczzz9WPRFG%Uw<G8Ug#7sG^@BJoWW!9ObY0Cck(<jz_Uccr=ZH
z9Q?C<D}(HQzFpE;FU%#aTH-nc=cVNy&L3eP=7IDe28qYT3K}OR*cwSibD%i}dVaBk
zoZaS~qtk(3?pZf?Po>!-bECj~lEE}uoq2gbVx(>y+tchsn9dwpg)@xo`(61L+kBV2
zH5NZP2~79zHg|9ZS``QYa0?6Oh@a_ACw)vOBcR1q`*wR*lP8sudLBQZZO8FRd`|}S
zUWdt*8N2d922Kt>xk*yQz-;^=zekns0JoEs0c%G1&n7p*Md6O*Z2uEIE6vaDcxau5
z-bvO+=UDo6CO=9-Tw|r6#E9DmET3O;c$ttC{$3pu!+xqTd0(UPdtjg1hSp4Zhp3X)
zOf!sYZ#~!yBt1saeYj1!&6V`<V5@99R$W8@T3Wu^9;0PGqY<B?R(a*pkI?`Y{|3Lk
zu|R0|#}=WsMQckCj$f586pphzfF@>9Hk^C5H(LCCj_>#_ZO#ev!{slSPNwnMOvzi|
zI;N`Yz8_WC6ho%3Z+oXXuHI(9xxZBGxH`0#n-!@Z-@$r$`&E*5->o(^v{L*_-B;UN
zRDD5f+YgQsy-u3GrCo!{fPY{+J-qip*a`2oVi@_*vop@Nl87_Uov1WYKkw~Mozn~9
z9wFKrCGlpZqLKb0QL2ABO#2?#<@;h%+qBeW-G3(|-l*N|hbbRXFNYG@mfuZxH<@Sp
zCj@AuoFFT5+lby}Q6@c6H*hH~GP<%0%ALTP=%7dhWTX4B>hlT$3THUW7&+}U0IQ$D
zcg+{%{3>aqe&e8eW`)hV>@;^<TClx5x7iXb4#Y@4H67uU8Fuqhz#X?ZsaYotbj^Ts
zNA=^kDQ9Nxd49V4<Iu_lvE<?HSv7?rdBDxrp1UNTp)!Myj_K+%ooiUW%m)hT);+Im
zkyTF#9_S=3W%8sA92*<gE=_Nclny$Sx<<ILd*->R8Ln6^$^4Vrt@|TMfwJ$q>HY}@
zc1kYNAQ>`Skz~Y$&F>}fW(au5dS+n|^K!B_cRWfTC9&(5ZEf0KJZgysZDbKHP|mAJ
zC&Z|LPI9|7!g4+Kgi?;ms=4eBX>oN+i*%#)i7IkGT7sQ`UmL@__mM6~fr=}-MlHV-
z>ZEF#O8Nj~wiO_)oB=RXD@!at)beh3mH~}ti(MQtv_?$V^xj<Dx~fIA5t%!e@}sJ+
zzVhPl5Uf0TfWWJ}TxuC84wrPdu`OgK2h*6xS=ASP6g^uhMpm9S`Ht(Q6D7SdM>W^#
z737X4Xm6!rT2WN+*;kbq6Qso0*o)4NuhZN-ri#r^0MF#L_N~?aP<$f80#NYLYj{7n
zVdn|lal`2o33P1>Rr44#i{mM^Dv~iSjX#_{Y;eW8Dj*#@Q+5H^mFCIMkUY6F=GO=5
z_J#XdsS<GwOJRoMygkN$@zD}_^LGMjc{lhg?;%9ByK!z^B^FP!R1zwi6;sQGdNt=p
zH{wgePR75lm}OsczLz`>M2r!u%6eG4iAsfLNo(1|$WNWYW?)rl963^(hhGiv9LIOn
z{dil^M*;gj4@;cT#BG`jxV?)Jq$Qa(p1h!Nt0U~B;|R%McbL{$89T$^?rAu0+E*UF
zVi%JVWvM{%4ST}<1DkYwC{wQL!||>xM;hhW$5J#S>>{zV%1u_Y=F|0dXRn9BSCuj3
zY}fujyR?v^57%1+L}D(RfF3B2jb23suX^>+WM56oCU9%(<9Y3MzS8bV8XS|RPsC*F
z0<xnQ@2`#Tl<w)yEBb1~<3Usn?#F|@T~E}UiIlq<i^fs>)95i`v`TUNfzM@9pi2X{
zWXywJ#n(9W4v*!jZcd?UTysBv)gx6pJc*$|(qw3(-?~xLiteq+f*@e;QT?V5lf;k8
zdU^uH7B8$)6f1b!cV>4zYuxIytja{erQl`|x7%pKuwOh6WHHaIDrk^%(>?7dXh_~K
zON^M@Z$REQ6eV`<%?bKt$r#Pwb*?+}yp8mw1R8I~;z{nWi6MJQmA&JsaLJ`vUhIcm
z$`&+e6i;$7RCH|weRmm8z8E{K^x`kM`H^5@q$trJY`$F3mk@RLtzvva_^Y10#b#A`
zZo3Y;o4QK{!rn^fY#Sg_9<max0@2)YF22~6>dsU7p0e;XWG&RW#1HTym2w;a83Us+
ztV1V{#em_tgr=l!LUjJZ5Vm{33oUmB8vpeA*!$=cuYf&oD&{c{*!*IKuSC^)Z`UoA
zD~f~a&A*aVVG2P7o=N&F-LlU?F1X+TKls_dhu=UNsmDWXB0nocw>^cNU}dT2da9&%
z=q)eQv$*bKp#Ayw0Rb8xjXSb-GbcESj@ve+O!rk*J*L~KWT^EeD5t>Rt`h=E*_b<(
z!$on%lXr3YhaZH|Z&f|n{i---oM>#7F14cbE9z)UxV`1PU=03#3u+z0*vk}CZSvFg
zD%Ee=>Be;^?5bKjB+4NG4t;>82v#ewQ5swMdZi?z^AW9yac;)oL1hZf#(z0%o{Dj5
zMJZ%7I?KXXD=A?{`EZ&8I{Z!N!F!tqm|V&8m>S$(hDzhe;ATf*m4m`S3qm5akdII<
zWSsP|JQ;;Z0d-_eG|MEL@j|b7xP+%~2lMv5_ilVgGSZ3Len?S^PdO7IZ0uagr*+b(
z(<&Xd(c@!5>Vc+t+{5|Uar*+(Wn!}lMyw5R{Jr7z|LSy+=-{}L0rT6v-khWiq<rA1
z2qcz&Fl0u0ed)%w^zlJToeySI338tICt<8Lr@6WBY03xd>u+Zp*;_kMYov+evtDmQ
zWXmnNT10mj=-sHW)bLL%q!rv_^}tNJtgWN=dT&Xen$>l8zu61c-wFv)@4cKWg}u@&
zbD(N}FQR;>gDE_;qLz0Wl0o)0YjuAer_OKi(-(y!%v+6r+-`fM8y&4sWdE}=to#hJ
z`voSPUO+WRnQql}QY@_|)Ak&1Q3#ULfrr{{FzJ<bl$}#aIC_WLm}-0{psuqzERvpU
zY^OpNa~IpNysSgMtlk#`4j{6!Ad)0SZYvx4cU-HVV(hpNOKPWFBKKvi#1*5Rj^k9d
zmM+s>V`N@^#PrDxY44R~sc8r2qO#IpI<v*Cb;3z}zP4?*f?rV-rcE+Z&4|j<lD8rp
zX^N1kijc-U1{(LFB{`*5zs<45%7PR}oz<iROmReja(DA4*e$=a>~wTZ!o=tg`+JzP
zoqMf0;az`z+;iNjrfM+S^M(k@87hbQ4(HrR3dH@I4P;~Vr^hZd3O(?`YIe#{JqZQ8
zevhYz#ZPv4N_c5LcQ=}(J3yjTw9RQJP_Ezid}|+(e1kDpfWKw$w+T$1=>6_o5B}pK
zB3>I*T+|mK>X|Wl@}r_(4j{wGLlgCzL-5TcdL_LLAgc!I3zDRE)b{Zy0VK^7K#Zs?
zoku}@V|lM7UW{Z^<$We1_U5bOTLI-yOj{uXK5izxUt3C`33Gk`p)|Vc08R2h3_)m4
z`;Rma(KgWl3F_uY`hvL<5O0Ar+0oyfffhD*Yz`-XCnI!EpN&#6Bko}I7fp{{tS}x!
z=KOfOsYWsi6KZobATqwwx+p(dFBPi=Olj-*)54}jw-VPoXwr{oM=O4SCsPmEcx%S{
z5aO>v377#)`*z=P(?a4EcObnYGmmEhm4P+#ZE{1-VQ!FmAhTUxAA){-V_M5fkD#1z
z1@U%QHC(E!9Xd!23pi_-z>Gr_QV4~9>_=9dvo{z26uU<EYZEA;#$VGyx2(pxx}5+W
z%--qjPN8T73N2S^xl$+DUqdPbb5nMX!P9q=nfI<!Mr71_GuoF~Jeiy1W^t(|<FDEi
zSNe)WZRh-E$LmK&fOJ+X*(<LR=3Qk;ezJ~THDeSq2#6!lGQ2tKhfo`0aBHyg;dW?$
z5hpBDJ1v@btmdiyWTkXvUN4<wyzvu(@p5p+EjZoGM{{%SHCU<HUkxFgHM3fhiW}JW
zBeYFgOez<nn})nZZ(Sh`dE<vb@P10ljViC?N7uaK!8!3NV>&4s&M6Q}2O~g3;CqV$
zqPi#aL6i;tol&8Enj{yJdm->Sg)6Nl{!CSvJtiZ*G!Y2;$-b=}q%ihguT&Sdn)<Sa
zNAHpo+E=`?mt6=>1-}C&@mIfIU)A6R^v`q>w$~w+q_o9dDF(SMxAle7=x~fu9!NIP
zV(W;{xFZ%ck@KWAF$)AEIEfOs8@DGcvKO2d2G^YwBr~L><{3dm&rqI5(?a-r$-MiB
zQ2AJ?Y4v*DV<1-ENiE@E>BuILuFfh_PwM*#ZPr_Exuu6VJzN4&3T*XG5h|DuPt*re
zfGwLFOzYLxMMtV|m&??HK_}h+Jx;m&E1LaE52B_KAF%REA-W_vPbohah^lD7A2Fj%
z+3onRBH(uazoM`fJ7bea<t2{(*(x9qBiP7$jsErRl;VmCu?R|&O{)oNMF|$2FJY+k
z==49`lP&k&E#1|4I?0GJ!W14bj8dRVZI`KX`)L-nvR6_cdUZgXbSEYI@(*n!t;9M2
zTG8%Z2PP2wbOdFkX=Ynai<A>o<!jaxiG^J{j$y=xzm$5%98UgJm2u^qC%4fnv$vIg
zUA`?g!<OO?LlinRQIUtD@=dxwv6VZ1wdMp-U9WxZI$FHs%<>jZbs(KkUX!#cbUR$e
z#e*hp8jSCa_o;^vjgFJbsygz(X4Fa1^Is&~U#f26(vRH66(1)h;=U)tR`8n*WXoUp
zDT$LDNWb{WjZBUkeB4B#Lt6q`%IMU==_^ZN-d7=woJVIfs$2QqekwO~rj4c_w=ckf
zy^aA%9Kyxd{D|7eY%V9I#mj;_aW@rBh{eTnX;OxRwgY^(EiEMbTFNZxahPiF-*QB^
zyP0M<HG)3B(Yr5?99$Y*eL<5maWNwRduqh1*oF9r%ay4s_C%(TNuR)lD_Zko9vVeq
zOUfJm{2zwcBe>_if73emEkOUn2Yj^e*}rLjKyrhBWqk)2bBFgu{!jXY--Z7$C{eqY
z-@{_OP_uthBiX&3y0q^%OSGRa2j-#VF&`v$tK>no^Ofheh13|Z1sCs2hN;ak${};f
z7dl$h<AbdHi#}9(XsltRz`9I7CfJ!T-u=n(vEo{%lsMrTcJR3we@7?%=&$A;uM4Fv
zTvt~&1Tp|K!koh7Oy!aHN87e`1FU&!?y-K=u^OSVfltHVheQDFpVL$%^~kF_)7-~e
zGY*M<e=josai+qe+MMF-u2RD|S7h~pQ4F&={X1DS`4oTZ(8go-YVVA1wOZ#`#VPaZ
z?+wpw|IYC$FGl=I51aadQp<4UhQ_(ew-*3x$OU6xtWg80Ctt~EBa>y(amikjfSgmw
zIBlO!^|k*&BUQQdMwJu-E97dS8niPk_X`(2By)eV><fX{y|Rm?Z8aO@=>7KnMYNF*
zM8=T^<Mvg2h1qe^9d~>M%|7LcM%@h4Aj`yogAgYq$ULPmx?-^)-g|6%kG^kE5&w7)
zQntEl-0ngd!v=1QU2iX%jKt?eVb^yp9w>9ByNyxTJ=8?YP|G$0+v95spX3b6v}@0C
zP2)-hX7F(X;(jLNP~CaaGQQ-owvN{zho|Z7QshL_p%dB96PIJX)^bo!lo?^al(Ozw
zaBTH<AJD(C+h=#{W}KkgyVK{4u4&X((z3d((*u$my)6VBk%NfX!osB~&eg3i=%js8
zf*hp_0_gSD8UL4e3gXVcw2^dpKq`-md(bmX3$w^;4Y@4H`EUX0=l_cQ5lAH+Mc5Wm
z@w@iGBxev%N*|sZU$1gdBY728Mmq*;d+|go{XOE~_X(u34lZM4SznAj-#KQrY;W1q
zH@dPQ!yUh>aTlG>(l_6#Cd%Gxc-G?-mz;j=Q0(@+?s*L}L|r@;#LBuBpSM}{XzPz(
zB$?aC9!8Bd{6#ym!o>t-L3DSvmj$o*T!w3Gger1kQSSpD9B5rMEr(lHpt?~@V<+p1
zNO8qaIllUfdt88Rm9`sxk+gI6Cn*Z+=_ORZCU^bWg7}V`bDn1GNLSs4R$87HHfpu;
zd&m`auo*94>untM>_Nsk_;;Sq2aK3L8fVa&_*(TXF(I`X2)Fl!yOcZ)T)La=#}oG&
zr1~CriB>PPpsL8K`gG12BI;r-D;TlEG^V*u7saX7A>z277^*q6zDXMal5$m4Z1c7R
zX-t5*z2l^d5FW3?`4W)m^I)!;0d+2Ojh}*REp9K@q)eNym^4RsX(f&V*}ld&!8Q-#
zr@fMZ^lC@i&#e4r_F5jC<bgi%@nD<KyJg$nXYn)t<i6bYT-LIzB3AX$+x4v0I@za9
z%IBZYebyfA-p|F$&hoJu&f!0tQ+K%(_5NvCswUc*F7xml`iz@N1lL#l5WtQ#QE|DQ
zmW|v{UkKChn`2y$v()!(gH=eeX$VpQFr$RHc}lQi`~G^Xdc{04oh9208?utN`sK>4
z;8LZ0h5az{`}iG!q4SqS548T-@R1}Z?^bjAc(~<@GZ1&r;_!*GESi2O6>?B~=I;7H
zOa>K}p;eFYipCDoC?<*sm#>ko*gJz#=qyX#NSQ*V%IK~g+xO>=10<l+(t4670f~K)
z0hf6r*T=rapSH6BXdq30zC~ZjnXl{C#0xcPC<qWbksx-?V@caOxDjU$d-2Vi{7_*v
zTA<+~%ZA2wXvjd{yIZQo57pG}7&odX4_OJNwemqz&;cH`b@DNS&4o`^di*dzzAMfl
zu{qM6?>y$B;?b=FyJ@m%PR(y6zrc<M!yjHi$~%+Sj?i2b6F0+(RZ+QZS90i@KEGM!
zvRA#Js9^$5^w6zEk_A_cy(cPG9%jbDy;IVaqwn2!5q!_(B9s)OT-p;JQ0Hah@ICUw
z3w9R;8y40fcn^GYhO5$RFh)wz9D8aa$5P-=OtiV~yqKTQ`BhWr>kiJk$<uhVHBcdH
zr}*{JGEHu09m~Cv>{AKQ>+0Hgp4yH64{fu|+FFcPF$F80FNn<f_`$Z*X^M7I0@EO4
zuM_s`6=BH?V`N=9m^qqJ^=smj=uNS{mU?HoIdr)Hx?uM9shr#NH?&2sSLZuhP**CX
zY)(obrm>*cuU1=7o;KI}rVr)aX#r9&LQEh;c6!Ta3STIpp!>0-s8E6v$yPy%<l`T9
zh1>6%tf1b~q5@!$PvgLO&u&|i@8~IZjOoEUCcRufRyXKYtglBDtg<hFtFERTDhW#+
zdh%0S?!{+}D~C9l()x-F;Tv-2+!u+Dg2b#C80vJ^sAC*}<X<nM!GfU*AqME!1pb{-
zyh`&G7Nw^`T22tn^M^>I&GE7u=?Obn`fOWa+w7D<`3?7|M+%hxQ{8z+HPyB28hvfA
zs0c`qqDUwT1|mg}A`t`x0YOn91c~$-B2{{X7c_K{8bAU{ks6vbfhf|e(vhY_5;}%Z
z6G-+_&-ab7$2j|(z5kt)U+c$A)?71du5~|iKF@stAZif>g(#Jy!|rY3A1RZ>SChz<
zKEnrOQn=V+?l}c<IW@nCu*<$fn|hWy9}gC-#F~9R6IrvjUQvul2){AKV3u)Q^x1j=
z*d>N@B6Ja;{6(?zw?B;6Mx$*^Qk~3Pu`JtQXZcmEt_1G^N^+3{ML%5it{p|Q^?KWE
z9N}XbXEI|ht(+(m`w0j&)@2hVynAFGA61s4sP-qNg473UP%7e~&%zOVbK)b0CJl43
zGxv>gH3brd6D6<KHO}y)OQiH5)a$e}-{>)(bZe@{jy07~ulm$zbr~McAsxru`Orq_
zHYGP1{i?e+69i~jG%fMUK|;qt?+XV}aUnQXY`i`g+LgR@;cCTG2cEOiQWQr1K@dk%
zwH4V_R~Y_wxw<)5b9~2oLj0Escu@g_(<ljj=GChC@zT~$z4KN0#s%~dE9QIciaRKK
z{iC?(vuog`BOd}^V~&4d?#gA3N2W^j+in71W-I&lZ>SR6%3uHT0s!nZ{$(x!_$~Yf
zul#C!h3yyd?f2&c8vs4{bNHVd^HE2f4F&=D&%00|WjCD1?zt-cbDJs%a*coDIb~wq
z@YO+Yhw12HUuJ`ZoBKQSdzMA-i3<QDh=BTua0ALa89nc8x;K!~EET`D#g&-@v-!NR
zs+bw|KL!bU05Dzvy74B8@>5kPz{L|T_@vD2XqrhSxz|DOP>M)%;@j`Xw~p(321#s}
zCURZ^uUCBi$}<T*^jEy-zTW{XVe4@1j3L^|B<7-ElzLk<;>_=hIKdA<grYATYvpf2
zn6|VaWbxXG;bGRIRL;6*e0O;)3D|E7n%LH_9ZH=o!;69`y<OoN`gBD*oSK-)L|^%Z
z$uN7V{GJ{gCf2;Lq>H~s(e=;RkYlE?il#<k?8^!@Q|A`VLK4iCXX?OR?@hp}i`uDG
zZ_i`T^xx83q+82~Prs(z?udF2(_eS-M{rjW@43syQZ#Xd4sB5vu5cL#a4dfRCOvLC
z3V6bzJ-=fviM(!2F9ormq)nQXX1NoR0(}HKHa+$ws6UQ<@_%~=1ei3IdaPDXmdhr!
zq|<L5x@=wf!+!2h*XE6tPoqD+uDPEPv2*G4lMf;yZmB;kVvT$V5rJEBPh4Yfnw}|;
z!eyV1JoXMOoOH`Fy=UiN()Fz-0}A=B<vyh(Wbb1uU|E98G=dI&kgSvCa~HuWf*x&U
zVC6Www~p^XAzw7ciO&84x!!z{V5O~6!%Nfm$%Ig|;~m^c9MxNNda@*EQcZHec6xm_
zxdPzE$Q})u|6~05sJs(f<46|o*y;!!PqyP7gMiFFi_J|JEvpNg$R8s2yZ0F$eyg8=
zvI&m>1$Qb@>T^ejRA<E6GdU1(hm8by%P!IK!(J=p-%%nB+jx<@dwH!SZ5+Fg&vy5g
z;!fC4mu;L~uk`sd9FQXb&5!NKInL*to8q$&Siy*7U)P_Ty-epuLx|YP;G91;9}u2@
z4FlV61wV|v&Ow1QpjZFb)VP22zn1d9&-LHc{r|r-2F06HYQMAZ7L>Yf-g}oHwVKb{
z^XqK_Xm~+ngTbhiqhm*n`KvtM$0((TDZwuCGfVIV2Y}lkmF6~5U{9_wL9gSOHreES
zID4sgk?uQlOrP!=E{|>$^mm+h8viHNCSHH&RGz@6K#a83-40I$W&KZ!{m=PhVrXZl
z)T_lEy?n3j)BGHx^Ip@sgSE%{$Ie{mm^%S<0Kz8=SmPWT%I`fe$fe#MwGb{^nYAXD
z26pVF_<X@7S8Z-^^*5XWZzl&-@@ZbvzOx<eR*HnGkDLtCkB+?;)YStovWrFPP>fm-
zHeY$Y_`KeehkV29e_h;JQ6OVC%~Qjn&C5i*^Fm$J_YwJFmn~$t2rhRa&$us0Nu$lW
zHNC}pANzD@rM9QjLU)EquhHuD|9Yq5nkB~_9~p~l6HI6C8H{qZz5jH%TbTA*OS-MQ
zSA&1%HMcq+vIUp*_}&va7;50hwb!1b23sYLfwaf_$`_$aph@B$oD3FYF_V`>_|oZB
zmL-Ox-p{mU?}N52HW=N^{BdI6o(`@jKTs&<pwY~lJS_pQbgI(Lqh}?9;prU75E|LP
zI4;r@9Mb|r65T;U^_p^1SyoOWL3$=#86Zu_ZMZL{P0rZw=Z?VgMJHHkJCSwRxiH!+
zrmfvzvW;~uMVEuyvX1X23L;_XR8V)euQx5XN?g0(<EUK01N8>oy_1}QNnyWn-bW(d
z`Wi>Ywt(SR-hH5Gi4pt}fC0YpCMK_w{UV_rvgI3+GH`qGcsV4B2T;ip?0Wf4ke%UB
z8Kw3pc!5o9#l%nBa@@m5UDs@0>&DD17=tRT8dv2MxhgY}re~kTa9#A@1GrFP-*zga
zg;(+dnc;5ae17wWmRnG}ct$DLgH?hX0l9sqmtbp;ir6D{UYQ~K>_7Cu=mnq~z~4Rv
zw>b1|KdD#iT6smE#iTN94}cRf#H(L`SPuWD&9tZ!tba%lGZJJT!ev2hD>F`h`S0LB
zv!g0uKY_mKl&=;?kWnqPPWA+UcHRWYJbQW#+;2{%DI6~-wlKci5V6CL8VIhrQ;T}p
zntiV5f;GBIE%z|sIs@EEWj`+UHY~JSl|71<Iu@SM^Nfo#FLFp%P6T4P<LTpG(%+q2
zUzReK+$Czi0P||Ye!#x#M5qQxgKbvEXYr;5FSR22erbO{VZyiCm1Iq{J`Wi=Zyzj~
zy_#f3)=fxHy8LEf{~Z9QZE*^Dyg5tp_jxHfIg?asxO$b;1c&^PJhzG&vd>UWSWYf@
z^BJy)EW0w0xnDH+Qk`^TuJTx^W|sRoKXNyHWR!57O13Mk#V^jrkW8p^qb3uaF)u1>
z$IC0$yUKn#M{AR>h@N&FjENVO!}VD0ObY&*DMyk$sBQO=W!=qk48l)|3dgzdJQ|cy
z&KzofSgoc78rQqs@Hdy|_l$QTOHri0$QoxF)JqPQ^ujDadb)5LdA%X{+zZ7TKC?-^
zBUd{c9@G*qKg7RYgGTYbzbP9f4K)__YrZ`T1);%oUx@JnUN^JMGu062X#GJDz<UtC
zW}hi=G27%u$hSW>(q<m_0r$ki><giX-%o3&RLYyohVm}^WIJUVt_R9R+(-xx4X^I%
z>ZMh8R+QHVb<Ec5goWu$cp=hMk@AT_PDi&3n8T{7NNt>N(KVKaBF*?@9bsKFG4b$d
z`SF!$z>sFtaPZ#D-_SS9ckfHv`K>fFZnk}(D*bioJ#lJDoRp$p%jije{VlCi&V3aj
zi0C3t*I()Y-P#SNEFBo<>L5i$MsQK{x|TYVZwBpuol$^GPj6Ify^`6`vrW${MBnM&
z5#x45^2-t@#Oq33C%hOOVR-DPFYc96FLC5`tM>-7hkn0<%sh5xdn2EH7zodV6jshL
zXU&K|FGx^Y^B`(dt+`U%oZ`^s$M~W_sOFb;dS!u&<lEMzD##@R+%d%^i+GmC^M;)j
z@I;J3Oec=xNqG@yh#XkW;xdEpKyy0WPI^<al>5d+eYxMapxVXqqbNjRB$6hV(>SwN
z0w4nTyrQN2GFlErOfR^u#Z=yOorBYRjltb!>V=WJ?W%+tu8i{t`Vr7)7mtbqq<J7~
zecxLScTaj|Z6o~r_idGnOKm|eA6rnGu0O9DL3zK|GwNdezZ_>5b;38i2-A9DLHzzb
z&41xJ<&&IL^JMjH*N7WcO${mOW-vHV>&ks#E*3r6v>$&L_wJ86S8ZRFb8xF$=TuF|
z`?Bu<u$oFbcG{vJ52fxb*S48m(jLOS4+3a{z<c~@?oDyp$eOJzhu*19(XQ7%x*YdJ
zW}&aAt)YBy{*HGKySAZm{jLHQg)P-u^@&Np#i`e)8xe|}S38M16DuKlCp(qr?7foD
zkE5Qxa+qREHQX>fFb2c4Nd)#47yq6XS%@Rj|2D9D4*nm>?Eih^)12tR`-Z`WCEA-m
zdBca-UW|K`H_V9I!0t84V^DlQzU_nXvXt{Lh32-pp1n2M#ta$+XQ>q1tO+CRF&`R+
zi~WA#!v<zvcDFe|C~CcXr(T`=XAnvMXOWy)V?sxS0i3Yw(n2G1?9?c$LBRY@_WYVE
zvt!H4hRJvv+9LCHl9&a{n@T&IYt@u#WBj};&bQylCXDa|m63@&Nc0;`Gt(0J^hv&&
z<R>Z6r*9LsMhH@M^E{}Tn(f`K?dUrv?qAv)kp{1cHi=owrT8lmk17m0N3`$D1<svC
z&6ox^hU2`uoyI$Ef34cC6APT;_}oJuR_(L83cC#0Je(sHIZh2#Ha3D@t-so=vfjnU
zqDT)nmNk&rwG92|049f!|Gv$1Fw*XmWUK?-+T+7Lu*kG+xj9}T>WfLYAOO|I)?|dc
zMY@e<l-}coNNK*P4b5B&_m_OC`7_5lKlc{}Y<5%+XODcBMw;1sTf<DP<|J;7upc%X
zL_vQ{4T6W7X=EYjeVBHKU0PbI=t#%ugT772wJhO&Xs6g%j~*I>H)hc$z7?UUAFbe6
ze^Um0uv8}KEmkq)@T8+>R&ZIMH4hSLxzP#+9T-h5txi4q;L(AaLf!uDsINHqt!@G1
zh1bYK=VmxmGD$9_{bwrtHl3ZNt*8o>%(H1@HH!P~sec{y?ddKkC*hc<(a-)$kINqJ
zZDfZ|pR&*(Se8dSgU9klE(_Oe(esr~8K1flcWFvNg54VJZm!<Y`TD_w0wZ3?RShb6
z<|g0WWY4%W5jFOenbk5H5-mJvVIbVyw2J=)_S$eboX&-Yha^X*bjvXj;I?Jr^6kbO
z8A(YgGE*B9ik{-aN5zUQgj7w9`%ZiL+-X?~UX5&bSQ;aW)S*P?VS7u1ku@fs^82Q4
z=idx?sxsT&GEE<rn+I9oO;@}|+yXNgNzpmZrupuM$Ex3s>+Fv>lbl?ey>8kO?~m64
z_N>&$@DjH+yFJ74@Y^n-+Rq2aiiamUawMoDKLR@F1u?nI!*WbfrVJm(yi_)YOQc<G
z*md65LP%^y510A)>!_nEu=F9R;Atixa2bniNgE7yj$A}x6+6uqs4KMg_NXhbc3<}7
zf(+dr!rzRqUuimLVOH>@si{cR_^q{JZ*LWFCP7o?gT|x~7Vb#QSaDcHcfJ<42PSRD
zB(~)Kwxc(7U{>1eh*4{eM@F0TUeL?s7ZP~08p+N6IoT&rQFy6LX@5C~=cLM_yd>#?
zP3miHt2!!7PJKyxvw`DeNlv+eai@kQSzA{2a_?R~jdV<?2{uyV@pWxXebc8!AgV}o
zsj{=b;hn-#v{4=_vRRKaQjyHYcsM+nU-{2bQKsFZnz2mrlqMsCDnYHb$>3Mje&8@?
z{<SHkJ6D1M@1vGXF^lV=UU_%g+-nm2GO{;|VlYnKm=P%fKKI4MM9)Cox=6Ihigz2V
zGB|wIlQdQJrY_c`z@;xZGpIB$)~w@vWh^DnD7QS-Q_l0U;=m~r=>d&H!ycWQB@gwk
z#=?>KcV3O7Pd*1rVPM*dUn(Q$o%ESLyKIFn9`w<sn@e%<)!zOo9D~GG0&_habHd%#
zu=2E`!)2?MiPU4&PMys$18wi=dSjuv>gB$2>^A6KClJAdbU7is%vfCtI~DFdeViY1
z7buddC?lVz9x{ehx&Y}&fy$lEKv+4zP((I_lgc{$^4Ijw@flAAI+)+~&ApC2?MZ~b
z^ed)5>1e|v0s9JOvc5|wwuJ7rIRbu>_@Y8BG4)5P1ux|GFO;*zW~=vr+CNOVs}9S}
z^YV0_pL=P=+t?jv1^yuqUOHHNv0`WRITxPG>kg*(ZiJy}hwWx!s!a1|^)P_Bk;8RK
za4^y7N-sO=L4Uy*ij4BTHp9|%-HW-gcj-VE4W^*A7>E81`Vwsj78yESZX5exPj!a6
zV^=CrsGkz~`NgiQCxDg7vJIS!<=vA?IN4|0RJ$s<I#c>3@hNG?C-to6yiVYE{!(m3
z&%`k#@f;&^U$1cwKz+$QO^!}l>o1(uqPys?=(R(w{X}`t+(jlz8nJf)dKQLCXj!-v
z<GIU>@U|E1r(HdRZ(uvseJj*sZioo8Wmwxt_R4qeB*#=dl9lNxd!b(wcN-FjNtLCF
z&567r0Sl+D24~fI<c$yZE#a5SvE)s^tKiw*>otB;%9(l&!CE~f>ED*}RbD`bbWq$Y
zS3?bw2te9h$c2M$ujG)k0h?i*&!RyX@k+Z2^Eae|+RIW%V&)c%f)(cU6rAx~J=9a2
z&(p09uU*b+^bn$5_jpP~c3w|9TBGvJMyksmrA9vMygEF4KfC3}p%u@=lZ7(%!O-@O
zi=qh^Cy*tEQuWn?mTD%Ob^UWi?F)P&(-lm3v=UPaNEIME;1yw$!9iDGz!oR{)T_WT
zXMWy~BV59xSu2K~4S8ptTE;`CeXR;XDDst*8(HI&mll`1+@9t>!wPKEqps-@h}en-
z*%Y9lc2$K_`&@)#5R$T)eWe+aFE7+t?KgAuR))5m`<*?zxd-3qPUIM3z%RB}8~evc
zz+ZW;|7g2Xm%DbO2P@`_E6eSn*ay+JdWdzeZpcKTew0%>P|Dnem1}F;4y{7#JAj12
zSFBNq^H<tINqn9pyX?eBZUaMLV~pu38*uZcy3b{<gx7C_%CMc`LB2SZy5JDs?id&-
zrDbTfy7R`1I_$Me+-R-22aYZiwy_wg0qgyWvD>nSFO|Nz*o4_D4J1=!(O1AtJ4}x7
ztYbOk;8=JUDp^m~45DNE1>K$laKh?p=aiku`m-sFPrPW|havl-G*WPNZBVs(yl+&L
zh^NJo(S9B@L%gEDMIxami5c5cZBQRtV${$H>K*Ql{_I>7xLc92Blz*2dYG1kOG*N6
z5Fz#M8`s^r>q05iPV4^O=mOw5>tBFjSqHaM(Gih~PDHR8Y$Zyb4V3cM*9C`i8U~fF
zLupD;dT?KIN!CX$trkSn{FB4RdM&s6V5bu!6}1<K?AXnp&z797d!lk0U5;tc$xKgw
zLT&Se_0YJnb3RR>QxP>z&N?h0JdlVelWh;h*3`QiBrbiETJ%otQQ;}eX+_8~xn}%k
zOPm=NwT-s!J9IdQRl{lY@SfgvTvT%~LaKE14%r||F-a>*Si{k>b>c(`qHVi9R-TrU
z+#1(vC<|M4=w`##cunNkei5JFJ?H>Hyfk+n2L^*AH1dJqi(?z_W_3cRC26e&2%{c<
zV;iL`m3P@rbFqS1Nv#5%$-0^NhMfavFC3(f+=He(2I$PvOxtusuV1WvS~qQas69q_
z`jk>(l%jg9&9<Rg|7@h<aEk3?^38^%Xpz_!ZdbYFwfh73L>eVWBd*3kGP^x=unLWH
z7*+lLKm*vo2y~3kjpG64K+B-xL$r6o&3sIa8~B55IUz7V7hLtDKP4LdNdur;@uKTF
zl*FRezF3{FL}WjeW%&zO(=*_SsXDUvo$=4P%vU!D;k5u;s|geE4gf?yu6zCV>KnP3
zEu134Qp<uTyZU@20PX7Byn3pT{)FVkIpfXQPHu2b7qlxk@(ymUF{`$n_)*9i6tiRT
z8`uSGUks{k5gvB$3kT&}ix#;2^$51Grvsr8Q5C?@RbRi|vwWj(9+j%%K{{Dua6Z-Q
zW9!O_=M(U>=6#)j2i2}&&V6#|%$@ED1-OwkIusIV_@%(trWqCMj}V<1RH?d9RDr4h
z^5qz9K7Pq}g}SRdhnIXrL(3*m3E&NrI5dT(rKqV0h<~%QfmHD*;uV(<WN7F+fDHY;
zXj;nc2)O##Q9bTIR4(5y_(ZW#tri1+i^RIfY2#TrYHMxKB~o~!1lMsAI~FM9<yd=f
zvmeQJqOUwvr{-!GEIL)$=fvSTCi14c=7u-|$SEmcx@*z<#?P(1+`IqbOkArgf{HY2
zF4wG{>tt7ffFqlkQ}`o~F_Ise_B4m**t}1IiYvY;Ql9@1irN9ElFc&;QfgDLiK-#q
zeq(extr5$@TD3k_>+`(ltMUK)Z+7m0VoNvcy(-PJS{i8+A9qY>$#CD|={kX=yPr<I
z$h6z@2`Gq#;RI}$h8$<!LEB^QTKdT5l5_Gbk5|q_;d%80i9zn#Ttw{Opg?79S~FKf
z-OHz>YLRm4LU>v8CKM{mmqLNQdpgly{&mG<QA~sj?^hmj`Dd3=XjhvswBcKMQFi&2
za?;8DrIZ96vTSw99m}uT0JNLXhrxVZTBQ@7HT)C}67Kxml_bBa_SeNZ2-X8>21d2{
z0>pG9XwZw=?W=?C5}w_PI1i?3HAyMcW*@$Lej;ZzE<T<gWvyO~r?fZeXe_G1dJldc
znG)@H()9&DTdvjvI*XIrt$S(|VfVQ08(+V0)SG=sbR->mMn2TGT7wlBw1+=_E$dMJ
z=4Bf3CB;REaVJ>v5#w5nIm~$iKWN$VC><*L1rA%vDxSn75o6HM{*uYRmP{WbR1|Dn
zYHGGMV*W~p8!3?{YHzOYSFtL98hkYZMJ<17ls;1uHbS-;F}BfPl@NfjUjp*W<2Uh5
zWntly$3guAeR5mWXdbJhiBA)F+Z|c5u!(<ClFRU)S~yw+=8=&|vx6V9qmO;;T099k
zRE|mj)`rgDtkmnfY!|J6MVZOsK;U=gG*es9OcgK!Unk7IbEhC}4zN9P%Br?ZJ5jNB
zCWKLl$p!+<vW<V6O8+0TOMWfT{8`BUeZ=-(HU?b!<cFQGESEG=tDSl4y`nQQLnWNS
zS?n8f`<wfV9Q^XdWX&cbBY@7(8eH~EZH(Riup}QN?tVMRduu#m^zW^~<+7E%&sm&u
z6_)+1==EX>5kZuz>PlzHDBllWl5AAF>?9QD@1B}%7f9*5#p13?BXTlM#TWg}#`elc
zm?qBN!-QKX>_VL(-bd5LXYLM?Y4>s(shgwQMMy{Lz+?Gas`7pZRHnYqmO8cEdxHH@
zxP9X*)@a2`l&1G(&(KST)yP@I4VkKc#X7<mBRbCJBg_T&3k9k2>PtKH6Q!v<R%2O|
zBU@kD*ggbubD|XlvRGQi$=N>c<FhtO2-92l21j0=NI{tzGNBHx?C>6iVFym+R)e&Q
z!$M(ey06XzgWXQiA4an$rQEICXRJJ&@!ykPP5ne&nD;6;m#|W)`u?68h96<bx_Euz
zt_f=9_UlAePNPcmt^oeqRtD2y?P7K<CVPjjqd8~^rMv09=}spW0<%*HKhw0#NV2~%
z;G;DIEr4>4)9Q=6lFH|o6ITZ}VRl{Kf^K~x8L&E{_A8HF!)`IoLjwc$1(Qw9;r14)
zSV|R|fmDG*rKWA46$|mD1FAzUppQkUv=}%PaNOnzBV$&qIMxvoan~99qf$oad^bKf
znM=p1ZZDryNvebwIwgE6P-B%+Hi$kImgWsHMZ2S!8)G?*8%nb~gE-{~D38Qe25ZJw
zfYVHw8vqcuT!KRbf}SbOp0hu3Poy_Sx3OXAjZ5(l)(^VQ9=IbXS0#AD^sIwLL&RHy
zwUdk)GlYiD?4Y|2LQ$<(IoYHday~`MkTuc5IkqLY{P7juV7c{`(1CFvFX|GoPEv8j
zGv$+yaE3I?He>;+Z5QH@g0ApQ%1s+e5X;;84M%#+3^fD=xZztJ56o9{*o^38WczY&
z98@ZSmhWzwPsd~b#x#UR8+<iLuis*Yst9SQkcwn6-u>^-6C2eYd|g^UGc~=fgd`0n
zjzSlSJM{T-`c=_cuV;~DLWU#|A6^7ZtX7+l6&wAQ$nn&X-EBRQCK3&)5ZpMACm$}l
z4l0Nz*XK;~LMqtY?2?__saXrsH1Q||+z6q0qI1s?Gl2dF#-O%p>euXw5RpPSIUGxZ
z+d-N2a&z55Z!f2O-mh|HV7De}&6#vLleH>~u6Qi;1Vje=@W(zywB9N`6BPl8=Uqru
zegmY?bX4-QbKo2zte%lZ$~z`^D#b~jgnmiSpyl!%4}xJ5b;+L=?>88n|8i;Y-o2s)
zEY!e~r4SrssZbN^KVWE0yCq^R-H`0z##6%{0BpWpI5aC|k;=~WAO^ZSEp;8VfbvZ#
zz^SKcIK$yB4{4R;ik-vo%tZ`OBCBeMJ)$>3%mTtYd{QDECSho!$Ew_a;Dpz<k8dTy
z@@ZGq4C+!Zt;4;7y28xH_mb|G1TlB4Ngf3F`2GSC`F^esMG6?S4NAUC5i>P0irALV
zCo?GyWKHQYt_Aq50?xZ-wX;(1Q3E{k!z~6-00Y!9LFeVYUvH2){DKuJ>*)p`rt<V-
z2sH+D8Vo;c;E=amT*rF1-D8pYGs>Z<B_P;dx>xo1a2sK%x4*jcikvvrE{JE75MTE*
zJY6k?eutH?OJ$^f7y;k}9fxOQ;s*RHc6eD%?|og%{XEL&R_w2LkqMF4B%Gmh<BmWc
zk48Re&`Wx{t28bXzt_=CCPX{0q?G!)JYkCJNxJ8G=z<ncqcs3y(f*1g5!wtISJQe1
zE$w%(__!ZxAA(S#(>ILCy%%E$&SV2_ZZ{1`H|Y14;rfE(M9xF~u7QWQX`mE$z*|dY
zW`xHreMrW3;x|{vvANZ~UP}KK+1>}@H{1Lg0i@otOW<L3=VcD$709x_&#H<0l3fD%
z6_3Ct-+PvJ(h$KxwgR~mCNQOx5s6G#)U+4B{ZQDi8+deKyF2*S!+t>T0;-2JYoL9l
zi|I|2(G;wEsR_bV#K>O1qZOhljsr{-Eu1V4<n2EP>*#@Aac*1>n-KRt!SqBs1u73F
z#SnOBOMo=l=`lN|-NZx7{%p}9TEYlHL5*mMRiW?grksevKi~p<#*~j-XMM#(81Kw<
zN$gr0#7M>Xi~a~(OII$xW%?Ms^wNMKzO`*_l-=Fo7NGayf(;t^?!k&Ew+cymE=ybE
z)F0c>^A_L9><T#i1Dxf}G87jZTjblFW-h7JiaDnO$k{<!#r-PE9CB^LUJm|fan51>
z4L{uuG++sJu3ULYT*cJK#@xhn4t!k*RVsNSGTiNOonDJERn5`qU9_PEQTp+kZ*3}A
zPBK<nkxW8uqe_5(bn>nsG7=<Tu<b*KHmQZe)E*|^#_jETmb<sKnNF)=0z*~W`iMyA
z&-e~*xNmocW5IeTe?a~Wbn1zZal($l7m}gNE-9rrRmI<FMepPjL~dd($@k-BPZlj`
z{xG^h-)4Wuj&65hoqiZ6rueLo+BqO!EhR0W&Qk&God-Q<wl^rtvR28uIMxQAVrOLM
zUj7`AEKlT+NT9+G@UFo&vMDno$M@M^hH$}}eDyGrk5mWMHXluVl@-8)j+^f-Brjlg
zttIe4DF|`y3zLGbb9I;Uqehm95%zTO?s^16Wu}*2?zb&rJ8$nC<SFpzMTOv*=OBv(
zLHpd7Q~Z3~=p`_9T$!2NwLz?kZIHNUSux9^G?i(R+ox0|XqErmqTCAMO$PG4)HtaM
z#QD~_?5BG?^LnAVt@ngx9(l|M5|a;tjWw}<u|akU0FiOYUcEJadfkzQNYm426b9_i
z3?Ou%o79uC^!$U%7#|zqd51|UEE}8c6dPMs^x^)0C7rN20<_aVDuwMV+kf7L|G$Wr
h|JNF4tqIdvf3<=eY~Ei`g#PNhuGSsRQjJHU{{c?%d87aU

literal 0
HcmV?d00001

diff --git a/docs/source/backends/nxp/nxp-overview.md b/docs/source/backends/nxp/nxp-overview.md
index 22499aea7ad..b8739046351 100644
--- a/docs/source/backends/nxp/nxp-overview.md
+++ b/docs/source/backends/nxp/nxp-overview.md
@@ -64,6 +64,8 @@ here https://www.nxp.com/design/design-center/software/eiq-ai-development-enviro
 
 **→{doc}`nxp-kernel-selection` — Neutron Firmware Kernel Selection support.**
 
+**→{doc}`nxp-profiling` — Neutron models profiling.**
+
 ```{toctree}
 :maxdepth: 2
 :hidden:
@@ -74,4 +76,5 @@ nxp-quantization
 tutorials/nxp-tutorials
 nxp-dim-order
 nxp-kernel-selection
+nxp-profiling
 ```
diff --git a/docs/source/backends/nxp/nxp-profiling.md b/docs/source/backends/nxp/nxp-profiling.md
new file mode 100644
index 00000000000..17e352e479d
--- /dev/null
+++ b/docs/source/backends/nxp/nxp-profiling.md
@@ -0,0 +1,205 @@
+# NXP eIQ Profiling Support
+
+
+The eIQ Neutron Backend is integrated with the
+[Developer Tools](https://docs.pytorch.org/executorch/stable/delegate-debugging.html)
+to provide visibility into delegated operator execution time.
+
+There are three steps required to obtain profiling results for an NXP‑delegated model:
+
+* Convert the model with profiling support enabled.
+* Generate the artifacts consumed by the Developer Tools (`ETRecord`, `ETDump`).
+* Create and run the Inspector class to consume these artifacts and print the results.
+
+---
+
+## Convert a model with the profiling support
+
+Profiling data is generated only for a **profilable** model. 
+To convert a model with profiling enabled, the `--use-profiling` flag must be set.
+
+See the `aot_neutron_compile.py` example and its
+[README](https://github.com/pytorch/executorch/blob/main/examples/nxp/README.md)
+for additional details.
+
+The following command creates a profilable `cifar10_nxp_delegate.pte` model and the corresponding `ETRecord` for the 
+**i.MX RT700** board:
+
+```bash
+python -m examples.nxp.aot_neutron_compile --quantize \
+    --delegate -m cifar10 \
+    --use_profiling
+```
+
+For installation details, see {doc}`nxp-overview`.
+
+---
+
+## Generate ETRecord (Optional)
+
+`ETRecord` is an optional artifact that contains model graphs and metadata used to link runtime profiling results 
+back to the eager model.
+
+The recommended approach is to enable `ETRecord` generation by passing `generate_etrecord=True` to export API calls.
+After export completes, retrieve the `ETRecord` using the `get_etrecord()` method, and save it using the `save()` method:
+
+### Example
+
+```python
+from executorch.devtools.etrecord import generate_etrecord
+
+# 1. Open a model and export the model to ATEN
+model = model.eval()
+exported_program = torch.export.export(model, example_inputs, strict=True)
+module = exported_program.module()
+
+# 2. Transform and lower
+compile_spec = generate_neutron_compile_spec("imxrt700")
+partitioners = (
+    [
+        NeutronPartitioner(
+            compile_spec,
+            NeutronTargetSpec(target="imxrt700"),
+            post_quantization_state_dict=module.state_dict(),
+        )
+    ]
+)
+edge_program_manager = to_edge_transform_and_lower(
+    export(module, example_inputs, strict=True),
+    transform_passes=NeutronEdgePassManager(),
+    generate_etrecord=True,
+    partitioner=partitioners,
+    compile_config=EdgeCompileConfig(
+        _core_aten_ops_exception_list=core_aten_ops_exception_list,
+    ),
+)
+
+# 3. Export to ExecuTorch program
+exec_prog = edge_program_manager.to_executorch(
+    config=ExecutorchBackendConfig(extract_delegate_segments=False)
+)
+# Save ETRecord
+exec_prog.get_etrecord().save("etrecord.bin")
+
+```
+
+### Complete Example
+
+A full implementation is available
+in [aot_neutron_compile.py](https://github.com/pytorch/executorch/blob/main/examples/nxp/aot_neutron_compile.py).
+
+The `--use_profiling` flag is used to create a **profilable** model and the corresponding `ETRecord` file  
+(see [Convert a model with profiling support](#convert-a-model-with-profiling-support) for the full command).
+
+
+---
+
+## Generate ETDump
+
+
+The next step is to generate an `ETDump`. An `ETDump` contains runtime data collected during model inference execution.
+
+To generate an `ETDump`, ensure that the ExecuTorch runtime library is integrated with the Developer Tools and built 
+with the `ET_EVENT_TRACER_ENABLED` flag enabled.
+
+Only models converted with profiling support will produce an `ETDump` containing execution times for all Neutron 
+operators. Otherwise, the dump will include only the final delegate execution time.
+
+Neutron software provides a profiling mechanism that logs individual operator execution times to a dedicated runtime 
+output. This data is then used to generate post‑time events after the inference has completed.
+
+
+### Example
+
+```c
+#include <executorch/devtools/etdump/etdump_flatcc.h>
+```
+```c
+// 1. Create ETDumpGen BEFORE inference.
+auto etdump_gen_ptr = std::make_unique<executorch::etdump::ETDumpGen>();
+executorch::etdump::ETDumpGen* etdump_gen = etdump_gen_ptr.get();
+
+// 2. Load a method from the program by name with ETDump generator for profiling.
+Result<Method> method = program->load_method(method_name, &memory_manager, etdump_gen);
+
+// 3. Input tensor setup.
+Tensor::SizesType sizes[] = {1, 1, 32, 32};
+Tensor::DimOrderType dim_order[] = {0, 2, 3, 1};
+TensorImpl impl(ScalarType::Float, 4, sizes, image_data, dim_order);
+Tensor tensor(&impl);
+Error status = method->set_input(tensor, 0);
+
+// 4. Execute.
+status = method->execute();
+
+// Get ETDump.
+if (etdump_gen != nullptr) {
+    executorch::etdump::ETDumpResult result = etdump_gen->get_etdump_data();
+    if (result.buf != nullptr && result.size > 0) {
+        PRINTF("Add a brakepoint here and run this command in Debugger Console: "
+    	       "dump binary memory trace.etdump result.buf (result.buf + result.size)\r\n");
+    }
+}
+```
+
+
+To save an `ETDump` file from the board to a PC, use the **Debug Console** in the MCUXpresso IDE:
+
+- Set a breakpoint at the `PRINTF(...)` line in the example above.
+- Enter the following command in the Debug Console and press **Enter**:
+
+  ```
+  dump binary memory trace.etdump result.buf (result.buf + result.size)
+  ```
+
+
+<figure style="border:1px solid #ccc; padding:8px; display:inline-block;">
+  <img src="../../_static/img/nxp/nxp-mcuxpresso-etdump.png" width="500" alt="Save ETDump in MCUXPresso project" />
+  <figcaption>
+        <b>Figure 1:</b> Save ETDump in MCUXPresso Project.
+  </figcaption>
+</figure>
+
+
+The resulting `ETDump` file is generated in the project folder within the MCUXpresso workspace.
+
+> **Note:**  
+> Profilable models print profiling data to the terminal. Generating this dump may take longer than executing the 
+> Neutron kernels themselves, but this overhead can be ignored as it affects only models with profiling support 
+> enabled. The dump generation time is included in the `ETDump` as the final kernel entry.
+
+---
+
+## Creating an Inspector
+
+The [Inspector](https://docs.pytorch.org/executorch/1.0/model-inspector.html) APIs provide a way to analyze the 
+contents of `ETRecord` and `ETDump`, enabling developers to gain insights into model architecture 
+and performance statistics.
+
+`ETRecord` is an optional argument used to obtain a mapping between the original model and the converted Neutron model.
+
+An `ETDump` generated on the board contains metadata for each Neutron operator, including its unique identifier.  
+To visualize this metadata in the Inspector results table, set the `include_delegate_debug_data = True` argument.
+
+### Example
+
+```python
+from executorch.devtools import Inspector
+
+inspector = Inspector(etdump_path="/path/to/etdump.etdp", etrecord="/path/to/etrecord.bin")
+inspector.print_data_tabular(include_delegate_debug_data = True)
+```
+
+### Complete Example
+
+A full implementation is available
+in [analyzing_with_inspector.py](https://github.com/pytorch/executorch/blob/main/examples/nxp/analyzing_with_inspector.py). @lint-ignore
+
+---
+
+## Summary
+
+* Build the model with the `--use_profiling` flag enabled.
+* Build the ExecuTorch runtime library with the `ET_EVENT_TRACER_ENABLED` flag and the ETDump Developer Tool.
+* Use the Debug Console in MCUXpresso to save the `ETDump` file from the board to a PC.
+* Visualize the profiling results using the Inspector.
diff --git a/examples/nxp/analyzing_with_inspector.py b/examples/nxp/analyzing_with_inspector.py
new file mode 100644
index 00000000000..b339af79d6e
--- /dev/null
+++ b/examples/nxp/analyzing_with_inspector.py
@@ -0,0 +1,58 @@
+# Copyright 2026 NXP
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Print profiling table for the NXP Neutron NPU model
+
+from typing import Any, Union
+
+from executorch.devtools import Inspector
+
+
+def parse_delegate_metadata(
+    delegate_metadatas: list[bytes],
+) -> Union[list[str], dict[str, Any]]:
+    """Metadata parser for Neutron Backend metadata.
+
+    The parser is a callable that deserializes the data and returns neutron kernel number.
+    The deserialized data is then added back to the corresponding event in the event block for user consumption.
+    """
+
+    metadata_list = []
+    for metadata_bytes in delegate_metadatas:
+        if len(metadata_bytes) == 1:
+            function_code = metadata_bytes[0]
+            if function_code == 0:
+                metadata_list.append("Profiling dump")
+            else:
+                metadata_list.append("Neutron kernel " + str(function_code))
+        else:
+            metadata_list.append("Invalid metadata size")
+    return metadata_list
+
+
+if __name__ == "__main__":
+
+    try:
+        etrecord_path = "etrecord/etrecord.bin"
+        etdump_path = "etdump/trace.etdump"
+        inspector = Inspector(
+            etdump_path=etdump_path,
+            etrecord=etrecord_path,
+            delegate_metadata_parser=parse_delegate_metadata,
+        )
+
+        # Access raw event data and filter quantized_decomposed nodes
+        for event_block in inspector.event_blocks:
+            for event in event_block.events:
+                if hasattr(event, "op_types") and isinstance(event.op_types, list):
+                    # Filter out quantized_decomposed ops from the actual list
+                    filtered = [
+                        op for op in event.op_types if "quantized_decomposed" not in op
+                    ]
+                    event.op_types = filtered if filtered else event.op_types
+
+        inspector.print_data_tabular(include_delegate_debug_data=True)
+    except Exception as e:
+        print(f"Error during inspection: {type(e).__name__}: {e}")
diff --git a/examples/nxp/aot_neutron_compile.py b/examples/nxp/aot_neutron_compile.py
index f5f92d36541..258b4c87772 100644
--- a/examples/nxp/aot_neutron_compile.py
+++ b/examples/nxp/aot_neutron_compile.py
@@ -8,6 +8,7 @@
 import argparse
 import io
 import logging
+import os
 from collections import defaultdict
 
 import executorch.extension.pybindings.portable_lib
@@ -167,6 +168,13 @@ def get_model_and_inputs_from_name(model_name: str, use_random_dataset: bool):
         default=False,
         help="Use QAT mode for quantization (performs two QAT training epochs)",
     )
+    parser.add_argument(
+        "--use_profiling",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Enable profiling for eIQ Neutron NPU delegated model",
+    )
     parser.add_argument(
         "-s",
         "--so_library",
@@ -322,6 +330,7 @@ def get_model_and_inputs_from_name(model_name: str, use_random_dataset: bool):
         operators_not_to_delegate=args.operators_not_to_delegate,
         fetch_constants_to_sram=args.fetch_constants_to_sram,
         dump_kernel_selection_code=args.dump_kernel_selection_code,
+        use_profiling=args.use_profiling,
     )
     partitioners = (
         [
@@ -338,6 +347,7 @@ def get_model_and_inputs_from_name(model_name: str, use_random_dataset: bool):
     edge_program_manager = to_edge_transform_and_lower(
         export(module, example_inputs, strict=True),
         transform_passes=NeutronEdgePassManager(),
+        generate_etrecord=args.use_profiling,
         partitioner=partitioners,
         compile_config=EdgeCompileConfig(
             _core_aten_ops_exception_list=core_aten_ops_exception_list,
@@ -360,6 +370,21 @@ def get_model_and_inputs_from_name(model_name: str, use_random_dataset: bool):
         exec_prog = edge_program_manager.to_executorch(
             config=ExecutorchBackendConfig(extract_delegate_segments=False)
         )
+
+        # Generate ETRecord if profiling flag is set
+        if args.use_profiling:
+            etrecord_path = os.path.join("etrecord", f"{args.model_name}_etrecord.bin")
+            # Create directory if it doesn't exist
+            os.makedirs(os.path.dirname(etrecord_path), exist_ok=True)
+            # Save ETRecord
+            exec_prog.get_etrecord().save(etrecord_path)
+            # Notify the user about profiling enablement and ETRecord generation.
+            logging.info(
+                "The model was converted with profiling enabled. The time spent generating the profiling dump is traced as the "
+                "final delegate operation and can be ignored, as no dump is produced for non‑profilable models."
+            )
+            logging.info(f"The ETRecord for the model was saved to {etrecord_path}.")
+
     except RuntimeError as e:
         if "Missing out variants" in str(e.args[0]):
             raise RuntimeError(
@@ -378,8 +403,10 @@ def executorch_program_to_str(ep, verbose=False):
     logging.debug(f"Executorch program:\n{executorch_program_to_str(exec_prog)}")
 
     # 6. Serialize to *.pte
-    model_name = f"{args.model_name}" + (
-        "_nxp_delegate" if args.delegate is True else ""
+    model_name = (
+        f"{args.model_name}"
+        + ("_nxp_delegate" if args.delegate is True else "")
+        + ("_profile" if args.use_profiling is True else "")
     )
     save_pte_program(exec_prog, model_name)
 

From 3447d08964881e3d2ef34123e8c985bbc800a01b Mon Sep 17 00:00:00 2001
From: Andrew <pullinandrew@meta.com>
Date: Tue, 23 Jun 2026 13:53:13 -0700
Subject: [PATCH 4/7] Quantize moveaxis/movedim so they delegate to Ethos-U
 (#20314)

Differential Revision: D108478011

Pull Request resolved: https://github.com/pytorch/executorch/pull/20453
---
 .../arm/quantizer/quantization_annotator.py   | 10 ++++++
 backends/arm/test/ops/test_permute.py         | 17 +++++++++
 .../test/quantizer/test_generic_annotater.py  | 35 +++++++++++++++++++
 3 files changed, 62 insertions(+)

diff --git a/backends/arm/quantizer/quantization_annotator.py b/backends/arm/quantizer/quantization_annotator.py
index 3b713659e84..13693bd235d 100644
--- a/backends/arm/quantizer/quantization_annotator.py
+++ b/backends/arm/quantizer/quantization_annotator.py
@@ -631,6 +631,16 @@ def _get_fixed_qparams_qspec(
 if _transpose_dimname is not None:
     _one_to_one_shared_input_qspec.add(_transpose_dimname)
 
+for _op in (
+    getattr(torch.ops.aten.moveaxis, "int", None),
+    getattr(torch.ops.aten.moveaxis, "intlist", None),
+    getattr(torch.ops.aten.movedim, "int", None),
+    getattr(torch.ops.aten.movedim, "intlist", None),
+):
+    if _op is not None:
+        _one_to_one_shared_input_qspec.add(_op)
+
+
 _one_to_one_shared_input_or_input_act_qspec: set[OpOverload] = {
     torch.ops.aten.alias.default,
     torch.ops.aten.clone.default,
diff --git a/backends/arm/test/ops/test_permute.py b/backends/arm/test/ops/test_permute.py
index 8864324dbd5..6819929104e 100644
--- a/backends/arm/test/ops/test_permute.py
+++ b/backends/arm/test/ops/test_permute.py
@@ -78,6 +78,12 @@ def forward(self, x):
         return torch.permute(x, self.dims)
 
 
+class SimpleMoveAxis(torch.nn.Module):
+
+    def forward(self, x):
+        return torch.moveaxis(x, 1, -1)
+
+
 @common.parametrize(
     "test_data", test_data_suite | test_data_suite_fp16 | test_data_suite_bf16
 )
@@ -118,6 +124,17 @@ def test_permute_u55_INT(test_data):
     pipeline.run()
 
 
+def test_moveaxis_u55_INT():
+    pipeline = EthosU55PipelineINT[input_t1](
+        SimpleMoveAxis(),
+        (torch.rand(1, 4, 5, 6),),
+        "torch.ops.aten.moveaxis.int",
+        exir_ops="executorch_exir_dialects_edge__ops_aten_permute_copy_default",
+        run_on_fvp=False,
+    )
+    pipeline.run()
+
+
 @common.parametrize("test_data", test_data_suite_u55_reject)
 def test_permute_u55_INT_not_delegated(test_data: torch.Tensor):
     test_data, dims = test_data()
diff --git a/backends/arm/test/quantizer/test_generic_annotater.py b/backends/arm/test/quantizer/test_generic_annotater.py
index dd883e72b1f..b5cfd1efdc6 100644
--- a/backends/arm/test/quantizer/test_generic_annotater.py
+++ b/backends/arm/test/quantizer/test_generic_annotater.py
@@ -89,6 +89,41 @@ def test_transpose_tosa_INT():
     )
 
 
+def test_moveaxis_movedim_tosa_INT():
+    check_annotation(
+        SingleOpModel(
+            torch.moveaxis,
+            (torch.randn(2, 3, 4),),
+            source=1,
+            destination=-1,
+        ),
+    )
+    check_annotation(
+        SingleOpModel(
+            torch.moveaxis,
+            (torch.randn(2, 3, 4),),
+            source=(0, 1),
+            destination=(-1, -2),
+        ),
+    )
+    check_annotation(
+        SingleOpModel(
+            torch.movedim,
+            (torch.randn(2, 3, 4),),
+            source=1,
+            destination=-1,
+        ),
+    )
+    check_annotation(
+        SingleOpModel(
+            torch.movedim,
+            (torch.randn(2, 3, 4),),
+            source=(0, 1),
+            destination=(-1, -2),
+        ),
+    )
+
+
 def test_tile_tosa_INT():
     check_annotation(
         SingleOpModel(torch.tile, (torch.randn(4, 4),), dims=(2,)),

From 65bc0cafe96c5e3c63e57f976b9e05a25027a53a Mon Sep 17 00:00:00 2001
From: Jon Janzen <jonjanzen@meta.com>
Date: Tue, 23 Jun 2026 13:53:27 -0700
Subject: [PATCH 5/7] drop redundant TARGETS files that duplicate sister BUCK
 files (#20403)

Differential Revision: D109082060

Pull Request resolved: https://github.com/pytorch/executorch/pull/20403
---
 backends/qualcomm/aot/wrappers/TARGETS        |   5 -
 backends/qualcomm/builders/TARGETS            |   5 -
 codegen/test/TARGETS                          |   8 --
 configurations/TARGETS                        |   8 --
 examples/devtools/example_runner/TARGETS      |   8 --
 examples/models/gemma4/BUCK                   | 120 ++++++++++++++++++
 examples/qualcomm/executor_runner/TARGETS     |   8 --
 extension/aten_util/TARGETS                   |   8 --
 extension/aten_util/test/TARGETS              |   8 --
 extension/cuda/TARGETS                        |   8 --
 extension/image/TARGETS                       |   5 -
 extension/image/benchmark/TARGETS             |   5 -
 extension/image/test/TARGETS                  |   5 -
 extension/pytree/aten_util/TARGETS            |   7 -
 extension/pytree/aten_util/test/TARGETS       |   8 --
 extension/runner_util/TARGETS                 |   8 --
 extension/tensor/TARGETS                      |   8 --
 extension/tensor/test/TARGETS                 |   8 --
 extension/testing_util/TARGETS                |   8 --
 extension/testing_util/test/TARGETS           |   8 --
 extension/threadpool/TARGETS                  |   8 --
 extension/threadpool/test/TARGETS             |   8 --
 kernels/optimized/cpu/TARGETS                 |   8 --
 kernels/optimized/test/TARGETS                |   8 --
 kernels/portable/cpu/util/TARGETS             |   8 --
 kernels/portable/cpu/util/test/TARGETS        |   8 --
 kernels/prim_ops/TARGETS                      |   7 -
 runtime/backend/TARGETS                       |   8 --
 runtime/backend/test/TARGETS                  |   8 --
 runtime/core/TARGETS                          |   8 --
 runtime/core/exec_aten/TARGETS                |   8 --
 runtime/core/exec_aten/testing_util/TARGETS   |   8 --
 .../core/exec_aten/testing_util/test/TARGETS  |   8 --
 runtime/core/exec_aten/util/TARGETS           |   8 --
 runtime/platform/TARGETS                      |   8 --
 runtime/platform/test/TARGETS                 |   8 --
 schema/TARGETS                                |   8 --
 schema/test/TARGETS                           |   8 --
 test/TARGETS                                  |   8 --
 39 files changed, 120 insertions(+), 287 deletions(-)
 delete mode 100644 backends/qualcomm/aot/wrappers/TARGETS
 delete mode 100644 backends/qualcomm/builders/TARGETS
 delete mode 100644 codegen/test/TARGETS
 delete mode 100644 configurations/TARGETS
 delete mode 100644 examples/devtools/example_runner/TARGETS
 delete mode 100644 examples/qualcomm/executor_runner/TARGETS
 delete mode 100644 extension/aten_util/TARGETS
 delete mode 100644 extension/aten_util/test/TARGETS
 delete mode 100644 extension/cuda/TARGETS
 delete mode 100644 extension/image/TARGETS
 delete mode 100644 extension/image/benchmark/TARGETS
 delete mode 100644 extension/image/test/TARGETS
 delete mode 100644 extension/pytree/aten_util/TARGETS
 delete mode 100644 extension/pytree/aten_util/test/TARGETS
 delete mode 100644 extension/runner_util/TARGETS
 delete mode 100644 extension/tensor/TARGETS
 delete mode 100644 extension/tensor/test/TARGETS
 delete mode 100644 extension/testing_util/TARGETS
 delete mode 100644 extension/testing_util/test/TARGETS
 delete mode 100644 extension/threadpool/TARGETS
 delete mode 100644 extension/threadpool/test/TARGETS
 delete mode 100644 kernels/optimized/cpu/TARGETS
 delete mode 100644 kernels/optimized/test/TARGETS
 delete mode 100644 kernels/portable/cpu/util/TARGETS
 delete mode 100644 kernels/portable/cpu/util/test/TARGETS
 delete mode 100644 kernels/prim_ops/TARGETS
 delete mode 100644 runtime/backend/TARGETS
 delete mode 100644 runtime/backend/test/TARGETS
 delete mode 100644 runtime/core/TARGETS
 delete mode 100644 runtime/core/exec_aten/TARGETS
 delete mode 100644 runtime/core/exec_aten/testing_util/TARGETS
 delete mode 100644 runtime/core/exec_aten/testing_util/test/TARGETS
 delete mode 100644 runtime/core/exec_aten/util/TARGETS
 delete mode 100644 runtime/platform/TARGETS
 delete mode 100644 runtime/platform/test/TARGETS
 delete mode 100644 schema/TARGETS
 delete mode 100644 schema/test/TARGETS
 delete mode 100644 test/TARGETS

diff --git a/backends/qualcomm/aot/wrappers/TARGETS b/backends/qualcomm/aot/wrappers/TARGETS
deleted file mode 100644
index 0a42614a385..00000000000
--- a/backends/qualcomm/aot/wrappers/TARGETS
+++ /dev/null
@@ -1,5 +0,0 @@
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/backends/qualcomm/builders/TARGETS b/backends/qualcomm/builders/TARGETS
deleted file mode 100644
index 0a42614a385..00000000000
--- a/backends/qualcomm/builders/TARGETS
+++ /dev/null
@@ -1,5 +0,0 @@
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/codegen/test/TARGETS b/codegen/test/TARGETS
deleted file mode 100644
index 1e8cc179228..00000000000
--- a/codegen/test/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain xplat-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/configurations/TARGETS b/configurations/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/configurations/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/examples/devtools/example_runner/TARGETS b/examples/devtools/example_runner/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/examples/devtools/example_runner/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/examples/models/gemma4/BUCK b/examples/models/gemma4/BUCK
index e587370ece0..19f0ff90c93 100644
--- a/examples/models/gemma4/BUCK
+++ b/examples/models/gemma4/BUCK
@@ -1,4 +1,5 @@
 load("@fbcode_macros//build_defs:build_file_migration.bzl", "fbcode_target", "non_fbcode_target")
+load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
 load(":targets.bzl", "define_common_targets")
 
 oncall("executorch")
@@ -6,3 +7,122 @@ oncall("executorch")
 non_fbcode_target(_kind = define_common_targets,)
 
 fbcode_target(_kind = define_common_targets,)
+
+# Text decoder module
+fbcode_target(_kind = runtime.python_library,
+    name = "text_decoder",
+    srcs = [
+        "text_decoder/__init__.py",
+        "text_decoder/convert_weights.py",
+        "text_decoder/gemma4_attention.py",
+        "text_decoder/gemma4_config.py",
+        "text_decoder/gemma4_cross_decoder.py",
+        "text_decoder/gemma4_decoder_layer.py",
+        "text_decoder/gemma4_model.py",
+        "text_decoder/gemma4_self_decoder.py",
+        "text_decoder/gemma4_transformer.py",
+    ],
+    _is_external_target = True,
+    base_module = "executorch.examples.models.gemma4",
+    resources = {
+        "config/e2b_config.json": "config/e2b_config.json",
+        "config/e4b_config.json": "config/e4b_config.json",
+    },
+    deps = [
+        "//caffe2:torch",
+        "fbsource//third-party/pypi/safetensors:safetensors",
+        "fbsource//third-party/pypi/transformers:transformers",
+    ],
+    visibility = ["PUBLIC"],
+)
+
+# Speech transform module
+fbcode_target(_kind = runtime.python_library,
+    name = "speech_transform",
+    srcs = [
+        "speech_transform.py",
+    ],
+    _is_external_target = True,
+    base_module = "executorch.examples.models.gemma4",
+    deps = [
+        "//caffe2:torch",
+    ],
+    visibility = ["PUBLIC"],
+)
+
+# Export utilities (shared quantization code)
+fbcode_target(_kind = runtime.python_library,
+    name = "quant_utils",
+    srcs = ["quant_utils.py"],
+    _is_external_target = True,
+    base_module = "executorch.examples.models.gemma4",
+    deps = [
+        "//caffe2:torch",
+        "//executorch/examples/models/llama:source_transformation",
+        "//executorch/extension/llm/export:export_lib",
+        "//pytorch/ao:torchao",
+    ],
+    visibility = ["PUBLIC"],
+)
+
+# Single PTE export
+fbcode_target(_kind = runtime.python_binary,
+    name = "export_gemma4",
+    srcs = ["export_gemma4.py"],
+    main_function = "executorch.examples.models.gemma4.export_gemma4.main",
+    preload_deps = [
+        "//pytorch/ao/torchao/csrc/cpu/shared_kernels/linear_8bit_act_xbit_weight:op_linear_8bit_act_xbit_weight_aten",
+        "//pytorch/ao/torchao/csrc/cpu/shared_kernels/embedding_xbit:op_embedding_xbit_aten",
+        "//executorch/extension/llm/custom_ops:custom_ops_aot_lib",
+        "//executorch/kernels/quantized:aot_lib",
+    ],
+    deps = [
+        ":text_decoder",
+        ":speech_transform",
+        ":quant_utils",
+        "//caffe2:torch",
+        "//executorch/exir:lib",
+        "//executorch/backends/xnnpack/partition:xnnpack_partitioner",
+        "//executorch/backends/xnnpack/quantizer:xnnpack_quantizer",
+        "//executorch/extension/llm/export:export_lib",
+        "//executorch/extension/llm/custom_ops:custom_ops_aot_lib",
+        "//executorch/extension/llm/custom_ops:custom_ops_aot_py",
+        "//executorch/kernels/quantized:aot_lib",
+        "//pytorch/ao:torchao",
+        "fbsource//third-party/pypi/safetensors:safetensors",
+        "fbsource//third-party/pypi/transformers:transformers",
+    ],
+)
+
+# Image preprocessing utilities
+fbcode_target(_kind = runtime.python_library,
+    name = "image_utils",
+    srcs = ["image_utils.py"],
+    _is_external_target = True,
+    base_module = "executorch.examples.models.gemma4",
+    deps = [
+        "//caffe2:torch",
+        "fbsource//third-party/pypi/pillow:pillow",
+    ],
+    visibility = ["PUBLIC"],
+)
+
+# Python runner (single PTE, audio + vision + text-only)
+fbcode_target(_kind = runtime.python_binary,
+    name = "run_gemma4",
+    srcs = ["run_gemma4.py"],
+    main_function = "executorch.examples.models.gemma4.run_gemma4.main",
+    preload_deps = [
+        "//executorch/backends/xnnpack:xnnpack_backend",
+        "//executorch/extension/llm/custom_ops:custom_ops_aot_lib",
+        "//executorch/kernels/quantized:aot_lib",
+        "//pytorch/ao/torchao/csrc/cpu/shared_kernels/embedding_xbit:op_embedding_xbit_aten",
+        "//pytorch/ao/torchao/csrc/cpu/shared_kernels/linear_8bit_act_xbit_weight:op_linear_8bit_act_xbit_weight_aten",
+    ],
+    deps = [
+        ":image_utils",
+        "//caffe2:torch",
+        "//executorch/runtime:runtime",
+        "fbsource//third-party/pypi/sentencepiece:sentencepiece",
+    ],
+)
diff --git a/examples/qualcomm/executor_runner/TARGETS b/examples/qualcomm/executor_runner/TARGETS
deleted file mode 100644
index 1e8cc179228..00000000000
--- a/examples/qualcomm/executor_runner/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain xplat-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/extension/aten_util/TARGETS b/extension/aten_util/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/extension/aten_util/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/extension/aten_util/test/TARGETS b/extension/aten_util/test/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/extension/aten_util/test/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/extension/cuda/TARGETS b/extension/cuda/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/extension/cuda/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/extension/image/TARGETS b/extension/image/TARGETS
deleted file mode 100644
index 0a42614a385..00000000000
--- a/extension/image/TARGETS
+++ /dev/null
@@ -1,5 +0,0 @@
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/extension/image/benchmark/TARGETS b/extension/image/benchmark/TARGETS
deleted file mode 100644
index 0a42614a385..00000000000
--- a/extension/image/benchmark/TARGETS
+++ /dev/null
@@ -1,5 +0,0 @@
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/extension/image/test/TARGETS b/extension/image/test/TARGETS
deleted file mode 100644
index 0a42614a385..00000000000
--- a/extension/image/test/TARGETS
+++ /dev/null
@@ -1,5 +0,0 @@
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/extension/pytree/aten_util/TARGETS b/extension/pytree/aten_util/TARGETS
deleted file mode 100644
index 77b38349334..00000000000
--- a/extension/pytree/aten_util/TARGETS
+++ /dev/null
@@ -1,7 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/extension/pytree/aten_util/test/TARGETS b/extension/pytree/aten_util/test/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/extension/pytree/aten_util/test/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/extension/runner_util/TARGETS b/extension/runner_util/TARGETS
deleted file mode 100644
index 1e8cc179228..00000000000
--- a/extension/runner_util/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain xplat-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/extension/tensor/TARGETS b/extension/tensor/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/extension/tensor/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/extension/tensor/test/TARGETS b/extension/tensor/test/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/extension/tensor/test/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/extension/testing_util/TARGETS b/extension/testing_util/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/extension/testing_util/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/extension/testing_util/test/TARGETS b/extension/testing_util/test/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/extension/testing_util/test/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/extension/threadpool/TARGETS b/extension/threadpool/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/extension/threadpool/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/extension/threadpool/test/TARGETS b/extension/threadpool/test/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/extension/threadpool/test/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/kernels/optimized/cpu/TARGETS b/kernels/optimized/cpu/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/kernels/optimized/cpu/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/kernels/optimized/test/TARGETS b/kernels/optimized/test/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/kernels/optimized/test/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/kernels/portable/cpu/util/TARGETS b/kernels/portable/cpu/util/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/kernels/portable/cpu/util/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/kernels/portable/cpu/util/test/TARGETS b/kernels/portable/cpu/util/test/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/kernels/portable/cpu/util/test/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/kernels/prim_ops/TARGETS b/kernels/prim_ops/TARGETS
deleted file mode 100644
index 77b38349334..00000000000
--- a/kernels/prim_ops/TARGETS
+++ /dev/null
@@ -1,7 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/runtime/backend/TARGETS b/runtime/backend/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/runtime/backend/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/runtime/backend/test/TARGETS b/runtime/backend/test/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/runtime/backend/test/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/runtime/core/TARGETS b/runtime/core/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/runtime/core/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/runtime/core/exec_aten/TARGETS b/runtime/core/exec_aten/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/runtime/core/exec_aten/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/runtime/core/exec_aten/testing_util/TARGETS b/runtime/core/exec_aten/testing_util/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/runtime/core/exec_aten/testing_util/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/runtime/core/exec_aten/testing_util/test/TARGETS b/runtime/core/exec_aten/testing_util/test/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/runtime/core/exec_aten/testing_util/test/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/runtime/core/exec_aten/util/TARGETS b/runtime/core/exec_aten/util/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/runtime/core/exec_aten/util/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/runtime/platform/TARGETS b/runtime/platform/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/runtime/platform/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/runtime/platform/test/TARGETS b/runtime/platform/test/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/runtime/platform/test/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/schema/TARGETS b/schema/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/schema/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/schema/test/TARGETS b/schema/test/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/schema/test/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/test/TARGETS b/test/TARGETS
deleted file mode 100644
index 2341af9282f..00000000000
--- a/test/TARGETS
+++ /dev/null
@@ -1,8 +0,0 @@
-# Any targets that should be shared between fbcode and xplat must be defined in
-# targets.bzl. This file can contain fbcode-only targets.
-
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()

From 8b145b5fc6d3d18b3d5702ea0823402a7699f401 Mon Sep 17 00:00:00 2001
From: gasoonjia <gasoonjia@fb.com>
Date: Tue, 23 Jun 2026 15:21:25 -0700
Subject: [PATCH 6/7] [executorch][cuda] gemma4_31b: fuse gate/up MLP
 projections (default-on)

Summary:
Fuse each gemma4_31b MLP's gate_proj|up_proj into a single
[2*intermediate, hidden] coalesced-int4 matmul, applied by default in the CUDA
export. This issues one activation-quant + one W4A8 matvec per layer instead of
two, cutting per-token launch + activation-quant overhead in the launch-bound
decode path. Only Q4_K (CudaCoalescedInt4Tensor) gate/up pairs are fused; any
other quant type (e.g. Q6_K) is left as two matmuls (guarded, still correct).

Builds on the already-landed kv_len-bounded tq4_sdpa kernel + gemma4_31b
call-site (kv_len + mask_is_causal), which recovered 128k decode from ~2.8 to
~43 tok/s. With both, ET gemma4_31b 128k+TurboQuant decode beats llama.cpp at
every measured context (cuda_graph ON):

  ctx    ET      llama
  512    44.80   42.77
  2K     43.20   41.97
  8K     42.23   41.23
  32K    41.64   40.27
  127K   38.41   35.97

TurboQuant KV compression kept; prefill restored (6-8x) with no regression;
output quality preserved.

Test Plan:
- Fusion numerics: fused vs unfused MLP through the real W4A8 int4_plain_mm
  kernel = bit-exact (max_abs_diff 0.0, cos 1.000000) for decode (T=1) and
  prefill (T=4).
- Export + run: fused module exported via CudaPartitioner and executed through
  executor_runner (RC=0, cos 0.999915 vs eager). Full 31B export logs
  "Fused gate+up on 60 MLP layers".
- Decode A/B (gemma4_31b 128k+TQ, cuda_graph ON, 5x median): table above; beats
  llama.cpp at 512 -> 127K. nsys: tq4_sdpa 91.7% -> 2.9% of decode.
---
 .../gemma4_31b/cuda_source_transformations.py | 107 ++++++++++++++++++
 examples/models/gemma4_31b/export.py          |   9 +-
 2 files changed, 111 insertions(+), 5 deletions(-)

diff --git a/examples/models/gemma4_31b/cuda_source_transformations.py b/examples/models/gemma4_31b/cuda_source_transformations.py
index 666d0c44e9d..6609178e084 100644
--- a/examples/models/gemma4_31b/cuda_source_transformations.py
+++ b/examples/models/gemma4_31b/cuda_source_transformations.py
@@ -30,6 +30,7 @@
 
 import torch
 import torch.nn as nn
+import torch.nn.functional as F
 
 from executorch.examples.models.gemma4.text_decoder import apply_rotary_emb
 from executorch.extension.llm.modules.turboquant import TurboQuantKVCache
@@ -110,6 +111,105 @@ def _turboquant_attention_forward(
     return self.o_proj(y)
 
 
+def _fused_mlp_forward(self, x: torch.Tensor) -> torch.Tensor:
+    """Drop-in ``Gemma4MLP.forward`` over a fused gate|up projection.
+
+    Identical math to ``down(gelu(gate(x)) * up(x))``: the single
+    ``gate_up_proj`` emits ``[gate | up]`` concatenated on the last dim,
+    which is then split. One W4A8 matmul (and one activation-quant of ``x``)
+    instead of two.
+    """
+    h = self.gate_up_proj(x)
+    gate = h[..., : self.intermediate_size]
+    up = h[..., self.intermediate_size :]
+    return self.down_proj(F.gelu(gate, approximate="tanh") * up)
+
+
+def _concat_coalesced_int4_along_n(a, b):
+    """Concatenate two ``CudaCoalescedInt4Tensor`` along the output (N) dim.
+
+    qdata is ``[N, K/2]`` and scale/zero_point are ``[N, n_groups]`` in the
+    coalesced layout, so a per-output-row concat on dim 0 is exact: the W4A8
+    dp4a matvec reads each output row's qdata/scale/zero independently, so
+    out[:N_a] reproduces ``a`` and out[N_a:] reproduces ``b`` bit-for-bit.
+    """
+    from executorch.backends.cuda.coalesced_int4_tensor import CudaCoalescedInt4Tensor
+
+    return CudaCoalescedInt4Tensor(
+        torch.cat([a.qdata, b.qdata], dim=0),
+        torch.cat([a.scale, b.scale], dim=0),
+        torch.cat([a.zero_point, b.zero_point], dim=0),
+        a.block_size,
+        torch.Size([a.shape[0] + b.shape[0], a.shape[1]]),
+        None,
+        a.activation_dtype,
+    )
+
+
+def _is_fuseable_int4_pair(gate_w, up_w) -> bool:
+    """True iff gate/up are both coalesced-int4 with matching K + block_size.
+
+    Q4_K MLP weights become ``CudaCoalescedInt4Tensor`` (fuseable); a Q6_K
+    weight becomes ``CudaDp4aPlanarInt6Tensor`` (left alone). ``act_pre_scale``
+    is unused on this path but we require it absent so the concat stays exact.
+    """
+    from executorch.backends.cuda.coalesced_int4_tensor import CudaCoalescedInt4Tensor
+
+    return (
+        isinstance(gate_w, CudaCoalescedInt4Tensor)
+        and isinstance(up_w, CudaCoalescedInt4Tensor)
+        and list(gate_w.block_size) == list(up_w.block_size)
+        and gate_w.shape[1] == up_w.shape[1]
+        and gate_w.act_pre_scale is None
+        and up_w.act_pre_scale is None
+    )
+
+
+def _fuse_gate_up_proj(model: nn.Module) -> None:
+    """Fuse each MLP's ``gate_proj | up_proj`` into one ``gate_up_proj``.
+
+    gate and up share the same input, so the unfused path quantizes ``x`` to
+    int8 twice and launches two W4A8 matvecs per layer. Fusing the weights
+    into one ``[2*inter, hidden]`` tensor halves both. Weight bytes read are
+    unchanged, so the win is launch + activation-quant overhead (decode is
+    launch-bound). Only Q4_K (coalesced-int4) layers are fused; any layer
+    with a non-int4 weight is left as two matmuls (still correct).
+
+    Must run AFTER weights are packed to ``CudaCoalescedInt4Tensor`` (i.e.
+    inside ``_export_cuda``), and is independent of TurboQuant.
+    """
+    n_fused = 0
+    n_skipped = 0
+    for layer in model.layers:
+        mlp = getattr(layer, "mlp", None)
+        if mlp is None or not (hasattr(mlp, "gate_proj") and hasattr(mlp, "up_proj")):
+            continue
+        gate_w = mlp.gate_proj.weight
+        up_w = mlp.up_proj.weight
+        if not _is_fuseable_int4_pair(gate_w, up_w):
+            n_skipped += 1
+            continue
+        inter = up_w.shape[0]
+        hidden = up_w.shape[1]
+        fused_w = _concat_coalesced_int4_along_n(gate_w, up_w)
+
+        # Container built on meta to avoid materializing a dense
+        # [2*inter, hidden] weight before we overwrite it with fused_w.
+        gate_up = nn.Linear(hidden, 2 * inter, bias=False, device="meta")
+        gate_up.weight = nn.Parameter(fused_w, requires_grad=False)
+        mlp.gate_up_proj = gate_up
+        mlp.intermediate_size = inter
+        del mlp.gate_proj
+        del mlp.up_proj
+        mlp.forward = types.MethodType(_fused_mlp_forward, mlp)
+        n_fused += 1
+
+    msg = f"[gemma4_31b cuda] Fused gate+up on {n_fused} MLP layers"
+    if n_skipped:
+        msg += f" ({n_skipped} skipped: non-int4 weights)"
+    print(msg)
+
+
 def cuda_source_transformations(
     model: nn.Module,
     *,
@@ -117,6 +217,11 @@ def cuda_source_transformations(
 ) -> None:
     """Apply CUDA source transformations to a Gemma 4 31B model in place.
 
+    Always fuses each MLP's ``gate_proj|up_proj`` into a single matmul (one
+    activation-quant + one W4A8 matvec per layer instead of two; Q4_K
+    coalesced-int4 layers only — other quant types are left untouched).
+    Optionally also swaps full-attention KV caches for TurboQuant TQ4.
+
     Args:
         model: ``Gemma4_31B`` instance to transform.
         use_turboquant: When True, swap full-attention layers' KV caches
@@ -125,6 +230,8 @@ def cuda_source_transformations(
             ``torch.ops.triton.tq4_sdpa``. Sliding-window layers are
             unaffected.
     """
+    _fuse_gate_up_proj(model)
+
     if not use_turboquant:
         return
 
diff --git a/examples/models/gemma4_31b/export.py b/examples/models/gemma4_31b/export.py
index d9e16bc34df..b2b2264178a 100644
--- a/examples/models/gemma4_31b/export.py
+++ b/examples/models/gemma4_31b/export.py
@@ -182,12 +182,11 @@ def _export_cuda(
 
     materialize_runtime_buffers(model, dtype=torch.bfloat16)
 
-    if use_turboquant:
-        from executorch.examples.models.gemma4_31b.cuda_source_transformations import (
-            cuda_source_transformations,
-        )
+    from executorch.examples.models.gemma4_31b.cuda_source_transformations import (
+        cuda_source_transformations,
+    )
 
-        cuda_source_transformations(model, use_turboquant=True)
+    cuda_source_transformations(model, use_turboquant=use_turboquant)
 
     # Int4Tensor weights are used directly — no format conversion.
     # F.linear dispatches to executorch_cuda::int4_plain_mm (CUDA shim).

From 638f07ae1d3aad4f00122217dde062d5a0a4b3a8 Mon Sep 17 00:00:00 2001
From: gasoonjia <gasoonjia@fb.com>
Date: Tue, 23 Jun 2026 17:08:39 -0700
Subject: [PATCH 7/7] [executorch][gemma4] fuse MLP gate/up at GGUF load
 (single point, cuda+mlx)

Summary:
Move the gemma4 MLP gate_proj|up_proj fusion to a single backend-agnostic point
in the GGUF loader, and make the model forward consume it. Supersedes the
earlier CUDA-only export-time fusion (reverted here).

- gguf_loader.py: before any backend conversion (_convert_weight), buffer each
  layer's raw gate/up ExportableGGUFTensor and, once both arrive, row-concat
  their raw GGUF blocks along the output dim into one fused gate_up
  ExportableGGUFTensor (gate rows then up rows). Both backends then pack the
  already-fused weight with NO per-type concat: CUDA (Q4_K ->
  CudaCoalescedInt4Tensor, Q6_K -> CudaDp4aPlanarInt6Tensor) and MLX
  (ExportableGGUFTensor). Guards: same ggml_type + K; non-fuseable pairs and
  unpaired leftovers fall through unfused.
- Gemma4MLP: when a fused gate_up_proj is present, run one matmul and split the
  [.., 2*intermediate_size] output back into gate/up; otherwise use the separate
  projections. The shared MLP stays safe for unfused checkpoints and the
  prequant/HF load paths (no gate_up_proj -> original path, no crash).
- Revert the previous CUDA-localized fusion (cuda_source_transformations.py and
  export.py back to their original form). The kv_len-bounded tq4_sdpa kernel +
  call-site (already on main) are unchanged.

Single fusion point widens applicability (CUDA + MLX, incl. Q6_K) and keeps the
model def backend-agnostic. Decode win is unchanged (same fused matmul, produced
at load instead of at export).

Test Plan:
- Raw concat (real GGUF blk.0 ffn, q4_k): fused.dequantize() == [gate; up]
  stacked, bit-exact; fused CudaCoalescedInt4Tensor rows [:N]/[N:]
  qdata+scale+zero bit-identical to gate/up.
- Model-def fused vs unfused forward through real W4A8 int4_plain_mm: decode
  (T=1) bit-exact (cos 1.000000); prefill (T=4) cos 0.999988 -- the only delta
  is cuBLAS GEMM shape-dependent fp ordering (N=43008 vs 21504, identical
  weights), benign and inherent to any gate/up fusion.
- Full CUDA GGUF export (gemma4_31b, --turboquant, max-seq-len 131072): loader
  logs "Fused gate+up on 60 MLP layers", TurboQuant swaps 10 layers, AOTI build
  clean (model.pte + 26.18GB aoti_cuda_blob.ptd, "Done.").
- Decode via gemma4_31b_runner on the new build: coherent output, no NaN;
  prefill 1375 tok/s, decode 38.3 tok/s (no cuda_graph sanity).
---
 .../text_decoder/gemma4_decoder_layer.py      |  17 ++-
 .../gemma4_31b/cuda_source_transformations.py | 107 ----------------
 examples/models/gemma4_31b/export.py          |   9 +-
 examples/models/gemma4_31b/gguf_loader.py     | 119 ++++++++++++++++++
 4 files changed, 138 insertions(+), 114 deletions(-)

diff --git a/examples/models/gemma4/text_decoder/gemma4_decoder_layer.py b/examples/models/gemma4/text_decoder/gemma4_decoder_layer.py
index e10c1c7e415..fe3e3bb94cb 100644
--- a/examples/models/gemma4/text_decoder/gemma4_decoder_layer.py
+++ b/examples/models/gemma4/text_decoder/gemma4_decoder_layer.py
@@ -34,14 +34,25 @@ class Gemma4MLP(nn.Module):
 
     def __init__(self, hidden_size: int, intermediate_size: int):
         super().__init__()
+        self.intermediate_size = intermediate_size
         self.gate_proj = nn.Linear(hidden_size, intermediate_size, bias=False)
         self.up_proj = nn.Linear(hidden_size, intermediate_size, bias=False)
         self.down_proj = nn.Linear(intermediate_size, hidden_size, bias=False)
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        return self.down_proj(
-            F.gelu(self.gate_proj(x), approximate="tanh") * self.up_proj(x)
-        )
+        # If a loader fused gate_proj|up_proj into one gate_up_proj (single
+        # matmul; e.g. the GGUF loader's coalesced fusion), use it and split the
+        # [.., 2*intermediate_size] output back into gate/up. Otherwise fall back
+        # to the separate projections (unfused checkpoints / non-fusing loaders).
+        gate_up = getattr(self, "gate_up_proj", None)
+        if gate_up is not None:
+            fused = gate_up(x)
+            gate = fused[..., : self.intermediate_size]
+            up = fused[..., self.intermediate_size :]
+        else:
+            gate = self.gate_proj(x)
+            up = self.up_proj(x)
+        return self.down_proj(F.gelu(gate, approximate="tanh") * up)
 
 
 class Gemma4DecoderLayer(nn.Module):
diff --git a/examples/models/gemma4_31b/cuda_source_transformations.py b/examples/models/gemma4_31b/cuda_source_transformations.py
index 6609178e084..666d0c44e9d 100644
--- a/examples/models/gemma4_31b/cuda_source_transformations.py
+++ b/examples/models/gemma4_31b/cuda_source_transformations.py
@@ -30,7 +30,6 @@
 
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 
 from executorch.examples.models.gemma4.text_decoder import apply_rotary_emb
 from executorch.extension.llm.modules.turboquant import TurboQuantKVCache
@@ -111,105 +110,6 @@ def _turboquant_attention_forward(
     return self.o_proj(y)
 
 
-def _fused_mlp_forward(self, x: torch.Tensor) -> torch.Tensor:
-    """Drop-in ``Gemma4MLP.forward`` over a fused gate|up projection.
-
-    Identical math to ``down(gelu(gate(x)) * up(x))``: the single
-    ``gate_up_proj`` emits ``[gate | up]`` concatenated on the last dim,
-    which is then split. One W4A8 matmul (and one activation-quant of ``x``)
-    instead of two.
-    """
-    h = self.gate_up_proj(x)
-    gate = h[..., : self.intermediate_size]
-    up = h[..., self.intermediate_size :]
-    return self.down_proj(F.gelu(gate, approximate="tanh") * up)
-
-
-def _concat_coalesced_int4_along_n(a, b):
-    """Concatenate two ``CudaCoalescedInt4Tensor`` along the output (N) dim.
-
-    qdata is ``[N, K/2]`` and scale/zero_point are ``[N, n_groups]`` in the
-    coalesced layout, so a per-output-row concat on dim 0 is exact: the W4A8
-    dp4a matvec reads each output row's qdata/scale/zero independently, so
-    out[:N_a] reproduces ``a`` and out[N_a:] reproduces ``b`` bit-for-bit.
-    """
-    from executorch.backends.cuda.coalesced_int4_tensor import CudaCoalescedInt4Tensor
-
-    return CudaCoalescedInt4Tensor(
-        torch.cat([a.qdata, b.qdata], dim=0),
-        torch.cat([a.scale, b.scale], dim=0),
-        torch.cat([a.zero_point, b.zero_point], dim=0),
-        a.block_size,
-        torch.Size([a.shape[0] + b.shape[0], a.shape[1]]),
-        None,
-        a.activation_dtype,
-    )
-
-
-def _is_fuseable_int4_pair(gate_w, up_w) -> bool:
-    """True iff gate/up are both coalesced-int4 with matching K + block_size.
-
-    Q4_K MLP weights become ``CudaCoalescedInt4Tensor`` (fuseable); a Q6_K
-    weight becomes ``CudaDp4aPlanarInt6Tensor`` (left alone). ``act_pre_scale``
-    is unused on this path but we require it absent so the concat stays exact.
-    """
-    from executorch.backends.cuda.coalesced_int4_tensor import CudaCoalescedInt4Tensor
-
-    return (
-        isinstance(gate_w, CudaCoalescedInt4Tensor)
-        and isinstance(up_w, CudaCoalescedInt4Tensor)
-        and list(gate_w.block_size) == list(up_w.block_size)
-        and gate_w.shape[1] == up_w.shape[1]
-        and gate_w.act_pre_scale is None
-        and up_w.act_pre_scale is None
-    )
-
-
-def _fuse_gate_up_proj(model: nn.Module) -> None:
-    """Fuse each MLP's ``gate_proj | up_proj`` into one ``gate_up_proj``.
-
-    gate and up share the same input, so the unfused path quantizes ``x`` to
-    int8 twice and launches two W4A8 matvecs per layer. Fusing the weights
-    into one ``[2*inter, hidden]`` tensor halves both. Weight bytes read are
-    unchanged, so the win is launch + activation-quant overhead (decode is
-    launch-bound). Only Q4_K (coalesced-int4) layers are fused; any layer
-    with a non-int4 weight is left as two matmuls (still correct).
-
-    Must run AFTER weights are packed to ``CudaCoalescedInt4Tensor`` (i.e.
-    inside ``_export_cuda``), and is independent of TurboQuant.
-    """
-    n_fused = 0
-    n_skipped = 0
-    for layer in model.layers:
-        mlp = getattr(layer, "mlp", None)
-        if mlp is None or not (hasattr(mlp, "gate_proj") and hasattr(mlp, "up_proj")):
-            continue
-        gate_w = mlp.gate_proj.weight
-        up_w = mlp.up_proj.weight
-        if not _is_fuseable_int4_pair(gate_w, up_w):
-            n_skipped += 1
-            continue
-        inter = up_w.shape[0]
-        hidden = up_w.shape[1]
-        fused_w = _concat_coalesced_int4_along_n(gate_w, up_w)
-
-        # Container built on meta to avoid materializing a dense
-        # [2*inter, hidden] weight before we overwrite it with fused_w.
-        gate_up = nn.Linear(hidden, 2 * inter, bias=False, device="meta")
-        gate_up.weight = nn.Parameter(fused_w, requires_grad=False)
-        mlp.gate_up_proj = gate_up
-        mlp.intermediate_size = inter
-        del mlp.gate_proj
-        del mlp.up_proj
-        mlp.forward = types.MethodType(_fused_mlp_forward, mlp)
-        n_fused += 1
-
-    msg = f"[gemma4_31b cuda] Fused gate+up on {n_fused} MLP layers"
-    if n_skipped:
-        msg += f" ({n_skipped} skipped: non-int4 weights)"
-    print(msg)
-
-
 def cuda_source_transformations(
     model: nn.Module,
     *,
@@ -217,11 +117,6 @@ def cuda_source_transformations(
 ) -> None:
     """Apply CUDA source transformations to a Gemma 4 31B model in place.
 
-    Always fuses each MLP's ``gate_proj|up_proj`` into a single matmul (one
-    activation-quant + one W4A8 matvec per layer instead of two; Q4_K
-    coalesced-int4 layers only — other quant types are left untouched).
-    Optionally also swaps full-attention KV caches for TurboQuant TQ4.
-
     Args:
         model: ``Gemma4_31B`` instance to transform.
         use_turboquant: When True, swap full-attention layers' KV caches
@@ -230,8 +125,6 @@ def cuda_source_transformations(
             ``torch.ops.triton.tq4_sdpa``. Sliding-window layers are
             unaffected.
     """
-    _fuse_gate_up_proj(model)
-
     if not use_turboquant:
         return
 
diff --git a/examples/models/gemma4_31b/export.py b/examples/models/gemma4_31b/export.py
index b2b2264178a..d9e16bc34df 100644
--- a/examples/models/gemma4_31b/export.py
+++ b/examples/models/gemma4_31b/export.py
@@ -182,11 +182,12 @@ def _export_cuda(
 
     materialize_runtime_buffers(model, dtype=torch.bfloat16)
 
-    from executorch.examples.models.gemma4_31b.cuda_source_transformations import (
-        cuda_source_transformations,
-    )
+    if use_turboquant:
+        from executorch.examples.models.gemma4_31b.cuda_source_transformations import (
+            cuda_source_transformations,
+        )
 
-    cuda_source_transformations(model, use_turboquant=use_turboquant)
+        cuda_source_transformations(model, use_turboquant=True)
 
     # Int4Tensor weights are used directly — no format conversion.
     # F.linear dispatches to executorch_cuda::int4_plain_mm (CUDA shim).
diff --git a/examples/models/gemma4_31b/gguf_loader.py b/examples/models/gemma4_31b/gguf_loader.py
index 90839ea6f6a..6a4a70ced18 100644
--- a/examples/models/gemma4_31b/gguf_loader.py
+++ b/examples/models/gemma4_31b/gguf_loader.py
@@ -104,6 +104,89 @@ def _convert_weight(model, model_key: str, gtensor, backend: str):
     return gtensor
 
 
+# ---------------------------------------------------------------------------
+# Single-point gate/up fusion (backend-agnostic, at the raw GGUF level)
+#
+# gate_proj and up_proj share the same input, so the MLP can issue ONE matmul
+# over a [2*intermediate, hidden] weight instead of two. We fuse here -- before
+# any backend conversion (_convert_weight) -- by concatenating the two raw GGUF
+# block blobs along the output (row) dim. ExportableGGUFTensor.raw is
+# (N, row_bytes) row-major with each output row self-contained, so the concat is
+# an exact row-stack (no re-quant, no scale recompute). Both CUDA and MLX then
+# pack the already-fused weight, so there is no per-backend-type concat. The
+# model's Gemma4MLP.forward splits the [.., 2*intermediate] output back into
+# gate/up only when a fused gate_up_proj is present (graceful for unfused loads).
+
+
+def _gate_up_layer_kind(model_key: str):
+    """If ``model_key`` is an MLP gate/up proj weight, return ``(layer_idx, kind)``
+    with ``kind`` in ``{"gate", "up"}``; otherwise ``None``."""
+    prefix = "layers."
+    for kind in ("gate", "up"):
+        suffix = f".mlp.{kind}_proj.weight"
+        if model_key.startswith(prefix) and model_key.endswith(suffix):
+            mid = model_key[len(prefix) : len(model_key) - len(suffix)]
+            if mid.isdigit():
+                return int(mid), kind
+    return None
+
+
+def _gate_up_fuseable(gate, up) -> bool:
+    """True iff gate/up are the same GGUF quant type and same packed row width
+    (hence same K + block layout), so a row-concat along output N is valid."""
+    return (
+        gate.ggml_type == up.ggml_type
+        and gate.raw.shape[1] == up.raw.shape[1]
+        and int(gate.shape[1]) == int(up.shape[1])
+    )
+
+
+def _fuse_gate_up_raw(gate, up):
+    """Row-concat gate|up raw GGUF blocks (gate rows first) into one fused
+    ExportableGGUFTensor of shape (2*N, K)."""
+    from executorch.extension.llm.export.gguf import ExportableGGUFTensor
+
+    fused_raw = torch.cat([gate.raw, up.raw], dim=0)
+    return ExportableGGUFTensor.from_raw(fused_raw, gate.ggml_type, gate.orig_dtype)
+
+
+def _assign_gate_up_unfused(model, layer_idx, kind, gtensor, backend, packers):
+    """Assign a single gate/up GGUF tensor to its own projection (no fusion)."""
+    from executorch.examples.models.gemma4_31b.quant import pack_one
+
+    key = f"layers.{layer_idx}.mlp.{kind}_proj.weight"
+    pack_one(model, key, _convert_weight(model, key, gtensor, backend), packers)
+
+
+def _install_and_pack_fused_gate_up(model, layer_idx, gate, up, backend, packers):
+    """Fuse gate|up at the raw level, swap the layer's MLP to a single
+    ``gate_up_proj`` (dropping gate_proj/up_proj), then pack the fused weight."""
+    import torch.nn as nn
+
+    from executorch.examples.models.gemma4_31b.quant import pack_one
+
+    fused = _fuse_gate_up_raw(gate, up)
+    inter, hidden = int(gate.shape[0]), int(gate.shape[1])
+
+    mlp = model.get_submodule(f"layers.{layer_idx}.mlp")
+    mlp.gate_up_proj = nn.Linear(hidden, 2 * inter, bias=False, device="meta")
+    del mlp.gate_proj
+    del mlp.up_proj
+
+    key = f"layers.{layer_idx}.mlp.gate_up_proj.weight"
+    pack_one(model, key, _convert_weight(model, key, fused, backend), packers)
+
+
+def _process_gate_up_pair(model, layer_idx, gate, up, backend, packers) -> bool:
+    """Fuse gate|up if compatible (returns True), else assign them unfused."""
+    if _gate_up_fuseable(gate, up):
+        _install_and_pack_fused_gate_up(model, layer_idx, gate, up, backend, packers)
+        return True
+    _assign_gate_up_unfused(model, layer_idx, "gate", gate, backend, packers)
+    _assign_gate_up_unfused(model, layer_idx, "up", up, backend, packers)
+    return False
+
+
 def _resolve_tied_lm_head(model, lm_head_weight, packers):
     """Assign a tied lm_head (GGUF ties it to the token embedding)."""
     from executorch.examples.models.gemma4_31b.quant import pack_one
@@ -217,11 +300,32 @@ def load_gguf_model(
     n_processed = 0
 
     print(f"Streaming GGUF from {gguf_path}...")
+    pending_gate_up: dict = {}  # layer_idx -> {"gate": raw, "up": raw}
+    n_fused = 0
+    n_unfused = 0
     for gguf_name, value in iter_gguf(gguf_path):
         model_key = gguf_to_model_key(gguf_name)
         if model_key is None:
             continue
 
+        # Buffer the RAW gate/up ExportableGGUFTensor (pre-conversion) and fuse
+        # once both arrive -- the single common point upstream of _convert_weight.
+        gu = _gate_up_layer_kind(model_key)
+        if gu is not None and isinstance(value, ExportableGGUFTensor):
+            layer_idx, kind = gu
+            slot = pending_gate_up.setdefault(layer_idx, {})
+            slot[kind] = value
+            if "gate" in slot and "up" in slot:
+                if _process_gate_up_pair(
+                    model, layer_idx, slot["gate"], slot["up"], backend, packers
+                ):
+                    n_fused += 1
+                else:
+                    n_unfused += 1
+                pending_gate_up.pop(layer_idx, None)
+                n_processed += 2
+            continue
+
         if isinstance(value, ExportableGGUFTensor):
             weight = _convert_weight(model, model_key, value, backend)
             if model_key == "embed_tokens.weight":
@@ -238,6 +342,21 @@ def load_gguf_model(
         if n_processed % 100 == 0:
             print(f"  Processed {n_processed} tensors...")
 
+    # Flush any unpaired gate/up (partial/malformed) as separate unfused
+    # projections so no weight is left on meta.
+    for layer_idx, slot in pending_gate_up.items():
+        for kind in ("gate", "up"):
+            if kind in slot:
+                _assign_gate_up_unfused(
+                    model, layer_idx, kind, slot[kind], backend, packers
+                )
+                n_unfused += 1
+
+    print(
+        f"[gemma4_31b gguf] Fused gate+up on {n_fused} MLP layers"
+        + (f" ({n_unfused} left unfused)" if n_unfused else "")
+    )
+
     _resolve_tied_lm_head(model, lm_head_weight, packers)
 
     # Fill RoPE tables / KV caches / scalar constants (left on meta by the