From 386638f842a5bc6084c4ebc8ff52f6235e12bbd5 Mon Sep 17 00:00:00 2001
From: JeffreyChen <zenxcvwait@gmail.com>
Date: Sat, 20 Jun 2026 00:03:00 +0800
Subject: [PATCH 1/2] Add fuzzy string matching and dedupe (difflib default,
 optional rapidfuzz)

---
 README.md                                     |  7 ++
 README/README_zh-CN.md                        |  7 ++
 README/README_zh-TW.md                        |  7 ++
 .../Eng/doc/new_features/v40_features_doc.rst | 51 ++++++++++
 docs/source/Eng/eng_index.rst                 |  1 +
 .../Zh/doc/new_features/v40_features_doc.rst  | 48 ++++++++++
 docs/source/Zh/zh_index.rst                   |  1 +
 je_auto_control/__init__.py                   |  5 +
 .../gui/script_builder/command_schema.py      | 35 +++++++
 .../utils/executor/action_executor.py         | 34 +++++++
 je_auto_control/utils/fuzzy/__init__.py       |  9 ++
 je_auto_control/utils/fuzzy/fuzzy_match.py    | 85 ++++++++++++++++
 .../utils/mcp_server/tools/_factories.py      | 43 ++++++++-
 .../utils/mcp_server/tools/_handlers.py       | 20 ++++
 pyproject.toml                                |  1 +
 .../headless/test_fuzzy_match_batch.py        | 96 +++++++++++++++++++
 16 files changed, 449 insertions(+), 1 deletion(-)
 create mode 100644 docs/source/Eng/doc/new_features/v40_features_doc.rst
 create mode 100644 docs/source/Zh/doc/new_features/v40_features_doc.rst
 create mode 100644 je_auto_control/utils/fuzzy/__init__.py
 create mode 100644 je_auto_control/utils/fuzzy/fuzzy_match.py
 create mode 100644 test/unit_test/headless/test_fuzzy_match_batch.py

diff --git a/README.md b/README.md
index 154ca942..b232208d 100644
--- a/README.md
+++ b/README.md
@@ -13,6 +13,7 @@
 
 ## Table of Contents
 
+- [What's new (2026-06-20) — Fuzzy String Matching & Dedupe](#whats-new-2026-06-20--fuzzy-string-matching--dedupe)
 - [What's new (2026-06-19) — Video Step-Overlay Report](#whats-new-2026-06-19--video-step-overlay-report)
 - [What's new (2026-06-19) — Agent Observability (GenAI OpenTelemetry Spans)](#whats-new-2026-06-19--agent-observability-genai-opentelemetry-spans)
 - [What's new (2026-06-19) — Compliance Control Report (SOC2 / ISO 27001)](#whats-new-2026-06-19--compliance-control-report-soc2--iso-27001)
@@ -92,6 +93,12 @@
 
 ---
 
+## What's new (2026-06-20) — Fuzzy String Matching & Dedupe
+
+Match noisy OCR/UI text robustly. Full reference: [`docs/source/Eng/doc/new_features/v40_features_doc.rst`](docs/source/Eng/doc/new_features/v40_features_doc.rst).
+
+- **`fuzzy_ratio` / `fuzzy_best_match` / `fuzzy_matches` / `fuzzy_dedupe`** (`AC_fuzzy_ratio` / `AC_fuzzy_best_match` / `AC_fuzzy_dedupe`, `ac_*`): score similarity (0..1), pick the closest candidate from a list, or collapse near-duplicates — so a flow can act on "the button that *looks like* Submit" rather than an exact label. The default backend is stdlib `difflib` (**zero extra deps**); the optional `[fuzzy]` extra adds `rapidfuzz` for speed, with scores normalised either way. `ignore_case` and `score_cutoff` supported.
+
 ## What's new (2026-06-19) — Video Step-Overlay Report
 
 Caption screenshots into a walkthrough video. Full reference: [`docs/source/Eng/doc/new_features/v39_features_doc.rst`](docs/source/Eng/doc/new_features/v39_features_doc.rst).
diff --git a/README/README_zh-CN.md b/README/README_zh-CN.md
index 4888eb50..de8a39ab 100644
--- a/README/README_zh-CN.md
+++ b/README/README_zh-CN.md
@@ -12,6 +12,7 @@
 
 ## 目录
 
+- [本次更新 (2026-06-20) — 模糊字符串匹配与去重](#本次更新-2026-06-20--模糊字符串匹配与去重)
 - [本次更新 (2026-06-19) — 视频步骤叠加报告](#本次更新-2026-06-19--视频步骤叠加报告)
 - [本次更新 (2026-06-19) — Agent 可观测性(GenAI OpenTelemetry Spans)](#本次更新-2026-06-19--agent-可观测性genai-opentelemetry-spans)
 - [本次更新 (2026-06-19) — 合规控制报告(SOC2 / ISO 27001)](#本次更新-2026-06-19--合规控制报告soc2--iso-27001)
@@ -91,6 +92,12 @@
 
 ---
 
+## 本次更新 (2026-06-20) — 模糊字符串匹配与去重
+
+稳健匹配含噪声的 OCR/UI 文本。完整参考:[`docs/source/Zh/doc/new_features/v40_features_doc.rst`](../docs/source/Zh/doc/new_features/v40_features_doc.rst)。
+
+- **`fuzzy_ratio` / `fuzzy_best_match` / `fuzzy_matches` / `fuzzy_dedupe`**(`AC_fuzzy_ratio` / `AC_fuzzy_best_match` / `AC_fuzzy_dedupe`、`ac_*`):为相似度评分(0..1)、从列表挑最接近的候选,或收合近似重复 —— 让流程可针对「*看起来像* Submit 的按钮」动作,而非精确标签。默认后端为标准库 `difflib`(**无额外依赖**);可选的 `[fuzzy]` extra 加入 `rapidfuzz` 以加速,两者分数皆归一化。支持 `ignore_case` 与 `score_cutoff`。
+
 ## 本次更新 (2026-06-19) — 视频步骤叠加报告
 
 将屏幕截图加上字幕制成走查视频。完整参考:[`docs/source/Zh/doc/new_features/v39_features_doc.rst`](../docs/source/Zh/doc/new_features/v39_features_doc.rst)。
diff --git a/README/README_zh-TW.md b/README/README_zh-TW.md
index 6a158c53..4a5c8570 100644
--- a/README/README_zh-TW.md
+++ b/README/README_zh-TW.md
@@ -12,6 +12,7 @@
 
 ## 目錄
 
+- [本次更新 (2026-06-20) — 模糊字串比對與去重](#本次更新-2026-06-20--模糊字串比對與去重)
 - [本次更新 (2026-06-19) — 影片步驟疊加報告](#本次更新-2026-06-19--影片步驟疊加報告)
 - [本次更新 (2026-06-19) — Agent 可觀測性(GenAI OpenTelemetry Spans)](#本次更新-2026-06-19--agent-可觀測性genai-opentelemetry-spans)
 - [本次更新 (2026-06-19) — 合規控制報告(SOC2 / ISO 27001)](#本次更新-2026-06-19--合規控制報告soc2--iso-27001)
@@ -91,6 +92,12 @@
 
 ---
 
+## 本次更新 (2026-06-20) — 模糊字串比對與去重
+
+穩健比對含雜訊的 OCR/UI 文字。完整參考:[`docs/source/Zh/doc/new_features/v40_features_doc.rst`](../docs/source/Zh/doc/new_features/v40_features_doc.rst)。
+
+- **`fuzzy_ratio` / `fuzzy_best_match` / `fuzzy_matches` / `fuzzy_dedupe`**(`AC_fuzzy_ratio` / `AC_fuzzy_best_match` / `AC_fuzzy_dedupe`、`ac_*`):為相似度評分(0..1)、從清單挑最接近的候選,或收合近似重複 —— 讓流程可針對「*看起來像* Submit 的按鈕」動作,而非精確標籤。預設後端為標準函式庫 `difflib`(**無額外相依**);選用的 `[fuzzy]` extra 加入 `rapidfuzz` 以加速,兩者分數皆正規化。支援 `ignore_case` 與 `score_cutoff`。
+
 ## 本次更新 (2026-06-19) — 影片步驟疊加報告
 
 將螢幕截圖加上字幕製成走查影片。完整參考:[`docs/source/Zh/doc/new_features/v39_features_doc.rst`](../docs/source/Zh/doc/new_features/v39_features_doc.rst)。
diff --git a/docs/source/Eng/doc/new_features/v40_features_doc.rst b/docs/source/Eng/doc/new_features/v40_features_doc.rst
new file mode 100644
index 00000000..8822f52d
--- /dev/null
+++ b/docs/source/Eng/doc/new_features/v40_features_doc.rst
@@ -0,0 +1,51 @@
+Fuzzy String Matching & Dedupe
+==============================
+
+Exact string comparison is brittle when text comes from OCR or shifting UI copy.
+These helpers score similarity, pick the best candidate from a list, and collapse
+near-duplicates — so a flow can act on "the button that *looks like* Submit"
+rather than an exact label.
+
+The default backend is the standard library :mod:`difflib`, so the feature works
+with **zero extra dependencies**. If the optional ``rapidfuzz`` package is
+installed (``pip install je_auto_control[fuzzy]``) it is used instead for speed;
+scores are normalised to ``0.0..1.0`` either way, so callers never depend on
+which backend ran. ``BACKEND`` names the active one. Imports no ``PySide6``.
+
+Headless API
+------------
+
+.. code-block:: python
+
+    from je_auto_control import (
+        fuzzy_ratio, fuzzy_best_match, fuzzy_matches, fuzzy_dedupe)
+
+    fuzzy_ratio("Sumbit", "Submit")          # ~0.83 (case-insensitive default)
+
+    fuzzy_best_match("Sve", ["Cancel", "Save", "Submit"])
+    # -> ("Save", 0.86, 1)   (choice, score, index) — or None below score_cutoff
+
+    fuzzy_matches("login", ["login", "logon", "logout"], limit=2)
+    # -> [("login", 1.0, 0), ("logon", 0.8, 1)]  sorted best-first
+
+    fuzzy_dedupe(["Invoice", "invoice ", "Receipt"], threshold=0.85)
+    # -> ["Invoice", "Receipt"]   near-duplicates collapse, first kept
+
+All functions take ``ignore_case`` (default ``True``); ``fuzzy_best_match`` /
+``fuzzy_matches`` take ``score_cutoff`` to drop weak candidates.
+
+Executor commands
+-----------------
+
+================================ ===================================================
+Command                          Effect
+================================ ===================================================
+``AC_fuzzy_ratio``               ``{score}`` similarity between two strings.
+``AC_fuzzy_best_match``          ``{match, score, index}`` (or null) from choices.
+``AC_fuzzy_dedupe``              ``{unique}`` with near-duplicates collapsed.
+================================ ===================================================
+
+``choices`` / ``items`` accept a list or a JSON-string list (so the visual
+builder works). The same operations are exposed as MCP tools (``ac_fuzzy_ratio``
+/ ``ac_fuzzy_best_match`` / ``ac_fuzzy_dedupe``) and as Script Builder commands
+under **Data**.
diff --git a/docs/source/Eng/eng_index.rst b/docs/source/Eng/eng_index.rst
index dafd14d0..27b3fa9c 100644
--- a/docs/source/Eng/eng_index.rst
+++ b/docs/source/Eng/eng_index.rst
@@ -62,6 +62,7 @@ Comprehensive guides for all AutoControl features.
    doc/new_features/v37_features_doc
    doc/new_features/v38_features_doc
    doc/new_features/v39_features_doc
+   doc/new_features/v40_features_doc
    doc/ocr_backends/ocr_backends_doc
    doc/observability/observability_doc
    doc/operations_layer/operations_layer_doc
diff --git a/docs/source/Zh/doc/new_features/v40_features_doc.rst b/docs/source/Zh/doc/new_features/v40_features_doc.rst
new file mode 100644
index 00000000..49eec3a1
--- /dev/null
+++ b/docs/source/Zh/doc/new_features/v40_features_doc.rst
@@ -0,0 +1,48 @@
+模糊字串比對與去重
+==================
+
+當文字來自 OCR 或時常變動的 UI 文案時,精確字串比對很脆弱。這些輔助函式為相似度評分、
+從清單中挑出最佳候選,並收合近似重複項 —— 讓流程可以針對「*看起來像* Submit 的按鈕」
+動作,而非精確標籤。
+
+預設後端為標準函式庫 :mod:`difflib`,因此本功能**無需任何額外相依**即可運作。若安裝了
+選用的 ``rapidfuzz`` 套件(``pip install je_auto_control[fuzzy]``)則改用其以加速;無論
+何者,分數皆正規化為 ``0.0..1.0``,故呼叫端永不依賴實際執行的後端。``BACKEND`` 標示目
+前作用中的後端。不匯入 ``PySide6``。
+
+無頭 API
+--------
+
+.. code-block:: python
+
+    from je_auto_control import (
+        fuzzy_ratio, fuzzy_best_match, fuzzy_matches, fuzzy_dedupe)
+
+    fuzzy_ratio("Sumbit", "Submit")          # ~0.83(預設不分大小寫)
+
+    fuzzy_best_match("Sve", ["Cancel", "Save", "Submit"])
+    # -> ("Save", 0.86, 1)   (choice, score, index) —— 低於 score_cutoff 則為 None
+
+    fuzzy_matches("login", ["login", "logon", "logout"], limit=2)
+    # -> [("login", 1.0, 0), ("logon", 0.8, 1)]  由高分至低分排序
+
+    fuzzy_dedupe(["Invoice", "invoice ", "Receipt"], threshold=0.85)
+    # -> ["Invoice", "Receipt"]   近似重複收合,保留第一個
+
+所有函式皆接受 ``ignore_case``(預設 ``True``);``fuzzy_best_match`` /
+``fuzzy_matches`` 接受 ``score_cutoff`` 以濾除弱候選。
+
+執行器指令
+----------
+
+================================ ===================================================
+指令                             效果
+================================ ===================================================
+``AC_fuzzy_ratio``               兩字串相似度的 ``{score}``。
+``AC_fuzzy_best_match``          從候選中取 ``{match, score, index}``(或 null)。
+``AC_fuzzy_dedupe``              收合近似重複後的 ``{unique}``。
+================================ ===================================================
+
+``choices`` / ``items`` 接受清單或 JSON 字串清單(因此視覺化建構器可用)。相同操作亦提供
+為 MCP 工具(``ac_fuzzy_ratio`` / ``ac_fuzzy_best_match`` / ``ac_fuzzy_dedupe``),以及
+Script Builder 中 **Data** 分類下的指令。
diff --git a/docs/source/Zh/zh_index.rst b/docs/source/Zh/zh_index.rst
index b9a8ccb1..eb68bf66 100644
--- a/docs/source/Zh/zh_index.rst
+++ b/docs/source/Zh/zh_index.rst
@@ -62,6 +62,7 @@ AutoControl 所有功能的完整使用指南。
    doc/new_features/v37_features_doc
    doc/new_features/v38_features_doc
    doc/new_features/v39_features_doc
+   doc/new_features/v40_features_doc
    doc/ocr_backends/ocr_backends_doc
    doc/observability/observability_doc
    doc/operations_layer/operations_layer_doc
diff --git a/je_auto_control/__init__.py b/je_auto_control/__init__.py
index e8f8eadc..f8c18a8f 100644
--- a/je_auto_control/__init__.py
+++ b/je_auto_control/__init__.py
@@ -230,6 +230,10 @@
 from je_auto_control.utils.video_report import (
     VideoStep, build_overlay_plan, render_overlay_frame, write_step_video,
 )
+# Fuzzy string matching / dedupe (difflib default, optional rapidfuzz)
+from je_auto_control.utils.fuzzy import (
+    fuzzy_best_match, fuzzy_dedupe, fuzzy_matches, fuzzy_ratio,
+)
 # Background popup/interrupt watchdog (unattended automation)
 from je_auto_control.utils.watchdog import (
     PopupWatchdog, WatchdogRule, default_popup_watchdog,
@@ -676,6 +680,7 @@ def start_autocontrol_gui(*args, **kwargs):
     "AgentTrace", "default_trace", "reset_trace",
     "VideoStep", "build_overlay_plan", "render_overlay_frame",
     "write_step_video",
+    "fuzzy_best_match", "fuzzy_dedupe", "fuzzy_matches", "fuzzy_ratio",
     # MCP server
     "AuditLogger", "HttpMCPServer", "MCPContent", "MCPPrompt",
     "MCPPromptArgument", "MCPResource", "MCPServer", "MCPTool",
diff --git a/je_auto_control/gui/script_builder/command_schema.py b/je_auto_control/gui/script_builder/command_schema.py
index a12ef96e..a726cbfe 100644
--- a/je_auto_control/gui/script_builder/command_schema.py
+++ b/je_auto_control/gui/script_builder/command_schema.py
@@ -867,6 +867,41 @@ def _add_misc_specs(specs: List[CommandSpec]) -> None:
         ),
         description="Render captioned screenshots into a walkthrough video.",
     ))
+    specs.append(CommandSpec(
+        "AC_fuzzy_ratio", "Data", "Fuzzy: Similarity Ratio",
+        fields=(
+            FieldSpec("left", FieldType.STRING),
+            FieldSpec("right", FieldType.STRING),
+            FieldSpec("ignore_case", FieldType.BOOL, optional=True,
+                      default=True),
+        ),
+        description="Similarity score (0..1) between two strings.",
+    ))
+    specs.append(CommandSpec(
+        "AC_fuzzy_best_match", "Data", "Fuzzy: Best Match",
+        fields=(
+            FieldSpec("query", FieldType.STRING),
+            FieldSpec("choices", FieldType.STRING,
+                      placeholder='["Save", "Cancel", "Submit"]'),
+            FieldSpec("score_cutoff", FieldType.FLOAT, optional=True,
+                      default=0.0),
+            FieldSpec("ignore_case", FieldType.BOOL, optional=True,
+                      default=True),
+        ),
+        description="Best fuzzy match of query within choices (JSON list).",
+    ))
+    specs.append(CommandSpec(
+        "AC_fuzzy_dedupe", "Data", "Fuzzy: Dedupe",
+        fields=(
+            FieldSpec("items", FieldType.STRING,
+                      placeholder='["foo", "foo ", "bar"]'),
+            FieldSpec("threshold", FieldType.FLOAT, optional=True,
+                      default=0.9),
+            FieldSpec("ignore_case", FieldType.BOOL, optional=True,
+                      default=True),
+        ),
+        description="Collapse near-duplicate strings (JSON list).",
+    ))
     specs.append(CommandSpec(
         "AC_generate_sop", "Report", "Generate SOP Document",
         fields=(
diff --git a/je_auto_control/utils/executor/action_executor.py b/je_auto_control/utils/executor/action_executor.py
index 23f6555c..41d6f0a0 100644
--- a/je_auto_control/utils/executor/action_executor.py
+++ b/je_auto_control/utils/executor/action_executor.py
@@ -3067,6 +3067,37 @@ def _write_step_video(steps: Any, output: str, fps: int = 10,
                             seconds_per_step=seconds_per_step)
 
 
+def _coerce_list(value: Any) -> List[Any]:
+    import json
+    return json.loads(value) if isinstance(value, str) else list(value)
+
+
+def _fuzzy_ratio(left: Any, right: Any,
+                 ignore_case: bool = True) -> Dict[str, Any]:
+    """Adapter: similarity score (0..1) between two values."""
+    from je_auto_control.utils.fuzzy import fuzzy_ratio
+    return {"score": fuzzy_ratio(left, right, ignore_case=ignore_case)}
+
+
+def _fuzzy_best_match(query: Any, choices: Any, score_cutoff: float = 0.0,
+                      ignore_case: bool = True) -> Dict[str, Any]:
+    """Adapter: best fuzzy match from choices, or a null match."""
+    from je_auto_control.utils.fuzzy import fuzzy_best_match
+    best = fuzzy_best_match(query, _coerce_list(choices),
+                            score_cutoff=score_cutoff, ignore_case=ignore_case)
+    if best is None:
+        return {"match": None, "score": 0.0, "index": -1}
+    return {"match": best[0], "score": best[1], "index": best[2]}
+
+
+def _fuzzy_dedupe(items: Any, threshold: float = 0.9,
+                  ignore_case: bool = True) -> Dict[str, Any]:
+    """Adapter: drop near-duplicate items, keeping the first of each cluster."""
+    from je_auto_control.utils.fuzzy import fuzzy_dedupe
+    return {"unique": fuzzy_dedupe(_coerce_list(items), threshold=threshold,
+                                   ignore_case=ignore_case)}
+
+
 class Executor:
     """
     Executor
@@ -3319,6 +3350,9 @@ def __init__(self):
             "AC_trace_export": _trace_export,
             "AC_trace_reset": _trace_reset,
             "AC_write_step_video": _write_step_video,
+            "AC_fuzzy_ratio": _fuzzy_ratio,
+            "AC_fuzzy_best_match": _fuzzy_best_match,
+            "AC_fuzzy_dedupe": _fuzzy_dedupe,
             "AC_a11y_record_start": _a11y_record_start,
             "AC_a11y_record_stop": _a11y_record_stop,
             "AC_a11y_record_events": _a11y_record_events,
diff --git a/je_auto_control/utils/fuzzy/__init__.py b/je_auto_control/utils/fuzzy/__init__.py
new file mode 100644
index 00000000..e7c2b0fd
--- /dev/null
+++ b/je_auto_control/utils/fuzzy/__init__.py
@@ -0,0 +1,9 @@
+"""Fuzzy string matching and dedupe (difflib by default, rapidfuzz if present)."""
+from je_auto_control.utils.fuzzy.fuzzy_match import (
+    BACKEND, fuzzy_best_match, fuzzy_dedupe, fuzzy_matches, fuzzy_ratio,
+)
+
+__all__ = [
+    "BACKEND", "fuzzy_best_match", "fuzzy_dedupe", "fuzzy_matches",
+    "fuzzy_ratio",
+]
diff --git a/je_auto_control/utils/fuzzy/fuzzy_match.py b/je_auto_control/utils/fuzzy/fuzzy_match.py
new file mode 100644
index 00000000..026ffc7b
--- /dev/null
+++ b/je_auto_control/utils/fuzzy/fuzzy_match.py
@@ -0,0 +1,85 @@
+"""Fuzzy string matching for noisy automation text (OCR labels, table cells).
+
+Exact string comparison is brittle when text comes from OCR or shifting UI
+copy. These helpers score similarity, pick the best candidate from a list, and
+collapse near-duplicates. The default backend is the standard library
+``difflib`` (so the feature works with **zero** extra dependencies); if the
+optional ``rapidfuzz`` package is installed it is used instead for speed — the
+scores are normalised to ``0.0..1.0`` either way, so callers don't care which
+backend ran. :data:`BACKEND` names the active one.
+
+Pure Python; imports no ``PySide6``.
+"""
+from typing import Any, List, Optional, Sequence, Tuple
+
+try:  # optional acceleration; the difflib fallback is always correct
+    from rapidfuzz import fuzz as _rf
+
+    BACKEND = "rapidfuzz"
+
+    def _similarity(left: str, right: str) -> float:
+        return _rf.ratio(left, right) / 100.0
+except ImportError:  # pragma: no cover - exercised wherever rapidfuzz is absent
+    from difflib import SequenceMatcher
+
+    BACKEND = "difflib"
+
+    def _similarity(left: str, right: str) -> float:
+        return SequenceMatcher(None, left, right).ratio()
+
+
+def _prepare(value: Any, ignore_case: bool) -> str:
+    text = str(value)
+    return text.lower() if ignore_case else text
+
+
+def fuzzy_ratio(left: Any, right: Any, *, ignore_case: bool = True) -> float:
+    """Return a similarity score in ``0.0..1.0`` for two values."""
+    return _similarity(_prepare(left, ignore_case),
+                       _prepare(right, ignore_case))
+
+
+def fuzzy_matches(query: Any, choices: Sequence[Any], *, limit: int = 5,
+                  score_cutoff: float = 0.0, ignore_case: bool = True
+                  ) -> List[Tuple[Any, float, int]]:
+    """Return up to ``limit`` ``(choice, score, index)`` tuples, best first.
+
+    Only choices scoring at least ``score_cutoff`` are returned.
+    """
+    prepared_query = _prepare(query, ignore_case)
+    scored = [
+        (choice, _similarity(prepared_query, _prepare(choice, ignore_case)),
+         index)
+        for index, choice in enumerate(choices)
+    ]
+    scored = [item for item in scored if item[1] >= score_cutoff]
+    scored.sort(key=lambda item: item[1], reverse=True)
+    return scored[:limit] if limit >= 0 else scored
+
+
+def fuzzy_best_match(query: Any, choices: Sequence[Any], *,
+                     score_cutoff: float = 0.0, ignore_case: bool = True
+                     ) -> Optional[Tuple[Any, float, int]]:
+    """Return the single best ``(choice, score, index)`` or ``None``."""
+    ranked = fuzzy_matches(query, choices, limit=1, score_cutoff=score_cutoff,
+                           ignore_case=ignore_case)
+    return ranked[0] if ranked else None
+
+
+def fuzzy_dedupe(items: Sequence[Any], *, threshold: float = 0.9,
+                 ignore_case: bool = True) -> List[Any]:
+    """Collapse near-duplicate items, keeping the first of each cluster.
+
+    An item is dropped when it scores at least ``threshold`` against an item
+    already kept.
+    """
+    kept: List[Any] = []
+    kept_prepared: List[str] = []
+    for item in items:
+        prepared = _prepare(item, ignore_case)
+        if any(_similarity(prepared, seen) >= threshold
+               for seen in kept_prepared):
+            continue
+        kept.append(item)
+        kept_prepared.append(prepared)
+    return kept
diff --git a/je_auto_control/utils/mcp_server/tools/_factories.py b/je_auto_control/utils/mcp_server/tools/_factories.py
index 091683ff..0069d947 100644
--- a/je_auto_control/utils/mcp_server/tools/_factories.py
+++ b/je_auto_control/utils/mcp_server/tools/_factories.py
@@ -2844,6 +2844,47 @@ def video_report_tools() -> List[MCPTool]:
     ]
 
 
+def fuzzy_tools() -> List[MCPTool]:
+    _CHOICES = {"type": "array", "items": {"type": "string"}}
+    return [
+        MCPTool(
+            name="ac_fuzzy_ratio",
+            description=("Similarity score (0..1) between two strings, robust "
+                         "to OCR/UI noise (difflib, or rapidfuzz if "
+                         "installed). 'ignore_case' defaults true. Returns "
+                         "{score}."),
+            input_schema=schema(
+                {"left": {"type": "string"}, "right": {"type": "string"},
+                 "ignore_case": {"type": "boolean"}}, ["left", "right"]),
+            handler=h.fuzzy_ratio,
+            annotations=READ_ONLY,
+        ),
+        MCPTool(
+            name="ac_fuzzy_best_match",
+            description=("Best fuzzy match of 'query' within 'choices' scoring "
+                         ">= 'score_cutoff'. Returns {match, score, index} or "
+                         "{match: null} when nothing qualifies."),
+            input_schema=schema(
+                {"query": {"type": "string"}, "choices": _CHOICES,
+                 "score_cutoff": {"type": "number"},
+                 "ignore_case": {"type": "boolean"}}, ["query", "choices"]),
+            handler=h.fuzzy_best_match,
+            annotations=READ_ONLY,
+        ),
+        MCPTool(
+            name="ac_fuzzy_dedupe",
+            description=("Collapse near-duplicate strings, keeping the first "
+                         "of each cluster (items >= 'threshold' similar are "
+                         "dropped). Returns {unique}."),
+            input_schema=schema(
+                {"items": _CHOICES, "threshold": {"type": "number"},
+                 "ignore_case": {"type": "boolean"}}, ["items"]),
+            handler=h.fuzzy_dedupe,
+            annotations=READ_ONLY,
+        ),
+    ]
+
+
 def unattended_tools() -> List[MCPTool]:
     return [
         MCPTool(
@@ -3904,7 +3945,7 @@ def media_assert_tools() -> List[MCPTool]:
     process_doc_tools, tween_drag_tools, plugin_sdk_tools, governance_tools,
     credential_lease_tools, egress_tools, approval_testing_tools,
     trajectory_eval_tools, compliance_tools, agent_trace_tools,
-    video_report_tools,
+    video_report_tools, fuzzy_tools,
     screen_record_tools,
     process_and_shell_tools, remote_desktop_tools, gamepad_tools,
     usb_passthrough_tools, assertion_tools, data_source_tools,
diff --git a/je_auto_control/utils/mcp_server/tools/_handlers.py b/je_auto_control/utils/mcp_server/tools/_handlers.py
index d099003f..2621783a 100644
--- a/je_auto_control/utils/mcp_server/tools/_handlers.py
+++ b/je_auto_control/utils/mcp_server/tools/_handlers.py
@@ -1372,6 +1372,26 @@ def write_step_video(steps, output, fps=10, seconds_per_step=2.0):
     return _write(steps, output, fps=fps, seconds_per_step=seconds_per_step)
 
 
+def fuzzy_ratio(left, right, ignore_case=True):
+    from je_auto_control.utils.fuzzy import fuzzy_ratio as _ratio
+    return {"score": _ratio(left, right, ignore_case=ignore_case)}
+
+
+def fuzzy_best_match(query, choices, score_cutoff=0.0, ignore_case=True):
+    from je_auto_control.utils.fuzzy import fuzzy_best_match as _best
+    best = _best(query, choices, score_cutoff=score_cutoff,
+                 ignore_case=ignore_case)
+    if best is None:
+        return {"match": None, "score": 0.0, "index": -1}
+    return {"match": best[0], "score": best[1], "index": best[2]}
+
+
+def fuzzy_dedupe(items, threshold=0.9, ignore_case=True):
+    from je_auto_control.utils.fuzzy import fuzzy_dedupe as _dedupe
+    return {"unique": _dedupe(items, threshold=threshold,
+                              ignore_case=ignore_case)}
+
+
 def vlm_locate(description: str,
                screen_region: Optional[List[int]] = None,
                model: Optional[str] = None) -> Optional[List[int]]:
diff --git a/pyproject.toml b/pyproject.toml
index e35619e8..a7136a51 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -72,6 +72,7 @@ signaling = ["fastapi>=0.115", "uvicorn>=0.32"]
 discovery = ["zeroconf>=0.130"]
 pdf = ["pypdf>=4.0"]
 office = ["openpyxl>=3.1", "python-docx>=1.1", "python-pptx>=0.6"]
+fuzzy = ["rapidfuzz>=3.0"]
 
 [tool.bandit]
 exclude_dirs = [
diff --git a/test/unit_test/headless/test_fuzzy_match_batch.py b/test/unit_test/headless/test_fuzzy_match_batch.py
new file mode 100644
index 00000000..c561f884
--- /dev/null
+++ b/test/unit_test/headless/test_fuzzy_match_batch.py
@@ -0,0 +1,96 @@
+"""Headless tests for fuzzy matching/dedupe. The difflib backend is always
+present, so these run with no extra dependency; assertions check ordering and
+thresholds, not exact backend-specific float values. Pure stdlib, no Qt."""
+import je_auto_control as ac
+from je_auto_control.utils.fuzzy import (
+    BACKEND, fuzzy_best_match, fuzzy_dedupe, fuzzy_matches, fuzzy_ratio)
+
+
+def test_backend_is_known():
+    assert BACKEND in ("difflib", "rapidfuzz")
+
+
+def test_ratio_bounds_and_identity():
+    assert fuzzy_ratio("hello", "hello") == 1.0
+    assert fuzzy_ratio("hello", "xxxxx") < 0.5
+    assert 0.0 <= fuzzy_ratio("Save", "save", ignore_case=False) <= 1.0
+
+
+def test_ratio_ignore_case():
+    assert fuzzy_ratio("SAVE", "save", ignore_case=True) == 1.0
+    assert fuzzy_ratio("SAVE", "save", ignore_case=False) < 1.0
+
+
+def test_best_match_picks_closest():
+    choices = ["Cancel", "Save As...", "Save", "Submit"]
+    match, score, index = fuzzy_best_match("Sve", choices)
+    assert match == "Save"
+    assert choices[index] == "Save"
+    assert score > 0.5
+
+
+def test_best_match_cutoff_returns_none():
+    assert fuzzy_best_match("zzzzz", ["alpha", "beta"],
+                            score_cutoff=0.8) is None
+
+
+def test_matches_sorted_and_limited():
+    choices = ["login", "logon", "logout", "register"]
+    ranked = fuzzy_matches("login", choices, limit=2)
+    assert len(ranked) == 2
+    scores = [score for _, score, _ in ranked]
+    assert scores == sorted(scores, reverse=True)
+    assert ranked[0][0] == "login"
+
+
+def test_dedupe_collapses_near_duplicates():
+    items = ["Invoice", "invoice ", "Receipt", "INVOICE"]
+    unique = fuzzy_dedupe(items, threshold=0.85)
+    assert "Invoice" in unique
+    assert "Receipt" in unique
+    assert len(unique) == 2          # the invoice variants collapse
+
+
+def test_dedupe_keeps_distinct():
+    items = ["apple", "banana", "cherry"]
+    assert fuzzy_dedupe(items) == items
+
+
+# --- wiring ---------------------------------------------------------------
+
+def test_executor_round_trip():
+    rec = ac.execute_action([[
+        "AC_fuzzy_best_match",
+        {"query": "Sumbit", "choices": ["Cancel", "Submit", "Save"]},
+    ]])
+    result = next(v for v in rec.values() if isinstance(v, dict))
+    assert result["match"] == "Submit"
+
+    rec2 = ac.execute_action([[
+        "AC_fuzzy_dedupe",
+        {"items": ["a", "a", "b"], "threshold": 0.95},
+    ]])
+    deduped = next(v for v in rec2.values() if isinstance(v, dict))
+    assert deduped["unique"] == ["a", "b"]
+
+
+def test_wiring():
+    known = ac.executor.known_commands()
+    assert {"AC_fuzzy_ratio", "AC_fuzzy_best_match",
+            "AC_fuzzy_dedupe"} <= known
+    from je_auto_control.utils.mcp_server.tools import (
+        build_default_tool_registry)
+    names = {t.name for t in build_default_tool_registry()}
+    assert {"ac_fuzzy_ratio", "ac_fuzzy_best_match",
+            "ac_fuzzy_dedupe"} <= names
+    from je_auto_control.gui.script_builder.command_schema import _build_specs
+    cmds = {s.command for s in _build_specs()}
+    assert {"AC_fuzzy_ratio", "AC_fuzzy_best_match",
+            "AC_fuzzy_dedupe"} <= cmds
+
+
+def test_facade_exports():
+    for attr in ("fuzzy_ratio", "fuzzy_best_match", "fuzzy_matches",
+                 "fuzzy_dedupe"):
+        assert hasattr(ac, attr)
+        assert attr in ac.__all__

From 81394ca611df011b4d3f9e255adfa1fae27a3705 Mon Sep 17 00:00:00 2001
From: JeffreyChen <zenxcvwait@gmail.com>
Date: Sat, 20 Jun 2026 00:09:33 +0800
Subject: [PATCH 2/2] Use pytest.approx for fuzzy ratio comparisons (Sonar
 S1244)

---
 test/unit_test/headless/test_fuzzy_match_batch.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/test/unit_test/headless/test_fuzzy_match_batch.py b/test/unit_test/headless/test_fuzzy_match_batch.py
index c561f884..6f3ffde0 100644
--- a/test/unit_test/headless/test_fuzzy_match_batch.py
+++ b/test/unit_test/headless/test_fuzzy_match_batch.py
@@ -1,6 +1,8 @@
 """Headless tests for fuzzy matching/dedupe. The difflib backend is always
 present, so these run with no extra dependency; assertions check ordering and
 thresholds, not exact backend-specific float values. Pure stdlib, no Qt."""
+import pytest
+
 import je_auto_control as ac
 from je_auto_control.utils.fuzzy import (
     BACKEND, fuzzy_best_match, fuzzy_dedupe, fuzzy_matches, fuzzy_ratio)
@@ -11,13 +13,13 @@ def test_backend_is_known():
 
 
 def test_ratio_bounds_and_identity():
-    assert fuzzy_ratio("hello", "hello") == 1.0
+    assert fuzzy_ratio("hello", "hello") == pytest.approx(1.0)
     assert fuzzy_ratio("hello", "xxxxx") < 0.5
     assert 0.0 <= fuzzy_ratio("Save", "save", ignore_case=False) <= 1.0
 
 
 def test_ratio_ignore_case():
-    assert fuzzy_ratio("SAVE", "save", ignore_case=True) == 1.0
+    assert fuzzy_ratio("SAVE", "save", ignore_case=True) == pytest.approx(1.0)
     assert fuzzy_ratio("SAVE", "save", ignore_case=False) < 1.0