From 70122c4d87515ef4487dd65e9b7ea2d75d117c5d Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Fri, 19 Jun 2026 22:52:50 +0800 Subject: [PATCH] Add approval testing: verify artifacts against approved baselines --- README.md | 7 ++ README/README_zh-CN.md | 7 ++ README/README_zh-TW.md | 7 ++ .../Eng/doc/new_features/v35_features_doc.rst | 52 +++++++++ docs/source/Eng/eng_index.rst | 1 + .../Zh/doc/new_features/v35_features_doc.rst | 46 ++++++++ docs/source/Zh/zh_index.rst | 1 + je_auto_control/__init__.py | 6 + .../gui/script_builder/command_schema.py | 29 +++++ je_auto_control/utils/approval/__init__.py | 9 ++ .../utils/approval/approval_test.py | 93 ++++++++++++++++ .../utils/executor/action_executor.py | 27 +++++ .../utils/mcp_server/tools/_factories.py | 39 ++++++- .../utils/mcp_server/tools/_handlers.py | 20 ++++ .../headless/test_approval_testing_batch.py | 104 ++++++++++++++++++ 15 files changed, 447 insertions(+), 1 deletion(-) create mode 100644 docs/source/Eng/doc/new_features/v35_features_doc.rst create mode 100644 docs/source/Zh/doc/new_features/v35_features_doc.rst create mode 100644 je_auto_control/utils/approval/__init__.py create mode 100644 je_auto_control/utils/approval/approval_test.py create mode 100644 test/unit_test/headless/test_approval_testing_batch.py diff --git a/README.md b/README.md index 12746670..79e5e536 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ ## Table of Contents +- [What's new (2026-06-19) — Approval Testing (Golden-Master Baselines)](#whats-new-2026-06-19--approval-testing-golden-master-baselines) - [What's new (2026-06-19) — Network Egress Allowlist Guard](#whats-new-2026-06-19--network-egress-allowlist-guard) - [What's new (2026-06-19) — Just-In-Time Credential Leases](#whats-new-2026-06-19--just-in-time-credential-leases) - [What's new (2026-06-19) — Maker-Checker Approval Gate](#whats-new-2026-06-19--maker-checker-approval-gate) @@ -87,6 +88,12 @@ --- +## What's new (2026-06-19) — Approval Testing (Golden-Master Baselines) + +Lock outputs against a human-approved baseline. Full reference: [`docs/source/Eng/doc/new_features/v35_features_doc.rst`](docs/source/Eng/doc/new_features/v35_features_doc.rst). + +- **`verify_artifact` / `approve_artifact`** (`AC_verify_artifact` / `AC_approve_artifact` / `AC_pending_artifacts`, `ac_*`): golden-master / snapshot testing for *any* artifact (text, JSON, OCR output, screenshot bytes). `verify_artifact` compares produced content to `.approved.`; a mismatch or missing baseline writes `.received.` for review and fails, and `approve_artifact` promotes a reviewed received file to the baseline. Complements pixel diffing with a review-gated baseline you commit alongside the test; names are path-traversal-checked. + ## What's new (2026-06-19) — Network Egress Allowlist Guard Pin which hosts automation may reach. Full reference: [`docs/source/Eng/doc/new_features/v34_features_doc.rst`](docs/source/Eng/doc/new_features/v34_features_doc.rst). diff --git a/README/README_zh-CN.md b/README/README_zh-CN.md index 8519e03a..aa380069 100644 --- a/README/README_zh-CN.md +++ b/README/README_zh-CN.md @@ -12,6 +12,7 @@ ## 目录 +- [本次更新 (2026-06-19) — 核准式测试(Golden-Master 基准)](#本次更新-2026-06-19--核准式测试golden-master-基准) - [本次更新 (2026-06-19) — 网络出口允许清单守卫](#本次更新-2026-06-19--网络出口允许清单守卫) - [本次更新 (2026-06-19) — 即时凭证租约](#本次更新-2026-06-19--即时凭证租约) - [本次更新 (2026-06-19) — Maker-Checker 审批闸门](#本次更新-2026-06-19--maker-checker-审批闸门) @@ -86,6 +87,12 @@ --- +## 本次更新 (2026-06-19) — 核准式测试(Golden-Master 基准) + +将输出锁定到人工核准的基准。完整参考:[`docs/source/Zh/doc/new_features/v35_features_doc.rst`](../docs/source/Zh/doc/new_features/v35_features_doc.rst)。 + +- **`verify_artifact` / `approve_artifact`**(`AC_verify_artifact` / `AC_approve_artifact` / `AC_pending_artifacts`、`ac_*`):对*任何*产物(文本、JSON、OCR 输出、屏幕截图字节)进行 golden-master / snapshot 测试。`verify_artifact` 将产出内容与 `.approved.` 比对;不符或缺少基准会写入 `.received.` 供审查并失败,`approve_artifact` 则将审查后的 received 文件晋升为基准。以与测试一起提交、受审查把关的基准补强逐像素比对;名称会经过路径穿越检查。 + ## 本次更新 (2026-06-19) — 网络出口允许清单守卫 钉选自动化可连线的主机。完整参考:[`docs/source/Zh/doc/new_features/v34_features_doc.rst`](../docs/source/Zh/doc/new_features/v34_features_doc.rst)。 diff --git a/README/README_zh-TW.md b/README/README_zh-TW.md index 1bd0376a..a59dc4f3 100644 --- a/README/README_zh-TW.md +++ b/README/README_zh-TW.md @@ -12,6 +12,7 @@ ## 目錄 +- [本次更新 (2026-06-19) — 核准式測試(Golden-Master 基準)](#本次更新-2026-06-19--核准式測試golden-master-基準) - [本次更新 (2026-06-19) — 網路出口允許清單守衛](#本次更新-2026-06-19--網路出口允許清單守衛) - [本次更新 (2026-06-19) — 即時憑證租約](#本次更新-2026-06-19--即時憑證租約) - [本次更新 (2026-06-19) — Maker-Checker 審批閘門](#本次更新-2026-06-19--maker-checker-審批閘門) @@ -86,6 +87,12 @@ --- +## 本次更新 (2026-06-19) — 核准式測試(Golden-Master 基準) + +將輸出鎖定到人工核准的基準。完整參考:[`docs/source/Zh/doc/new_features/v35_features_doc.rst`](../docs/source/Zh/doc/new_features/v35_features_doc.rst)。 + +- **`verify_artifact` / `approve_artifact`**(`AC_verify_artifact` / `AC_approve_artifact` / `AC_pending_artifacts`、`ac_*`):對*任何*產物(文字、JSON、OCR 輸出、螢幕截圖位元組)進行 golden-master / snapshot 測試。`verify_artifact` 將產出內容與 `.approved.` 比對;不符或缺少基準會寫入 `.received.` 供審查並失敗,`approve_artifact` 則將審查後的 received 檔晉升為基準。以與測試一起提交、受審查把關的基準補強逐像素比對;名稱會經過路徑穿越檢查。 + ## 本次更新 (2026-06-19) — 網路出口允許清單守衛 釘選自動化可連線的主機。完整參考:[`docs/source/Zh/doc/new_features/v34_features_doc.rst`](../docs/source/Zh/doc/new_features/v34_features_doc.rst)。 diff --git a/docs/source/Eng/doc/new_features/v35_features_doc.rst b/docs/source/Eng/doc/new_features/v35_features_doc.rst new file mode 100644 index 00000000..42557b8e --- /dev/null +++ b/docs/source/Eng/doc/new_features/v35_features_doc.rst @@ -0,0 +1,52 @@ +Approval Testing (Golden-Master Baselines) +========================================== + +Approval testing (a.k.a. golden-master / snapshot testing) reframes "is this +output still correct?" as "does it still match the version a human approved?". +:func:`verify_artifact` compares produced content to a stored +``.approved.`` baseline: + +* **match** → the check passes; +* **mismatch or missing baseline** → the produced bytes are written to + ``.received.`` and the check fails, so a reviewer can diff the two + and, if the change is intended, promote it with :func:`approve_artifact`. + +It works for *any* artifact — rendered text, JSON, OCR output, screenshot bytes +— so it complements pixel diffing with a review-gated baseline you commit +alongside the test. Pure standard library; imports no ``PySide6``. Names are +validated against path traversal. + +Headless API +------------ + +.. code-block:: python + + from je_auto_control import verify_artifact, approve_artifact + + result = verify_artifact("invoice_render", produced_text, + approvals_dir="tests/.approvals") + if not result.match: + # first run is "new", a changed output is "mismatch"; review the + # .received file, then bless it: + approve_artifact("invoice_render", approvals_dir="tests/.approvals") + +``content`` may be ``str`` or ``bytes`` (pass ``extension="png"`` for binary +snapshots). A verified run clears any stale received file. +``pending_artifacts(dir)`` lists names still awaiting approval. ``ApprovalResult`` +carries ``status`` (``verified`` / ``mismatch`` / ``new``), ``match``, and both +file paths. + +Executor commands +----------------- + +================================ =================================================== +Command Effect +================================ =================================================== +``AC_verify_artifact`` Compare ``content`` to the approved baseline. +``AC_approve_artifact`` Promote the received artifact to the baseline. +``AC_pending_artifacts`` List artifacts awaiting approval. +================================ =================================================== + +The same operations are exposed as MCP tools (``ac_verify_artifact`` / +``ac_approve_artifact`` / ``ac_pending_artifacts``) and as Script Builder +commands under **Testing**. diff --git a/docs/source/Eng/eng_index.rst b/docs/source/Eng/eng_index.rst index ecbb2a4c..2e4fe404 100644 --- a/docs/source/Eng/eng_index.rst +++ b/docs/source/Eng/eng_index.rst @@ -57,6 +57,7 @@ Comprehensive guides for all AutoControl features. doc/new_features/v32_features_doc doc/new_features/v33_features_doc doc/new_features/v34_features_doc + doc/new_features/v35_features_doc doc/ocr_backends/ocr_backends_doc doc/observability/observability_doc doc/operations_layer/operations_layer_doc diff --git a/docs/source/Zh/doc/new_features/v35_features_doc.rst b/docs/source/Zh/doc/new_features/v35_features_doc.rst new file mode 100644 index 00000000..348cfd67 --- /dev/null +++ b/docs/source/Zh/doc/new_features/v35_features_doc.rst @@ -0,0 +1,46 @@ +核准式測試(Golden-Master 基準) +================================ + +核准式測試(又稱 golden-master / snapshot 測試)把「這個輸出還正確嗎?」重新表述為 +「它是否仍與人工核准過的版本相符?」。:func:`verify_artifact` 將產出的內容與儲存的 +``.approved.`` 基準比對: + +* **相符** → 檢查通過; +* **不符或缺少基準** → 產出的位元組會被寫入 ``.received.`` 且檢查失敗,讓 + 審查者可比對兩者,若變更為預期,即以 :func:`approve_artifact` 晉升。 + +它適用於*任何*產物 —— 渲染後的文字、JSON、OCR 輸出、螢幕截圖位元組 —— 因此以一個受 +審查把關、與測試一起提交的基準,補強逐像素比對。純標準函式庫,不匯入 ``PySide6``。 +名稱會經過路徑穿越驗證。 + +無頭 API +-------- + +.. code-block:: python + + from je_auto_control import verify_artifact, approve_artifact + + result = verify_artifact("invoice_render", produced_text, + approvals_dir="tests/.approvals") + if not result.match: + # 首次執行為 "new",輸出變更為 "mismatch";審查 .received 檔後再核可: + approve_artifact("invoice_render", approvals_dir="tests/.approvals") + +``content`` 可為 ``str`` 或 ``bytes``(二進位快照請傳 ``extension="png"``)。相符的執 +行會清除任何過期的 received 檔。``pending_artifacts(dir)`` 列出仍待核准的名稱。 +``ApprovalResult`` 帶有 ``status``(``verified`` / ``mismatch`` / ``new``)、 +``match`` 及兩個檔案路徑。 + +執行器指令 +---------- + +================================ =================================================== +指令 效果 +================================ =================================================== +``AC_verify_artifact`` 將 ``content`` 與已核准基準比對。 +``AC_approve_artifact`` 將 received 產物晉升為基準。 +``AC_pending_artifacts`` 列出待核准的產物。 +================================ =================================================== + +相同操作亦提供為 MCP 工具(``ac_verify_artifact`` / ``ac_approve_artifact`` / +``ac_pending_artifacts``),以及 Script Builder 中 **Testing** 分類下的指令。 diff --git a/docs/source/Zh/zh_index.rst b/docs/source/Zh/zh_index.rst index 1b957c0a..97015308 100644 --- a/docs/source/Zh/zh_index.rst +++ b/docs/source/Zh/zh_index.rst @@ -57,6 +57,7 @@ AutoControl 所有功能的完整使用指南。 doc/new_features/v32_features_doc doc/new_features/v33_features_doc doc/new_features/v34_features_doc + doc/new_features/v35_features_doc doc/ocr_backends/ocr_backends_doc doc/observability/observability_doc doc/operations_layer/operations_layer_doc diff --git a/je_auto_control/__init__.py b/je_auto_control/__init__.py index 2c1684e5..6db8b058 100644 --- a/je_auto_control/__init__.py +++ b/je_auto_control/__init__.py @@ -212,6 +212,10 @@ from je_auto_control.utils.egress import ( EgressBlocked, EgressPolicy, get_egress_policy, set_egress_policy, ) +# Approval testing: verify artifacts against a human-approved baseline +from je_auto_control.utils.approval import ( + ApprovalResult, approve_artifact, pending_artifacts, verify_artifact, +) # Background popup/interrupt watchdog (unattended automation) from je_auto_control.utils.watchdog import ( PopupWatchdog, WatchdogRule, default_popup_watchdog, @@ -650,6 +654,8 @@ def start_autocontrol_gui(*args, **kwargs): "ApprovalGate", "CredentialBroker", "CredentialBrokerError", "default_broker", "set_secret_resolver", "EgressBlocked", "EgressPolicy", "get_egress_policy", "set_egress_policy", + "ApprovalResult", "approve_artifact", "pending_artifacts", + "verify_artifact", # MCP server "AuditLogger", "HttpMCPServer", "MCPContent", "MCPPrompt", "MCPPromptArgument", "MCPResource", "MCPServer", "MCPTool", diff --git a/je_auto_control/gui/script_builder/command_schema.py b/je_auto_control/gui/script_builder/command_schema.py index f9bfe82e..60ba6b0f 100644 --- a/je_auto_control/gui/script_builder/command_schema.py +++ b/je_auto_control/gui/script_builder/command_schema.py @@ -772,6 +772,35 @@ def _add_misc_specs(specs: List[CommandSpec]) -> None: fields=(), description="Clear the egress policy back to allow-all.", )) + specs.append(CommandSpec( + "AC_verify_artifact", "Testing", "Approval: Verify Artifact", + fields=( + FieldSpec("name", FieldType.STRING, placeholder="login_screen"), + FieldSpec("content", FieldType.STRING), + FieldSpec("approvals_dir", FieldType.STRING, optional=True, + default=".approvals"), + FieldSpec("extension", FieldType.STRING, optional=True, + default="txt"), + ), + description="Compare content to its approved baseline (snapshot test).", + )) + specs.append(CommandSpec( + "AC_approve_artifact", "Testing", "Approval: Promote Received", + fields=( + FieldSpec("name", FieldType.STRING), + FieldSpec("approvals_dir", FieldType.STRING, optional=True, + default=".approvals"), + FieldSpec("extension", FieldType.STRING, optional=True, + default="txt"), + ), + description="Promote a received artifact to the approved baseline.", + )) + specs.append(CommandSpec( + "AC_pending_artifacts", "Testing", "Approval: List Pending", + fields=(FieldSpec("approvals_dir", FieldType.STRING, optional=True, + default=".approvals"),), + description="List artifacts awaiting approval.", + )) specs.append(CommandSpec( "AC_generate_sop", "Report", "Generate SOP Document", fields=( diff --git a/je_auto_control/utils/approval/__init__.py b/je_auto_control/utils/approval/__init__.py new file mode 100644 index 00000000..20981a5e --- /dev/null +++ b/je_auto_control/utils/approval/__init__.py @@ -0,0 +1,9 @@ +"""Approval testing: verify artifacts against an approved baseline.""" +from je_auto_control.utils.approval.approval_test import ( + ApprovalResult, approve_artifact, pending_artifacts, verify_artifact, +) + +__all__ = [ + "ApprovalResult", "approve_artifact", "pending_artifacts", + "verify_artifact", +] diff --git a/je_auto_control/utils/approval/approval_test.py b/je_auto_control/utils/approval/approval_test.py new file mode 100644 index 00000000..869554f4 --- /dev/null +++ b/je_auto_control/utils/approval/approval_test.py @@ -0,0 +1,93 @@ +"""Approval testing — lock an artifact against a human-approved baseline. + +The approval-testing workflow (a.k.a. golden-master / snapshot testing) turns +"is this output still correct?" into "does this output still match the version +a human approved?". :func:`verify_artifact` compares produced ``content`` to a +stored ``.approved.`` baseline: + +* match → the check passes; +* mismatch or missing baseline → the produced bytes are written to + ``.received.`` and the check fails, so a reviewer can diff the two + and, if the change is intended, promote it with :func:`approve_artifact`. + +It works for any artifact — rendered text, JSON, OCR output, screenshot bytes — +complementing pixel diffing with a review-gated baseline. Pure standard +library; imports no ``PySide6``. +""" +import os +from dataclasses import dataclass +from pathlib import Path +from typing import List, Union + +DEFAULT_DIR = ".approvals" + + +@dataclass(frozen=True) +class ApprovalResult: + """Outcome of :func:`verify_artifact`.""" + + name: str + status: str # "verified" | "mismatch" | "new" + match: bool + approved_path: str + received_path: str + + +def _safe_name(name: str) -> str: + """Reject path-traversal in ``name`` and return it unchanged if safe.""" + if not name or name != os.path.basename(name) or name in (".", ".."): + raise ValueError(f"unsafe approval name: {name!r}") + return name + + +def _paths(name: str, approvals_dir: str, extension: str): + base = Path(approvals_dir) + ext = extension.lstrip(".") + return (base / f"{_safe_name(name)}.approved.{ext}", + base / f"{name}.received.{ext}") + + +def _as_bytes(content: Union[str, bytes]) -> bytes: + return content.encode("utf-8") if isinstance(content, str) else bytes(content) + + +def verify_artifact(name: str, content: Union[str, bytes], + approvals_dir: str = DEFAULT_DIR, + extension: str = "txt") -> ApprovalResult: + """Compare ``content`` to the approved baseline for ``name``. + + On match the received file is cleared and ``match`` is ``True``; otherwise + the produced bytes are written to the received file for review. + """ + approved, received = _paths(name, approvals_dir, extension) + produced = _as_bytes(content) + if approved.is_file() and approved.read_bytes() == produced: + if received.is_file(): + received.unlink() + return ApprovalResult(name, "verified", True, + str(approved), str(received)) + received.parent.mkdir(parents=True, exist_ok=True) + received.write_bytes(produced) + status = "mismatch" if approved.is_file() else "new" + return ApprovalResult(name, status, False, str(approved), str(received)) + + +def approve_artifact(name: str, approvals_dir: str = DEFAULT_DIR, + extension: str = "txt") -> str: + """Promote the received artifact for ``name`` to be the approved baseline.""" + approved, received = _paths(name, approvals_dir, extension) + if not received.is_file(): + raise FileNotFoundError( + f"no received artifact to approve for {name!r}") + os.replace(received, approved) + return str(approved) + + +def pending_artifacts(approvals_dir: str = DEFAULT_DIR) -> List[str]: + """Return the names of artifacts with a received file awaiting approval.""" + base = Path(approvals_dir) + if not base.is_dir(): + return [] + names = [path.name.split(".received.", 1)[0] + for path in base.glob("*.received.*")] + return sorted(names) diff --git a/je_auto_control/utils/executor/action_executor.py b/je_auto_control/utils/executor/action_executor.py index 0dd0f7b2..d4bce220 100644 --- a/je_auto_control/utils/executor/action_executor.py +++ b/je_auto_control/utils/executor/action_executor.py @@ -2967,6 +2967,30 @@ def _egress_reset() -> Dict[str, Any]: return {"allow": None, "deny": []} +def _verify_artifact(name: str, content: Any, + approvals_dir: str = ".approvals", + extension: str = "txt") -> Dict[str, Any]: + """Adapter: verify an artifact against its approved baseline.""" + from je_auto_control.utils.approval import verify_artifact + result = verify_artifact(name, content, approvals_dir, extension) + return {"status": result.status, "match": result.match, + "approved_path": result.approved_path, + "received_path": result.received_path} + + +def _approve_artifact(name: str, approvals_dir: str = ".approvals", + extension: str = "txt") -> Dict[str, Any]: + """Adapter: promote a received artifact to the approved baseline.""" + from je_auto_control.utils.approval import approve_artifact + return {"approved": approve_artifact(name, approvals_dir, extension)} + + +def _pending_artifacts(approvals_dir: str = ".approvals") -> Dict[str, Any]: + """Adapter: list artifacts awaiting approval.""" + from je_auto_control.utils.approval import pending_artifacts + return {"pending": pending_artifacts(approvals_dir)} + + class Executor: """ Executor @@ -3209,6 +3233,9 @@ def __init__(self): "AC_egress_allow": _egress_allow, "AC_egress_check": _egress_check, "AC_egress_reset": _egress_reset, + "AC_verify_artifact": _verify_artifact, + "AC_approve_artifact": _approve_artifact, + "AC_pending_artifacts": _pending_artifacts, "AC_a11y_record_start": _a11y_record_start, "AC_a11y_record_stop": _a11y_record_stop, "AC_a11y_record_events": _a11y_record_events, diff --git a/je_auto_control/utils/mcp_server/tools/_factories.py b/je_auto_control/utils/mcp_server/tools/_factories.py index fd5147ca..96cad3d4 100644 --- a/je_auto_control/utils/mcp_server/tools/_factories.py +++ b/je_auto_control/utils/mcp_server/tools/_factories.py @@ -2692,6 +2692,43 @@ def egress_tools() -> List[MCPTool]: ] +def approval_testing_tools() -> List[MCPTool]: + _ND = {"name": {"type": "string"}, + "approvals_dir": {"type": "string"}, + "extension": {"type": "string"}} + return [ + MCPTool( + name="ac_verify_artifact", + description=("Approval testing: compare produced 'content' (text) " + "to the approved baseline .approved. under " + "'approvals_dir'. On mismatch/new, the content is " + "written to .received. for review. Returns " + "{status (verified/mismatch/new), match, " + "approved_path, received_path}."), + input_schema=schema({**_ND, "content": {"type": "string"}}, + ["name", "content"]), + handler=h.verify_artifact, + annotations=SIDE_EFFECT_ONLY, + ), + MCPTool( + name="ac_approve_artifact", + description=("Promote the received artifact for 'name' to be the " + "approved baseline. Returns {approved} path."), + input_schema=schema(dict(_ND), ["name"]), + handler=h.approve_artifact, + annotations=SIDE_EFFECT_ONLY, + ), + MCPTool( + name="ac_pending_artifacts", + description=("List artifact names with a received file awaiting " + "approval under 'approvals_dir'. Returns {pending}."), + input_schema=schema({"approvals_dir": {"type": "string"}}), + handler=h.pending_artifacts, + annotations=READ_ONLY, + ), + ] + + def unattended_tools() -> List[MCPTool]: return [ MCPTool( @@ -3750,7 +3787,7 @@ def media_assert_tools() -> List[MCPTool]: input_macro_tools, resilience_tools, ci_annotation_tools, clipboard_history_tools, audit_analysis_tools, process_doc_tools, tween_drag_tools, plugin_sdk_tools, governance_tools, - credential_lease_tools, egress_tools, + credential_lease_tools, egress_tools, approval_testing_tools, screen_record_tools, process_and_shell_tools, remote_desktop_tools, gamepad_tools, usb_passthrough_tools, assertion_tools, data_source_tools, diff --git a/je_auto_control/utils/mcp_server/tools/_handlers.py b/je_auto_control/utils/mcp_server/tools/_handlers.py index 24bc8cc8..e42b44bc 100644 --- a/je_auto_control/utils/mcp_server/tools/_handlers.py +++ b/je_auto_control/utils/mcp_server/tools/_handlers.py @@ -1305,6 +1305,26 @@ def egress_reset(): return {"allow": None, "deny": []} +def verify_artifact(name: str, content, approvals_dir: str = ".approvals", + extension: str = "txt"): + from je_auto_control.utils.approval import verify_artifact as _verify + result = _verify(name, content, approvals_dir, extension) + return {"status": result.status, "match": result.match, + "approved_path": result.approved_path, + "received_path": result.received_path} + + +def approve_artifact(name: str, approvals_dir: str = ".approvals", + extension: str = "txt"): + from je_auto_control.utils.approval import approve_artifact as _approve + return {"approved": _approve(name, approvals_dir, extension)} + + +def pending_artifacts(approvals_dir: str = ".approvals"): + from je_auto_control.utils.approval import pending_artifacts as _pending + return {"pending": _pending(approvals_dir)} + + def vlm_locate(description: str, screen_region: Optional[List[int]] = None, model: Optional[str] = None) -> Optional[List[int]]: diff --git a/test/unit_test/headless/test_approval_testing_batch.py b/test/unit_test/headless/test_approval_testing_batch.py new file mode 100644 index 00000000..83be6cbf --- /dev/null +++ b/test/unit_test/headless/test_approval_testing_batch.py @@ -0,0 +1,104 @@ +"""Headless tests for approval testing (golden-master baselines). All files go +under a tmp dir; pure stdlib, no Qt imports.""" +import pytest + +import je_auto_control as ac +from je_auto_control.utils.approval import ( + approve_artifact, pending_artifacts, verify_artifact) + + +def test_first_run_is_new_and_writes_received(tmp_path): + d = str(tmp_path) + result = verify_artifact("greeting", "hello", approvals_dir=d) + assert result.status == "new" + assert result.match is False + assert pending_artifacts(d) == ["greeting"] + + +def test_approve_then_match(tmp_path): + d = str(tmp_path) + verify_artifact("greeting", "hello", approvals_dir=d) # writes received + approve_artifact("greeting", approvals_dir=d) # promote baseline + assert pending_artifacts(d) == [] # received cleared + result = verify_artifact("greeting", "hello", approvals_dir=d) + assert result.status == "verified" and result.match is True + + +def test_mismatch_after_baseline(tmp_path): + d = str(tmp_path) + verify_artifact("greeting", "hello", approvals_dir=d) + approve_artifact("greeting", approvals_dir=d) + result = verify_artifact("greeting", "HELLO", approvals_dir=d) + assert result.status == "mismatch" and result.match is False + assert pending_artifacts(d) == ["greeting"] # received re-written + + +def test_verified_clears_stale_received(tmp_path): + d = str(tmp_path) + verify_artifact("g", "v1", approvals_dir=d) + approve_artifact("g", approvals_dir=d) + verify_artifact("g", "DIFF", approvals_dir=d) # leaves a received + assert pending_artifacts(d) == ["g"] + verify_artifact("g", "v1", approvals_dir=d) # matches again + assert pending_artifacts(d) == [] # received removed + + +def test_bytes_content_supported(tmp_path): + d = str(tmp_path) + blob = b"\x89PNG\r\n" + verify_artifact("img", blob, approvals_dir=d, extension="png") + approve_artifact("img", approvals_dir=d, extension="png") + assert verify_artifact("img", blob, approvals_dir=d, + extension="png").match is True + + +def test_approve_without_received_raises(tmp_path): + with pytest.raises(FileNotFoundError): + approve_artifact("nope", approvals_dir=str(tmp_path)) + + +def test_path_traversal_rejected(tmp_path): + with pytest.raises(ValueError): + verify_artifact("../escape", "x", approvals_dir=str(tmp_path)) + + +# --- wiring --------------------------------------------------------------- + +def test_executor_round_trip(tmp_path): + d = str(tmp_path) + rec = ac.execute_action([ + ["AC_verify_artifact", {"name": "a", "content": "x", + "approvals_dir": d}], + ]) + assert any(v.get("status") == "new" for v in rec.values() + if isinstance(v, dict)) + ac.execute_action([["AC_approve_artifact", {"name": "a", + "approvals_dir": d}]]) + rec2 = ac.execute_action([ + ["AC_verify_artifact", {"name": "a", "content": "x", + "approvals_dir": d}], + ]) + assert any(v.get("match") is True for v in rec2.values() + if isinstance(v, dict)) + + +def test_wiring(): + known = ac.executor.known_commands() + assert {"AC_verify_artifact", "AC_approve_artifact", + "AC_pending_artifacts"} <= known + from je_auto_control.utils.mcp_server.tools import ( + build_default_tool_registry) + names = {t.name for t in build_default_tool_registry()} + assert {"ac_verify_artifact", "ac_approve_artifact", + "ac_pending_artifacts"} <= names + from je_auto_control.gui.script_builder.command_schema import _build_specs + cmds = {s.command for s in _build_specs()} + assert {"AC_verify_artifact", "AC_approve_artifact", + "AC_pending_artifacts"} <= cmds + + +def test_facade_exports(): + for attr in ("verify_artifact", "approve_artifact", "pending_artifacts", + "ApprovalResult"): + assert hasattr(ac, attr) + assert attr in ac.__all__