From 5920f710b3e8303b9bbf59d66ecc1462ffa93584 Mon Sep 17 00:00:00 2001
From: JeffreyChen <zenxcvwait@gmail.com>
Date: Fri, 19 Jun 2026 23:34:48 +0800
Subject: [PATCH] Add video step-overlay walkthrough report

---
 README.md                                     |   7 +
 README/README_zh-CN.md                        |   7 +
 README/README_zh-TW.md                        |   7 +
 .../Eng/doc/new_features/v39_features_doc.rst |  44 +++++++
 docs/source/Eng/eng_index.rst                 |   1 +
 .../Zh/doc/new_features/v39_features_doc.rst  |  39 ++++++
 docs/source/Zh/zh_index.rst                   |   1 +
 je_auto_control/__init__.py                   |   6 +
 .../gui/script_builder/command_schema.py      |  12 ++
 .../utils/executor/action_executor.py         |  12 ++
 .../utils/mcp_server/tools/_factories.py      |  23 ++++
 .../utils/mcp_server/tools/_handlers.py       |   6 +
 .../utils/video_report/__init__.py            |   9 ++
 .../utils/video_report/video_report.py        | 124 ++++++++++++++++++
 .../headless/test_video_report_batch.py       | 122 +++++++++++++++++
 15 files changed, 420 insertions(+)
 create mode 100644 docs/source/Eng/doc/new_features/v39_features_doc.rst
 create mode 100644 docs/source/Zh/doc/new_features/v39_features_doc.rst
 create mode 100644 je_auto_control/utils/video_report/__init__.py
 create mode 100644 je_auto_control/utils/video_report/video_report.py
 create mode 100644 test/unit_test/headless/test_video_report_batch.py

diff --git a/README.md b/README.md
index 6f325ea..154ca94 100644
--- a/README.md
+++ b/README.md
@@ -13,6 +13,7 @@
 
 ## Table of Contents
 
+- [What's new (2026-06-19) — Video Step-Overlay Report](#whats-new-2026-06-19--video-step-overlay-report)
 - [What's new (2026-06-19) — Agent Observability (GenAI OpenTelemetry Spans)](#whats-new-2026-06-19--agent-observability-genai-opentelemetry-spans)
 - [What's new (2026-06-19) — Compliance Control Report (SOC2 / ISO 27001)](#whats-new-2026-06-19--compliance-control-report-soc2--iso-27001)
 - [What's new (2026-06-19) — Agent Trajectory Evaluation](#whats-new-2026-06-19--agent-trajectory-evaluation)
@@ -91,6 +92,12 @@
 
 ---
 
+## What's new (2026-06-19) — Video Step-Overlay Report
+
+Caption screenshots into a walkthrough video. Full reference: [`docs/source/Eng/doc/new_features/v39_features_doc.rst`](docs/source/Eng/doc/new_features/v39_features_doc.rst).
+
+- **`write_step_video`** (`AC_write_step_video`, `ac_write_step_video`): turns per-step screenshots into a shareable video where each frame is held for a few seconds with its caption and a pass/fail colour banner burned in. The assembly logic (`build_overlay_plan` / `render_overlay_frame`) is separated from OpenCV via injectable `loader`/`drawer`/`writer_factory` hooks — unit-testable with fakes and no `cv2`/`numpy` dependency; the real path lazily imports `cv2` only when those hooks are absent. The visual companion to the HTML/JSON reports.
+
 ## What's new (2026-06-19) — Agent Observability (GenAI OpenTelemetry Spans)
 
 OTel GenAI-convention spans for LLM runs. Full reference: [`docs/source/Eng/doc/new_features/v38_features_doc.rst`](docs/source/Eng/doc/new_features/v38_features_doc.rst).
diff --git a/README/README_zh-CN.md b/README/README_zh-CN.md
index df1578b..4888eb5 100644
--- a/README/README_zh-CN.md
+++ b/README/README_zh-CN.md
@@ -12,6 +12,7 @@
 
 ## 目录
 
+- [本次更新 (2026-06-19) — 视频步骤叠加报告](#本次更新-2026-06-19--视频步骤叠加报告)
 - [本次更新 (2026-06-19) — Agent 可观测性(GenAI OpenTelemetry Spans)](#本次更新-2026-06-19--agent-可观测性genai-opentelemetry-spans)
 - [本次更新 (2026-06-19) — 合规控制报告(SOC2 / ISO 27001)](#本次更新-2026-06-19--合规控制报告soc2--iso-27001)
 - [本次更新 (2026-06-19) — Agent 轨迹评估](#本次更新-2026-06-19--agent-轨迹评估)
@@ -90,6 +91,12 @@
 
 ---
 
+## 本次更新 (2026-06-19) — 视频步骤叠加报告
+
+将屏幕截图加上字幕制成走查视频。完整参考:[`docs/source/Zh/doc/new_features/v39_features_doc.rst`](../docs/source/Zh/doc/new_features/v39_features_doc.rst)。
+
+- **`write_step_video`**(`AC_write_step_video`、`ac_write_step_video`):将各步骤的屏幕截图转成可分享的视频,每个画面停留数秒并烧入其字幕与通过/失败色彩横幅。组装逻辑(`build_overlay_plan` / `render_overlay_frame`)通过可注入的 `loader`/`drawer`/`writer_factory` 钩子与 OpenCV 分离 —— 可用假物件单元测试、无 `cv2`/`numpy` 依赖;真实路径仅在缺少这些钩子时才延迟导入 `cv2`。为 HTML/JSON 报告的视觉伙伴。
+
 ## 本次更新 (2026-06-19) — Agent 可观测性(GenAI OpenTelemetry Spans)
 
 LLM 运行的 OTel GenAI 惯例 spans。完整参考:[`docs/source/Zh/doc/new_features/v38_features_doc.rst`](../docs/source/Zh/doc/new_features/v38_features_doc.rst)。
diff --git a/README/README_zh-TW.md b/README/README_zh-TW.md
index b2dd44e..6a158c5 100644
--- a/README/README_zh-TW.md
+++ b/README/README_zh-TW.md
@@ -12,6 +12,7 @@
 
 ## 目錄
 
+- [本次更新 (2026-06-19) — 影片步驟疊加報告](#本次更新-2026-06-19--影片步驟疊加報告)
 - [本次更新 (2026-06-19) — Agent 可觀測性(GenAI OpenTelemetry Spans)](#本次更新-2026-06-19--agent-可觀測性genai-opentelemetry-spans)
 - [本次更新 (2026-06-19) — 合規控制報告(SOC2 / ISO 27001)](#本次更新-2026-06-19--合規控制報告soc2--iso-27001)
 - [本次更新 (2026-06-19) — Agent 軌跡評估](#本次更新-2026-06-19--agent-軌跡評估)
@@ -90,6 +91,12 @@
 
 ---
 
+## 本次更新 (2026-06-19) — 影片步驟疊加報告
+
+將螢幕截圖加上字幕製成走查影片。完整參考:[`docs/source/Zh/doc/new_features/v39_features_doc.rst`](../docs/source/Zh/doc/new_features/v39_features_doc.rst)。
+
+- **`write_step_video`**(`AC_write_step_video`、`ac_write_step_video`):將各步驟的螢幕截圖轉成可分享的影片,每個畫面停留數秒並燒入其字幕與通過/失敗色彩橫幅。組裝邏輯(`build_overlay_plan` / `render_overlay_frame`)透過可注入的 `loader`/`drawer`/`writer_factory` 掛鉤與 OpenCV 分離 —— 可用假物件單元測試、無 `cv2`/`numpy` 相依;真實路徑僅在缺少這些掛鉤時才延遲匯入 `cv2`。為 HTML/JSON 報告的視覺夥伴。
+
 ## 本次更新 (2026-06-19) — Agent 可觀測性(GenAI OpenTelemetry Spans)
 
 LLM 執行的 OTel GenAI 慣例 spans。完整參考:[`docs/source/Zh/doc/new_features/v38_features_doc.rst`](../docs/source/Zh/doc/new_features/v38_features_doc.rst)。
diff --git a/docs/source/Eng/doc/new_features/v39_features_doc.rst b/docs/source/Eng/doc/new_features/v39_features_doc.rst
new file mode 100644
index 0000000..1f99510
--- /dev/null
+++ b/docs/source/Eng/doc/new_features/v39_features_doc.rst
@@ -0,0 +1,44 @@
+Video Step-Overlay Report
+=========================
+
+A run already produces per-step screenshots; :func:`write_step_video` turns them
+into a shareable walkthrough video where each step's frame is held for a few
+seconds with its caption — and a pass/fail colour banner — burned in. It is the
+visual companion to the HTML/JSON reports: a reviewer watches what the automation
+did, step by step.
+
+The orchestration (which frames, how many repeats per step, which caption) is
+separated from OpenCV: the ``loader``, ``drawer``, and ``writer_factory`` hooks
+are injectable, so the assembly logic is unit-testable with fakes and **no**
+``cv2`` / ``numpy`` dependency. The real path lazily imports ``cv2`` only when
+those hooks are not supplied. Imports no ``PySide6``.
+
+Headless API
+------------
+
+.. code-block:: python
+
+    from je_auto_control import VideoStep, write_step_video
+
+    steps = [
+        VideoStep("step1.png", caption="Open the app", status="ok"),
+        VideoStep("step2.png", caption="Submit the form", status="error"),
+    ]
+    result = write_step_video(steps, "walkthrough.mp4",
+                              fps=10, seconds_per_step=2.5)
+    print(result)   # {output, steps, fps, frame_count}
+
+A step's ``image`` may be a file path (read with ``cv2.imread``) or an in-memory
+frame. ``status`` of ``ok`` / ``error`` colours the caption banner green / red.
+``build_overlay_plan(steps, fps, seconds_per_step)`` returns the per-step frame
+plan without any I/O, and ``render_overlay_frame(frame, caption, status)`` burns a
+single banner — both useful on their own.
+
+Executor command
+----------------
+
+``AC_write_step_video`` takes ``steps`` (a list of ``{image, caption, status}``,
+or a JSON string from the visual builder), an ``output`` path, and optional
+``fps`` / ``seconds_per_step``; it returns ``{output, steps, fps, frame_count}``.
+The same operation is exposed as the MCP tool ``ac_write_step_video`` and as a
+Script Builder command under **Report**.
diff --git a/docs/source/Eng/eng_index.rst b/docs/source/Eng/eng_index.rst
index 34cf42d..dafd14d 100644
--- a/docs/source/Eng/eng_index.rst
+++ b/docs/source/Eng/eng_index.rst
@@ -61,6 +61,7 @@ Comprehensive guides for all AutoControl features.
    doc/new_features/v36_features_doc
    doc/new_features/v37_features_doc
    doc/new_features/v38_features_doc
+   doc/new_features/v39_features_doc
    doc/ocr_backends/ocr_backends_doc
    doc/observability/observability_doc
    doc/operations_layer/operations_layer_doc
diff --git a/docs/source/Zh/doc/new_features/v39_features_doc.rst b/docs/source/Zh/doc/new_features/v39_features_doc.rst
new file mode 100644
index 0000000..78f8c76
--- /dev/null
+++ b/docs/source/Zh/doc/new_features/v39_features_doc.rst
@@ -0,0 +1,39 @@
+影片步驟疊加報告
+================
+
+一次執行已產生各步驟的螢幕截圖;:func:`write_step_video` 將它們轉成可分享的逐步走查影
+片,每個步驟的畫面停留數秒,並燒入其字幕 —— 以及通過/失敗的色彩橫幅。它是 HTML/JSON
+報告的視覺夥伴:審查者可逐步觀看自動化做了什麼。
+
+其編排(哪些畫面、每步重複幾幀、哪段字幕)與 OpenCV 分離:``loader``、``drawer`` 與
+``writer_factory`` 三個掛鉤皆可注入,因此組裝邏輯可用假物件進行單元測試,**無需**
+``cv2`` / ``numpy`` 相依。真實路徑僅在未提供這些掛鉤時才延遲匯入 ``cv2``。不匯入
+``PySide6``。
+
+無頭 API
+--------
+
+.. code-block:: python
+
+    from je_auto_control import VideoStep, write_step_video
+
+    steps = [
+        VideoStep("step1.png", caption="開啟應用", status="ok"),
+        VideoStep("step2.png", caption="送出表單", status="error"),
+    ]
+    result = write_step_video(steps, "walkthrough.mp4",
+                              fps=10, seconds_per_step=2.5)
+    print(result)   # {output, steps, fps, frame_count}
+
+步驟的 ``image`` 可為檔案路徑(以 ``cv2.imread`` 讀取)或記憶體中的畫面。``status`` 為
+``ok`` / ``error`` 會將字幕橫幅著色為綠 / 紅。``build_overlay_plan(steps, fps,
+seconds_per_step)`` 回傳各步驟的幀計畫而不進行任何 I/O,``render_overlay_frame(frame,
+caption, status)`` 則燒入單一橫幅 —— 兩者皆可單獨使用。
+
+執行器指令
+----------
+
+``AC_write_step_video`` 接受 ``steps``(``{image, caption, status}`` 的清單,或視覺化
+建構器傳入的 JSON 字串)、``output`` 路徑,以及選用的 ``fps`` / ``seconds_per_step``;
+回傳 ``{output, steps, fps, frame_count}``。相同操作亦提供為 MCP 工具
+``ac_write_step_video``,以及 Script Builder 中 **Report** 分類下的指令。
diff --git a/docs/source/Zh/zh_index.rst b/docs/source/Zh/zh_index.rst
index 9e8854c..b9a8ccb 100644
--- a/docs/source/Zh/zh_index.rst
+++ b/docs/source/Zh/zh_index.rst
@@ -61,6 +61,7 @@ AutoControl 所有功能的完整使用指南。
    doc/new_features/v36_features_doc
    doc/new_features/v37_features_doc
    doc/new_features/v38_features_doc
+   doc/new_features/v39_features_doc
    doc/ocr_backends/ocr_backends_doc
    doc/observability/observability_doc
    doc/operations_layer/operations_layer_doc
diff --git a/je_auto_control/__init__.py b/je_auto_control/__init__.py
index 92b543a..e8f8ead 100644
--- a/je_auto_control/__init__.py
+++ b/je_auto_control/__init__.py
@@ -226,6 +226,10 @@
 from je_auto_control.utils.agent_trace import (
     AgentTrace, default_trace, reset_trace,
 )
+# Video step-overlay report: caption screenshots into a walkthrough video
+from je_auto_control.utils.video_report import (
+    VideoStep, build_overlay_plan, render_overlay_frame, write_step_video,
+)
 # Background popup/interrupt watchdog (unattended automation)
 from je_auto_control.utils.watchdog import (
     PopupWatchdog, WatchdogRule, default_popup_watchdog,
@@ -670,6 +674,8 @@ def start_autocontrol_gui(*args, **kwargs):
     "build_compliance_report", "render_compliance_html",
     "write_compliance_report",
     "AgentTrace", "default_trace", "reset_trace",
+    "VideoStep", "build_overlay_plan", "render_overlay_frame",
+    "write_step_video",
     # MCP server
     "AuditLogger", "HttpMCPServer", "MCPContent", "MCPPrompt",
     "MCPPromptArgument", "MCPResource", "MCPServer", "MCPTool",
diff --git a/je_auto_control/gui/script_builder/command_schema.py b/je_auto_control/gui/script_builder/command_schema.py
index fd2eef9..a12ef96 100644
--- a/je_auto_control/gui/script_builder/command_schema.py
+++ b/je_auto_control/gui/script_builder/command_schema.py
@@ -855,6 +855,18 @@ def _add_misc_specs(specs: List[CommandSpec]) -> None:
         fields=(),
         description="Clear the default agent trace.",
     ))
+    specs.append(CommandSpec(
+        "AC_write_step_video", "Report", "Step-Overlay Video",
+        fields=(
+            FieldSpec("steps", FieldType.STRING,
+                      placeholder='[{"image": "s1.png", "caption": "Step 1"}]'),
+            FieldSpec("output", FieldType.STRING, default="walkthrough.mp4"),
+            FieldSpec("fps", FieldType.INT, optional=True, default=10),
+            FieldSpec("seconds_per_step", FieldType.FLOAT, optional=True,
+                      default=2.0),
+        ),
+        description="Render captioned screenshots into a walkthrough video.",
+    ))
     specs.append(CommandSpec(
         "AC_generate_sop", "Report", "Generate SOP Document",
         fields=(
diff --git a/je_auto_control/utils/executor/action_executor.py b/je_auto_control/utils/executor/action_executor.py
index fe1cc59..23f6555 100644
--- a/je_auto_control/utils/executor/action_executor.py
+++ b/je_auto_control/utils/executor/action_executor.py
@@ -3056,6 +3056,17 @@ def _trace_reset() -> Dict[str, Any]:
     return {"reset": True}
 
 
+def _write_step_video(steps: Any, output: str, fps: int = 10,
+                      seconds_per_step: float = 2.0) -> Dict[str, Any]:
+    """Adapter: render captioned screenshots into a walkthrough video."""
+    import json
+    from je_auto_control.utils.video_report import write_step_video
+    if isinstance(steps, str):
+        steps = json.loads(steps)
+    return write_step_video(steps, output, fps=fps,
+                            seconds_per_step=seconds_per_step)
+
+
 class Executor:
     """
     Executor
@@ -3307,6 +3318,7 @@ def __init__(self):
             "AC_trace_summary": _trace_summary,
             "AC_trace_export": _trace_export,
             "AC_trace_reset": _trace_reset,
+            "AC_write_step_video": _write_step_video,
             "AC_a11y_record_start": _a11y_record_start,
             "AC_a11y_record_stop": _a11y_record_stop,
             "AC_a11y_record_events": _a11y_record_events,
diff --git a/je_auto_control/utils/mcp_server/tools/_factories.py b/je_auto_control/utils/mcp_server/tools/_factories.py
index 451cfe1..091683f 100644
--- a/je_auto_control/utils/mcp_server/tools/_factories.py
+++ b/je_auto_control/utils/mcp_server/tools/_factories.py
@@ -2822,6 +2822,28 @@ def agent_trace_tools() -> List[MCPTool]:
     ]
 
 
+def video_report_tools() -> List[MCPTool]:
+    return [
+        MCPTool(
+            name="ac_write_step_video",
+            description=("Render captioned screenshots into a walkthrough "
+                         "video. 'steps' is a list of {image (path), caption, "
+                         "status (ok/error)}; each frame is held for "
+                         "'seconds_per_step' at 'fps' with a caption banner "
+                         "burned in. Writes 'output' (mp4/avi). Returns "
+                         "{output, steps, fps, frame_count}."),
+            input_schema=schema(
+                {"steps": {"type": "array", "items": {"type": "object"}},
+                 "output": {"type": "string"},
+                 "fps": {"type": "integer"},
+                 "seconds_per_step": {"type": "number"}},
+                ["steps", "output"]),
+            handler=h.write_step_video,
+            annotations=SIDE_EFFECT_ONLY,
+        ),
+    ]
+
+
 def unattended_tools() -> List[MCPTool]:
     return [
         MCPTool(
@@ -3882,6 +3904,7 @@ def media_assert_tools() -> List[MCPTool]:
     process_doc_tools, tween_drag_tools, plugin_sdk_tools, governance_tools,
     credential_lease_tools, egress_tools, approval_testing_tools,
     trajectory_eval_tools, compliance_tools, agent_trace_tools,
+    video_report_tools,
     screen_record_tools,
     process_and_shell_tools, remote_desktop_tools, gamepad_tools,
     usb_passthrough_tools, assertion_tools, data_source_tools,
diff --git a/je_auto_control/utils/mcp_server/tools/_handlers.py b/je_auto_control/utils/mcp_server/tools/_handlers.py
index 472cfdf..d099003 100644
--- a/je_auto_control/utils/mcp_server/tools/_handlers.py
+++ b/je_auto_control/utils/mcp_server/tools/_handlers.py
@@ -1366,6 +1366,12 @@ def trace_reset():
     return {"reset": True}
 
 
+def write_step_video(steps, output, fps=10, seconds_per_step=2.0):
+    from je_auto_control.utils.video_report import (
+        write_step_video as _write)
+    return _write(steps, output, fps=fps, seconds_per_step=seconds_per_step)
+
+
 def vlm_locate(description: str,
                screen_region: Optional[List[int]] = None,
                model: Optional[str] = None) -> Optional[List[int]]:
diff --git a/je_auto_control/utils/video_report/__init__.py b/je_auto_control/utils/video_report/__init__.py
new file mode 100644
index 0000000..31cf1c6
--- /dev/null
+++ b/je_auto_control/utils/video_report/__init__.py
@@ -0,0 +1,9 @@
+"""Video step-overlay report: caption each screenshot into a walkthrough video."""
+from je_auto_control.utils.video_report.video_report import (
+    VideoStep, build_overlay_plan, render_overlay_frame, write_step_video,
+)
+
+__all__ = [
+    "VideoStep", "build_overlay_plan", "render_overlay_frame",
+    "write_step_video",
+]
diff --git a/je_auto_control/utils/video_report/video_report.py b/je_auto_control/utils/video_report/video_report.py
new file mode 100644
index 0000000..44f0181
--- /dev/null
+++ b/je_auto_control/utils/video_report/video_report.py
@@ -0,0 +1,124 @@
+"""Assemble captioned screenshots into a step-by-step walkthrough video.
+
+A run already produces per-step screenshots; this turns them into an MP4/AVI
+where each step's frame is held for a few seconds with its caption (and a
+pass/fail colour banner) burned in — a shareable visual report of what the
+automation did. The orchestration (which frames, how many repeats per step,
+which caption) is separated from OpenCV: ``loader`` / ``drawer`` /
+``writer_factory`` are injectable, so the assembly logic is unit-testable with
+fakes and **no** ``cv2``/``numpy`` dependency. The real path lazily imports
+``cv2`` only when those hooks are not supplied.
+
+Imports no ``PySide6``.
+"""
+from dataclasses import dataclass
+from typing import Any, Callable, Dict, List, Optional, Sequence
+
+STATUS_COLORS = {
+    "ok": (40, 160, 40), "error": (40, 40, 200), "": (60, 60, 60),
+}
+
+
+@dataclass
+class VideoStep:
+    """One step: an image (path or frame) plus a caption and optional status."""
+
+    image: Any
+    caption: str = ""
+    status: str = ""
+
+
+def _coerce_step(step: Any) -> VideoStep:
+    if isinstance(step, VideoStep):
+        return step
+    if isinstance(step, dict):
+        return VideoStep(step.get("image"), step.get("caption", ""),
+                         str(step.get("status", "")))
+    raise TypeError(f"unsupported step type: {type(step).__name__}")
+
+
+def build_overlay_plan(steps: Sequence[Any], fps: int = 10,
+                       seconds_per_step: float = 2.0) -> List[Dict[str, Any]]:
+    """Return per-step ``{caption, status, frames}`` (no I/O, no cv2).
+
+    ``frames`` is how many times the step's frame is written
+    (``round(fps * seconds_per_step)``, at least 1).
+    """
+    frames = max(1, round(fps * seconds_per_step))
+    plan: List[Dict[str, Any]] = []
+    for step in steps:
+        coerced = _coerce_step(step)
+        plan.append({"caption": coerced.caption, "status": coerced.status,
+                     "frames": frames})
+    return plan
+
+
+def _default_drawer(frame: Any, caption: str, status: str) -> Any:
+    import cv2
+    height, width = frame.shape[0], frame.shape[1]
+    color = STATUS_COLORS.get(status, STATUS_COLORS[""])
+    banner_top = max(0, height - 60)
+    cv2.rectangle(frame, (0, banner_top), (width, height), color, thickness=-1)
+    cv2.putText(frame, caption, (15, height - 20), cv2.FONT_HERSHEY_SIMPLEX,
+                0.7, (255, 255, 255), 2, cv2.LINE_AA)
+    return frame
+
+
+def render_overlay_frame(frame: Any, caption: str, status: str = "",
+                         drawer: Optional[Callable[..., Any]] = None) -> Any:
+    """Burn ``caption`` and a status banner onto ``frame`` and return it."""
+    return (drawer or _default_drawer)(frame, caption, status)
+
+
+def _default_loader(image: Any) -> Any:
+    if isinstance(image, str):
+        import cv2
+        frame = cv2.imread(image)
+        if frame is None:
+            raise FileNotFoundError(f"could not read image: {image!r}")
+        return frame
+    return image
+
+
+def _default_writer_factory(path: str, fps: int, size: Any) -> Any:
+    import cv2
+    fourcc = cv2.VideoWriter.fourcc(*"mp4v")
+    return cv2.VideoWriter(path, fourcc, fps, size)
+
+
+def _frame_size(frame: Any) -> Any:
+    return (frame.shape[1], frame.shape[0])
+
+
+def write_step_video(steps: Sequence[Any], output_path: str, *,
+                     fps: int = 10, seconds_per_step: float = 2.0,
+                     size: Optional[Any] = None,
+                     loader: Optional[Callable[[Any], Any]] = None,
+                     drawer: Optional[Callable[..., Any]] = None,
+                     writer_factory: Optional[Callable[..., Any]] = None
+                     ) -> Dict[str, Any]:
+    """Render ``steps`` into a captioned walkthrough video at ``output_path``.
+
+    Returns ``{output, steps, fps, frame_count}``. The ``loader`` /
+    ``drawer`` / ``writer_factory`` hooks default to OpenCV; injecting them
+    makes the assembly testable without cv2.
+    """
+    load = loader or _default_loader
+    plan = build_overlay_plan(steps, fps, seconds_per_step)
+    coerced = [_coerce_step(step) for step in steps]
+    rendered = [render_overlay_frame(load(step.image), entry["caption"],
+                                     entry["status"], drawer)
+                for step, entry in zip(coerced, plan)]
+    if size is None and rendered:
+        size = _frame_size(rendered[0])
+    writer = (writer_factory or _default_writer_factory)(output_path, fps, size)
+    frame_count = 0
+    try:
+        for frame, entry in zip(rendered, plan):
+            for _ in range(entry["frames"]):
+                writer.write(frame)
+                frame_count += 1
+    finally:
+        writer.release()
+    return {"output": output_path, "steps": len(plan), "fps": fps,
+            "frame_count": frame_count}
diff --git a/test/unit_test/headless/test_video_report_batch.py b/test/unit_test/headless/test_video_report_batch.py
new file mode 100644
index 0000000..7d807cf
--- /dev/null
+++ b/test/unit_test/headless/test_video_report_batch.py
@@ -0,0 +1,122 @@
+"""Headless tests for the video step-overlay report. The assembly logic is
+exercised with injected fakes (no cv2/numpy needed); one test exercises the
+real OpenCV path under importorskip. Pure stdlib otherwise; no Qt imports."""
+import pytest
+
+import je_auto_control as ac
+from je_auto_control.utils.video_report import (
+    VideoStep, build_overlay_plan, render_overlay_frame, write_step_video)
+
+STEPS = [
+    {"image": "a.png", "caption": "Open app", "status": "ok"},
+    {"image": "b.png", "caption": "Save", "status": "error"},
+]
+
+
+class _FakeWriter:
+    def __init__(self):
+        self.frames = []
+        self.released = False
+
+    def write(self, frame):
+        self.frames.append(frame)
+
+    def release(self):
+        self.released = True
+
+
+def test_plan_frame_count():
+    plan = build_overlay_plan(STEPS, fps=10, seconds_per_step=2.0)
+    assert [p["frames"] for p in plan] == [20, 20]
+    assert plan[0]["caption"] == "Open app"
+    assert plan[1]["status"] == "error"
+
+
+def test_plan_minimum_one_frame():
+    plan = build_overlay_plan([VideoStep("x")], fps=1, seconds_per_step=0.0)
+    assert plan[0]["frames"] == 1
+
+
+def test_render_overlay_uses_injected_drawer():
+    seen = {}
+
+    def drawer(frame, caption, status):
+        seen["args"] = (frame, caption, status)
+        return f"{frame}+{caption}"
+
+    out = render_overlay_frame("FRAME", "hi", "ok", drawer=drawer)
+    assert out == "FRAME+hi"
+    assert seen["args"] == ("FRAME", "hi", "ok")
+
+
+def test_write_step_video_with_fakes(tmp_path):
+    writer = _FakeWriter()
+    loaded = []
+
+    def loader(image):
+        loaded.append(image)
+        return f"frame:{image}"
+
+    def drawer(frame, caption, status):
+        return f"{frame}|{caption}|{status}"
+
+    result = write_step_video(
+        STEPS, str(tmp_path / "out.mp4"), fps=5, seconds_per_step=2.0,
+        size=(640, 480), loader=loader, drawer=drawer,
+        writer_factory=lambda path, fps, size: writer)
+
+    assert loaded == ["a.png", "b.png"]
+    assert result["steps"] == 2
+    assert result["frame_count"] == 20            # 2 steps * (5fps * 2s)
+    assert len(writer.frames) == 20
+    assert writer.frames[0] == "frame:a.png|Open app|ok"
+    assert writer.released is True                 # released in finally
+
+
+def test_writer_released_on_error(tmp_path):
+    writer = _FakeWriter()
+
+    def exploding_write(_frame):
+        raise RuntimeError("disk full")
+
+    writer.write = exploding_write
+    with pytest.raises(RuntimeError):
+        write_step_video(
+            [VideoStep("a.png", "x")], str(tmp_path / "o.mp4"),
+            size=(8, 8), loader=lambda i: "f", drawer=lambda f, c, s: f,
+            writer_factory=lambda p, fp, sz: writer)
+    assert writer.released is True
+
+
+def test_real_opencv_path(tmp_path):
+    cv2 = pytest.importorskip("cv2")
+    np = pytest.importorskip("numpy")
+    frame = np.zeros((120, 160, 3), dtype=np.uint8)
+    drawn = render_overlay_frame(frame.copy(), "hello", "ok")
+    assert drawn.shape == frame.shape         # banner drawn in place
+
+    out = str(tmp_path / "real.mp4")
+    result = write_step_video([VideoStep(frame, "step", "ok")], out,
+                              fps=5, seconds_per_step=0.4)
+    assert result["frame_count"] == 2
+    assert cv2 is not None
+
+
+# --- wiring ---------------------------------------------------------------
+
+def test_wiring():
+    assert "AC_write_step_video" in ac.executor.known_commands()
+    from je_auto_control.utils.mcp_server.tools import (
+        build_default_tool_registry)
+    names = {t.name for t in build_default_tool_registry()}
+    assert "ac_write_step_video" in names
+    from je_auto_control.gui.script_builder.command_schema import _build_specs
+    cmds = {s.command for s in _build_specs()}
+    assert "AC_write_step_video" in cmds
+
+
+def test_facade_exports():
+    for attr in ("VideoStep", "build_overlay_plan", "render_overlay_frame",
+                 "write_step_video"):
+        assert hasattr(ac, attr)
+        assert attr in ac.__all__