From 2b76dd5cbcdfc73e970967b32cca412fdfce6b13 Mon Sep 17 00:00:00 2001
From: chen <clean6378@gmail.com>
Date: Tue, 23 Jun 2026 06:33:39 +0800
Subject: [PATCH 1/6] Extract shared export engine; wire CLI to summary cache
 and drop mypy override

---
 pyproject.toml                |   6 -
 scripts/export.py             | 460 +++++++------------------------
 services/export_engine.py     | 491 ++++++++++++++++++++++++++++++++++
 services/workspace_listing.py |  34 +--
 4 files changed, 602 insertions(+), 389 deletions(-)
 create mode 100644 services/export_engine.py

diff --git a/pyproject.toml b/pyproject.toml
index 678f218..a49ac40 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -98,12 +98,6 @@ pretty = true
 # Anchored regexes — unanchored `venv/` would match any path segment containing "venv/".
 exclude = ["^venv/", "^\\.venv/", "^build/", "^dist/"]
 
-# Standalone CLI export script (~985 LOC) duplicates utils/ helpers; typed
-# incrementally — issue #100 allows per-module override until consolidated.
-[[tool.mypy.overrides]]
-module = "scripts.export"
-ignore_errors = true
-
 # Test modules use unittest/pytest patterns that are not worth strict-checking
 # alongside production code; route handlers and utils are fully strict.
 [[tool.mypy.overrides]]
diff --git a/scripts/export.py b/scripts/export.py
index cd36454..7f8abd1 100644
--- a/scripts/export.py
+++ b/scripts/export.py
@@ -10,14 +10,16 @@
 The guard below is only necessary for direct invocation (``python scripts/export.py``).
 """
 
+from __future__ import annotations
+
 import json
 import logging
 import os
-import sqlite3
 import sys
 import zipfile
 from datetime import datetime
 from pathlib import Path
+from typing import Literal, TypedDict
 
 # sys.path guard: only needed when the script is invoked directly
 # (``python scripts/export.py``). When installed via the pyproject.toml
@@ -28,48 +30,27 @@
     if str(_project_root) not in sys.path:
         sys.path.insert(0, str(_project_root))
 
+from models import ExportEntry, SchemaError  # noqa: E402
+from services.export_engine import collect_export_entries  # noqa: E402
 from utils.exclusion_rules import (  # noqa: E402
-    resolve_exclusion_rules_path,
     load_rules,
-    build_searchable_text,
-    is_excluded_by_rules,
+    resolve_exclusion_rules_path,
 )
 from utils.path_helpers import to_epoch_ms  # noqa: E402
-from utils.text_extract import (  # noqa: E402
-    extract_text_from_bubble,
-    slug,
-)
-from utils.workspace_path import (  # noqa: E402
-    get_cli_chats_path,
-    resolve_workspace_path,
-)
-from utils.cli_chat_reader import (  # noqa: E402
-    list_cli_projects,
-    traverse_blobs,
-    messages_to_bubbles,
-)
-from utils.cursor_md_exporter import (  # noqa: E402
-    cursor_cli_session_to_markdown,
-    cursor_ide_chat_to_markdown,
-)
-from models import Bubble, ExportEntry, SchemaError  # noqa: E402
-from services.workspace_context import (  # noqa: E402
-    enrich_workspace_context_from_global_db,
-    resolve_workspace_context,
-)
-from services.workspace_db import (  # noqa: E402
-    load_code_block_diff_map,
-    open_global_db,
-)
-from services.workspace_resolver import (  # noqa: E402
-    determine_project_for_conversation,
-    infer_invalid_workspace_aliases,
-    lookup_workspace_display_name,
-)
+from utils.workspace_path import resolve_workspace_path  # noqa: E402
 
 _logger = logging.getLogger(__name__)
 
 
+class ExportCliOptions(TypedDict):
+    since: Literal["all", "last"]
+    out_dir: str
+    include_composer: bool
+    zip: bool
+    exclusion_rules_path: str | None
+    base_dir: str | None
+
+
 def configure_cli_logging() -> None:
     """Route log records to stderr so stdout stays for export progress lines."""
     root = logging.getLogger()
@@ -82,17 +63,9 @@ def configure_cli_logging() -> None:
     )
 
 
-def json_dump_safe(value) -> str:
-    """Best-effort JSON serialization for exclusion matching."""
-    try:
-        return json.dumps(value, ensure_ascii=False, sort_keys=True)
-    except Exception:
-        return str(value) if value is not None else ""
-
-
-def load_manifest_entries(manifest_path: str) -> dict:
+def load_manifest_entries(manifest_path: str) -> dict[str, dict[str, object]]:
     """Load manifest entries keyed by log_id from a JSONL file."""
-    existing: dict = {}
+    existing: dict[str, dict[str, object]] = {}
     if not os.path.isfile(manifest_path):
         return existing
     try:
@@ -113,7 +86,10 @@ def load_manifest_entries(manifest_path: str) -> dict:
     return existing
 
 
-def write_manifest_entries(manifest_path: str, entries_by_id: dict):
+def write_manifest_entries(
+    manifest_path: str,
+    entries_by_id: dict[str, dict[str, object]],
+) -> None:
     """Write manifest entries to JSONL."""
     os.makedirs(os.path.dirname(manifest_path), exist_ok=True)
     with open(manifest_path, "w", encoding="utf-8") as f:
@@ -132,8 +108,9 @@ def get_global_state_dir() -> str:
     return os.path.join(str(Path.home()), ".cursor-chat-browser")
 
 
-def parse_args():
+def parse_args() -> ExportCliOptions:
     import argparse
+
     parser = argparse.ArgumentParser(
         description="Export Cursor chat history to Markdown files.",
         epilog=(
@@ -143,23 +120,42 @@ def parse_args():
         ),
         formatter_class=argparse.RawDescriptionHelpFormatter,
     )
-    parser.add_argument("--since", choices=["all", "last"], default="all",
-                        help="Export all chats or only those updated since last export. Default: all")
-    parser.add_argument("--out", default=".",
-                        help="Output directory. Default: current working directory (.)")
-    parser.add_argument("--no-zip", action="store_true", default=False,
-                        help="Write individual Markdown files instead of a zip archive.")
-    parser.add_argument("--no-composer", action="store_true", default=False,
-                        help="Exclude composer logs (export only chat logs).")
-    parser.add_argument("--base-dir", default=None,
-                        help="Override Cursor workspaceStorage path (also settable via WORKSPACE_PATH env var).")
     parser.add_argument(
-        "--exclude-rules", "-e",
+        "--since",
+        choices=["all", "last"],
+        default="all",
+        help="Export all chats or only those updated since last export. Default: all",
+    )
+    parser.add_argument(
+        "--out",
+        default=".",
+        help="Output directory. Default: current working directory (.)",
+    )
+    parser.add_argument(
+        "--no-zip",
+        action="store_true",
+        default=False,
+        help="Write individual Markdown files instead of a zip archive.",
+    )
+    parser.add_argument(
+        "--no-composer",
+        action="store_true",
+        default=False,
+        help="Exclude composer logs (export only chat logs).",
+    )
+    parser.add_argument(
+        "--base-dir",
+        default=None,
+        help="Override Cursor workspaceStorage path (also settable via WORKSPACE_PATH env var).",
+    )
+    parser.add_argument(
+        "--exclude-rules",
+        "-e",
         default=None,
         metavar="PATH",
         dest="exclude_rules",
         help="Path to exclusion rules file (sensitive projects/chats are omitted). "
-             "If omitted, uses ~/.cursor-chat-browser/exclusion-rules.txt if present.",
+        "If omitted, uses ~/.cursor-chat-browser/exclusion-rules.txt if present.",
     )
     args = parser.parse_args()
     return {
@@ -172,311 +168,52 @@ def parse_args():
     }
 
 
-def main():
+def _read_last_export_ms(state_path: str, since: Literal["all", "last"]) -> int:
+    if since != "last" or not os.path.isfile(state_path):
+        return 0
+    try:
+        with open(state_path, "r", encoding="utf-8") as f:
+            st = json.load(f)
+        ts = st.get("lastExportTime")
+        if ts:
+            return int(
+                datetime.fromisoformat(ts.replace("Z", "+00:00")).timestamp() * 1000,
+            )
+    except (json.JSONDecodeError, ValueError, OSError) as e:
+        _logger.warning(
+            "Could not read last export timestamp; defaulting to full export: %s",
+            e,
+        )
+    return 0
+
+
+def main() -> None:
     configure_cli_logging()
     opts = parse_args()
     since = opts["since"]
     out_dir = os.path.abspath(opts["out_dir"])
     use_zip = opts["zip"]
-    exclusion_rules = load_rules(resolve_exclusion_rules_path(opts.get("exclusion_rules_path")))
-    if opts.get("base_dir"):
-        os.environ["WORKSPACE_PATH"] = opts["base_dir"]
+    exclusion_rules = load_rules(
+        resolve_exclusion_rules_path(opts.get("exclusion_rules_path")),
+    )
+    base_dir = opts.get("base_dir")
+    if base_dir:
+        os.environ["WORKSPACE_PATH"] = base_dir
     workspace_path = resolve_workspace_path()
 
     state_dir = get_global_state_dir()
     state_path = os.path.join(state_dir, "export_state.json")
-    last_export = 0
-    if since == "last" and os.path.isfile(state_path):
-        try:
-            with open(state_path, "r", encoding="utf-8") as f:
-                st = json.load(f)
-            ts = st.get("lastExportTime")
-            if ts:
-                last_export = int(datetime.fromisoformat(ts.replace("Z", "+00:00")).timestamp() * 1000)
-        except (json.JSONDecodeError, ValueError, OSError) as e:
-            _logger.warning(
-                "Could not read last export timestamp; defaulting to full export: %s",
-                e,
-            )
-
-    # ── Workspace scanning via service layer ──────────────────────────────────
-    ctx = resolve_workspace_context(workspace_path)
-    workspace_entries = ctx.workspace_entries
-    invalid_workspace_ids = ctx.invalid_workspace_ids
-    project_name_map = ctx.project_name_to_workspace_id
-    workspace_path_map = ctx.workspace_path_to_id
-    composer_id_to_ws = ctx.composer_id_to_workspace_id
-
-    # Build display-name and slug maps from workspace entries.
-    # Entries whose workspace.json cannot be resolved are omitted so the
-    # usage-site fallback (slug(ws_id[:12])) applies — matching original
-    # behaviour where unresolvable workspaces were skipped.
-    workspace_id_to_display_name: dict[str, str] = {}
-    workspace_id_to_slug: dict[str, str] = {}
-    for entry in workspace_entries:
-        display = lookup_workspace_display_name(workspace_path, entry["name"])
-        if display != entry["name"]:  # successfully resolved a human-readable name
-            workspace_id_to_display_name[entry["name"]] = display
-            workspace_id_to_slug[entry["name"]] = slug(display)
-
-    # ── Database reading via service layer ────────────────────────────────────
-    project_layouts_map: dict = {}
-    bubble_map: dict[str, Bubble] = {}
-    code_block_diff_map: dict = {}
-    ide_composer_rows: list = []
-    invalid_workspace_aliases: dict = {}
-
-    with open_global_db(workspace_path) as (global_db, global_db_path):
-        if global_db is None:
-            _logger.info(
-                "Cursor IDE global storage not found at %s — skipping IDE chats.",
-                global_db_path,
-            )
-        else:
-            ctx = enrich_workspace_context_from_global_db(
-                ctx,
-                global_db,
-                populate_project_layouts=True,
-                populate_bubble_map=True,
-            )
-            project_layouts_map = ctx.project_layouts_map
-            bubble_map = ctx.bubble_map
-            code_block_diff_map = load_code_block_diff_map(global_db)
-
-            try:
-                ide_composer_rows = global_db.execute(
-                    "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'composerData:%'"
-                    " AND value LIKE '%fullConversationHeadersOnly%'"
-                ).fetchall()
-            except sqlite3.Error:
-                pass
-
-            invalid_workspace_aliases = infer_invalid_workspace_aliases(
-                composer_rows=ide_composer_rows,
-                project_layouts_map=project_layouts_map,
-                project_name_map=project_name_map,
-                workspace_path_map=workspace_path_map,
-                workspace_entries=workspace_entries,
-                bubble_map=bubble_map,
-                composer_id_to_ws=composer_id_to_ws,
-                invalid_workspace_ids=invalid_workspace_ids,
-            )
-
-    today = datetime.now().strftime("%Y-%m-%d")
-    exported = []
-    count = 0
-
-    # ── Process IDE composers ────────────────────────────────────────────────
-    include_composer = opts.get("include_composer", True)
-    for row in ide_composer_rows if include_composer else []:
-        composer_id = row["key"].split(":")[1]
-        try:
-            cd = json.loads(row["value"])
-        except (json.JSONDecodeError, ValueError) as parse_err:
-            _logger.debug(
-                "Skipping corrupt composerData row %s: %s",
-                composer_id,
-                parse_err,
-            )
-            continue
-
-        headers = cd.get("fullConversationHeadersOnly") or []
-        if not headers:
-            continue
-
-        updated_at = to_epoch_ms(cd.get("lastUpdatedAt"))
-        if updated_at is None:
-            updated_at = to_epoch_ms(cd.get("createdAt"))
-        if updated_at is None:
-            updated_at = 0
-        if since == "last" and updated_at <= last_export:
-            continue
-
-        # Workspace assignment via service layer
-        pid = determine_project_for_conversation(
-            cd, composer_id, project_layouts_map,
-            project_name_map, workspace_path_map,
-            workspace_entries, bubble_map, composer_id_to_ws, invalid_workspace_ids,
-        )
-        mapped_ws = composer_id_to_ws.get(composer_id)
-        if not pid and mapped_ws in invalid_workspace_ids:
-            pid = invalid_workspace_aliases.get(mapped_ws)
-        ws_id = pid if pid else "global"
-
-        ws_slug = "other-chats" if ws_id == "global" else (workspace_id_to_slug.get(ws_id) or slug(ws_id[:12]))
-        ws_display_name = "Other chats" if ws_id == "global" else (workspace_id_to_display_name.get(ws_id) or ws_slug)
-        title = cd.get("name") or f"Chat {composer_id[:8]}"
-        model_config = cd.get("modelConfig") or {}
-        model_name = model_config.get("modelName")
-        model_names = [model_name] if model_name and model_name != "default" else None
-
-        # Build broad text for exclusion checks so any visible output term can match.
-        # CLI export intentionally includes metadata/tool payload text in addition to
-        # bubble text because these fields are emitted into exported markdown.
-        bubble_texts = []
-        bubble_meta_parts = []
-        for h in headers:
-            b = bubble_map.get(h.get("bubbleId"))
-            if not b:
-                continue
-            text = extract_text_from_bubble(b)
-            if text:
-                bubble_texts.append(text)
-            bubble_meta_parts.append(json_dump_safe(b))
-
-        code_diff_parts = [json_dump_safe(d) for d in code_block_diff_map.get(composer_id, [])]
-        searchable = build_searchable_text(
-            project_name=ws_display_name,
-            chat_title=title,
-            model_names=model_names,
-            chat_content_snippet="\n\n".join(
-                p
-                for p in (
-                    bubble_texts
-                    + bubble_meta_parts
-                    + code_diff_parts
-                    + [json_dump_safe(model_config), json_dump_safe(cd)]
-                )
-                if p
-            ),
-        )
-        if is_excluded_by_rules(exclusion_rules, searchable):
-            continue
-
-        title_slug = slug(title)
-        ts = updated_at or int(datetime.now().timestamp() * 1000)
-        ts_str = datetime.fromtimestamp(ts / 1000).strftime("%Y-%m-%dT%H-%M-%S")
-        filename = f"{ts_str}__{title_slug}__{composer_id[:8]}.md"
-        out_path = os.path.join(out_dir, today, ws_slug, "chat", filename)
-
-        # Markdown generation via shared exporter
-        md = cursor_ide_chat_to_markdown(
-            composer_data=cd,
-            composer_id=composer_id,
-            bubble_map=bubble_map,
-            code_block_diff_map=code_block_diff_map,
-            workspace_info={"ws_slug": ws_slug, "ws_display_name": ws_display_name},
-        )
-
-        rel_path = os.path.join(today, ws_slug, "chat", filename)
-        exported.append({
-            "id": composer_id,
-            "rel_path": rel_path,
-            "content": md,
-            "out_path": out_path,
-            "updatedAt": updated_at,
-            "title": title,
-            "workspace": ws_display_name,
-        })
-        count += 1
-
-    # ── Cursor CLI sessions ──────────────────────────────────────────────────
-    try:
-        cli_projects = list_cli_projects(get_cli_chats_path())
-    except Exception as e:
-        _logger.warning(
-            "Could not enumerate CLI chats: %s (%s) — skipping",
-            e,
-            type(e).__name__,
-            exc_info=True,
-        )
-        cli_projects = []
-
-    for cp in cli_projects:
-        ws_name = cp["workspace_name"] or cp["project_id"][:12]
-        ws_slug_cli = slug(ws_name)
-
-        if is_excluded_by_rules(exclusion_rules, build_searchable_text(project_name=ws_name)):
-            continue
-
-        for session in cp["sessions"]:
-            meta = session.get("meta", {})
-            session_id = session["session_id"]
-            created_ms: int = meta.get("createdAt") or int(datetime.now().timestamp() * 1000)
-            session_name = meta.get("name") or f"Session {session_id[:8]}"
-
-            # Use the store.db mtime as a proxy for "last updated" — createdAt
-            # is immutable and would cause sessions with new turns to be skipped.
-            try:
-                db_mtime_ms = int(os.path.getmtime(session["db_path"]) * 1000)
-            except OSError:
-                db_mtime_ms = created_ms
-            updated_ms = max(created_ms, db_mtime_ms)
-
-            if since == "last" and updated_ms <= last_export:
-                continue
-
-            try:
-                messages = traverse_blobs(session["db_path"])
-                bubbles = messages_to_bubbles(messages, created_ms)
-            except Exception as e:
-                _logger.warning(
-                    "Could not read CLI session %s: %s (%s)",
-                    session_id,
-                    e,
-                    type(e).__name__,
-                    exc_info=True,
-                )
-                continue
-
-            if not bubbles:
-                continue
-
-            # Derive title for the filename (shared exporter does it too, but
-            # we need it here first to build the output path).
-            title = session_name
-            if not title or title.startswith("New Agent"):
-                for b in bubbles:
-                    if b["type"] == "user" and b.get("text"):
-                        first_lines = [ln for ln in b["text"].split("\n") if ln.strip()]
-                        if first_lines:
-                            title = first_lines[0][:100]
-                            if len(title) == 100:
-                                title += "..."
-                        break
-
-            bubble_texts = [b["text"] for b in bubbles if b.get("text")]
-            tool_call_texts = [
-                tc.get("input", "") or tc.get("summary", "")
-                for b in bubbles
-                for tc in (b.get("metadata") or {}).get("toolCalls") or []
-            ]
-            searchable = build_searchable_text(
-                project_name=ws_name,
-                chat_title=title,
-                chat_content_snippet="\n\n".join(bubble_texts + tool_call_texts),
-            )
-            if is_excluded_by_rules(exclusion_rules, searchable):
-                continue
-
-            title_slug = slug(title)
-            ts_str = datetime.fromtimestamp(created_ms / 1000).strftime("%Y-%m-%dT%H-%M-%S")
-            filename = f"{ts_str}__{title_slug}__{session_id[:8]}.md"
-            out_path = os.path.join(out_dir, today, ws_slug_cli, "cli", filename)
-
-            md = cursor_cli_session_to_markdown(
-                session["db_path"],
-                session_meta=meta,
-                workspace_info={
-                    "workspace": ws_slug_cli,
-                    "workspace_name": ws_name,
-                    "workspace_path": cp.get("workspace_path"),
-                    "project_id": cp["project_id"],
-                },
-                bubbles=bubbles,
-                title_override=title,
-            )
-            rel_path = os.path.join(today, ws_slug_cli, "cli", filename)
-            exported.append({
-                "id": session_id,
-                "rel_path": rel_path,
-                "content": md,
-                "out_path": out_path,
-                "updatedAt": updated_ms,
-                "title": title,
-                "workspace": ws_name,
-            })
-            count += 1
+    last_export = _read_last_export_ms(state_path, since)
+
+    exported = collect_export_entries(
+        workspace_path=workspace_path,
+        exclusion_rules=exclusion_rules,
+        since=since,
+        last_export_ms=last_export,
+        out_dir=out_dir,
+        include_composer=opts.get("include_composer", True),
+    )
+    count = len(exported)
 
     if count == 0:
         label = " since last export" if since == "last" else ""
@@ -484,6 +221,7 @@ def main():
         sys.exit(0)
 
     os.makedirs(out_dir, exist_ok=True)
+    today = datetime.now().strftime("%Y-%m-%d")
 
     if use_zip:
         zip_name = f"cursor-export-{today}.zip"
@@ -506,7 +244,11 @@ def main():
                 "title": entry["title"],
                 "workspace": entry["workspace"],
                 "path": os.path.relpath(entry["out_path"], out_dir),
-                "updated_at": datetime.fromtimestamp(entry["updatedAt"] / 1000).isoformat() if entry["updatedAt"] else datetime.now().isoformat(),
+                "updated_at": (
+                    datetime.fromtimestamp(entry["updatedAt"] / 1000).isoformat()
+                    if entry["updatedAt"]
+                    else datetime.now().isoformat()
+                ),
             }
         if existing:
             write_manifest_entries(manifest_path, existing)
@@ -519,7 +261,11 @@ def main():
                 "title": entry["title"],
                 "workspace": entry["workspace"],
                 "path": entry["out_path"],
-                "updated_at": datetime.fromtimestamp(entry["updatedAt"] / 1000).isoformat() if entry["updatedAt"] else datetime.now().isoformat(),
+                "updated_at": (
+                    datetime.fromtimestamp(entry["updatedAt"] / 1000).isoformat()
+                    if entry["updatedAt"]
+                    else datetime.now().isoformat()
+                ),
             }
         if global_existing:
             write_manifest_entries(global_manifest_path, global_existing)
diff --git a/services/export_engine.py b/services/export_engine.py
new file mode 100644
index 0000000..ca7c086
--- /dev/null
+++ b/services/export_engine.py
@@ -0,0 +1,491 @@
+"""Shared export orchestration for CLI and web paths."""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import sqlite3
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Any, Literal, TypedDict
+
+from models import Bubble
+from services.summary_cache import fingerprint_workspace_storage
+from services.workspace_context import (
+    WorkspaceContext,
+    enrich_workspace_context_from_global_db,
+    resolve_workspace_context_cached,
+)
+from services.workspace_db import (
+    COMPOSER_ROWS_WITH_HEADERS_SQL,
+    collect_workspace_entries,
+    global_storage_db_path,
+    load_code_block_diff_map,
+    open_global_db,
+    safe_fetchall,
+)
+from services.workspace_resolver import (
+    determine_project_for_conversation,
+    infer_invalid_workspace_aliases,
+    lookup_workspace_display_name,
+)
+from utils.cli_chat_reader import (
+    list_cli_projects,
+    messages_to_bubbles,
+    traverse_blobs,
+)
+from utils.cursor_md_exporter import (
+    cursor_cli_session_to_markdown,
+    cursor_ide_chat_to_markdown,
+)
+from utils.exclusion_rules import build_searchable_text, is_excluded_by_rules
+from utils.path_helpers import to_epoch_ms
+from utils.text_extract import extract_text_from_bubble, slug
+from utils.workspace_path import get_cli_chats_path
+
+_logger = logging.getLogger(__name__)
+
+SinceMode = Literal["all", "last"]
+
+
+class ExportEntry(TypedDict):
+    """One exportable conversation with rendered markdown."""
+
+    id: str
+    rel_path: str
+    content: str
+    out_path: str
+    updatedAt: int
+    title: str
+    workspace: str
+
+
+@dataclass(frozen=True)
+class WorkspaceOrchestration:
+    """Precomputed workspace maps shared by listing and export."""
+
+    workspace_path: str
+    workspace_entries: list[dict[str, Any]]
+    fingerprint: dict[str, Any]
+    ctx: WorkspaceContext
+    workspace_id_to_display_name: dict[str, str]
+    workspace_id_to_slug: dict[str, str]
+
+
+@dataclass(frozen=True)
+class GlobalDbExportData:
+    """Global KV data loaded for export orchestration."""
+
+    project_layouts_map: dict[str, list[str]]
+    bubble_map: dict[str, Bubble]
+    code_block_diff_map: dict[str, list[Any]]
+    ide_composer_rows: list[sqlite3.Row]
+    invalid_workspace_aliases: dict[str, str]
+
+
+def json_dump_safe(value: object) -> str:
+    """Best-effort JSON serialization for exclusion matching."""
+    try:
+        return json.dumps(value, ensure_ascii=False, sort_keys=True)
+    except Exception:
+        return str(value) if value is not None else ""
+
+
+def build_workspace_display_maps(
+    workspace_path: str,
+    workspace_entries: list[dict[str, Any]],
+) -> tuple[dict[str, str], dict[str, str]]:
+    """Build display-name and slug maps from workspace entries.
+
+    Entries whose ``workspace.json`` cannot be resolved are omitted so the
+    usage-site fallback (``slug(ws_id[:12])``) applies.
+    """
+    workspace_id_to_display_name: dict[str, str] = {}
+    workspace_id_to_slug: dict[str, str] = {}
+    for entry in workspace_entries:
+        display = lookup_workspace_display_name(workspace_path, entry["name"])
+        if display != entry["name"]:
+            workspace_id_to_display_name[entry["name"]] = display
+            workspace_id_to_slug[entry["name"]] = slug(display)
+    return workspace_id_to_display_name, workspace_id_to_slug
+
+
+def prepare_workspace_orchestration(
+    workspace_path: str,
+    rules: list[Any],
+    *,
+    nocache: bool = False,
+    workspace_entries: list[dict[str, Any]] | None = None,
+) -> WorkspaceOrchestration:
+    """Scan workspace storage and resolve maps (with summary-cache fingerprint)."""
+    entries = (
+        workspace_entries
+        if workspace_entries is not None
+        else collect_workspace_entries(workspace_path)
+    )
+    gdb = global_storage_db_path(workspace_path)
+    cli_path = get_cli_chats_path()
+    fingerprint = fingerprint_workspace_storage(
+        workspace_path,
+        entries,
+        global_db_path=gdb if os.path.isfile(gdb) else None,
+        rules=rules,
+        cli_chats_path=cli_path if os.path.isdir(cli_path) else None,
+    )
+    ctx = resolve_workspace_context_cached(
+        workspace_path,
+        rules,
+        workspace_entries=entries,
+        nocache=nocache,
+    )
+    display_name, slug_map = build_workspace_display_maps(workspace_path, entries)
+    return WorkspaceOrchestration(
+        workspace_path=workspace_path,
+        workspace_entries=entries,
+        fingerprint=fingerprint,
+        ctx=ctx,
+        workspace_id_to_display_name=display_name,
+        workspace_id_to_slug=slug_map,
+    )
+
+
+def load_global_db_export_data(
+    orch: WorkspaceOrchestration,
+) -> GlobalDbExportData | None:
+    """Load global DB maps needed for IDE composer export."""
+    ctx = orch.ctx
+    project_layouts_map: dict[str, list[str]] = {}
+    bubble_map: dict[str, Bubble] = {}
+    code_block_diff_map: dict[str, list[Any]] = {}
+    ide_composer_rows: list[sqlite3.Row] = []
+    invalid_workspace_aliases: dict[str, str] = {}
+
+    with open_global_db(orch.workspace_path) as (global_db, global_db_path):
+        if global_db is None:
+            _logger.info(
+                "Cursor IDE global storage not found at %s — skipping IDE chats.",
+                global_db_path,
+            )
+            return None
+
+        enriched = enrich_workspace_context_from_global_db(
+            ctx,
+            global_db,
+            populate_project_layouts=True,
+            populate_bubble_map=True,
+        )
+        project_layouts_map = enriched.project_layouts_map
+        bubble_map = enriched.bubble_map
+        code_block_diff_map = load_code_block_diff_map(global_db)
+        ide_composer_rows = safe_fetchall(global_db, COMPOSER_ROWS_WITH_HEADERS_SQL)
+
+        invalid_workspace_aliases = infer_invalid_workspace_aliases(
+            composer_rows=ide_composer_rows,
+            project_layouts_map=project_layouts_map,
+            project_name_map=ctx.project_name_to_workspace_id,
+            workspace_path_map=ctx.workspace_path_to_id,
+            workspace_entries=orch.workspace_entries,
+            bubble_map=bubble_map,
+            composer_id_to_ws=ctx.composer_id_to_workspace_id,
+            invalid_workspace_ids=ctx.invalid_workspace_ids,
+        )
+
+    return GlobalDbExportData(
+        project_layouts_map=project_layouts_map,
+        bubble_map=bubble_map,
+        code_block_diff_map=code_block_diff_map,
+        ide_composer_rows=ide_composer_rows,
+        invalid_workspace_aliases=invalid_workspace_aliases,
+    )
+
+
+def _collect_ide_export_entries(
+    *,
+    orch: WorkspaceOrchestration,
+    db_data: GlobalDbExportData,
+    exclusion_rules: list[Any],
+    since: SinceMode,
+    last_export_ms: int,
+    today: str,
+    out_dir: str,
+) -> list[ExportEntry]:
+    ctx = orch.ctx
+    exported: list[ExportEntry] = []
+    for row in db_data.ide_composer_rows:
+        composer_id = row["key"].split(":")[1]
+        try:
+            cd = json.loads(row["value"])
+        except (json.JSONDecodeError, ValueError) as parse_err:
+            _logger.debug(
+                "Skipping corrupt composerData row %s: %s",
+                composer_id,
+                parse_err,
+            )
+            continue
+
+        headers = cd.get("fullConversationHeadersOnly") or []
+        if not headers:
+            continue
+
+        updated_at = to_epoch_ms(cd.get("lastUpdatedAt"))
+        if updated_at is None:
+            updated_at = to_epoch_ms(cd.get("createdAt"))
+        if updated_at is None:
+            updated_at = 0
+        if since == "last" and updated_at <= last_export_ms:
+            continue
+
+        pid = determine_project_for_conversation(
+            cd,
+            composer_id,
+            db_data.project_layouts_map,
+            ctx.project_name_to_workspace_id,
+            ctx.workspace_path_to_id,
+            orch.workspace_entries,
+            db_data.bubble_map,
+            ctx.composer_id_to_workspace_id,
+            ctx.invalid_workspace_ids,
+        )
+        mapped_ws = ctx.composer_id_to_workspace_id.get(composer_id)
+        if not pid and mapped_ws in ctx.invalid_workspace_ids:
+            pid = db_data.invalid_workspace_aliases.get(mapped_ws)
+        ws_id = pid if pid else "global"
+
+        ws_slug = (
+            "other-chats"
+            if ws_id == "global"
+            else (orch.workspace_id_to_slug.get(ws_id) or slug(ws_id[:12]))
+        )
+        ws_display_name = (
+            "Other chats"
+            if ws_id == "global"
+            else (orch.workspace_id_to_display_name.get(ws_id) or ws_slug)
+        )
+        title = cd.get("name") or f"Chat {composer_id[:8]}"
+        model_config = cd.get("modelConfig") or {}
+        model_name = model_config.get("modelName")
+        model_names = [model_name] if model_name and model_name != "default" else None
+
+        bubble_texts: list[str] = []
+        bubble_meta_parts: list[str] = []
+        for h in headers:
+            b = db_data.bubble_map.get(h.get("bubbleId"))
+            if not b:
+                continue
+            text = extract_text_from_bubble(b)
+            if text:
+                bubble_texts.append(text)
+            bubble_meta_parts.append(json_dump_safe(b))
+
+        code_diff_parts = [
+            json_dump_safe(d) for d in db_data.code_block_diff_map.get(composer_id, [])
+        ]
+        searchable = build_searchable_text(
+            project_name=ws_display_name,
+            chat_title=title,
+            model_names=model_names,
+            chat_content_snippet="\n\n".join(
+                p
+                for p in (
+                    bubble_texts
+                    + bubble_meta_parts
+                    + code_diff_parts
+                    + [json_dump_safe(model_config), json_dump_safe(cd)]
+                )
+                if p
+            ),
+        )
+        if is_excluded_by_rules(exclusion_rules, searchable):
+            continue
+
+        title_slug = slug(title)
+        ts = updated_at or int(datetime.now().timestamp() * 1000)
+        ts_str = datetime.fromtimestamp(ts / 1000).strftime("%Y-%m-%dT%H-%M-%S")
+        filename = f"{ts_str}__{title_slug}__{composer_id[:8]}.md"
+        out_path = os.path.join(out_dir, today, ws_slug, "chat", filename)
+
+        md = cursor_ide_chat_to_markdown(
+            composer_data=cd,
+            composer_id=composer_id,
+            bubble_map=db_data.bubble_map,
+            code_block_diff_map=db_data.code_block_diff_map,
+            workspace_info={"ws_slug": ws_slug, "ws_display_name": ws_display_name},
+        )
+
+        rel_path = os.path.join(today, ws_slug, "chat", filename)
+        exported.append({
+            "id": composer_id,
+            "rel_path": rel_path,
+            "content": md,
+            "out_path": out_path,
+            "updatedAt": updated_at,
+            "title": title,
+            "workspace": ws_display_name,
+        })
+    return exported
+
+
+def _collect_cli_export_entries(
+    *,
+    exclusion_rules: list[Any],
+    since: SinceMode,
+    last_export_ms: int,
+    today: str,
+    out_dir: str,
+) -> list[ExportEntry]:
+    exported: list[ExportEntry] = []
+    try:
+        cli_projects = list_cli_projects(get_cli_chats_path())
+    except Exception as e:
+        _logger.warning(
+            "Could not enumerate CLI chats: %s (%s) — skipping",
+            e,
+            type(e).__name__,
+            exc_info=True,
+        )
+        cli_projects = []
+
+    for cp in cli_projects:
+        ws_name = cp["workspace_name"] or cp["project_id"][:12]
+        ws_slug_cli = slug(ws_name)
+
+        if is_excluded_by_rules(
+            exclusion_rules, build_searchable_text(project_name=ws_name),
+        ):
+            continue
+
+        for session in cp["sessions"]:
+            meta = session.get("meta", {})
+            session_id = session["session_id"]
+            created_ms: int = meta.get("createdAt") or int(
+                datetime.now().timestamp() * 1000,
+            )
+            session_name = meta.get("name") or f"Session {session_id[:8]}"
+
+            try:
+                db_mtime_ms = int(os.path.getmtime(session["db_path"]) * 1000)
+            except OSError:
+                db_mtime_ms = created_ms
+            updated_ms = max(created_ms, db_mtime_ms)
+
+            if since == "last" and updated_ms <= last_export_ms:
+                continue
+
+            try:
+                messages = traverse_blobs(session["db_path"])
+                bubbles = messages_to_bubbles(messages, created_ms)
+            except Exception as e:
+                _logger.warning(
+                    "Could not read CLI session %s: %s (%s)",
+                    session_id,
+                    e,
+                    type(e).__name__,
+                    exc_info=True,
+                )
+                continue
+
+            if not bubbles:
+                continue
+
+            title = session_name
+            if not title or title.startswith("New Agent"):
+                for b in bubbles:
+                    if b["type"] == "user" and b.get("text"):
+                        first_lines = [
+                            ln for ln in b["text"].split("\n") if ln.strip()
+                        ]
+                        if first_lines:
+                            title = first_lines[0][:100]
+                            if len(title) == 100:
+                                title += "..."
+                        break
+
+            bubble_texts = [b["text"] for b in bubbles if b.get("text")]
+            tool_call_texts = [
+                tc.get("input", "") or tc.get("summary", "")
+                for b in bubbles
+                for tc in (b.get("metadata") or {}).get("toolCalls") or []
+            ]
+            searchable = build_searchable_text(
+                project_name=ws_name,
+                chat_title=title,
+                chat_content_snippet="\n\n".join(bubble_texts + tool_call_texts),
+            )
+            if is_excluded_by_rules(exclusion_rules, searchable):
+                continue
+
+            title_slug = slug(title)
+            ts_str = datetime.fromtimestamp(created_ms / 1000).strftime(
+                "%Y-%m-%dT%H-%M-%S",
+            )
+            filename = f"{ts_str}__{title_slug}__{session_id[:8]}.md"
+            out_path = os.path.join(out_dir, today, ws_slug_cli, "cli", filename)
+
+            md = cursor_cli_session_to_markdown(
+                session["db_path"],
+                session_meta=meta,
+                workspace_info={
+                    "workspace": ws_slug_cli,
+                    "workspace_name": ws_name,
+                    "workspace_path": cp.get("workspace_path"),
+                    "project_id": cp["project_id"],
+                },
+                bubbles=bubbles,
+                title_override=title,
+            )
+            rel_path = os.path.join(today, ws_slug_cli, "cli", filename)
+            exported.append({
+                "id": session_id,
+                "rel_path": rel_path,
+                "content": md,
+                "out_path": out_path,
+                "updatedAt": updated_ms,
+                "title": title,
+                "workspace": ws_name,
+            })
+    return exported
+
+
+def collect_export_entries(
+    *,
+    workspace_path: str,
+    exclusion_rules: list[Any],
+    since: SinceMode,
+    last_export_ms: int,
+    out_dir: str,
+    include_composer: bool = True,
+    nocache: bool = False,
+) -> list[ExportEntry]:
+    """Collect exportable conversations (IDE + CLI) via shared orchestration."""
+    orch = prepare_workspace_orchestration(
+        workspace_path, exclusion_rules, nocache=nocache,
+    )
+    today = datetime.now().strftime("%Y-%m-%d")
+    exported: list[ExportEntry] = []
+
+    if include_composer:
+        db_data = load_global_db_export_data(orch)
+        if db_data is not None:
+            exported.extend(
+                _collect_ide_export_entries(
+                    orch=orch,
+                    db_data=db_data,
+                    exclusion_rules=exclusion_rules,
+                    since=since,
+                    last_export_ms=last_export_ms,
+                    today=today,
+                    out_dir=out_dir,
+                ),
+            )
+
+    exported.extend(
+        _collect_cli_export_entries(
+            exclusion_rules=exclusion_rules,
+            since=since,
+            last_export_ms=last_export_ms,
+            today=today,
+            out_dir=out_dir,
+        ),
+    )
+    return exported
diff --git a/services/workspace_listing.py b/services/workspace_listing.py
index 891cabe..b0b1c98 100644
--- a/services/workspace_listing.py
+++ b/services/workspace_listing.py
@@ -19,17 +19,14 @@
 )
 from utils.workspace_descriptor import read_json_file
 from models import Bubble, ParseWarningCollector
+from services.export_engine import WorkspaceOrchestration, prepare_workspace_orchestration
 from services.summary_cache import (
-    fingerprint_workspace_storage,
     get_cached_projects,
     nocache_enabled,
     set_cached_projects,
 )
-from services.workspace_context import resolve_workspace_context_cached
 from services.workspace_db import (
     COMPOSER_ROWS_WITH_HEADERS_SQL,
-    collect_workspace_entries,
-    global_storage_db_path,
     load_project_layouts_for_composer,
     load_project_layouts_map,
     open_global_db,
@@ -93,43 +90,28 @@ def list_workspace_projects(
         parse-error dicts (``type``, ``count``, ``detail``) from
         :meth:`models.ParseWarningCollector.to_api_list`; empty when no skips.
     """
-    workspace_entries = collect_workspace_entries(workspace_path)
-    gdb = global_storage_db_path(workspace_path)
-    cli_path = get_cli_chats_path()
-    fingerprint = fingerprint_workspace_storage(
-        workspace_path,
-        workspace_entries,
-        global_db_path=gdb if os.path.isfile(gdb) else None,
-        rules=rules,
-        cli_chats_path=cli_path if os.path.isdir(cli_path) else None,
-    )
+    orch = prepare_workspace_orchestration(workspace_path, rules, nocache=nocache)
     if not nocache_enabled(request_nocache=nocache):
-        cached = get_cached_projects(fingerprint)
+        cached = get_cached_projects(orch.fingerprint)
         if cached is not None:
             return cached
 
     projects, warnings = _build_workspace_projects_uncached(
-        workspace_path, rules, workspace_entries, nocache=nocache,
+        workspace_path, rules, orch,
     )
     if not nocache_enabled(request_nocache=nocache):
-        set_cached_projects(fingerprint, projects, warnings)
+        set_cached_projects(orch.fingerprint, projects, warnings)
     return projects, warnings
 
 
 def _build_workspace_projects_uncached(
     workspace_path: str,
     rules: list[Any],
-    workspace_entries: list[dict[str, Any]],
-    *,
-    nocache: bool,
+    orch: WorkspaceOrchestration,
 ) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
     parse_warnings = ParseWarningCollector()
-    ctx = resolve_workspace_context_cached(
-        workspace_path,
-        rules,
-        workspace_entries=workspace_entries,
-        nocache=nocache,
-    )
+    ctx = orch.ctx
+    workspace_entries = orch.workspace_entries
     invalid_workspace_ids = ctx.invalid_workspace_ids
     project_name_map = ctx.project_name_to_workspace_id
     workspace_path_map = ctx.workspace_path_to_id

From d75620d60cbae99145edadf607c75edeb06c1ffd Mon Sep 17 00:00:00 2001
From: chen <clean6378@gmail.com>
Date: Tue, 23 Jun 2026 09:18:43 +0800
Subject: [PATCH 2/6] Harden export orchestration from review feedback

Use to_epoch_ms for lastExportTime parsing, validate composerData shape,
serialize CLI tool-call fields safely, and pass effective nocache flag
through workspace listing orchestration.
---
 scripts/export.py             |  4 +---
 services/export_engine.py     | 25 +++++++++++++++++++------
 services/workspace_listing.py |  9 ++++++---
 3 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/scripts/export.py b/scripts/export.py
index 7f8abd1..f818bba 100644
--- a/scripts/export.py
+++ b/scripts/export.py
@@ -176,9 +176,7 @@ def _read_last_export_ms(state_path: str, since: Literal["all", "last"]) -> int:
             st = json.load(f)
         ts = st.get("lastExportTime")
         if ts:
-            return int(
-                datetime.fromisoformat(ts.replace("Z", "+00:00")).timestamp() * 1000,
-            )
+            return to_epoch_ms(ts)
     except (json.JSONDecodeError, ValueError, OSError) as e:
         _logger.warning(
             "Could not read last export timestamp; defaulting to full export: %s",
diff --git a/services/export_engine.py b/services/export_engine.py
index ca7c086..30eb76e 100644
--- a/services/export_engine.py
+++ b/services/export_engine.py
@@ -88,7 +88,7 @@ def json_dump_safe(value: object) -> str:
     """Best-effort JSON serialization for exclusion matching."""
     try:
         return json.dumps(value, ensure_ascii=False, sort_keys=True)
-    except Exception:
+    except Exception:  # noqa: BLE001 — best-effort fallback when value is not JSON-serializable
         return str(value) if value is not None else ""
 
 
@@ -224,8 +224,16 @@ def _collect_ide_export_entries(
             )
             continue
 
+        if not isinstance(cd, dict):
+            _logger.debug(
+                "Skipping corrupt composerData row %s: expected object, got %s",
+                composer_id,
+                type(cd).__name__,
+            )
+            continue
+
         headers = cd.get("fullConversationHeadersOnly") or []
-        if not headers:
+        if not isinstance(headers, list) or not headers:
             continue
 
         updated_at = to_epoch_ms(cd.get("lastUpdatedAt"))
@@ -270,7 +278,12 @@ def _collect_ide_export_entries(
         bubble_texts: list[str] = []
         bubble_meta_parts: list[str] = []
         for h in headers:
-            b = db_data.bubble_map.get(h.get("bubbleId"))
+            if not isinstance(h, dict):
+                continue
+            bubble_id = h.get("bubbleId")
+            if not isinstance(bubble_id, str):
+                continue
+            b = db_data.bubble_map.get(bubble_id)
             if not b:
                 continue
             text = extract_text_from_bubble(b)
@@ -337,7 +350,7 @@ def _collect_cli_export_entries(
     exported: list[ExportEntry] = []
     try:
         cli_projects = list_cli_projects(get_cli_chats_path())
-    except Exception as e:
+    except Exception as e:  # noqa: BLE001 — log and skip CLI enumeration on any failure
         _logger.warning(
             "Could not enumerate CLI chats: %s (%s) — skipping",
             e,
@@ -375,7 +388,7 @@ def _collect_cli_export_entries(
             try:
                 messages = traverse_blobs(session["db_path"])
                 bubbles = messages_to_bubbles(messages, created_ms)
-            except Exception as e:
+            except Exception as e:  # noqa: BLE001 — log and skip session on read/parse failure
                 _logger.warning(
                     "Could not read CLI session %s: %s (%s)",
                     session_id,
@@ -403,7 +416,7 @@ def _collect_cli_export_entries(
 
             bubble_texts = [b["text"] for b in bubbles if b.get("text")]
             tool_call_texts = [
-                tc.get("input", "") or tc.get("summary", "")
+                json_dump_safe(tc.get("input", "") or tc.get("summary", ""))
                 for b in bubbles
                 for tc in (b.get("metadata") or {}).get("toolCalls") or []
             ]
diff --git a/services/workspace_listing.py b/services/workspace_listing.py
index b0b1c98..9cab01a 100644
--- a/services/workspace_listing.py
+++ b/services/workspace_listing.py
@@ -90,8 +90,11 @@ def list_workspace_projects(
         parse-error dicts (``type``, ``count``, ``detail``) from
         :meth:`models.ParseWarningCollector.to_api_list`; empty when no skips.
     """
-    orch = prepare_workspace_orchestration(workspace_path, rules, nocache=nocache)
-    if not nocache_enabled(request_nocache=nocache):
+    effective_nocache = nocache_enabled(request_nocache=nocache)
+    orch = prepare_workspace_orchestration(
+        workspace_path, rules, nocache=effective_nocache,
+    )
+    if not effective_nocache:
         cached = get_cached_projects(orch.fingerprint)
         if cached is not None:
             return cached
@@ -99,7 +102,7 @@ def list_workspace_projects(
     projects, warnings = _build_workspace_projects_uncached(
         workspace_path, rules, orch,
     )
-    if not nocache_enabled(request_nocache=nocache):
+    if not effective_nocache:
         set_cached_projects(orch.fingerprint, projects, warnings)
     return projects, warnings
 

From 45c36ac5f2bcfd56bc56ac32bda137b3accf4cb6 Mon Sep 17 00:00:00 2001
From: chen <clean6378@gmail.com>
Date: Tue, 23 Jun 2026 09:34:29 +0800
Subject: [PATCH 3/6] Address export consolidation review findings

---
 api/export_api.py                          | 168 +++++----------------
 scripts/export.py                          |   5 +-
 services/export_engine.py                  |  36 ++---
 tests/test_api_export.py                   |   2 +-
 tests/test_export_engine.py                |  94 ++++++++++++
 tests/test_workspace_path_thread_safety.py |   5 +
 utils/workspace_path.py                    |  17 ++-
 7 files changed, 170 insertions(+), 157 deletions(-)
 create mode 100644 tests/test_export_engine.py

diff --git a/api/export_api.py b/api/export_api.py
index ba4ab39..66e53a1 100644
--- a/api/export_api.py
+++ b/api/export_api.py
@@ -4,32 +4,24 @@
 GET  /api/export/state — returns last export time
 """
 
+from __future__ import annotations
+
 import io
 import json
 import logging
 import os
-import sqlite3
 import zipfile
 from datetime import datetime
 from pathlib import Path
-from typing import Any
+from typing import Any, Literal
 
 from flask import Blueprint, Response, request
 
 from api.flask_config import exclusion_rules, json_response
-
-from utils.workspace_path import resolve_workspace_path
+from services.export_engine import collect_export_entries
+from services.workspace_db import global_storage_db_path
 from utils.path_helpers import to_epoch_ms
-from utils.text_extract import extract_text_from_bubble, slug
-from utils.exclusion_rules import build_searchable_text, is_excluded_by_rules
-from utils.cursor_md_exporter import cursor_ide_chat_to_markdown
-from services.workspace_context import resolve_workspace_context_minimal
-from services.workspace_db import (
-    load_bubble_map,
-    load_code_block_diff_map,
-    open_global_db,
-)
-from services.workspace_resolver import lookup_workspace_display_name
+from utils.workspace_path import resolve_workspace_path
 
 bp = Blueprint("export_api", __name__)
 _logger = logging.getLogger(__name__)
@@ -75,6 +67,15 @@ def _save_export_state(count: int) -> None:
         json.dump(state, f, indent=2)
 
 
+def _read_last_export_ms(since: Literal["all", "last"]) -> int:
+    if since != "last":
+        return 0
+    ts = _get_export_state().get("lastExportTime")
+    if ts:
+        return to_epoch_ms(ts)
+    return 0
+
+
 @bp.route("/api/export/state")
 def get_export_state() -> Response:
     """Return the last export timestamp."""
@@ -93,126 +94,37 @@ def export_chats() -> tuple[Response, int] | Response:
     """
     try:
         body = request.get_json(silent=True) or {}
-        since = "last" if body.get("since") == "last" else "all"
+        since: Literal["all", "last"] = (
+            "last" if body.get("since") == "last" else "all"
+        )
 
         workspace_path = resolve_workspace_path()
-
-        # Determine last export timestamp for filtering
-        last_export_ms = 0
-        if since == "last":
-            state = _get_export_state()
-            ts_str = state.get("lastExportTime")
-            if ts_str:
-                last_export_ms = to_epoch_ms(ts_str)
-
-        # ── Workspace scanning via service layer ──────────────────────────────
-        ctx = resolve_workspace_context_minimal(workspace_path)
-        workspace_entries = ctx.workspace_entries
-        composer_id_to_ws = ctx.composer_id_to_workspace_id
-
-        # Build display-name and slug maps
-        ws_id_to_slug: dict[str, str] = {}
-        ws_id_to_display_name: dict[str, str] = {}
-        for e in workspace_entries:
-            display = lookup_workspace_display_name(workspace_path, e["name"])
-            if display != e["name"]:
-                ws_id_to_display_name[e["name"]] = display
-                ws_id_to_slug[e["name"]] = slug(display)
-
-        today = datetime.now().strftime("%Y-%m-%d")
-        exported: list[dict[str, Any]] = []
-        rules = exclusion_rules()
-
-        # ── Database reading via service layer ────────────────────────────────
-        with open_global_db(workspace_path) as (global_db, _):
-            if global_db is None:
-                return json_response({"error": "Cursor global storage not found"}, 404)
-            bubble_map = load_bubble_map(global_db)
-            code_block_diff_map = load_code_block_diff_map(global_db)
-
-            try:
-                composer_rows = global_db.execute(
-                    "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'composerData:%'"
-                    " AND value LIKE '%fullConversationHeadersOnly%'"
-                    " AND value NOT LIKE '%fullConversationHeadersOnly\":[]%'"
-                ).fetchall()
-            except sqlite3.Error:
-                composer_rows = []
-
-            for row in composer_rows:
-                composer_id = row["key"].split(":")[1]
-                try:
-                    cd = json.loads(row["value"])
-                    headers = cd.get("fullConversationHeadersOnly") or []
-                    if not headers:
-                        continue
-
-                    updated_at_ms = to_epoch_ms(cd.get("lastUpdatedAt"))
-                    if updated_at_ms is None:
-                        updated_at_ms = to_epoch_ms(cd.get("createdAt"))
-                    if updated_at_ms is None:
-                        updated_at_ms = 0
-                    if since == "last" and updated_at_ms and updated_at_ms <= last_export_ms:
-                        continue
-
-                    ws_id = composer_id_to_ws.get(composer_id, "global")
-                    ws_slug = "other-chats" if ws_id == "global" else (ws_id_to_slug.get(ws_id) or slug(ws_id[:12]))
-                    ws_display_name = "Other chats" if ws_id == "global" else (ws_id_to_display_name.get(ws_id) or ws_slug)
-                    title = cd.get("name") or f"Chat {composer_id[:8]}"
-                    model_config = cd.get("modelConfig") or {}
-                    model_name = model_config.get("modelName")
-                    model_names = [model_name] if model_name and model_name != "default" else None
-
-                    bubble_texts = []
-                    for h in headers:
-                        b = bubble_map.get(h.get("bubbleId"))
-                        if b:
-                            bt = extract_text_from_bubble(b)
-                            if bt:
-                                bubble_texts.append(bt)
-
-                    searchable = build_searchable_text(
-                        project_name=ws_display_name,
-                        chat_title=title,
-                        model_names=model_names,
-                        chat_content_snippet="\n\n".join(bubble_texts) if bubble_texts else None,
-                    )
-                    if is_excluded_by_rules(rules, searchable):
-                        continue
-
-                    title_slug = slug(title)
-                    ts_ms = updated_at_ms or int(datetime.now().timestamp() * 1000)
-                    ts_str = datetime.fromtimestamp(ts_ms / 1000).strftime("%Y-%m-%dT%H-%M-%S")
-                    filename = f"{ts_str}__{title_slug}__{composer_id[:8]}.md"
-                    rel_path = os.path.join(today, ws_slug, "chat", filename)
-
-                    md = cursor_ide_chat_to_markdown(
-                        composer_data=cd,
-                        composer_id=composer_id,
-                        bubble_map=bubble_map,
-                        code_block_diff_map=code_block_diff_map,
-                        workspace_info={"ws_slug": ws_slug, "ws_display_name": ws_display_name},
-                    )
-                    exported.append({"path": rel_path, "content": md, "updatedAt": updated_at_ms})
-
-                except Exception as e:
-                    _logger.error(
-                        "Error processing composer %s for export: %s (%s)",
-                        composer_id,
-                        e,
-                        type(e).__name__,
-                        exc_info=True,
-                    )
-
+        gdb = global_storage_db_path(workspace_path)
+        if not os.path.isfile(gdb):
+            return json_response({"error": "Cursor global storage not found"}, 404)
+
+        exported = collect_export_entries(
+            workspace_path=workspace_path,
+            exclusion_rules=exclusion_rules(),
+            since=since,
+            last_export_ms=_read_last_export_ms(since),
+            out_dir="",
+            include_composer=True,
+            include_cli=False,
+        )
         count = len(exported)
         if count == 0:
-            return json_response({"error": "No conversations to export" + (
-                " since last export" if since == "last" else ""
-            )}, 404)
+            return json_response(
+                {"error": "No conversations to export" + (
+                    " since last export" if since == "last" else ""
+                )},
+                404,
+            )
+
         buf = io.BytesIO()
         with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
             for entry in exported:
-                zf.writestr(entry["path"], entry["content"])
+                zf.writestr(entry["rel_path"], entry["content"])
 
         buf.seek(0)
         _save_export_state(count)
@@ -234,4 +146,4 @@ def export_chats() -> tuple[Response, int] | Response:
             type(e).__name__,
             exc_info=True,
         )
-        return json_response({"error": "Export failed"}, 500)
\ No newline at end of file
+        return json_response({"error": "Export failed"}, 500)
diff --git a/scripts/export.py b/scripts/export.py
index f818bba..9dfcffa 100644
--- a/scripts/export.py
+++ b/scripts/export.py
@@ -194,10 +194,7 @@ def main() -> None:
     exclusion_rules = load_rules(
         resolve_exclusion_rules_path(opts.get("exclusion_rules_path")),
     )
-    base_dir = opts.get("base_dir")
-    if base_dir:
-        os.environ["WORKSPACE_PATH"] = base_dir
-    workspace_path = resolve_workspace_path()
+    workspace_path = resolve_workspace_path(override=opts.get("base_dir"))
 
     state_dir = get_global_state_dir()
     state_path = os.path.join(state_dir, "export_state.json")
diff --git a/services/export_engine.py b/services/export_engine.py
index 30eb76e..89a7c9d 100644
--- a/services/export_engine.py
+++ b/services/export_engine.py
@@ -11,7 +11,7 @@
 from typing import Any, Literal, TypedDict
 
 from models import Bubble
-from services.summary_cache import fingerprint_workspace_storage
+from services.summary_cache import fingerprint_workspace_storage, nocache_enabled
 from services.workspace_context import (
     WorkspaceContext,
     enrich_workspace_context_from_global_db,
@@ -236,11 +236,9 @@ def _collect_ide_export_entries(
         if not isinstance(headers, list) or not headers:
             continue
 
-        updated_at = to_epoch_ms(cd.get("lastUpdatedAt"))
-        if updated_at is None:
-            updated_at = to_epoch_ms(cd.get("createdAt"))
-        if updated_at is None:
-            updated_at = 0
+        updated_at = to_epoch_ms(cd.get("lastUpdatedAt")) or to_epoch_ms(
+            cd.get("createdAt"),
+        )
         if since == "last" and updated_at <= last_export_ms:
             continue
 
@@ -271,7 +269,8 @@ def _collect_ide_export_entries(
             else (orch.workspace_id_to_display_name.get(ws_id) or ws_slug)
         )
         title = cd.get("name") or f"Chat {composer_id[:8]}"
-        model_config = cd.get("modelConfig") or {}
+        raw_model_config = cd.get("modelConfig")
+        model_config = raw_model_config if isinstance(raw_model_config, dict) else {}
         model_name = model_config.get("modelName")
         model_names = [model_name] if model_name and model_name != "default" else None
 
@@ -468,11 +467,13 @@ def collect_export_entries(
     last_export_ms: int,
     out_dir: str,
     include_composer: bool = True,
+    include_cli: bool = True,
     nocache: bool = False,
 ) -> list[ExportEntry]:
     """Collect exportable conversations (IDE + CLI) via shared orchestration."""
+    effective_nocache = nocache_enabled(request_nocache=nocache)
     orch = prepare_workspace_orchestration(
-        workspace_path, exclusion_rules, nocache=nocache,
+        workspace_path, exclusion_rules, nocache=effective_nocache,
     )
     today = datetime.now().strftime("%Y-%m-%d")
     exported: list[ExportEntry] = []
@@ -492,13 +493,14 @@ def collect_export_entries(
                 ),
             )
 
-    exported.extend(
-        _collect_cli_export_entries(
-            exclusion_rules=exclusion_rules,
-            since=since,
-            last_export_ms=last_export_ms,
-            today=today,
-            out_dir=out_dir,
-        ),
-    )
+    if include_cli:
+        exported.extend(
+            _collect_cli_export_entries(
+                exclusion_rules=exclusion_rules,
+                since=since,
+                last_export_ms=last_export_ms,
+                today=today,
+                out_dir=out_dir,
+            ),
+        )
     return exported
diff --git a/tests/test_api_export.py b/tests/test_api_export.py
index 1e46c31..68d89ae 100644
--- a/tests/test_api_export.py
+++ b/tests/test_api_export.py
@@ -115,7 +115,7 @@ def test_no_conversations_returns_404(self, workspace_storage, export_state_dir)
 
     def test_internal_failure_returns_500(self, client, export_state_dir):
         with patch(
-            "api.export_api.resolve_workspace_context_minimal",
+            "api.export_api.collect_export_entries",
             side_effect=RuntimeError("simulated export failure"),
         ):
             response = _post_export(client)
diff --git a/tests/test_export_engine.py b/tests/test_export_engine.py
new file mode 100644
index 0000000..d88540e
--- /dev/null
+++ b/tests/test_export_engine.py
@@ -0,0 +1,94 @@
+"""Unit tests for services.export_engine orchestration."""
+
+from __future__ import annotations
+
+import os
+import sys
+import unittest
+from unittest.mock import MagicMock, patch
+
+REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if REPO_ROOT not in sys.path:
+    sys.path.insert(0, REPO_ROOT)
+
+from services.export_engine import (  # noqa: E402
+    GlobalDbExportData,
+    WorkspaceOrchestration,
+    collect_export_entries,
+)
+
+
+class TestCollectExportEntriesNocache(unittest.TestCase):
+    def test_nocache_env_passed_to_prepare_workspace_orchestration(self):
+        with patch.dict(os.environ, {"CURSOR_CHAT_BROWSER_NOCACHE": "1"}):
+            with patch(
+                "services.export_engine.prepare_workspace_orchestration",
+            ) as mock_prepare:
+                mock_prepare.return_value = MagicMock(spec=WorkspaceOrchestration)
+                with patch(
+                    "services.export_engine.load_global_db_export_data",
+                    return_value=None,
+                ):
+                    collect_export_entries(
+                        workspace_path="/tmp/ws",
+                        exclusion_rules=[],
+                        since="all",
+                        last_export_ms=0,
+                        out_dir="/tmp/out",
+                        include_composer=False,
+                        include_cli=False,
+                    )
+        mock_prepare.assert_called_once()
+        self.assertTrue(mock_prepare.call_args.kwargs["nocache"])
+
+
+class TestCollectExportEntriesCorruptComposer(unittest.TestCase):
+    def test_non_dict_composer_row_is_skipped(self):
+        ctx = MagicMock()
+        ctx.project_name_to_workspace_id = {}
+        ctx.workspace_path_to_id = {}
+        ctx.composer_id_to_workspace_id = {}
+        ctx.invalid_workspace_ids = set()
+        orch = WorkspaceOrchestration(
+            workspace_path="/tmp/ws",
+            workspace_entries=[],
+            fingerprint={},
+            ctx=ctx,
+            workspace_id_to_display_name={},
+            workspace_id_to_slug={},
+        )
+
+        class FakeRow:
+            def __getitem__(self, key: str) -> str:
+                if key == "key":
+                    return "composerData:bad-row"
+                return "[]"
+
+        db_data = GlobalDbExportData(
+            project_layouts_map={},
+            bubble_map={},
+            code_block_diff_map={},
+            ide_composer_rows=[FakeRow()],
+            invalid_workspace_aliases={},
+        )
+        with patch(
+            "services.export_engine.prepare_workspace_orchestration",
+            return_value=orch,
+        ):
+            with patch(
+                "services.export_engine.load_global_db_export_data",
+                return_value=db_data,
+            ):
+                exported = collect_export_entries(
+                    workspace_path="/tmp/ws",
+                    exclusion_rules=[],
+                    since="all",
+                    last_export_ms=0,
+                    out_dir="/tmp/out",
+                    include_cli=False,
+                )
+        self.assertEqual(exported, [])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_workspace_path_thread_safety.py b/tests/test_workspace_path_thread_safety.py
index 92d2bc9..88ff91c 100644
--- a/tests/test_workspace_path_thread_safety.py
+++ b/tests/test_workspace_path_thread_safety.py
@@ -112,6 +112,11 @@ def reader() -> None:
 
         self.assertEqual(errors, [], "\n".join(errors[:20]))
 
+    def test_explicit_override_takes_precedence_over_module_override(self):
+        set_workspace_path_override(self.path_a)
+        self.assertEqual(resolve_workspace_path(override=self.path_b), self.path_b)
+        self.assertEqual(resolve_workspace_path(), self.path_a)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/utils/workspace_path.py b/utils/workspace_path.py
index cc4e048..447b9e1 100644
--- a/utils/workspace_path.py
+++ b/utils/workspace_path.py
@@ -64,17 +64,20 @@ def get_default_workspace_path() -> str:
         return os.path.join(home, "workspaceStorage")
 
 
-def resolve_workspace_path() -> str:
-    """Return the effective workspace path (override > env var > default).
+def resolve_workspace_path(*, override: str | None = None) -> str:
+    """Return the effective workspace path (call override > module > env > default).
 
-    Override comes from POST /api/set-workspace (validated). ``WORKSPACE_PATH``
-    is only tilde-expanded — trusted-operator escape hatch, not the same checks
-    as the API (issue #15).
+    *override* is for one-shot callers (e.g. CLI ``--base-dir``) and does not
+    mutate ``WORKSPACE_PATH``. Module override comes from POST /api/set-workspace
+    (validated). ``WORKSPACE_PATH`` is only tilde-expanded — trusted-operator
+    escape hatch, not the same checks as the API (issue #15).
     """
-    with _workspace_path_lock:
-        override = _workspace_path_override
     if override:
         return expand_tilde_path(override)
+    with _workspace_path_lock:
+        module_override = _workspace_path_override
+    if module_override:
+        return expand_tilde_path(module_override)
     env_path = os.environ.get("WORKSPACE_PATH", "").strip()
     if env_path:
         return expand_tilde_path(env_path)

From d3b101f723347d8e0379cda49436d747cf704ab8 Mon Sep 17 00:00:00 2001
From: chen <clean6378@gmail.com>
Date: Tue, 23 Jun 2026 09:43:30 +0800
Subject: [PATCH 4/6] Validate export API JSON body and use tempfile paths in
 engine tests

---
 api/export_api.py           |  4 +++-
 tests/test_api_export.py    |  9 +++++++++
 tests/test_export_engine.py | 28 +++++++++++++++++++++-------
 3 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/api/export_api.py b/api/export_api.py
index 66e53a1..a992cba 100644
--- a/api/export_api.py
+++ b/api/export_api.py
@@ -93,7 +93,9 @@ def export_chats() -> tuple[Response, int] | Response:
     exclusion rules file.
     """
     try:
-        body = request.get_json(silent=True) or {}
+        body = request.get_json(silent=True)
+        if not isinstance(body, dict):
+            return json_response({"error": "request body must be a JSON object"}, 400)
         since: Literal["all", "last"] = (
             "last" if body.get("since") == "last" else "all"
         )
diff --git a/tests/test_api_export.py b/tests/test_api_export.py
index 68d89ae..152f496 100644
--- a/tests/test_api_export.py
+++ b/tests/test_api_export.py
@@ -91,6 +91,15 @@ def test_post_returns_zip_with_markdown_entry(self, client, export_state_dir):
 
 
 class TestExportErrorResponses:
+    def test_non_dict_json_body_returns_400(self, client, export_state_dir):
+        response = client.post(
+            "/api/export",
+            json=["not", "an", "object"],
+            content_type="application/json",
+        )
+        assert response.status_code == 400
+        assert response.get_json().get("error") == "request body must be a JSON object"
+
     def test_missing_global_storage_returns_404(self, empty_workspace_client):
         response = _post_export(empty_workspace_client)
         assert response.status_code == 404
diff --git a/tests/test_export_engine.py b/tests/test_export_engine.py
index d88540e..84a4c2a 100644
--- a/tests/test_export_engine.py
+++ b/tests/test_export_engine.py
@@ -4,6 +4,7 @@
 
 import os
 import sys
+import tempfile
 import unittest
 from unittest.mock import MagicMock, patch
 
@@ -18,7 +19,17 @@
 )
 
 
-class TestCollectExportEntriesNocache(unittest.TestCase):
+class _TempExportPathsMixin:
+    def setUp(self):
+        self._tmp = tempfile.TemporaryDirectory()
+        self.addCleanup(self._tmp.cleanup)
+        self.tmp_ws = os.path.join(self._tmp.name, "ws")
+        self.tmp_out = os.path.join(self._tmp.name, "out")
+        os.makedirs(self.tmp_ws, exist_ok=True)
+        os.makedirs(self.tmp_out, exist_ok=True)
+
+
+class TestCollectExportEntriesNocache(_TempExportPathsMixin, unittest.TestCase):
     def test_nocache_env_passed_to_prepare_workspace_orchestration(self):
         with patch.dict(os.environ, {"CURSOR_CHAT_BROWSER_NOCACHE": "1"}):
             with patch(
@@ -30,11 +41,11 @@ def test_nocache_env_passed_to_prepare_workspace_orchestration(self):
                     return_value=None,
                 ):
                     collect_export_entries(
-                        workspace_path="/tmp/ws",
+                        workspace_path=self.tmp_ws,
                         exclusion_rules=[],
                         since="all",
                         last_export_ms=0,
-                        out_dir="/tmp/out",
+                        out_dir=self.tmp_out,
                         include_composer=False,
                         include_cli=False,
                     )
@@ -42,7 +53,10 @@ def test_nocache_env_passed_to_prepare_workspace_orchestration(self):
         self.assertTrue(mock_prepare.call_args.kwargs["nocache"])
 
 
-class TestCollectExportEntriesCorruptComposer(unittest.TestCase):
+class TestCollectExportEntriesCorruptComposer(
+    _TempExportPathsMixin,
+    unittest.TestCase,
+):
     def test_non_dict_composer_row_is_skipped(self):
         ctx = MagicMock()
         ctx.project_name_to_workspace_id = {}
@@ -50,7 +64,7 @@ def test_non_dict_composer_row_is_skipped(self):
         ctx.composer_id_to_workspace_id = {}
         ctx.invalid_workspace_ids = set()
         orch = WorkspaceOrchestration(
-            workspace_path="/tmp/ws",
+            workspace_path=self.tmp_ws,
             workspace_entries=[],
             fingerprint={},
             ctx=ctx,
@@ -80,11 +94,11 @@ def __getitem__(self, key: str) -> str:
                 return_value=db_data,
             ):
                 exported = collect_export_entries(
-                    workspace_path="/tmp/ws",
+                    workspace_path=self.tmp_ws,
                     exclusion_rules=[],
                     since="all",
                     last_export_ms=0,
-                    out_dir="/tmp/out",
+                    out_dir=self.tmp_out,
                     include_cli=False,
                 )
         self.assertEqual(exported, [])

From 38e548d8b89ec63d1d61f543af754d5d7759b5d6 Mon Sep 17 00:00:00 2001
From: chen <clean6378@gmail.com>
Date: Wed, 24 Jun 2026 01:31:31 +0800
Subject: [PATCH 5/6] Address PR #112 review: CollectedExportEntry, shared
 read_last_export_ms, engine tests

---
 api/export_api.py           |  14 +--
 models/__init__.py          |   3 +-
 models/export.py            |  14 ++-
 scripts/export.py           |  22 +----
 services/export_engine.py   |  57 ++++++++----
 tests/test_export_engine.py | 173 +++++++++++++++++++++++++++++++-----
 6 files changed, 209 insertions(+), 74 deletions(-)

diff --git a/api/export_api.py b/api/export_api.py
index a992cba..a2bed91 100644
--- a/api/export_api.py
+++ b/api/export_api.py
@@ -18,9 +18,8 @@
 from flask import Blueprint, Response, request
 
 from api.flask_config import exclusion_rules, json_response
-from services.export_engine import collect_export_entries
+from services.export_engine import collect_export_entries, read_last_export_ms
 from services.workspace_db import global_storage_db_path
-from utils.path_helpers import to_epoch_ms
 from utils.workspace_path import resolve_workspace_path
 
 bp = Blueprint("export_api", __name__)
@@ -67,15 +66,6 @@ def _save_export_state(count: int) -> None:
         json.dump(state, f, indent=2)
 
 
-def _read_last_export_ms(since: Literal["all", "last"]) -> int:
-    if since != "last":
-        return 0
-    ts = _get_export_state().get("lastExportTime")
-    if ts:
-        return to_epoch_ms(ts)
-    return 0
-
-
 @bp.route("/api/export/state")
 def get_export_state() -> Response:
     """Return the last export timestamp."""
@@ -109,7 +99,7 @@ def export_chats() -> tuple[Response, int] | Response:
             workspace_path=workspace_path,
             exclusion_rules=exclusion_rules(),
             since=since,
-            last_export_ms=_read_last_export_ms(since),
+            last_export_ms=read_last_export_ms(since, state=_get_export_state()),
             out_dir="",
             include_composer=True,
             include_cli=False,
diff --git a/models/__init__.py b/models/__init__.py
index 4657ff6..a222bc3 100644
--- a/models/__init__.py
+++ b/models/__init__.py
@@ -3,7 +3,7 @@
 from models.conversation import Bubble, Composer, Conversation, WorkspaceLocalComposer
 from models.errors import SchemaError
 from models.parse_warnings import ParseWarningCollector
-from models.export import ExportEntry
+from models.export import CollectedExportEntry, ExportEntry
 from models.search import ConversationSummary, SearchResult
 from models.workspace import Workspace
 
@@ -16,6 +16,7 @@
     "Composer",
     "Conversation",
     "ConversationSummary",
+    "CollectedExportEntry",
     "ExportEntry",
     "ParseWarningCollector",
     "SchemaError",
diff --git a/models/export.py b/models/export.py
index ff08bc9..50f7421 100644
--- a/models/export.py
+++ b/models/export.py
@@ -1,11 +1,23 @@
 from __future__ import annotations
 
 from dataclasses import dataclass, field
-from typing import Any
+from typing import Any, TypedDict
 
 from models.from_dict_validation import require_dict, require_non_empty_str_fields
 
 
+class CollectedExportEntry(TypedDict):
+    """One exportable conversation with rendered markdown (engine/CLI collection)."""
+
+    id: str
+    rel_path: str
+    content: str
+    out_path: str
+    updatedAt: int
+    title: str
+    workspace: str
+
+
 @dataclass(frozen=True)
 class ExportEntry:
     """One line of manifest.jsonl; log_id / title / workspace required, timestamps optional."""
diff --git a/scripts/export.py b/scripts/export.py
index 9dfcffa..8274dee 100644
--- a/scripts/export.py
+++ b/scripts/export.py
@@ -31,12 +31,11 @@
         sys.path.insert(0, str(_project_root))
 
 from models import ExportEntry, SchemaError  # noqa: E402
-from services.export_engine import collect_export_entries  # noqa: E402
+from services.export_engine import collect_export_entries, read_last_export_ms  # noqa: E402
 from utils.exclusion_rules import (  # noqa: E402
     load_rules,
     resolve_exclusion_rules_path,
 )
-from utils.path_helpers import to_epoch_ms  # noqa: E402
 from utils.workspace_path import resolve_workspace_path  # noqa: E402
 
 _logger = logging.getLogger(__name__)
@@ -168,23 +167,6 @@ def parse_args() -> ExportCliOptions:
     }
 
 
-def _read_last_export_ms(state_path: str, since: Literal["all", "last"]) -> int:
-    if since != "last" or not os.path.isfile(state_path):
-        return 0
-    try:
-        with open(state_path, "r", encoding="utf-8") as f:
-            st = json.load(f)
-        ts = st.get("lastExportTime")
-        if ts:
-            return to_epoch_ms(ts)
-    except (json.JSONDecodeError, ValueError, OSError) as e:
-        _logger.warning(
-            "Could not read last export timestamp; defaulting to full export: %s",
-            e,
-        )
-    return 0
-
-
 def main() -> None:
     configure_cli_logging()
     opts = parse_args()
@@ -198,7 +180,7 @@ def main() -> None:
 
     state_dir = get_global_state_dir()
     state_path = os.path.join(state_dir, "export_state.json")
-    last_export = _read_last_export_ms(state_path, since)
+    last_export = read_last_export_ms(since, state_path=state_path)
 
     exported = collect_export_entries(
         workspace_path=workspace_path,
diff --git a/services/export_engine.py b/services/export_engine.py
index 89a7c9d..51a62d7 100644
--- a/services/export_engine.py
+++ b/services/export_engine.py
@@ -8,9 +8,10 @@
 import sqlite3
 from dataclasses import dataclass
 from datetime import datetime
-from typing import Any, Literal, TypedDict
+from typing import Any, Literal
 
 from models import Bubble
+from models.export import CollectedExportEntry
 from services.summary_cache import fingerprint_workspace_storage, nocache_enabled
 from services.workspace_context import (
     WorkspaceContext,
@@ -49,16 +50,32 @@
 SinceMode = Literal["all", "last"]
 
 
-class ExportEntry(TypedDict):
-    """One exportable conversation with rendered markdown."""
-
-    id: str
-    rel_path: str
-    content: str
-    out_path: str
-    updatedAt: int
-    title: str
-    workspace: str
+def read_last_export_ms(
+    since: SinceMode,
+    *,
+    state_path: str | None = None,
+    state: dict[str, Any] | None = None,
+) -> int:
+    """Return last-export epoch ms for ``since=last``; 0 for a full export."""
+    if since != "last":
+        return 0
+    ts: Any = None
+    if state is not None:
+        ts = state.get("lastExportTime")
+    elif state_path is not None and os.path.isfile(state_path):
+        try:
+            with open(state_path, "r", encoding="utf-8") as f:
+                st = json.load(f)
+            if isinstance(st, dict):
+                ts = st.get("lastExportTime")
+        except (json.JSONDecodeError, ValueError, OSError) as e:
+            _logger.warning(
+                "Could not read last export timestamp; defaulting to full export: %s",
+                e,
+            )
+    if ts:
+        return to_epoch_ms(ts)
+    return 0
 
 
 @dataclass(frozen=True)
@@ -209,9 +226,9 @@ def _collect_ide_export_entries(
     last_export_ms: int,
     today: str,
     out_dir: str,
-) -> list[ExportEntry]:
+) -> list[CollectedExportEntry]:
     ctx = orch.ctx
-    exported: list[ExportEntry] = []
+    exported: list[CollectedExportEntry] = []
     for row in db_data.ide_composer_rows:
         composer_id = row["key"].split(":")[1]
         try:
@@ -239,6 +256,8 @@ def _collect_ide_export_entries(
         updated_at = to_epoch_ms(cd.get("lastUpdatedAt")) or to_epoch_ms(
             cd.get("createdAt"),
         )
+        # Intentional behavior change vs legacy CLI: fall back to createdAt when
+        # lastUpdatedAt is absent (affects timestamps, filenames, and --since last).
         if since == "last" and updated_at <= last_export_ms:
             continue
 
@@ -325,7 +344,7 @@ def _collect_ide_export_entries(
             workspace_info={"ws_slug": ws_slug, "ws_display_name": ws_display_name},
         )
 
-        rel_path = os.path.join(today, ws_slug, "chat", filename)
+        rel_path = os.path.relpath(out_path, out_dir)
         exported.append({
             "id": composer_id,
             "rel_path": rel_path,
@@ -345,8 +364,8 @@ def _collect_cli_export_entries(
     last_export_ms: int,
     today: str,
     out_dir: str,
-) -> list[ExportEntry]:
-    exported: list[ExportEntry] = []
+) -> list[CollectedExportEntry]:
+    exported: list[CollectedExportEntry] = []
     try:
         cli_projects = list_cli_projects(get_cli_chats_path())
     except Exception as e:  # noqa: BLE001 — log and skip CLI enumeration on any failure
@@ -446,7 +465,7 @@ def _collect_cli_export_entries(
                 bubbles=bubbles,
                 title_override=title,
             )
-            rel_path = os.path.join(today, ws_slug_cli, "cli", filename)
+            rel_path = os.path.relpath(out_path, out_dir)
             exported.append({
                 "id": session_id,
                 "rel_path": rel_path,
@@ -469,14 +488,14 @@ def collect_export_entries(
     include_composer: bool = True,
     include_cli: bool = True,
     nocache: bool = False,
-) -> list[ExportEntry]:
+) -> list[CollectedExportEntry]:
     """Collect exportable conversations (IDE + CLI) via shared orchestration."""
     effective_nocache = nocache_enabled(request_nocache=nocache)
     orch = prepare_workspace_orchestration(
         workspace_path, exclusion_rules, nocache=effective_nocache,
     )
     today = datetime.now().strftime("%Y-%m-%d")
-    exported: list[ExportEntry] = []
+    exported: list[CollectedExportEntry] = []
 
     if include_composer:
         db_data = load_global_db_export_data(orch)
diff --git a/tests/test_export_engine.py b/tests/test_export_engine.py
index 84a4c2a..2139826 100644
--- a/tests/test_export_engine.py
+++ b/tests/test_export_engine.py
@@ -2,21 +2,28 @@
 
 from __future__ import annotations
 
+import json
 import os
 import sys
 import tempfile
 import unittest
+from datetime import datetime
 from unittest.mock import MagicMock, patch
 
 REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 if REPO_ROOT not in sys.path:
     sys.path.insert(0, REPO_ROOT)
 
+from models import Bubble  # noqa: E402
 from services.export_engine import (  # noqa: E402
     GlobalDbExportData,
     WorkspaceOrchestration,
+    _collect_ide_export_entries,
     collect_export_entries,
+    read_last_export_ms,
 )
+from utils.exclusion_rules import load_rules  # noqa: E402
+from utils.text_extract import slug  # noqa: E402
 
 
 class _TempExportPathsMixin:
@@ -29,6 +36,53 @@ def setUp(self):
         os.makedirs(self.tmp_out, exist_ok=True)
 
 
+def _fake_composer_row(composer_id: str, cd: dict[str, object]) -> object:
+    class FakeRow:
+        def __getitem__(self, key: str) -> str:
+            if key == "key":
+                return f"composerData:{composer_id}"
+            return json.dumps(cd)
+
+    return FakeRow()
+
+
+def _minimal_ctx() -> MagicMock:
+    ctx = MagicMock()
+    ctx.project_name_to_workspace_id = {}
+    ctx.workspace_path_to_id = {}
+    ctx.composer_id_to_workspace_id = {}
+    ctx.invalid_workspace_ids = set()
+    return ctx
+
+
+def _minimal_orch(
+    tmp_ws: str,
+    *,
+    display_name: dict[str, str] | None = None,
+    slug_map: dict[str, str] | None = None,
+) -> WorkspaceOrchestration:
+    return WorkspaceOrchestration(
+        workspace_path=tmp_ws,
+        workspace_entries=[],
+        fingerprint={},
+        ctx=_minimal_ctx(),
+        workspace_id_to_display_name=display_name or {},
+        workspace_id_to_slug=slug_map or {},
+    )
+
+
+class TestReadLastExportMs(unittest.TestCase):
+    def test_since_all_returns_zero(self):
+        self.assertEqual(read_last_export_ms("all", state={"lastExportTime": "2026-01-01"}), 0)
+
+    def test_since_last_reads_state_dict(self):
+        ms = read_last_export_ms(
+            "last",
+            state={"lastExportTime": "2026-01-01T12:00:00"},
+        )
+        self.assertGreater(ms, 0)
+
+
 class TestCollectExportEntriesNocache(_TempExportPathsMixin, unittest.TestCase):
     def test_nocache_env_passed_to_prepare_workspace_orchestration(self):
         with patch.dict(os.environ, {"CURSOR_CHAT_BROWSER_NOCACHE": "1"}):
@@ -58,31 +112,12 @@ class TestCollectExportEntriesCorruptComposer(
     unittest.TestCase,
 ):
     def test_non_dict_composer_row_is_skipped(self):
-        ctx = MagicMock()
-        ctx.project_name_to_workspace_id = {}
-        ctx.workspace_path_to_id = {}
-        ctx.composer_id_to_workspace_id = {}
-        ctx.invalid_workspace_ids = set()
-        orch = WorkspaceOrchestration(
-            workspace_path=self.tmp_ws,
-            workspace_entries=[],
-            fingerprint={},
-            ctx=ctx,
-            workspace_id_to_display_name={},
-            workspace_id_to_slug={},
-        )
-
-        class FakeRow:
-            def __getitem__(self, key: str) -> str:
-                if key == "key":
-                    return "composerData:bad-row"
-                return "[]"
-
+        orch = _minimal_orch(self.tmp_ws)
         db_data = GlobalDbExportData(
             project_layouts_map={},
             bubble_map={},
             code_block_diff_map={},
-            ide_composer_rows=[FakeRow()],
+            ide_composer_rows=[_fake_composer_row("bad-row", [])],  # type: ignore[arg-type]
             invalid_workspace_aliases={},
         )
         with patch(
@@ -104,5 +139,101 @@ def __getitem__(self, key: str) -> str:
         self.assertEqual(exported, [])
 
 
+class TestCollectIdeExportEntries(_TempExportPathsMixin, unittest.TestCase):
+    def _collect(
+        self,
+        cd: dict[str, object],
+        *,
+        composer_id: str = "cmp-1",
+        exclusion_rules: list | None = None,
+        orch: WorkspaceOrchestration | None = None,
+        project_id: str = "ws-unknown-abcdefghijklmnop",
+    ) -> list:
+        bubble_id = "bubble-1"
+        bubble_map = {
+            bubble_id: Bubble.from_dict(
+                {"type": "user", "text": "Hello from the test bubble."},
+                bubble_id=bubble_id,
+            ),
+        }
+        db_data = GlobalDbExportData(
+            project_layouts_map={},
+            bubble_map=bubble_map,
+            code_block_diff_map={},
+            ide_composer_rows=[_fake_composer_row(composer_id, cd)],
+            invalid_workspace_aliases={},
+        )
+        orch = orch or _minimal_orch(self.tmp_ws)
+        with patch(
+            "services.export_engine.determine_project_for_conversation",
+            return_value=project_id,
+        ):
+            with patch(
+                "services.export_engine.cursor_ide_chat_to_markdown",
+                return_value="# exported markdown",
+            ):
+                return _collect_ide_export_entries(
+                    orch=orch,
+                    db_data=db_data,
+                    exclusion_rules=exclusion_rules or [],
+                    since="all",
+                    last_export_ms=0,
+                    today="2026-06-22",
+                    out_dir=self.tmp_out,
+                )
+
+    def test_created_at_fallback_when_last_updated_missing(self):
+        created_ms = 1739200000000
+        exported = self._collect({
+            "name": "Created-only chat",
+            "modelConfig": {},
+            "fullConversationHeadersOnly": [{"bubbleId": "bubble-1", "type": 1}],
+            "createdAt": created_ms,
+        })
+        self.assertEqual(len(exported), 1)
+        entry = exported[0]
+        self.assertEqual(entry["updatedAt"], created_ms)
+        ts_str = datetime.fromtimestamp(created_ms / 1000).strftime("%Y-%m-%dT%H-%M-%S")
+        self.assertIn(ts_str, entry["rel_path"])
+        self.assertEqual(
+            entry["rel_path"],
+            os.path.relpath(entry["out_path"], self.tmp_out),
+        )
+
+    def test_display_name_falls_back_to_slug_of_workspace_id_prefix(self):
+        ws_id = "abcdefghijklmnop"
+        exported = self._collect(
+            {
+                "name": "Workspace fallback chat",
+                "modelConfig": {},
+                "fullConversationHeadersOnly": [{"bubbleId": "bubble-1", "type": 1}],
+                "lastUpdatedAt": 1739300000000,
+            },
+            project_id=ws_id,
+            orch=_minimal_orch(self.tmp_ws),
+        )
+        self.assertEqual(len(exported), 1)
+        expected_display = slug(ws_id[:12])
+        self.assertEqual(exported[0]["workspace"], expected_display)
+        self.assertIn(expected_display, exported[0]["rel_path"])
+
+    def test_exclusion_rules_filter_ide_entry(self):
+        rules_path = os.path.join(self._tmp.name, "rules.txt")
+        with open(rules_path, "w", encoding="utf-8") as f:
+            f.write("roadmap\n")
+        rules = load_rules(rules_path)
+
+        exported = self._collect(
+            {
+                "name": "Roadmap planning",
+                "modelConfig": {},
+                "fullConversationHeadersOnly": [{"bubbleId": "bubble-1", "type": 1}],
+                "lastUpdatedAt": 1739300000000,
+            },
+            exclusion_rules=rules,
+        )
+        self.assertEqual(exported, [])
+
+
 if __name__ == "__main__":
     unittest.main()

From 476bd5da681758b224c5713436419205146f7ad0 Mon Sep 17 00:00:00 2001
From: chen <clean6378@gmail.com>
Date: Wed, 24 Jun 2026 02:23:11 +0800
Subject: [PATCH 6/6] Restore legacy lastUpdatedAt-only behavior; mkdir
 state_dir early

---
 scripts/export.py           |  2 +-
 services/export_engine.py   |  6 +-----
 tests/test_export_engine.py | 26 +++++++++++++++-----------
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/scripts/export.py b/scripts/export.py
index 8274dee..2c820ba 100644
--- a/scripts/export.py
+++ b/scripts/export.py
@@ -179,6 +179,7 @@ def main() -> None:
     workspace_path = resolve_workspace_path(override=opts.get("base_dir"))
 
     state_dir = get_global_state_dir()
+    os.makedirs(state_dir, exist_ok=True)
     state_path = os.path.join(state_dir, "export_state.json")
     last_export = read_last_export_ms(since, state_path=state_path)
 
@@ -253,7 +254,6 @@ def main() -> None:
         "exportedCount": count,
         "exportDir": out_dir,
     }
-    os.makedirs(state_dir, exist_ok=True)
     with open(os.path.join(state_dir, "export_state.json"), "w", encoding="utf-8") as f:
         json.dump(state, f, indent=2)
 
diff --git a/services/export_engine.py b/services/export_engine.py
index 51a62d7..ea05fa6 100644
--- a/services/export_engine.py
+++ b/services/export_engine.py
@@ -253,11 +253,7 @@ def _collect_ide_export_entries(
         if not isinstance(headers, list) or not headers:
             continue
 
-        updated_at = to_epoch_ms(cd.get("lastUpdatedAt")) or to_epoch_ms(
-            cd.get("createdAt"),
-        )
-        # Intentional behavior change vs legacy CLI: fall back to createdAt when
-        # lastUpdatedAt is absent (affects timestamps, filenames, and --since last).
+        updated_at = to_epoch_ms(cd.get("lastUpdatedAt"))
         if since == "last" and updated_at <= last_export_ms:
             continue
 
diff --git a/tests/test_export_engine.py b/tests/test_export_engine.py
index 2139826..0257e17 100644
--- a/tests/test_export_engine.py
+++ b/tests/test_export_engine.py
@@ -182,22 +182,26 @@ def _collect(
                     out_dir=self.tmp_out,
                 )
 
-    def test_created_at_fallback_when_last_updated_missing(self):
+    def test_last_updated_at_only_no_created_at_fallback(self):
         created_ms = 1739200000000
-        exported = self._collect({
-            "name": "Created-only chat",
-            "modelConfig": {},
-            "fullConversationHeadersOnly": [{"bubbleId": "bubble-1", "type": 1}],
-            "createdAt": created_ms,
-        })
+        fixed_now = datetime(2026, 6, 22, 12, 0, 0)
+        with patch("services.export_engine.datetime") as mock_dt:
+            mock_dt.now.return_value = fixed_now
+            mock_dt.fromtimestamp = datetime.fromtimestamp
+            exported = self._collect({
+                "name": "Created-only chat",
+                "modelConfig": {},
+                "fullConversationHeadersOnly": [{"bubbleId": "bubble-1", "type": 1}],
+                "createdAt": created_ms,
+            })
         self.assertEqual(len(exported), 1)
         entry = exported[0]
-        self.assertEqual(entry["updatedAt"], created_ms)
-        ts_str = datetime.fromtimestamp(created_ms / 1000).strftime("%Y-%m-%dT%H-%M-%S")
+        self.assertEqual(entry["updatedAt"], 0)
+        ts_str = fixed_now.strftime("%Y-%m-%dT%H-%M-%S")
         self.assertIn(ts_str, entry["rel_path"])
-        self.assertEqual(
+        self.assertNotIn(
+            datetime.fromtimestamp(created_ms / 1000).strftime("%Y-%m-%dT%H-%M-%S"),
             entry["rel_path"],
-            os.path.relpath(entry["out_path"], self.tmp_out),
         )
 
     def test_display_name_falls_back_to_slug_of_workspace_id_prefix(self):