diff --git a/plans/009-cross-runtime-analysis.md b/plans/009-cross-runtime-analysis.md new file mode 100644 index 0000000..6c3f5ed --- /dev/null +++ b/plans/009-cross-runtime-analysis.md @@ -0,0 +1,61 @@ +# ADR-009: Cross-Runtime Analysis — Python, Rust CLI, and Web UI Parity + +## Status + +Referenced + +## Context + +The project maintains three runtimes: Python (primary development, scripts/), +Rust CLI (high-performance binary, cli/), and Web UI (Next.js, web/). Features +developed in one runtime often lag behind the others, creating a maintenance +burden and inconsistent user experience. + +## Decision + +Track parity gaps explicitly in a living document (this ADR). Close gaps based +on user impact and maintenance cost: + +1. **Rate throttling** → Implement in Rust first (harder to retrofit), port to + Python, keep in aiohttp for Web. +2. **Quality confidence gate** → Implement in all three simultaneously (shared + constants). +3. **Routing features** → Python first (fastest iteration), port to Rust + Web + once stable. + +## Completed Parity Items + +| Feature | Python | Rust | Web | PR | +|---------|--------|------|-----|----| +| Quality confidence gate | ✅ | ✅ | ✅ | #341 | +| Tiered cache TTL | ✅ | ✅ | N/A | #338 | +| Probabilistic provider skip | ✅ | ✅ | ✅ | #342 | +| Adaptive per-domain reorder | ✅ | ✅ | N/A | #343 | +| Rate throttling (token bucket) | ✅ | ✅ | N/A | #358 | +| Cache pre-warming | ❌ | ✅ | ✅ | #339 | + +## Open Parity Gaps + +| # | Feature | Python | Rust | Web | Impact | +|---|---------|--------|------|-----|--------| +| P1 | `exa_mcp_mistral` combo | ❌ | ❌ | ✅ | Users of Python/Rust CLI miss this query strategy | +| P2 | Deep research parallel mode | Partial | `--synthesize` only | ✅ | Full parallel mode missing in CLIs | +| P3 | Budget profiles / presets | N/A | `--profile` flag exists, not wired | N/A | Wire Rust flag to presets | +| P4 | Preflight routing | `detect_doc_platform()` | Minimal `detectJsHeavy()` | Minimal | Port advanced routing to Rust/Web | +| P5 | Hedged requests | ✅ | ❌ | ❌ | Performance gap — Python has hedging, Rust/Web sequential | +| P6 | Routing memory persistence | In-memory only | File persistence | N/A | Python loses state on restart | +| M7 | Mobile/tablet Playwright in CI | N/A | N/A | ❌ | Mobile regressions undetected | + +## Consequences + +- Gaps P4-P5 are the highest priority — they directly affect resolution quality + and performance. +- P1 is lower priority — few users rely on the exact `exa_mcp_mistral` combo. +- P6 is a correctness issue — routing memory is the primary mechanism for + learned provider preferences and should be persistent across all runtimes. +- M7 is a CI gap that should be fixed in the next CI pass (see ADR-013). + +## References + +- [AUDIT.md](AUDIT.md) — Section 4: Cross-Platform Parity +- [GOAP_FOLLOWUP.md](GOAP_FOLLOWUP.md) — Wave implementation tracking diff --git a/plans/01-architecture-improvements.md b/plans/01-architecture-improvements.md index bd32249..f1ba88a 100644 --- a/plans/01-architecture-improvements.md +++ b/plans/01-architecture-improvements.md @@ -1,476 +1,36 @@ -# Architecture Improvements Plan +# Architecture Improvements (Condensed Status) -## Overview +## Original Overview -This plan addresses code consolidation between Python and Rust implementations, async/await optimizations, and architectural abstractions to reduce maintenance burden and improve performance. +Code consolidation between Python and Rust implementations, async/await +optimizations, and architectural abstractions. ---- +## Status -## Phase 1: Async Mutex Migration (Week 1) +Partially addressed by ongoing work. The detailed plan (476 lines, phases 1-6) +is superseded by ADR-014 and ADR-009. -### Task 1.1: Replace std::sync::Mutex with tokio::sync::RwLock +## What's Done -**Files:** -- `cli/src/resolver/url.rs` (lines 206, 237, 317, 321) -- `cli/src/resolver/query.rs` (similar patterns) -- `cli/src/resolver/mod.rs` (registry definitions) +- **Async-aware locks** (Phase 1): Not yet started. Tokio `RwLock` migration + remains pending. +- **DashMap integration** (Phase 2): Not started — requires Cargo.toml changes. +- **Provider trait unification** (Phase 3): Not started. `cli/src/providers/` + uses `#[async_trait]` per-provider but no shared registry yet. +- **Python async migration** (Phase 4): Not started. Python resolver still uses + `ThreadPoolExecutor`. +- **PyO3 bindings** (Phase 5): Not started. Still uses separate Python/Rust. +- **Config consolidation** (Phase 6): Partially addressed — CLI reads `config.toml` + via `serde` but doesn't use the `config` crate. Environment variable naming + aligned to `DO_WDR_*` prefix. -**Changes:** -```rust -// BEFORE: -use std::sync::{Arc, Mutex}; -pub struct Resolver { - negative_cache: Arc>, - circuit_breakers: Arc>, - routing_memory: Arc>, -} +## What Remains -// AFTER: -use tokio::sync::RwLock; -use std::sync::Arc; -pub struct Resolver { - negative_cache: Arc>, - circuit_breakers: Arc>, - routing_memory: Arc>, -} -``` +All 6 phases remain to be implemented. See ADR-014 for the highest-priority item +(constants/state extraction, Wave 3). -**Rationale:** -- `std::sync::Mutex` blocks the async executor thread -- `tokio::sync::RwLock` yields control instead of blocking -- Allows concurrent reads on circuit breaker checks +## References -**Testing:** -- Run Rust tests: `cd cli && cargo test` -- Verify no deadlocks under concurrent load -- Check latency distribution with `wrk` or `oha` - ---- - -## Phase 2: DashMap Integration (Week 1-2) - -### Task 2.1: Implement Concurrent Hash Maps - -**Files:** -- `cli/src/resolver/mod.rs` -- New: `cli/src/concurrent_state.rs` - -**Implementation:** -```rust -// cli/src/concurrent_state.rs -use dashmap::DashMap; -use std::sync::Arc; - -pub struct ConcurrentState { - // Sharded by domain for concurrent access - negative_cache: DashMap, - circuit_breakers: DashMap, - routing_memory: DashMap, -} - -impl ConcurrentState { - pub fn new() -> Self { - Self { - negative_cache: DashMap::new(), - circuit_breakers: DashMap::new(), - routing_memory: DashMap::new(), - } - } - - // Read operations don't block other reads - pub fn get_circuit_breaker(&self, provider: &str) -> Option { - self.circuit_breakers.get(provider).map(|e| e.clone()) - } -} -``` - -**Dependencies:** -Add to `cli/Cargo.toml`: -```toml -[dependencies] -dashmap = "5.5" -``` - -**Testing:** -- Benchmark concurrent access patterns -- Verify 10-30% throughput improvement - ---- - -## Phase 3: Unified Provider Trait (Week 2-3) - -### Task 3.1: Create Provider Trait Definition - -**New File:** `cli/src/providers/trait.rs` - -```rust -//! Unified Provider Trait - -use async_trait::async_trait; -use crate::error::ResolverError; -use crate::types::ResolvedResult; - -#[async_trait] -pub trait Provider: Send + Sync { - /// Provider name for logging and metrics - fn name(&self) -> &str; - - /// Check if provider is available (has API key, not rate limited) - fn is_available(&self) -> bool; - - /// Check if this is a paid provider - fn is_paid(&self) -> bool; - - /// Execute the provider - async fn execute(&self, input: &str) -> Result; - - /// Classify provider-specific errors - fn classify_error(&self, err: reqwest::Error) -> ResolverError { - if err.is_timeout() { - ResolverError::Network("timeout".to_string()) - } else if err.status() == Some(reqwest::StatusCode::TOO_MANY_REQUESTS) { - ResolverError::RateLimit("429".to_string()) - } else { - ResolverError::Network(err.to_string()) - } - } -} -``` - -### Task 3.2: Create Provider Registry - -**New File:** `cli/src/providers/registry.rs` - -```rust -//! Provider Registry with shared HTTP client - -use std::sync::Arc; -use reqwest::Client; - -pub struct ProviderRegistry { - shared_client: Arc, - providers: Vec>, -} - -impl ProviderRegistry { - pub fn new() -> Self { - let client = Client::builder() - .pool_max_idle_per_host(10) - .pool_idle_timeout(Duration::from_secs(60)) - .timeout(Duration::from_secs(30)) - .build() - .expect("Failed to create HTTP client"); - - Self { - shared_client: Arc::new(client), - providers: Vec::new(), - } - } - - pub fn register(&mut self, provider: P) { - self.providers.push(Box::new(provider)); - } -} -``` - -### Task 3.3: Migrate Existing Providers - -**Files to Update:** -- `cli/src/providers/jina.rs` -- `cli/src/providers/exa_mcp.rs` -- `cli/src/providers/firecrawl.rs` -- `cli/src/providers/tavily.rs` -- `cli/src/providers/serper.rs` -- `cli/src/providers/duckduckgo.rs` -- `cli/src/providers/mistral_browser.rs` -- `cli/src/providers/mistral_websearch.rs` - -**Migration Pattern:** -```rust -// BEFORE: -pub struct JinaProvider { - client: reqwest::Client, -} - -// AFTER: -pub struct JinaProvider { - client: Arc, -} - -#[async_trait] -impl Provider for JinaProvider { - fn name(&self) -> &str { "jina" } - fn is_available(&self) -> bool { true } - fn is_paid(&self) -> bool { false } - - async fn execute(&self, url: &str) -> Result { - // Implementation - } -} -``` - ---- - -## Phase 4: Python Async Migration (Week 3-4) - -### Task 4.1: Convert to asyncio - -**File:** `scripts/resolve.py` - -**Changes:** -```python -# BEFORE: -import concurrent.futures - -def resolve_url_stream(...): - executor = concurrent.futures.ThreadPoolExecutor(max_workers=max(1, len(eligible))) - # ... blocking waits - -# AFTER: -import asyncio - -async def resolve_url_stream(...): - tasks = [] - for p_name in eligible: - task = asyncio.create_task(run_provider(p_name)) - tasks.append(task) - - # Hedging: wait for threshold or first completion - done, pending = await asyncio.wait( - tasks, - timeout=threshold, - return_when=asyncio.FIRST_COMPLETED - ) -``` - -### Task 4.2: Create Async Provider Implementations - -**New File:** `scripts/providers_async.py` - -```python -"""Async provider implementations using aiohttp.""" - -import aiohttp -import asyncio -from typing import Optional - -async def resolve_with_jina_async( - url: str, - max_chars: int, - session: aiohttp.ClientSession -) -> Optional[ResolvedResult]: - """Async Jina Reader resolution.""" - try: - async with session.get(f"https://r.jina.ai/{url}") as resp: - if resp.status == 200: - content = await resp.text() - return ResolvedResult( - source="jina", - content=content[:max_chars], - url=url - ) - except asyncio.TimeoutError: - return None - return None -``` - -**Dependencies:** -Add to `requirements.txt`: -``` -aiohttp>=3.9.0 -aiodns>=3.1.0 -``` - ---- - -## Phase 5: PyO3 Python Bindings (Week 4-6) - -### Task 5.1: Create Python Module - -**New File:** `cli/src/python.rs` - -```rust -//! PyO3 Python bindings for Rust resolver - -use pyo3::prelude::*; -use pyo3::types::PyDict; - -#[pyfunction] -fn resolve_url_py(url: String, max_chars: usize) -> PyResult> { - let rt = tokio::runtime::Runtime::new()?; - - let result = rt.block_on(async { - let resolver = Resolver::new().await; - resolver.resolve_url(&url).await - }); - - Python::with_gil(|py| { - let dict = PyDict::new(py); - match result { - Ok(res) => { - dict.set_item("source", res.source)?; - dict.set_item("content", res.content)?; - dict.set_item("url", res.url)?; - dict.set_item("score", res.score)?; - } - Err(e) => { - dict.set_item("error", e.to_string())?; - } - } - Ok(dict.into()) - }) -} - -#[pymodule] -fn do_wdr(_py: Python, m: &PyModule) -> PyResult<()> { - m.add_function(wrap_pyfunction!(resolve_url_py, m)?)?; - Ok(()) -} -``` - -### Task 5.2: Configure Cargo for Python Extension - -**Update:** `cli/Cargo.toml` - -```toml -[package] -name = "do-wdr" -version = "0.4.0" -edition = "2024" - -[lib] -name = "do_wdr" -crate-type = ["cdylib", "rlib"] - -[dependencies] -pyo3 = { version = "0.22", features = ["extension-module"], optional = true } -tokio = { version = "1.35", features = ["full"] } - -[features] -default = ["python"] -python = ["dep:pyo3"] -``` - -### Task 5.3: Python Package Structure - -**New File:** `python/do_wdr/__init__.py` - -```python -"""Python bindings for do-web-doc-resolver.""" - -try: - from .do_wdr import resolve_url_py as _resolve_url - RUST_AVAILABLE = True -except ImportError: - RUST_AVAILABLE = False - from scripts.resolve import resolve_url as _resolve_url_py - -async def resolve_url(url: str, max_chars: int = 8000) -> dict: - """Resolve a URL using the Rust implementation if available.""" - if RUST_AVAILABLE: - return _resolve_url(url, max_chars) - else: - return await _resolve_url_py(url, max_chars) - -__all__ = ["resolve_url", "RUST_AVAILABLE"] -``` - ---- - -## Phase 6: Configuration Consolidation (Week 5-6) - -### Task 6.1: Use config Crate - -**File:** `cli/src/config.rs` - -**Changes:** -```rust -// BEFORE: Manual field-by-field merging - -// AFTER: Using config crate -use config::{Config, ConfigError, Environment, File}; -use serde::Deserialize; - -#[derive(Debug, Deserialize)] -pub struct AppConfig { - #[serde(default = "default_max_chars")] - pub max_chars: usize, - #[serde(default)] - pub profile: Profile, - #[serde(default)] - pub skip_providers: Vec, -} - -impl AppConfig { - pub fn load() -> Result { - let s = Config::builder() - .add_source(File::with_name("config").required(false)) - .add_source(File::with_name("config.toml").required(false)) - .add_source(Environment::with_prefix("DO_WDR")) - .build()?; - - s.try_deserialize() - } -} -``` - ---- - -## Dependencies - -### New Rust Dependencies -```toml -dashmap = "5.5" -pyo3 = { version = "0.22", optional = true } -config = "0.14" -``` - -### New Python Dependencies -``` -aiohttp>=3.9.0 -aiodns>=3.1.0 -pytest-asyncio>=0.21.0 -``` - ---- - -## Testing Plan - -1. **Unit Tests**: Each trait implementation -2. **Integration Tests**: Full cascade with mocked providers -3. **Performance Tests**: Latency comparison before/after -4. **Python/Rust Parity**: Same inputs produce same outputs - ---- - -## Success Metrics - -- [ ] 10-30% throughput improvement (DashMap) -- [ ] No blocking operations in async context -- [ ] Unified provider trait reduces code by ~200 lines -- [ ] Python bindings functional with feature parity -- [ ] All existing tests pass - ---- - -## Timeline - -| Phase | Duration | Deliverable | -|-------|----------|-------------| -| 1: Async Mutex | Week 1 | Non-blocking resolver | -| 2: DashMap | Week 1-2 | Concurrent state | -| 3: Provider Trait | Week 2-3 | Unified provider interface | -| 4: Python Async | Week 3-4 | Async Python resolver | -| 5: PyO3 Bindings | Week 4-6 | Rust library with Python bindings | -| 6: Config Consolidation | Week 5-6 | Simplified configuration | - ---- - -## Risks & Mitigation - -| Risk | Mitigation | -|------|------------| -| PyO3 complexity | Start with simple bindings, expand incrementally | -| Async migration bugs | Extensive test coverage, gradual rollout | -| Performance regression | Benchmark before/after each phase | -| Breaking changes | Maintain Python API compatibility layer | +- [ADR-014](014-architecture-and-parity.md) — DRY consolidation plan +- [ADR-009](009-cross-runtime-analysis.md) — Cross-runtime parity gaps +- [GOAP_FOLLOWUP.md](GOAP_FOLLOWUP.md) — Wave tracking diff --git a/plans/012-correctness-and-safety-fixes.md b/plans/012-correctness-and-safety-fixes.md new file mode 100644 index 0000000..fb7b22e --- /dev/null +++ b/plans/012-correctness-and-safety-fixes.md @@ -0,0 +1,76 @@ +# ADR-012: Correctness & Safety Fixes — Thread Safety, SSRF, Provider Reachability + +## Status + +Wave 1 COMPLETED (PR #364). Waves 4-6 PENDING. + +## Context + +Code review on PR #341 and subsequent manual audit identified several classes +of correctness issues across the Python codebase: +- **Thread safety**: `threading.Lock` used where `RLock` was needed; SQLite + connections used across threads without `check_same_thread=False`; TOCTOU + races in circuit breaker state reads. +- **SSRF**: `is_safe_url()` validation was applied inconsistently — some + providers had it, others didn't. +- **Provider reachability**: `resolve_direct()` was missing 4 provider entries; + `Profile.max_hops()` lacked a default return path. + +## Wave 1 — COMPLETED (PR `fix/adr-012-correctness-and-safety`) + +| ID | Task | Files | Status | +|----|------|-------|--------| +| T1 | CircuitBreakerRegistry lock + falsy-threshold fix | `scripts/circuit_breaker.py` | ✅ | +| T2 | RoutingMemory lock + magic number→constants | `scripts/routing_memory.py` | ✅ | +| T3 | providers_impl rate-limit lock | `scripts/providers_impl.py` | ✅ | +| T4 | utils session + cache lock | `scripts/utils.py` | ✅ | +| T5 | semantic_cache singleton lock + atomic eviction | `scripts/semantic_cache.py` | ✅ | +| T6 | resolve.py monkey-patch → state.py wiring | `scripts/resolve.py` | ✅ | +| S1 | Mistral browser SSRF check | `scripts/providers_impl.py` | ✅ | +| S2 | is_url() reject ftp/ftps | `scripts/utils.py` | ✅ | +| S3 | safeFetch() initial URL validation | `web/lib/resolvers/url.ts` | ✅ | +| P1 | resolve_direct() missing 4 providers | `scripts/resolve.py` | ✅ | +| P2 | Profile.max_hops() default return | `scripts/models.py` | ✅ | +| | RLock deadlock fix (reentrant lock) | `scripts/utils.py` | ✅ | +| | Conftest lock-safe clearing | `tests/conftest.py` | ✅ | +| | Test updates for is_url() behavior | `tests/test_resolve.py` | ✅ | + +### Codacy Review Items (PR #364) — All Addressed + +| Comment | Fix | +|---------|-----| +| HIGH: SemanticCache SQLite not thread-safe | Added `check_same_thread=False` + `_conn_lock` | +| MEDIUM: Lock→RLock mismatch in CircuitBreaker | Changed to `threading.RLock()` | +| MEDIUM: Lock→RLock mismatch in RoutingMemory | Changed to `threading.RLock()`, deduplicated `get_domain_stats` | +| MEDIUM: SSRF missing from Jina/Firecrawl | Added `is_safe_url()` checks | +| Minor: Bare except in Mistral browser | Changed to `except Exception as e:` with logging | +| HIGH: TOCTOU race in CircuitBreakerState.is_open | Capture `open_until` once at function entry | + +## Waves 4-6 — PENDING + +| Wave | Focus | Effort | Dependency | +|------|-------|--------|------------| +| Wave 4 | Logging, quality, synthesis fixes (P3-P7, Q1-Q6) | M | Wave 3 (ADR-014) | +| Wave 5 | New test files for uncovered paths (C1-C7) | L | Wave 3 | +| Wave 6 | Cascade consolidation, budget alignment, DRY cleanup | L | Wave 3 | + +## Learnings + +- **`threading.RLock` vs `Lock`**: `Lock` deadlocks on recursive acquisition; + `RLock` is reentrant and safe for nested calls like `_get_from_cache` → + `_get_cache`. +- **Conftest clearing**: After adding locks, the `autouse` fixture must call + `.clear()` methods (which hold the lock) instead of direct dict access. +- **Monkey-patching dependency**: `resolve.py` lines 85-91 wire shared instances + to `_url_resolve`/`_query_resolve`. These overwrites must remain until + ADR-014 creates `scripts/state.py`. +- **SQLite thread safety**: `check_same_thread=False` + `_conn_lock` is required + when using SQLite connections across `ThreadPoolExecutor` threads. +- **TOCTOU pattern**: Capture mutable state once at function entry to avoid + time-of-check-to-time-of-use races. + +## References + +- [GOAP_FOLLOWUP.md](GOAP_FOLLOWUP.md) — Remaining waves +- [PR #364](https://github.com/d-oit/do-web-doc-resolver/pull/364) +- [ADR-014](014-architecture-and-parity.md) — Prerequisite for Waves 4-6 diff --git a/plans/013-test-coverage-and-ci-reliability.md b/plans/013-test-coverage-and-ci-reliability.md new file mode 100644 index 0000000..66b7607 --- /dev/null +++ b/plans/013-test-coverage-and-ci-reliability.md @@ -0,0 +1,63 @@ +# ADR-013: Test Coverage & CI Reliability — Misleading Tests, CI Fixes + +## Status + +Wave 1b COMPLETED. Waves 2 and 5 PENDING. + +## Context + +The test suite has misleading structure: some tests pass without actually +testing the code path they claim to cover (e.g., tests that skip without +asserting when API keys are missing). CI has configuration gaps (coverage +uploads, npm peer deps, gitleaks triggers) and flakiness (libsql `Once` +poisoning, npm peer dep conflicts). + +## Wave 1b — CI Reliability (COMPLETED) + +| ID | Task | Status | +|----|------|--------| +| I6 | Fix package.json version specifiers | ✅ | +| I7 | Update version in package-lock.json | ✅ | +| I8 | Regenerate lockfile after version fixes | ✅ | +| — | npm `--legacy-peer-deps` for ESLint 10 compat | ✅ | +| — | libsql `--test-threads=1` for semantic cache tests | ✅ | +| — | ESLint config update (playwright globals, build exclusions) | ✅ | + +## Wave 2 — CI Config Fixes (PENDING) + +| ID | Task | File | Effort | +|----|------|------|--------| +| I1 | Fix coverage upload condition | `.github/workflows/ci.yml:106` | S | +| I2 | Fix gitleaks branch triggers | `.github/workflows/gitleaks.yml:5-6` | S | +| I3 | Update actions/checkout in gitleaks | `.github/workflows/gitleaks.yml:21` | S | +| I4 | Install lint deps from requirements.txt | `.github/workflows/ci.yml:69` | S | +| I5 | Shellcheck severity → error | `.pre-commit-config.yaml:34` | S | +| K1-K3 | Consolidate pre-commit hooks | `scripts/setup-hooks.sh`, `.githooks/` | M | +| K4 | Fix requirements.txt package names | `requirements.txt` | S | +| K5 | Add Python 3.13 classifier | `pyproject.toml:16-18` | S | +| K7 | Fix close-resolved-issues.yml trigger | `.github/workflows/close-resolved-issues.yml:4` | S | + +## Wave 5 — New Test Files (PENDING) + +| ID | Task | Effort | +|----|------|--------| +| C1-C2 | Stream resolution tests | L | +| C3 | Provider unit tests | L | +| C4 | Synthesis tests | M | +| C5-C7 | Utils, models, CLI tests | M | + +## Open Test Infrastructure Gaps + +| Gap | Detail | +|-----|--------| +| Mobile/tablet Playwright | CI runs `--project=desktop` only; mobile regressions undetected | +| Code coverage | No `--cov-fail-under` threshold enforced | +| Python 3.10 in CI | `requires-python = ">=3.10"` but CI matrix is 3.11/3.12/3.13 | +| Web unit tests | `web/lib/circuit-breaker.ts`, `errors.ts`, `quality.ts`, `keys.ts` untested | +| Rust unit tests | `query.rs` (527 lines) and `url.rs` (496 lines) lack direct unit tests | + +## References + +- [CI_FIX.md](CI_FIX.md) — Detailed CI fix notes (npm, libsql) +- [ESLINT_CONFIG_UPDATE.md](ESLINT_CONFIG_UPDATE.md) — ESLint fix notes +- [GOAP_FOLLOWUP.md](GOAP_FOLLOWUP.md) — Wave execution tracking diff --git a/plans/014-architecture-and-parity.md b/plans/014-architecture-and-parity.md new file mode 100644 index 0000000..e022cd6 --- /dev/null +++ b/plans/014-architecture-and-parity.md @@ -0,0 +1,60 @@ +# ADR-014: Architecture & Parity — DRY Consolidation, Constants, Dead Code + +## Status + +PENDING. Prerequisite for Waves 4-6. + +## Context + +The Python codebase has accumulated duplicate constants across multiple modules +(e.g., `MAX_CHARS`, `MIN_CHARS`, cache TTLs defined in 3+ places), shared +state wired via monkey-patching in `resolve.py` lines 85-91, and the cascade +logic is inlined in `_url_resolve.py`/`_query_resolve.py` rather than a +dedicated module. These patterns make maintenance harder and create subtle bugs. + +## Decision + +1. **Extract constants**: Create `scripts/constants.py` as the single source of + truth for all magic numbers and named constants. +2. **Extract state**: Create `scripts/state.py` to hold shared instances + (CircuitBreakerRegistry, RoutingMemory, rate-limit dicts) — eliminating the + monkey-patching in `resolve.py`. +3. **Extract cascade**: Move cascade orchestration to `scripts/cascade.py`, + keeping `_url_resolve.py` and `_query_resolve.py` for pre/post processing. +4. **Clean dead code**: Remove unused `NegativeCacheEntry`, dead `TIERED_TTL` + entry, unused imports. + +## Wave 3 — Constants & State Extraction (PENDING, next) + +| ID | Task | File | Effort | +|----|------|------|--------| +| A1 | Create `scripts/constants.py` | New | M | +| A2-A4 | Remove duplicate constants from resolve.py, utils.py, providers_impl.py | 3 files | M | +| A5 | Create `scripts/state.py` | New | M | +| A6 | Remove monkey-patching from resolve.py | `scripts/resolve.py` | S | +| A7 | Import state in _url_resolve, _query_resolve | 2 files | S | +| A8 | Centralize semantic cache env vars | `scripts/semantic_cache.py` | S | + +## Wave 6 — Cascade Consolidation (PENDING, depends on Wave 3) + +| ID | Task | Effort | +|----|------|--------| +| D1 | Extract cascade to `scripts/cascade.py` | L | +| D2-D3 | Replace inline cache in _url/_query resolve | M | +| U1-U6 | Budget profile alignment (`scripts/routing.py`, `web/constants.ts`) | M | +| R1-R7 | Intra-module DRY cleanup | S | +| C1-C10 | Circular imports, dead code | M | + +## Risks + +| Risk | Mitigation | +|------|------------| +| State extraction breaks test fixtures | Update conftest to use `state.py` API; run full test suite | +| Cascade refactor overlaps with remaining ADR-012 fixes | Do Wave 3 first; then Wave 4 + 5 parallel, then Wave 6 | +| Constants extraction changes behavior | Ensure all constants are functionally identical; diff before/after | + +## References + +- [GOAP_FOLLOWUP.md](GOAP_FOLLOWUP.md) — Wave execution order +- [AUDIT.md](AUDIT.md) — Code quality section (Q1-Q3) +- [ADR-012](012-correctness-and-safety-fixes.md) — Remaining fixes depend on this diff --git a/plans/02-new-providers.md b/plans/02-new-providers.md index cac20ab..df1ee0a 100644 --- a/plans/02-new-providers.md +++ b/plans/02-new-providers.md @@ -1,655 +1,28 @@ -# New Provider Integration Plan +# New Provider Integration (Condensed Status) -## Overview +## Original Overview -This plan details the integration of 7 new providers (excluding Brave Search as requested) to expand the resolver's capabilities, coverage, and free tier options. +Integration of 7 new providers (Tavily Extract, ScrapingAnt, ScrapeGraph AI, +SearchAPI.io, ScrapingBee, You.com, Perplexity) to expand coverage and free +tier options. ---- +## Status -## Provider Priority Matrix +None of the 7 providers have been integrated. This plan is deprioritized in +favor of correctness, CI, and architecture consolidation (ADRs 012-014). -| Priority | Provider | Type | Free Tier | Effort | Impact | -|----------|----------|------|-----------|--------|--------| -| **P1** | Tavily Extract | URL | 1,000/mo | Low | Reuses existing key | -| **P1** | ScrapingAnt | URL | 10,000/mo | Low | Most generous free tier | -| **P2** | ScrapeGraph AI | URL | 50 credits | Medium | AI-powered extraction | -| **P2** | SearchAPI.io | Query | 100 requests | Medium | 40+ search engines | -| **P3** | ScrapingBee | URL | 1,000 credits | Medium | AI extraction, proxies | -| **P3** | You.com API | Both | Free signup | Medium | Research synthesis | -| **P4** | Perplexity API | Query | Free tier | Medium | AI-synthesized answers | +## What's Done ---- +- None. All 655 lines of implementation code are aspirational. -## Phase 1: Tavily Extract Integration (Week 1) +## What Remains -### Overview -Enhances existing Tavily integration with URL extraction capabilities using the same API key. +All 7 providers remain to be implemented. When work resumes, priority order +should follow the original P1→P4 matrix — Tavily Extract first (reuses existing +TAVILY_API_KEY), then ScrapingAnt (10K free tier). -### Implementation +## References -**File:** `scripts/providers_impl.py` - -```python -def resolve_with_tavily_extract(url: str, max_chars: int = MAX_CHARS) -> ResolvedResult | None: - """Extract content from URL using Tavily Extract API. - - Uses the same TAVILY_API_KEY as Tavily search. - """ - api_key = os.getenv("TAVILY_API_KEY") - if not api_key or _is_rate_limited("tavily_extract"): - return None - - try: - from tavily import TavilyClient - - client = TavilyClient(api_key=api_key) - - # Use Tavily's extract endpoint - response = client.extract( - urls=[url], - include_images=False, - extract_depth="basic" # or "advanced" for more content - ) - - if response and response.get("results"): - result = response["results"][0] - content = result.get("raw_content", "") - - if content and len(content) >= MIN_CHARS: - return ResolvedResult( - source="tavily_extract", - content=compact_content(content, max_chars), - url=url, - metadata={ - "title": result.get("title", ""), - "extract_depth": result.get("extract_depth", "basic") - } - ) - - except Exception as e: - if "429" in str(e) or "rate limit" in str(e).lower(): - _set_rate_limit("tavily_extract", 60) - logger.warning(f"Tavily Extract failed for {url}: {e}") - - return None -``` - -**File:** `cli/src/providers/tavily_extract.rs` - -```rust -//! Tavily Extract provider for URL content extraction - -use async_trait::async_trait; -use reqwest::Client; -use serde::{Deserialize, Serialize}; -use std::env; -use std::sync::Arc; -use std::sync::atomic::{AtomicBool, Ordering}; - -use crate::error::ResolverError; -use crate::providers::Provider; -use crate::types::ResolvedResult; - -pub struct TavilyExtractProvider { - client: Arc, - api_key: Option, - rate_limited: Arc, -} - -#[derive(Serialize)] -struct ExtractRequest { - urls: Vec, - #[serde(skip_serializing_if = "Option::is_none")] - extract_depth: Option, - include_images: bool, -} - -#[derive(Deserialize)] -struct ExtractResponse { - results: Vec, -} - -#[derive(Deserialize)] -struct ExtractResult { - url: String, - raw_content: String, - title: Option, -} - -impl TavilyExtractProvider { - pub fn new(client: Arc) -> Self { - let api_key = env::var("TAVILY_API_KEY").ok(); - Self { - client, - api_key, - rate_limited: Arc::new(AtomicBool::new(false)), - } - } - - fn is_rate_limited(&self) -> bool { - self.rate_limited.load(Ordering::SeqCst) - } -} - -#[async_trait] -impl Provider for TavilyExtractProvider { - fn name(&self) -> &str { - "tavily_extract" - } - - fn is_available(&self) -> bool { - self.api_key.is_some() && !self.is_rate_limited() - } - - fn is_paid(&self) -> bool { - true - } - - async fn execute(&self, url: &str) -> Result { - let api_key = self.api_key.as_ref() - .ok_or_else(|| ResolverError::Config("TAVILY_API_KEY not set".to_string()))?; - - let request = ExtractRequest { - urls: vec![url.to_string()], - extract_depth: Some("basic".to_string()), - include_images: false, - }; - - let response = self.client - .post("https://api.tavily.com/extract") - .header("Authorization", format!("Bearer {}", api_key)) - .json(&request) - .send() - .await?; - - if response.status() == 429 { - self.rate_limited.store(true, Ordering::SeqCst); - return Err(ResolverError::RateLimit("Tavily Extract rate limited".to_string())); - } - - let data: ExtractResponse = response.json().await?; - - if let Some(result) = data.results.first() { - if !result.raw_content.is_empty() { - return Ok(ResolvedResult { - source: "tavily_extract".to_string(), - url: url.to_string(), - content: result.raw_content.clone(), - score: None, - metadata: result.title.clone().map(|t| { - let mut map = std::collections::HashMap::new(); - map.insert("title".to_string(), t); - map - }), - }); - } - } - - Err(ResolverError::Provider("Tavily Extract returned empty content".to_string())) - } -} -``` - -### Cascade Integration - -**File:** `scripts/resolve.py` (line ~192) - -```python -# Add to URL cascade -cascade_map = { - # ... existing providers - "tavily_extract": ( - ProviderType.TAVILY_EXTRACT, - lambda: resolve_with_tavily_extract(url, max_chars) - ), - # ... rest of providers -} -``` - -**File:** `cli/src/resolver/cascade.rs` - -Add `TavilyExtract` to the URL cascade between Jina and Firecrawl. - -### Environment Variables - -Already supported: `TAVILY_API_KEY` - -### Testing - -```python -# tests/test_providers.py -@pytest.mark.live -@pytest.mark.skipif(not os.getenv("TAVILY_API_KEY"), reason="No Tavily API key") -def test_live_tavily_extract(): - result = resolve_with_tavily_extract("https://example.com", max_chars=5000) - assert result is not None - assert result.source == "tavily_extract" - assert len(result.content) > 200 -``` - ---- - -## Phase 2: ScrapingAnt Integration (Week 1-2) - -### Overview -Generous free tier (10,000 credits/month) with JS rendering and proxy rotation. - -### Implementation - -**File:** `scripts/providers_impl.py` - -```python -def resolve_with_scrapingant(url: str, max_chars: int = MAX_CHARS) -> ResolvedResult | None: - """Extract content using ScrapingAnt API. - - Free tier: 10,000 API credits/month - """ - api_key = os.getenv("SCRAPINGANT_API_KEY") - if not api_key or _is_rate_limited("scrapingant"): - return None - - try: - api_url = "https://api.scrapingant.com/v2/general" - - headers = { - "x-api-key": api_key - } - - params = { - "url": url, - "js_rendering": "true", - "proxy_type": "datacenter" # or "residential" for harder targets - } - - response = requests.get(api_url, headers=headers, params=params, timeout=30) - - if response.status_code == 429: - _set_rate_limit("scrapingant", 60) - return None - - response.raise_for_status() - content = response.text - - if len(content) >= MIN_CHARS: - return ResolvedResult( - source="scrapingant", - content=compact_content(content, max_chars), - url=url, - metadata={"proxy_type": "datacenter"} - ) - - except Exception as e: - logger.warning(f"ScrapingAnt failed for {url}: {e}") - - return None -``` - -**File:** `cli/src/providers/scrapingant.rs` - -```rust -//! ScrapingAnt provider - -use async_trait::async_trait; -use reqwest::Client; -use std::env; -use std::sync::Arc; -use std::sync::atomic::{AtomicBool, Ordering}; - -use crate::error::ResolverError; -use crate::providers::Provider; -use crate::types::ResolvedResult; - -pub struct ScrapingAntProvider { - client: Arc, - api_key: Option, - rate_limited: Arc, -} - -impl ScrapingAntProvider { - pub fn new(client: Arc) -> Self { - let api_key = env::var("SCRAPINGANT_API_KEY").ok(); - Self { - client, - api_key, - rate_limited: Arc::new(AtomicBool::new(false)), - } - } - - fn is_rate_limited(&self) -> bool { - self.rate_limited.load(Ordering::SeqCst) - } -} - -#[async_trait] -impl Provider for ScrapingAntProvider { - fn name(&self) -> &str { - "scrapingant" - } - - fn is_available(&self) -> bool { - self.api_key.is_some() && !self.is_rate_limited() - } - - fn is_paid(&self) -> bool { - false // Has generous free tier - } - - async fn execute(&self, url: &str) -> Result { - let api_key = self.api_key.as_ref() - .ok_or_else(|| ResolverError::Config("SCRAPINGANT_API_KEY not set".to_string()))?; - - let response = self.client - .get("https://api.scrapingant.com/v2/general") - .header("x-api-key", api_key) - .query(&[ - ("url", url), - ("js_rendering", "true"), - ("proxy_type", "datacenter"), - ]) - .send() - .await?; - - if response.status() == 429 { - self.rate_limited.store(true, Ordering::SeqCst); - return Err(ResolverError::RateLimit("ScrapingAnt rate limited".to_string())); - } - - let content = response.text().await?; - - if content.is_empty() { - return Err(ResolverError::Provider("ScrapingAnt returned empty content".to_string())); - } - - Ok(ResolvedResult { - source: "scrapingant".to_string(), - url: url.to_string(), - content, - score: None, - metadata: None, - }) - } -} -``` - -### Environment Variables - -```bash -export SCRAPINGANT_API_KEY="your-api-key" -``` - -### Cascade Position - -Add after Firecrawl in the URL cascade (position 7). - ---- - -## Phase 3: ScrapeGraph AI Integration (Week 2-3) - -### Overview -AI-powered extraction using natural language prompts. Multiple endpoints available. - -### Implementation - -**File:** `scripts/providers_impl.py` - -```python -def resolve_with_scrapegraph( - url: str, - max_chars: int = MAX_CHARS, - extract_prompt: str | None = None -) -> ResolvedResult | None: - """Extract content using ScrapeGraph AI. - - Free tier: 50 API credits (one-time) + 10 requests/min - Endpoints: - - /v1/markdownify (2 credits) - Simple extraction - - /v1/smartscraper (10 credits) - AI-powered with prompts - """ - api_key = os.getenv("SCRAPEGRAPH_API_KEY") - if not api_key or _is_rate_limited("scrapegraph"): - return None - - try: - # Use markdownify endpoint for standard extraction - api_url = "https://api.scrapegraphai.com/v1/markdownify" - - headers = { - "accept": "application/json", - "Content-Type": "application/json", - "SGAI-APIKEY": api_key - } - - data = { - "website_url": url, - "user_prompt": extract_prompt or "Extract all the main content" - } - - response = requests.post(api_url, headers=headers, json=data, timeout=45) - - if response.status_code == 429: - _set_rate_limit("scrapegraph", 60) - return None - - response.raise_for_status() - result = response.json() - - content = result.get("result", {}).get("markdown_content", "") - - if len(content) >= MIN_CHARS: - return ResolvedResult( - source="scrapegraph", - content=compact_content(content, max_chars), - url=url, - metadata={ - "credits_used": 2, - "endpoint": "markdownify" - } - ) - - except Exception as e: - logger.warning(f"ScrapeGraph AI failed for {url}: {e}") - - return None -``` - -### Environment Variables - -```bash -export SCRAPEGRAPH_API_KEY="your-api-key" -``` - ---- - -## Phase 4: SearchAPI.io Integration (Week 3-4) - -### Overview -40+ search engines with rich structured data and geo-targeting. - -### Implementation - -**File:** `scripts/providers_impl.py` - -```python -def resolve_with_searchapi( - query: str, - max_chars: int = MAX_CHARS, - engine: str = "google" -) -> ResolvedResult | None: - """Search using SearchAPI.io. - - Free tier: 100 requests (no credit card) - Supports: google, bing, yandex, duckduckgo, amazon, youtube, etc. - """ - api_key = os.getenv("SEARCHAPI_KEY") - if not api_key or _is_rate_limited("searchapi"): - return None - - try: - api_url = "https://www.searchapi.io/api/v1/search" - - params = { - "engine": engine, - "q": query, - "api_key": api_key - } - - response = requests.get(api_url, params=params, timeout=30) - - if response.status_code == 429: - _set_rate_limit("searchapi", 60) - return None - - response.raise_for_status() - data = response.json() - - # Extract organic results - organic_results = data.get("organic_results", []) - - if not organic_results: - return None - - # Format results as markdown - content_parts = [f"# Search Results for: {query}\n"] - - for i, result in enumerate(organic_results[:5], 1): - title = result.get("title", "") - link = result.get("link", "") - snippet = result.get("snippet", "") - - content_parts.append(f"## {i}. {title}\n") - content_parts.append(f"**URL:** {link}\n") - content_parts.append(f"{snippet}\n") - - content = "\n".join(content_parts) - - return ResolvedResult( - source="searchapi", - content=compact_content(content, max_chars), - query=query, - metadata={ - "engine": engine, - "total_results": len(organic_results) - } - ) - - except Exception as e: - logger.warning(f"SearchAPI.io failed for '{query}': {e}") - - return None -``` - -### Environment Variables - -```bash -export SEARCHAPI_KEY="your-api-key" -``` - ---- - -## Phase 5: ScrapingBee Integration (Week 4-5) - -### Overview -AI-powered extraction with proxy rotation and JS rendering. - -### Environment Variables - -```bash -export SCRAPINGBEE_API_KEY="your-api-key" -``` - ---- - -## Phase 6: You.com API Integration (Week 5-6) - -### Overview -Research API with multi-step reasoning for comprehensive answers. - -### Environment Variables - -```bash -export YOU_API_KEY="your-api-key" -``` - ---- - -## Phase 7: Perplexity API Integration (Week 6) - -### Overview -AI-synthesized answers with citations. OpenAI-compatible API. - -### Environment Variables - -```bash -export PERPLEXITY_API_KEY="your-api-key" -``` - ---- - -## Updated Cascade Diagrams - -### Query Resolution Cascade (Final) - -``` -1. Cache Check -2. Exa MCP (FREE) -3. Exa SDK (PAID) -4. Tavily (PAID) -5. Serper (PAID) -6. SearchAPI.io (PAID) ← NEW -7. DuckDuckGo (FREE) -8. Mistral Web Search (PAID) -9. You.com API (PAID) ← NEW -10. Perplexity (PAID) ← NEW -``` - -### URL Resolution Cascade (Final) - -``` -1. Cache Check -2. File Type Detection (Docling/OCR) -3. llms.txt (FREE) -4. Jina Reader (FREE) -5. Tavily Extract (PAID) ← NEW -6. Firecrawl (PAID) - RESTORED TO UI 2026-05-05 -7. ScrapingAnt (FREE) ← NEW -8. ScrapingBee (PAID) ← NEW -9. ScrapeGraph AI (PAID) ← NEW -10. Direct Fetch (FREE) -11. Mistral Browser (PAID) -12. DuckDuckGo (FREE) -``` - ---- - -## Environment Variables Summary - -| Variable | Provider | Type | Required For | -|----------|----------|------|--------------| -| `TAVILY_API_KEY` | Tavily Extract | URL | Extraction endpoint | -| `SCRAPINGANT_API_KEY` | ScrapingAnt | URL | 10K free credits | -| `SCRAPEGRAPH_API_KEY` | ScrapeGraph AI | URL | AI extraction | -| `SEARCHAPI_KEY` | SearchAPI.io | Query | 40+ engines | -| `SCRAPINGBEE_API_KEY` | ScrapingBee | URL | Proxy rotation | -| `YOU_API_KEY` | You.com | Both | Research API | -| `PERPLEXITY_API_KEY` | Perplexity | Query | AI synthesis | - ---- - -## Testing Checklist - -- [ ] Unit tests with mocked responses -- [ ] Live integration tests (marked with `@pytest.mark.live`) -- [ ] Rate limit handling tests -- [ ] Error condition tests -- [ ] Cascade integration tests -- [ ] Documentation updates (PROVIDERS.md, CASCADE.md) - ---- - -## Documentation Updates - -1. **PROVIDERS.md**: Add new provider details -2. **CASCADE.md**: Update cascade diagrams -3. **README.md**: Add to provider list -4. **CONFIG.md**: Add environment variables -5. **CHANGELOG.md**: Document new providers +- [AUDIT.md](AUDIT.md) — Priority overview +- [scripts/providers_impl.py](../scripts/providers_impl.py) — Existing providers +- [PROVIDERS.md](../.agents/skills/do-web-doc-resolver/references/PROVIDERS.md) diff --git a/plans/03-performance-optimization.md b/plans/03-performance-optimization.md index 00a8bba..dbce669 100644 --- a/plans/03-performance-optimization.md +++ b/plans/03-performance-optimization.md @@ -1,652 +1,47 @@ -# Performance Optimization Plan - -## Overview - -This plan implements 10 performance optimizations to achieve 30-50% latency reduction and improved throughput. Organized by effort level and impact. - ---- - -## Phase 1: Quick Wins (Week 1) - -### Optimization 1: Reuse ThreadPoolExecutor - -**Problem:** New ThreadPoolExecutor created for every request (5-50ms overhead) -**Location:** `scripts/resolve.py:209, 360` - -**Implementation:** - -```python -# scripts/resolve.py - -# Module-level executor -_executor = None - -def _get_executor(max_workers: int = 10): - """Get or create shared ThreadPoolExecutor.""" - global _executor - if _executor is None: - _executor = concurrent.futures.ThreadPoolExecutor( - max_workers=max_workers, - thread_name_prefix="resolver" - ) - return _executor - -def resolve_url_stream(...): - # BEFORE: - # executor = concurrent.futures.ThreadPoolExecutor(max_workers=max(1, len(eligible))) - - # AFTER: - executor = _get_executor(max_workers=max(10, len(eligible))) - # ... rest of function -``` - -**Expected Impact:** 5-50ms reduction per request -**Testing:** Benchmark with `wrk` or `oha` - ---- - -### Optimization 2: Eliminate Busy-Polling - -**Problem:** 0.01s timeout creates busy-poll loop (30% CPU waste) -**Location:** `scripts/resolve.py:239, 384` - -**Implementation:** - -```python -# BEFORE: -done, _ = concurrent.futures.wait( - active_futures.keys(), - timeout=0.01, # 10ms polling - return_when=concurrent.futures.FIRST_COMPLETED, -) - -# AFTER - Option A: Increase timeout: -done, _ = concurrent.futures.wait( - active_futures.keys(), - timeout=0.1, # 100ms blocking wait - return_when=concurrent.futures.FIRST_COMPLETED, -) - -# AFTER - Option B: Use asyncio (see Phase 4): -# Migrate to asyncio for true async I/O -``` - -**Expected Impact:** 30% CPU reduction -**Testing:** Profile CPU usage under load - ---- - -### Optimization 3: HTTP/2 and Keep-Alive Configuration - -**Problem:** Default HTTP settings disable connection reuse -**Location:** `scripts/utils.py`, `cli/src/providers/*.rs` - -**Python Implementation:** - -```python -# scripts/utils.py - -# BEFORE: -_session = None - -def get_session(): - global _session - if _session is None: - _session = requests.Session() - return _session - -# AFTER: -_session = None - -def get_session(): - global _session - if _session is None: - adapter = requests.adapters.HTTPAdapter( - pool_connections=20, - pool_maxsize=50, - max_retries=3, - pool_block=False - ) - - _session = requests.Session() - _session.mount("https://", adapter) - _session.mount("http://", adapter) - - # Enable keep-alive - _session.headers["Connection"] = "keep-alive" - _session.headers["Keep-Alive"] = "timeout=60" - - return _session -``` - -**Rust Implementation:** - -```rust -// cli/src/providers/registry.rs - -use reqwest::Client; -use std::time::Duration; - -pub fn create_optimized_client() -> Client { - Client::builder() - .http2_prior_knowledge() // Use HTTP/2 when available - .pool_max_idle_per_host(10) - .pool_idle_timeout(Duration::from_secs(60)) - .timeout(Duration::from_secs(30)) - .connect_timeout(Duration::from_secs(10)) - .tcp_keepalive(Duration::from_secs(60)) - .build() - .expect("Failed to create HTTP client") -} -``` - -**Expected Impact:** 20-40% latency reduction for repeated domains -**Testing:** Measure connection reuse with Wireshark or logging - ---- - -### Optimization 4: L1 In-Memory Cache - -**Problem:** Disk cache adds 5-20ms per operation -**Location:** `scripts/utils.py:409` - -**Implementation:** - -```python -# scripts/utils.py - -from cachetools import TTLCache -import hashlib - -# L1 in-memory cache (5 min TTL, max 1000 entries) -_l1_cache: TTLCache = TTLCache(maxsize=1000, ttl=300) - -def _cache_key_l1(key: str) -> str: - """Fast hash for L1 cache keys.""" - return hashlib.md5(key.encode()).hexdigest()[:16] - -def get_from_cache(key: str, prefix: str = "") -> dict | None: - """Two-tier cache: L1 (memory) -> L2 (disk).""" - cache_key = f"{prefix}:{key}" - l1_key = _cache_key_l1(cache_key) - - # Check L1 first - if l1_key in _l1_cache: - return _l1_cache[l1_key] - - # Check L2 (disk) - disk_result = _get_from_cache_disk(key, prefix) - if disk_result: - # Promote to L1 - _l1_cache[l1_key] = disk_result - return disk_result - - return None - -def save_to_cache(key: str, prefix: str, data: dict) -> None: - """Save to both L1 and L2 cache.""" - cache_key = f"{prefix}:{key}" - l1_key = _cache_key_l1(cache_key) - - # Save to L1 - _l1_cache[l1_key] = data - - # Save to L2 - _save_to_cache_disk(key, prefix, data) -``` - -**Dependencies:** -``` -cachetools>=5.3.0 -``` - -**Expected Impact:** 10-20ms for cache hits, 5x throughput improvement -**Testing:** Benchmark cache hit/miss scenarios - ---- - -### Optimization 5: Content Compaction Optimization - -**Problem:** Multiple intermediate allocations during compaction -**Location:** `scripts/utils.py:200-212`, `cli/src/compaction.rs` - -**Implementation:** - -```python -# scripts/utils.py - -def compact_content(content: str, max_chars: int) -> str: - """Optimized content compaction with fewer allocations.""" - # Early exit if content is already small enough - if len(content) <= max_chars * 0.5: - return content - - # Pre-allocate result buffer - result = [] - result_size = 0 - - for line in content.split('\n'): - stripped = line.strip() - - # Skip empty lines and common noise - if not stripped or stripped in {'```', '---', ''}: - continue - - line_size = len(stripped) - if result_size + line_size > max_chars: - break - - result.append(stripped) - result_size += line_size + 1 # +1 for newline - - return '\n'.join(result) -``` - -**Rust Implementation:** - -```rust -// cli/src/compaction.rs - -pub fn compact_content(content: &str, max_chars: usize) -> String { - // Early exit for small content - if content.len() <= max_chars / 2 { - return content.to_string(); - } - - // Pre-allocate with capacity - let mut result = String::with_capacity(max_chars); - let mut count = 0; - - for line in content.lines() { - let stripped = line.trim(); - - if stripped.is_empty() || is_noise_line(stripped) { - continue; - } - - let line_len = stripped.len(); - if count + line_len > max_chars { - break; - } - - if !result.is_empty() { - result.push('\n'); - count += 1; - } - result.push_str(stripped); - count += line_len; - } - - result -} -``` - -**Expected Impact:** 5-10ms faster processing for large documents -**Testing:** Benchmark with 100KB+ documents - ---- - -### Optimization 6: Early Quality Exit - -**Problem:** Full quality scoring on results that will be rejected -**Location:** `scripts/quality.py:18-65` - -**Implementation:** - -```python -# scripts/quality.py - -class ContentScore: - """Optimized quality scoring with early exits.""" - - @staticmethod - def score_content(content: str) -> "ContentScore": - # Early exit: too short - if len(content) < MIN_CHARS: - return ContentScore( - score=0.0, - acceptable=False, - reason="too_short" - ) - - # Early exit: empty after stripping - stripped = content.strip() - if not stripped: - return ContentScore( - score=0.0, - acceptable=False, - reason="empty" - ) - - # Calculate score - score = 1.0 - - # Check for links (fast check) - has_links = "http" in content - if not has_links: - score -= 0.15 - - # Duplicate detection (skip for small content) - lines = content.split('\n') - if len(lines) > 10: - unique_lines = set(lines) - if len(unique_lines) < len(lines) * 0.5: - score -= 0.25 - - return ContentScore( - score=max(0.0, score), - acceptable=score >= 0.65, - reason=None - ) -``` - -**Expected Impact:** 5-15ms per rejected result -**Testing:** Profile quality scoring with edge cases - ---- - -## Phase 2: Medium Effort Optimizations (Week 2-3) - -### Optimization 7: Shared reqwest Client (Rust) - -**Problem:** Each provider creates its own HTTP client -**Location:** `cli/src/providers/*.rs` - -**Implementation:** - -```rust -// cli/src/providers/mod.rs - -use std::sync::Arc; -use reqwest::Client; - -pub struct ProviderContext { - pub client: Arc, -} - -impl ProviderContext { - pub fn new() -> Self { - let client = Client::builder() - .pool_max_idle_per_host(10) - .pool_idle_timeout(Duration::from_secs(60)) - .timeout(Duration::from_secs(30)) - .build() - .unwrap(); - - Self { - client: Arc::new(client), - } - } -} - -// Update all providers to use shared client -pub struct JinaProvider { - client: Arc, -} - -impl JinaProvider { - pub fn new(ctx: &ProviderContext) -> Self { - Self { - client: ctx.client.clone(), - } - } -} -``` - -**Expected Impact:** 50-150ms for subsequent requests to same hosts -**Testing:** Measure connection establishment overhead - ---- - -### Optimization 8: Async-Aware Locks (Rust) - -**Problem:** `std::sync::Mutex` blocks async executor threads -**Location:** `cli/src/resolver/url.rs`, `query.rs` - -**Implementation:** - -See `01-architecture-improvements.md` for detailed implementation. - -**Expected Impact:** 10-20% throughput increase -**Testing:** Concurrent load testing - ---- - -## Phase 3: High Effort Optimizations (Week 3-4) - -### Optimization 9: True Parallel Provider Launch - -**Problem:** Sequential provider launches with threshold delays -**Location:** `scripts/resolve.py:186-314` - -**Implementation:** - -```python -# NEW: scripts/resolve_parallel.py - -import asyncio -import aiohttp -from typing import List, Optional - -async def resolve_url_parallel( - url: str, - max_chars: int, - providers: List[str], - quality_threshold: float = 0.65 -) -> dict: - """Launch top-N providers simultaneously.""" - - async with aiohttp.ClientSession() as session: - # Create tasks for first 3 providers - tasks = [] - for p_name in providers[:3]: - task = asyncio.create_task( - run_provider_async(p_name, url, session), - name=p_name - ) - tasks.append(task) - - # Wait for first acceptable result - pending = set(tasks) - while pending: - # Wait for any task to complete - done, pending = await asyncio.wait( - pending, - return_when=asyncio.FIRST_COMPLETED - ) - - for task in done: - result = task.result() - if result and result.get("score", 0) >= quality_threshold: - # Cancel remaining tasks - for p in pending: - p.cancel() - return result - - # If no acceptable result, try remaining providers - # ... - -async def run_provider_async( - provider_name: str, - url: str, - session: aiohttp.ClientSession -) -> Optional[dict]: - """Async provider execution.""" - # Provider-specific async implementation - pass -``` - -**Expected Impact:** 40-60% p95 latency reduction -**Testing:** Measure tail latency under load - ---- - -### Optimization 10: Request Coalescing / Deduplication - -**Problem:** Identical concurrent requests run in parallel -**Implementation:** - -```rust -// cli/src/dedup.rs - -use std::collections::HashMap; -use std::future::Shared; -use std::pin::Pin; -use std::task::{Context, Poll, Waker}; -use dashmap::DashMap; - -pub struct RequestDedup { - in_flight: DashMap>>>>, -} - -impl RequestDedup { - pub async fn resolve_or_spawn( - &self, - key: String, - factory: F - ) -> ResolvedResult - where - F: FnOnce() -> Fut, - Fut: Future, - { - // Check if request already in flight - if let Some(future) = self.in_flight.get(&key) { - return future.clone().await; - } - - // Create new request - let future = Box::pin(factory()); - let shared = Shared::new(future); - - self.in_flight.insert(key.clone(), shared.clone()); - - let result = shared.await; - - // Clean up - self.in_flight.remove(&key); - - result - } -} -``` - -**Expected Impact:** 50-80% resource reduction for duplicate requests -**Testing:** Simulate burst traffic with identical queries - ---- - -## Dependencies - -### Python -``` -cachetools>=5.3.0 -aiohttp>=3.9.0 -aiodns>=3.1.0 -pytest-asyncio>=0.21.0 -``` - -### Rust -```toml -[dependencies] -dashmap = "5.5" -tokio = { version = "1.35", features = ["full"] } -reqwest = { version = "0.12", features = ["http2", "rustls-tls"] } -``` - ---- - -## Testing & Benchmarking - -### Benchmark Scripts - -```python -# scripts/benchmark.py - -import time -import statistics -from scripts.resolve import resolve - -def benchmark_resolution(urls: List[str], iterations: int = 10): - """Benchmark resolution latency.""" - results = [] - - for url in urls: - times = [] - for _ in range(iterations): - start = time.perf_counter() - resolve(url) - elapsed = time.perf_counter() - start - times.append(elapsed * 1000) # Convert to ms - - results.append({ - "url": url, - "mean": statistics.mean(times), - "median": statistics.median(times), - "p95": sorted(times)[int(len(times) * 0.95)], - "p99": sorted(times)[int(len(times) * 0.99)], - }) - - return results - -if __name__ == "__main__": - urls = [ - "https://example.com", - "https://docs.python.org", - "https://docs.rs/tokio", - ] - - results = benchmark_resolution(urls) - for r in results: - print(f"{r['url']}: mean={r['mean']:.1f}ms, p95={r['p95']:.1f}ms") -``` - -### Load Testing - -```bash -# Install oha (Rust-based load tester) -cargo install oha - -# Test resolver web API -oha -z 30s -c 10 \ - --method POST \ - --json-path '{"input": "https://example.com"}' \ - http://localhost:8000/api/resolve -``` - ---- - -## Success Metrics - -| Optimization | Target Improvement | Measurement | -|--------------|---------------------|-------------| -| ThreadPool reuse | 5-50ms/request | Mean latency | -| Eliminate polling | 30% CPU | CPU usage | -| HTTP/2 + Keep-Alive | 20-40% latency | Repeated requests | -| L1 cache | 10-20ms hits | Cache hit rate | -| Compaction | 5-10ms large docs | Processing time | -| Quality exit | 5-15ms/rejected | Rejection path | -| Shared client | 50-150ms | Connection reuse | -| Async locks | 10-20% throughput | Concurrent reqs | -| Parallel launch | 40-60% p95 | Tail latency | -| Coalescing | 50-80% bursts | Duplicate requests | - ---- - -## Timeline - -| Week | Optimizations | Expected Impact | -|------|---------------|-----------------| -| 1 | 1-6 (Quick wins) | 30% latency reduction | -| 2-3 | 7-8 (Medium effort) | 20% additional improvement | -| 3-4 | 9-10 (High effort) | 40-60% tail latency | - ---- - -## Risks & Mitigation - -| Risk | Mitigation | -|------|------------| -| Async migration bugs | Extensive test coverage, feature flags | -| Cache coherency issues | TTL validation, cache invalidation tests | -| HTTP/2 compatibility | Fallback to HTTP/1.1, A/B testing | -| Memory pressure from L1 cache | Bounded size, LRU eviction | +# Performance Optimization (Condensed Status) + +## Original Overview + +10 performance optimizations organized by effort level: quick wins (Phase 1), +medium effort (Phase 2), high effort (Phase 3). + +## Status + +Several quick wins are partially or fully addressed by merged PRs. + +## What's Done + +- **Opt 1: Reuse ThreadPoolExecutor** (Phase 1): Partially done — shared + executor pattern not yet implemented, but the `_get_executor` approach is + straightforward when needed. +- **Opt 2: Eliminate busy-polling** (Phase 1): Not done — `timeout=0.01` still + in `scripts/resolve.py:239, 384`. +- **Opt 3: HTTP/2 + keep-alive** (Phase 1): Partially done. Python + `requests.Session()` via `get_session()` in `utils.py` reuses connections. + Rust `reqwest::Client` is shared across providers in some cases. No explicit + `HTTPAdapter` pool size configuration. +- **Opt 4: L1 in-memory cache** (Phase 1): Not done. Cache remains two-tier + (semantic cache + disk). No `TTLCache` layer. +- **Opt 5: Content compaction optimization** (Phase 1): ✅ PR #325 merged + (`optimize compact_content`). +- **Opt 6: Early quality exit** (Phase 1): Not done. `scripts/quality.py` has + no early-exit optimization. +- **Opt 7: Shared reqwest Client** (Phase 2): Not done. Providers still create + individual clients. +- **Opt 8: Async-aware locks** (Phase 2): Not done. `std::sync::Mutex` still + used. +- **Opt 9: True parallel provider launch** (Phase 3): Not done. Python still + uses `ThreadPoolExecutor` with sequential launch. +- **Opt 10: Request coalescing** (Phase 3): Not done. + +## What Remains + +All 10 optimizations remain candidates. ~2-3/10 are partially addressed; +full implementation requires a dedicated sprint, with Phases 2-3 depending on +async migration (ADR-014). + +## References + +- [ADR-014](014-architecture-and-parity.md) — Async/await migration dependency +- [scripts/resolve.py](../scripts/resolve.py) — Busy-polling locations +- [scripts/utils.py](../scripts/utils.py) — Compaction + session code diff --git a/plans/04-new-features.md b/plans/04-new-features.md index 1ba5226..fd50969 100644 --- a/plans/04-new-features.md +++ b/plans/04-new-features.md @@ -1,808 +1,28 @@ -# New Features Implementation Plan +# New Features (Condensed Status) -## Overview +## Original Overview -This plan implements 12 new features across 4 priority tiers: quick wins, strategic features, enterprise capabilities, and future roadmap items. +12 new features across 4 priority tiers: quick wins (JSON output, batch API, +change detection, exports), strategic features (SSE streaming, webhooks, +metrics dashboard), enterprise (CSS selector extraction, image captioning, +team support). ---- +## Status -## Phase 1: Quick Wins (Weeks 1-2) +None of the 12 features have been implemented. This plan is deprioritized. -### Feature 1: Structured JSON Output Format +## What's Done -**Description:** Return extracted content as structured JSON with sections, metadata, and links. +- None. All 808 lines are aspirational. -**Use Cases:** -- RAG pipelines requiring chunked content -- API consumers needing programmatic access -- Data extraction workflows +## What Remains -**Implementation:** +All 12 features remain unimplemented. When work resumes, quick wins +(JSON output, batch API) should come first as they have the highest user +impact for the lowest effort. -```python -# scripts/resolve.py +## References -from dataclasses import dataclass -from typing import List, Dict, Optional -import re - -@dataclass -class StructuredContent: - title: str - url: str - sections: List[Dict] - links: List[Dict] - images: List[Dict] - metadata: Dict - -def parse_markdown_structure(content: str, url: str) -> StructuredContent: - """Parse markdown into structured sections.""" - - # Extract title (first H1) - title_match = re.search(r'^# (.+)$', content, re.MULTILINE) - title = title_match.group(1) if title_match else "" - - # Parse sections by headers - sections = [] - current_section = {"heading": "", "content": [], "level": 0} - - for line in content.split('\n'): - header_match = re.match(r'^(#{1,6})\s+(.+)$', line) - - if header_match: - # Save previous section - if current_section["content"]: - sections.append({ - "heading": current_section["heading"], - "content": '\n'.join(current_section["content"]).strip(), - "level": current_section["level"] - }) - - # Start new section - level = len(header_match.group(1)) - heading = header_match.group(2) - current_section = { - "heading": heading, - "content": [], - "level": level - } - else: - current_section["content"].append(line) - - # Add final section - if current_section["content"]: - sections.append({ - "heading": current_section["heading"], - "content": '\n'.join(current_section["content"]).strip(), - "level": current_section["level"] - }) - - # Extract links - links = [] - for match in re.finditer(r'\[([^\]]+)\]\(([^)]+)\)', content): - links.append({ - "text": match.group(1), - "url": match.group(2) - }) - - return StructuredContent( - title=title, - url=url, - sections=sections, - links=links, - images=[], # Extract from markdown images - metadata={} - ) - -def resolve_url( - url: str, - max_chars: int = MAX_CHARS, - output_format: str = "markdown" # "markdown" | "json" -) -> dict: - """Resolve URL with format option.""" - - # Get markdown result - result = resolve_url_markdown(url, max_chars) - - if output_format == "json": - structured = parse_markdown_structure( - result["content"], - result["url"] - ) - return { - "source": result["source"], - "url": result["url"], - "score": result.get("score"), - "content": structured.__dict__, - "metrics": result.get("metrics") - } - - return result -``` - -**CLI Usage:** -```bash -python -m scripts.cli "https://example.com" --format json -./target/release/do-wdr resolve "https://example.com" --format json -``` - -**Web API:** -```bash -POST /api/resolve -{ - "input": "https://example.com", - "format": "json" -} -``` - ---- - -### Feature 2: Batch Resolution API - -**Description:** Process multiple URLs/queries in a single request. - -**Implementation:** - -```python -# scripts/batch_resolve.py - -import asyncio -from typing import List, Dict, Union -from dataclasses import dataclass -from scripts.resolve import resolve - -@dataclass -class BatchRequest: - id: str - input: str - max_chars: int = 8000 - profile: str = "balanced" - -@dataclass -class BatchResult: - id: str - status: str # "success" | "error" - result: Union[dict, None] - error: Union[str, None] - latency_ms: int - -async def resolve_batch( - requests: List[BatchRequest], - max_concurrent: int = 5 -) -> List[BatchResult]: - """Resolve multiple inputs in parallel.""" - - semaphore = asyncio.Semaphore(max_concurrent) - - async def resolve_single(req: BatchRequest) -> BatchResult: - async with semaphore: - start = time.time() - try: - result = await resolve_async( - req.input, - max_chars=req.max_chars, - profile=req.profile - ) - return BatchResult( - id=req.id, - status="success", - result=result, - error=None, - latency_ms=int((time.time() - start) * 1000) - ) - except Exception as e: - return BatchResult( - id=req.id, - status="error", - result=None, - error=str(e), - latency_ms=int((time.time() - start) * 1000) - ) - - # Process all requests in parallel - tasks = [resolve_single(req) for req in requests] - results = await asyncio.gather(*tasks) - - return list(results) - -# Web API endpoint -# web/app/api/resolve/batch/route.ts -``` - -**CLI Usage:** -```bash -# Batch file (JSON) -cat > batch.json << 'EOF' -[ - {"id": "1", "input": "https://example.com"}, - {"id": "2", "input": "Python tutorial"} -] -EOF - -./target/release/do-wdr resolve-batch batch.json --output results.json -``` - -**Limitations:** -- Max 20 requests per batch -- Max 5 concurrent -- 60 second timeout - ---- - -### Feature 3: Content Change Detection - -**Description:** Track content hashes and detect when cached content has changed. - -**Implementation:** - -```python -# scripts/change_detection.py - -import hashlib -import time -from typing import Optional -from scripts.utils import _get_from_cache, _save_to_cache - -def compute_content_hash(content: str) -> str: - """Compute SHA256 hash of content.""" - return hashlib.sha256(content.encode()).hexdigest()[:16] - -def check_content_changed( - url: str, - provider: str = "any" -) -> dict: - """Check if content has changed since last fetch.""" - - cache_key = f"hash:{provider}:{url}" - - # Get cached hash - cached = _get_from_cache(cache_key, "content_hash") - cached_hash = cached.get("hash") if cached else None - cached_time = cached.get("timestamp") if cached else None - - # Fetch current content - from scripts.resolve import resolve_url - result = resolve_url(url, max_chars=1000) # Small sample - - if result.get("source") == "none": - return { - "url": url, - "status": "error", - "error": "Failed to fetch content" - } - - # Compute current hash - current_hash = compute_content_hash(result["content"]) - - # Save new hash - _save_to_cache(cache_key, "content_hash", { - "hash": current_hash, - "timestamp": time.time() - }) - - # Compare - if cached_hash is None: - return { - "url": url, - "status": "new", - "changed": True, - "previous_hash": None, - "current_hash": current_hash - } - - changed = cached_hash != current_hash - - return { - "url": url, - "status": "changed" if changed else "unchanged", - "changed": changed, - "previous_hash": cached_hash, - "current_hash": current_hash, - "last_checked": cached_time - } - -# Web API endpoint -# web/app/api/cache/check/route.ts -``` - -**CLI Usage:** -```bash -./target/release/do-wdr cache-check "https://example.com" -``` - ---- - -### Feature 4: Export Format Options - -**Description:** Support multiple output formats beyond markdown. - -**Implementation:** - -```python -# scripts/export_formats.py - -from typing import Dict -import html -import csv -import io - -def convert_to_format(content: str, format_type: str) -> str: - """Convert markdown content to various formats.""" - - if format_type == "markdown": - return content - - elif format_type == "plain": - # Strip markdown syntax - import re - # Remove headers - plain = re.sub(r'^#{1,6}\s*', '', content, flags=re.MULTILINE) - # Remove links, keep text - plain = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', plain) - # Remove code fences - plain = re.sub(r'```[\s\S]*?```', '', plain) - # Remove inline code - plain = re.sub(r'`([^`]+)`', r'\1', plain) - # Remove bold/italic - plain = re.sub(r'\*\*?|__?', '', plain) - return plain.strip() - - elif format_type == "html": - # Simple markdown to HTML conversion - html_content = html.escape(content) - # Convert headers - for i in range(6, 0, -1): - html_content = re.sub( - rf'^#{i}\s+(.+)$', - rf'\1', - html_content, - flags=re.MULTILINE - ) - # Convert paragraphs - html_content = '

' + html_content.replace('\n\n', '

') + '

' - return f"{html_content}" - - elif format_type == "csv": - # Try to extract tabular data - # This is a simplified version - lines = content.split('\n') - output = io.StringIO() - writer = csv.writer(output) - - for line in lines: - if '|' in line: - # Markdown table row - cells = [c.strip() for c in line.split('|') if c.strip()] - writer.writerow(cells) - - return output.getvalue() - - else: - raise ValueError(f"Unknown format: {format_type}") - -# Supported formats -EXPORT_FORMATS = ["markdown", "plain", "html", "csv"] -``` - -**CLI Usage:** -```bash -./target/release/do-wdr resolve "https://example.com" --format html -./target/release/do-wdr resolve "https://example.com" --format plain --output output.txt -``` - ---- - -## Phase 2: Strategic Features (Weeks 3-6) - -### Feature 5: Streaming Response Support (SSE) - -**Description:** Stream resolution progress in real-time using Server-Sent Events. - -**Implementation:** - -```typescript -// web/app/api/resolve/stream/route.ts - -import { NextResponse } from 'next/server'; - -export async function POST(request: Request) { - const body = await request.json(); - const { input, profile = 'balanced' } = body; - - const encoder = new TextEncoder(); - const stream = new ReadableStream({ - async start(controller) { - // Send initial event - controller.enqueue(encoder.encode( - `event: start\ndata: ${JSON.stringify({ input, profile })}\n\n` - )); - - try { - // Resolve with streaming callbacks - const result = await resolveWithCallbacks(input, { - onProviderStart: (provider: string) => { - controller.enqueue(encoder.encode( - `event: provider_start\ndata: ${JSON.stringify({ provider })}\n\n` - )); - }, - onProviderComplete: (provider: string, result: any) => { - controller.enqueue(encoder.encode( - `event: provider_complete\ndata: ${JSON.stringify({ provider, result })}\n\n` - )); - }, - onResult: (partial: any) => { - controller.enqueue(encoder.encode( - `event: result\ndata: ${JSON.stringify(partial)}\n\n` - )); - } - }); - - // Send completion event - controller.enqueue(encoder.encode( - `event: complete\ndata: ${JSON.stringify(result)}\n\n` - )); - - } catch (error) { - controller.enqueue(encoder.encode( - `event: error\ndata: ${JSON.stringify({ error: String(error) })}\n\n` - )); - } finally { - controller.close(); - } - } - }); - - return new NextResponse(stream, { - headers: { - 'Content-Type': 'text/event-stream', - 'Cache-Control': 'no-cache', - 'Connection': 'keep-alive', - }, - }); -} -``` - -**Web UI Component:** -```typescript -// Web UI streaming handler -const eventSource = new EventSource('/api/resolve/stream'); - -eventSource.addEventListener('provider_start', (e) => { - const data = JSON.parse(e.data); - updateStepper(data.provider, 'running'); -}); - -eventSource.addEventListener('result', (e) => { - const data = JSON.parse(e.data); - appendPartialContent(data.content); -}); - -eventSource.addEventListener('complete', (e) => { - const data = JSON.parse(e.data); - finalizeResult(data); - eventSource.close(); -}); -``` - ---- - -### Feature 6: Webhook & Async Callback System - -**Description:** Support async resolution with webhook callbacks. - -**Implementation:** - -```python -# scripts/async_jobs.py - -import asyncio -import json -import aiohttp -import uuid -from typing import Optional -from dataclasses import dataclass -from datetime import datetime - -# Simple in-memory job store (use Redis in production) -_jobs: dict = {} - -@dataclass -class Job: - id: str - status: str # "pending" | "running" | "completed" | "failed" - input: str - webhook_url: Optional[str] - result: Optional[dict] - error: Optional[str] - created_at: datetime - completed_at: Optional[datetime] - -async def create_job( - input: str, - webhook_url: Optional[str] = None, - max_chars: int = 8000 -) -> str: - """Create a new async job.""" - job_id = str(uuid.uuid4()) - - job = Job( - id=job_id, - status="pending", - input=input, - webhook_url=webhook_url, - result=None, - error=None, - created_at=datetime.now(), - completed_at=None - ) - - _jobs[job_id] = job - - # Start async processing - asyncio.create_task(_process_job(job_id, max_chars)) - - return job_id - -async def _process_job(job_id: str, max_chars: int): - """Process job in background.""" - job = _jobs[job_id] - job.status = "running" - - try: - from scripts.resolve import resolve - result = resolve(job.input, max_chars=max_chars) - - job.result = result - job.status = "completed" - job.completed_at = datetime.now() - - # Send webhook if configured - if job.webhook_url: - await _send_webhook(job) - - except Exception as e: - job.error = str(e) - job.status = "failed" - job.completed_at = datetime.now() - -def get_job_status(job_id: str) -> Optional[dict]: - """Get job status and result.""" - job = _jobs.get(job_id) - if not job: - return None - - return { - "id": job.id, - "status": job.status, - "input": job.input, - "result": job.result, - "error": job.error, - "created_at": job.created_at.isoformat(), - "completed_at": job.completed_at.isoformat() if job.completed_at else None - } - -async def _send_webhook(job: Job): - """Send result to webhook URL.""" - if not job.webhook_url: - return - - payload = { - "job_id": job.id, - "status": job.status, - "input": job.input, - "result": job.result, - "error": job.error - } - - try: - async with aiohttp.ClientSession() as session: - async with session.post( - job.webhook_url, - json=payload, - timeout=aiohttp.ClientTimeout(total=30) - ) as resp: - if resp.status >= 400: - logger.warning(f"Webhook failed for job {job.id}: {resp.status}") - except Exception as e: - logger.error(f"Webhook error for job {job.id}: {e}") -``` - -**API Endpoints:** - -```typescript -// POST /api/jobs -// Create async job -{ - "input": "https://example.com", - "webhook_url": "https://myapp.com/webhook", - "max_chars": 8000 -} - -// Response: { "job_id": "uuid" } - -// GET /api/jobs/{id} -// Check job status - -// Response: -{ - "id": "uuid", - "status": "completed", - "input": "https://example.com", - "result": { /* ... */ }, - "created_at": "2024-01-01T12:00:00", - "completed_at": "2024-01-01T12:00:05" -} -``` - ---- - -### Feature 7: Metrics Dashboard - -**Description:** Web-based dashboard for usage analytics and cost tracking. - -**Implementation:** - -```typescript -// web/app/dashboard/page.tsx - -import { useEffect, useState } from 'react'; -import { LineChart, BarChart, PieChart } from 'recharts'; - -interface MetricsData { - daily_requests: { date: string; count: number }[]; - provider_usage: { provider: string; count: number }[]; - latency_trends: { date: string; p50: number; p95: number }[]; - cache_hit_rate: number; - cost_breakdown: { provider: string; cost: number }[]; -} - -export default function Dashboard() { - const [metrics, setMetrics] = useState(null); - - useEffect(() => { - fetch('/api/metrics') - .then(res => res.json()) - .then(data => setMetrics(data)); - }, []); - - if (!metrics) return ; - - return ( -
-

Resolver Metrics

- -
- - - {/* Chart implementation */} - - - - - - {/* Chart implementation */} - - - - - - {/* Chart implementation */} - - - - -
{metrics.cache_hit_rate}%
-
-
-
- ); -} -``` - ---- - -## Phase 3: Enterprise Features (Weeks 6-8) - -### Feature 8: CSS Selector Extraction - -**Description:** Target specific content areas using CSS selectors. - -**CLI Usage:** -```bash -./target/release/do-wdr resolve "https://example.com" \ - --selector "article.main-content" \ - --exclude-selector "nav,footer,aside" -``` - -**Implementation:** Use Playwright or BeautifulSoup for selector-based extraction. - ---- - -### Feature 9: Image Captioning - -**Description:** Automatically caption images using vision models. - -**Implementation:** -```python -async def caption_images(content: str) -> str: - """Add captions to images in content.""" - # Extract image URLs - # Use Jina VLM or Mistral vision API - # Add captions as alt text -``` - ---- - -### Feature 10: Team/Workspace Support - -**Description:** Multi-user workspaces with shared history and API keys. - -**Implementation:** -- Clerk/Auth0 integration -- Workspace-scoped data -- Role-based permissions -- Team billing - ---- - -## API Summary - -### New Endpoints - -| Endpoint | Method | Description | -|----------|--------|-------------| -| `/api/resolve/batch` | POST | Batch resolution | -| `/api/resolve/stream` | POST | Streaming (SSE) | -| `/api/jobs` | POST | Create async job | -| `/api/jobs/{id}` | GET | Job status | -| `/api/cache/check` | POST | Change detection | -| `/api/metrics` | GET | Usage metrics | -| `/api/export` | POST | Export formats | - -### New CLI Commands - -```bash -do-wdr resolve-batch -do-wdr cache-check -do-wdr jobs create [--webhook ] -do-wdr jobs status -do-wdr dashboard # Open web dashboard -``` - ---- - -## Dependencies - -### Python -``` -aiohttp>=3.9.0 -aiodns>=3.1.0 -pytest-asyncio>=0.21.0 -``` - -### Web -```json -{ - "recharts": "^2.10", - "date-fns": "^2.30" -} -``` - ---- - -## Testing Plan - -1. **Unit tests** for each feature -2. **Integration tests** for API endpoints -3. **Load tests** for streaming and batch -4. **E2E tests** for dashboard - ---- - -## Timeline - -| Week | Features | -|------|----------| -| 1-2 | Structured JSON, Batch API, Change Detection, Export Formats | -| 3-6 | Streaming (SSE), Webhooks, Metrics Dashboard | -| 6-8 | CSS Selectors, Image Captioning, Team Support | +- [AUDIT.md](AUDIT.md) — Priority overview +- [scripts/resolve.py](../scripts/resolve.py) — Integration points +- [web/app/api/resolve/route.ts](../web/app/api/resolve/route.ts) — API endpoints diff --git a/plans/05-ui-ux-improvements.md b/plans/05-ui-ux-improvements.md index 39dec69..09a6189 100644 --- a/plans/05-ui-ux-improvements.md +++ b/plans/05-ui-ux-improvements.md @@ -1,1139 +1,37 @@ -# UI/UX Improvements Plan +# UI/UX Improvements (Condensed Status) -## Overview +## Original Overview -This plan implements 10 UI/UX improvements across Web UI, CLI, and design system integration to enhance user experience, accessibility, and visual consistency. +10+ UI/UX improvements across Web UI and CLI: cascade progress stepper, +streaming response UI, code highlighting, error recovery, accessibility, +CLI colors, toasts, result cards, history cleanup. ---- +## Status -## Phase 1: Critical Improvements (Week 1) +Partially addressed by merged PRs. Core accessibility and history features +implemented. -### Improvement 1: Cascade Progress Stepper +## What's Done -**Description:** Visual stepper showing real-time provider cascade progress during resolution. +The following 5 items have been implemented (4 original + 1 restoration): +- **Keyboard navigation** (Phase 2): ✅ PR #324 merged — search accessibility + and keyboard navigation improvements. +- **Profile combobox** (Phase 5): ✅ PR #290 merged — accessible profile selector. +- **Active provider indicators**: ✅ PR #291 merged — visual indication of + active providers in search profile. +- **History state persistence**: ✅ Merged — search and history state + preserved across navigations. +- **Firecrawl restored to Web UI**: ✅ PR #321 merged — re-enabled Firecrawl + provider in constants.ts + E2E test. -**Current Issue:** Users see only "Fetching..." with no provider visibility during 2-10s operations. +## What Remains -**Implementation:** +All other improvements (stepper, streaming UI, code blocks, CLI colors, toasts, +result cards, history cleanup, error recovery UI, tooltips, quick toggles) +remain unimplemented. -```typescript -// web/app/components/ResolveStepper.tsx +## References -import React from 'react'; -import { Check, Loader2, X, Clock } from 'lucide-react'; - -interface Step { - id: string; - name: string; - status: 'pending' | 'running' | 'complete' | 'failed' | 'skipped'; - latency?: number; -} - -interface ResolveStepperProps { - steps: Step[]; - currentProvider?: string; -} - -export function ResolveStepper({ steps, currentProvider }: ResolveStepperProps) { - return ( -
-
- {steps.map((step, index) => ( -
-
- {step.status === 'pending' && } - {step.status === 'running' && } - {step.status === 'complete' && } - {step.status === 'failed' && } - {step.status === 'skipped' && } -
-
- {step.name} - {step.latency && step.status === 'complete' && ( - {step.latency}ms - )} -
- {index < steps.length - 1 && ( -
- )} -
- ))} -
-
- ); -} -``` - -```css -/* web/app/components/ResolveStepper.css */ - -.stepper-container { - background: var(--do-wdr-color-bg-secondary); - border-radius: var(--do-wdr-radius-lg); - padding: var(--do-wdr-space-4); - margin-bottom: var(--do-wdr-space-4); -} - -.stepper { - display: flex; - flex-direction: column; - gap: var(--do-wdr-space-2); -} - -.step { - display: flex; - align-items: center; - gap: var(--do-wdr-space-3); - padding: var(--do-wdr-space-2); - border-radius: var(--do-wdr-radius-md); - transition: background-color 0.2s ease; -} - -.step:hover { - background-color: var(--do-wdr-color-bg-tertiary); -} - -.step-icon { - width: 24px; - height: 24px; - display: flex; - align-items: center; - justify-content: center; - border-radius: 50%; - font-size: 12px; -} - -.step.pending .step-icon { - background: var(--do-wdr-color-bg-tertiary); - color: var(--do-wdr-color-text-tertiary); -} - -.step.running .step-icon { - background: var(--do-wdr-color-info); - color: var(--do-wdr-color-info-contrast); -} - -.step.complete .step-icon { - background: var(--do-wdr-color-success); - color: var(--do-wdr-color-success-contrast); -} - -.step.failed .step-icon { - background: var(--do-wdr-color-error); - color: var(--do-wdr-color-error-contrast); -} - -.step-content { - flex: 1; - display: flex; - justify-content: space-between; - align-items: center; -} - -.step-name { - font-size: var(--do-wdr-font-size-sm); - color: var(--do-wdr-color-text-primary); -} - -.step-latency { - font-size: var(--do-wdr-font-size-xs); - color: var(--do-wdr-color-text-tertiary); - font-family: var(--do-wdr-font-mono); -} - -.step-connector { - width: 2px; - height: 16px; - background: var(--do-wdr-color-border); - margin-left: 11px; - margin-top: -8px; - margin-bottom: -8px; -} - -.step-connector.active { - background: var(--do-wdr-color-success); -} - -/* Reduced motion support */ -@media (prefers-reduced-motion: reduce) { - .step-icon .animate-spin { - animation: none; - } - - .step { - transition: none; - } -} - -/* High contrast support */ -@media (forced-colors: active) { - .step-icon { - border: 2px solid currentColor; - } -} -``` - -**Integration:** -```typescript -// web/app/page.tsx - -import { ResolveStepper } from './components/ResolveStepper'; - -// In the resolve function: -const providerSteps = [ - { id: 'cache', name: 'Cache Check', status: 'complete', latency: 5 }, - { id: 'llms_txt', name: 'llms.txt Probe', status: 'failed' }, - { id: 'jina', name: 'Jina Reader', status: 'running' }, - { id: 'firecrawl', name: 'Firecrawl', status: 'pending' }, - // ... -]; - -// Render during resolution - -``` - ---- - -### Improvement 2: Streaming Response UI - -**Description:** Real-time content display using Server-Sent Events (SSE). - -**Implementation:** - -```typescript -// web/app/components/StreamingResult.tsx - -import { useEffect, useState } from 'react'; -import { StreamIndicator } from './StreamIndicator'; - -interface StreamingResultProps { - input: string; -} - -export function StreamingResult({ input }: StreamingResultProps) { - const [content, setContent] = useState(''); - const [isStreaming, setIsStreaming] = useState(false); - const [providerStatus, setProviderStatus] = useState(''); - - useEffect(() => { - const eventSource = new EventSource('/api/resolve/stream', { - body: JSON.stringify({ input }) - }); - - eventSource.addEventListener('provider_start', (e) => { - const data = JSON.parse(e.data); - setProviderStatus(`Trying ${data.provider}...`); - }); - - eventSource.addEventListener('result', (e) => { - const data = JSON.parse(e.data); - setContent(prev => prev + data.content); - setIsStreaming(true); - }); - - eventSource.addEventListener('complete', (e) => { - setIsStreaming(false); - eventSource.close(); - }); - - eventSource.addEventListener('error', (e) => { - setIsStreaming(false); - eventSource.close(); - }); - - return () => eventSource.close(); - }, [input]); - - return ( -
- {isStreaming && } -
- -
-
- ); -} -``` - -```typescript -// web/app/components/StreamIndicator.tsx - -import { Loader2, Zap } from 'lucide-react'; - -interface StreamIndicatorProps { - provider: string; -} - -export function StreamIndicator({ provider }: StreamIndicatorProps) { - return ( -
-
- -
-
- {provider} - - - Streaming... - -
-
- ); -} -``` - ---- - -### Improvement 3: Syntax Highlighting for Code Blocks - -**Description:** Syntax highlighting for code blocks in markdown output. - -**Implementation:** - -```typescript -// web/app/components/CodeBlock.tsx - -import { useState } from 'react'; -import { Copy, Check } from 'lucide-react'; -import Prism from 'prismjs'; -import 'prismjs/components/prism-python'; -import 'prismjs/components/prism-rust'; -import 'prismjs/components/prism-javascript'; -import 'prismjs/components/prism-typescript'; -import 'prismjs/components/prism-bash'; -import 'prismjs/components/prism-json'; - -interface CodeBlockProps { - code: string; - language?: string; -} - -export function CodeBlock({ code, language = 'text' }: CodeBlockProps) { - const [copied, setCopied] = useState(false); - - const highlighted = Prism.highlight( - code, - Prism.languages[language] || Prism.languages.text, - language - ); - - const handleCopy = () => { - navigator.clipboard.writeText(code); - setCopied(true); - setTimeout(() => setCopied(false), 2000); - }; - - return ( -
-
- {language} - -
-
-        
-      
-
- ); -} -``` - -```css -/* web/app/components/CodeBlock.css */ - -.code-block { - background: var(--do-wdr-codeblock-bg); - border-radius: var(--do-wdr-radius-md); - margin: var(--do-wdr-space-4) 0; - overflow: hidden; -} - -.code-block-header { - display: flex; - justify-content: space-between; - align-items: center; - padding: var(--do-wdr-space-2) var(--do-wdr-space-3); - background: var(--do-wdr-codeblock-header-bg); - border-bottom: 1px solid var(--do-wdr-color-border); -} - -.code-block-language { - font-size: var(--do-wdr-font-size-xs); - color: var(--do-wdr-color-text-tertiary); - text-transform: uppercase; - font-family: var(--do-wdr-font-mono); -} - -.code-block-copy { - display: flex; - align-items: center; - gap: var(--do-wdr-space-1); - padding: var(--do-wdr-space-1) var(--do-wdr-space-2); - font-size: var(--do-wdr-font-size-xs); - color: var(--do-wdr-color-text-secondary); - background: transparent; - border: 1px solid var(--do-wdr-color-border); - border-radius: var(--do-wdr-radius-sm); - cursor: pointer; - transition: all 0.2s ease; -} - -.code-block-copy:hover { - background: var(--do-wdr-color-bg-tertiary); - border-color: var(--do-wdr-color-border-hover); -} - -.code-block pre { - margin: 0; - padding: var(--do-wdr-space-3); - overflow-x: auto; - font-family: var(--do-wdr-font-mono); - font-size: var(--do-wdr-font-size-sm); - line-height: 1.5; -} - -.code-block code { - color: var(--do-wdr-codeblock-text-color); -} - -/* Syntax highlighting tokens */ -.token-keyword { color: var(--do-wdr-codeblock-keyword); } -.token-string { color: var(--do-wdr-codeblock-string); } -.token-comment { color: var(--do-wdr-codeblock-comment); } -.token-function { color: var(--do-wdr-codeblock-function); } -.token-number { color: var(--do-wdr-codeblock-number); } -``` - ---- - -### Improvement 4: Error Recovery & Form Validation - -**Description:** Comprehensive error handling with actionable recovery options. - -**Implementation:** - -```typescript -// web/app/components/ErrorDisplay.tsx - -import { AlertCircle, RefreshCw, Settings, HelpCircle } from 'lucide-react'; - -interface ErrorAction { - label: string; - icon: React.ReactNode; - action: () => void; - variant: 'primary' | 'secondary'; -} - -interface ErrorDisplayProps { - error: string; - type: 'network' | 'rate_limit' | 'provider_failure' | 'validation' | 'unknown'; - onRetry?: () => void; - onSkipProvider?: () => void; - onHelp?: () => void; -} - -export function ErrorDisplay({ - error, - type, - onRetry, - onSkipProvider, - onHelp -}: ErrorDisplayProps) { - const getErrorDetails = () => { - switch (type) { - case 'network': - return { - title: 'Network Error', - description: 'Unable to reach the resolution service. Please check your connection.', - actions: [ - { label: 'Retry', icon: , action: onRetry, variant: 'primary' } - ] - }; - case 'rate_limit': - return { - title: 'Rate Limited', - description: 'Too many requests. Please wait a moment before trying again.', - actions: [ - { label: 'Retry with Different Provider', icon: , action: onSkipProvider, variant: 'primary' }, - { label: 'View Rate Limits', icon: , action: onHelp, variant: 'secondary' } - ] - }; - case 'provider_failure': - return { - title: 'Provider Failed', - description: 'The selected provider is currently unavailable.', - actions: [ - { label: 'Try Alternative Provider', icon: , action: onSkipProvider, variant: 'primary' }, - { label: 'Check Provider Status', icon: , action: onHelp, variant: 'secondary' } - ] - }; - default: - return { - title: 'Resolution Failed', - description: error, - actions: [ - { label: 'Retry', icon: , action: onRetry, variant: 'primary' } - ] - }; - } - }; - - const details = getErrorDetails(); - - return ( -
-
- -
-
-

{details.title}

-

{details.description}

-
- {details.actions.map((action, index) => ( - - ))} -
-
-
- ); -} -``` - ---- - -## Phase 2: Accessibility & Mobile (Week 2) - -### Improvement 5: Keyboard Navigation for Provider Selection - -**Description:** Full keyboard navigation for provider grid using roving tabindex pattern. - -```typescript -// web/app/hooks/useRovingTabindex.ts - -import { useState, useCallback, useRef } from 'react'; - -export function useRovingTabindex(itemCount: number) { - const [activeIndex, setActiveIndex] = useState(0); - const containerRef = useRef(null); - - const handleKeyDown = useCallback((e: React.KeyboardEvent) => { - switch (e.key) { - case 'ArrowRight': - case 'ArrowDown': - e.preventDefault(); - setActiveIndex(prev => (prev + 1) % itemCount); - break; - case 'ArrowLeft': - case 'ArrowUp': - e.preventDefault(); - setActiveIndex(prev => (prev - 1 + itemCount) % itemCount); - break; - case 'Home': - e.preventDefault(); - setActiveIndex(0); - break; - case 'End': - e.preventDefault(); - setActiveIndex(itemCount - 1); - break; - } - }, [itemCount]); - - const getItemProps = useCallback((index: number) => ({ - tabIndex: index === activeIndex ? 0 : -1, - onKeyDown: handleKeyDown, - onFocus: () => setActiveIndex(index), - role: 'button', - 'aria-pressed': false, - }), [activeIndex, handleKeyDown]); - - return { - containerRef, - activeIndex, - getItemProps, - }; -} -``` - ---- - -### Improvement 6: Mobile-Optimized History View - -**Description:** Touch-friendly history with swipe actions and card layout. - -```typescript -// web/app/components/MobileHistoryCard.tsx - -import { useState } from 'react'; -import { Trash2, ExternalLink } from 'lucide-react'; - -interface HistoryCardProps { - entry: { - id: string; - input: string; - timestamp: string; - provider: string; - }; - onDelete: () => void; - onSelect: () => void; -} - -export function MobileHistoryCard({ entry, onDelete, onSelect }: HistoryCardProps) { - const [swipeOffset, setSwipeOffset] = useState(0); - - const handleTouchStart = (e: React.TouchEvent) => { - // Touch start logic - }; - - const handleTouchMove = (e: React.TouchEvent) => { - // Swipe logic - }; - - const handleTouchEnd = () => { - if (swipeOffset > 100) { - onDelete(); - } - setSwipeOffset(0); - }; - - return ( -
-
-

{entry.input}

-
- {entry.provider} - - {new Date(entry.timestamp).toLocaleDateString()} - -
-
- -
- ); -} -``` - ---- - -## Phase 3: Polish & Power User Features (Week 3) - -### Improvement 7: CLI Colors & Progress Bars - -**Description:** Colored CLI output with progress indicators. - -**Implementation:** - -```rust -// cli/src/output/colored.rs - -use colored::*; -use indicatif::{ProgressBar, ProgressStyle}; - -pub struct ColoredOutput; - -impl ColoredOutput { - pub fn success(msg: &str) { - println!("{}", msg.green()); - } - - pub fn error(msg: &str) { - eprintln!("{} {}", "Error:".red().bold(), msg); - } - - pub fn info(msg: &str) { - println!("{} {}", "Info:".blue(), msg); - } - - pub fn warning(msg: &str) { - println!("{} {}", "Warning:".yellow(), msg); - } - - pub fn create_progress_bar(total: u64) -> ProgressBar { - let pb = ProgressBar::new(total); - pb.set_style( - ProgressStyle::default_bar() - .template("{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} {msg}") - .unwrap() - .progress_chars("#>-"), - ); - pb - } -} - -// Usage in resolver -pub fn print_provider_attempt(provider: &str) { - ColoredOutput::info(&format!("Trying provider: {}", provider.cyan())); -} - -pub fn print_provider_success(provider: &str, latency: u64) { - ColoredOutput::success(&format!( - "✓ {} resolved in {}ms", - provider.green(), - latency.to_string().yellow() - )); -} - -pub fn print_provider_failure(provider: &str, error: &str) { - ColoredOutput::error(&format!( - "✗ {} failed: {}", - provider, - error - )); -} -``` - -**Dependencies:** -```toml -[dependencies] -colored = "2.0" -indicatif = "0.17" -``` - ---- - -### Improvement 8: Toast Notifications - -**Description:** Stacking toast notifications with actions. - -```typescript -// web/app/components/Toast.tsx - -import { useEffect, useState } from 'react'; -import { X, Check, AlertCircle, Info } from 'lucide-react'; - -interface Toast { - id: string; - type: 'success' | 'error' | 'warning' | 'info'; - title: string; - message?: string; - action?: { - label: string; - onClick: () => void; - }; - duration?: number; -} - -interface ToastContainerProps { - toasts: Toast[]; - onDismiss: (id: string) => void; -} - -export function ToastContainer({ toasts, onDismiss }: ToastContainerProps) { - return ( -
- {toasts.map((toast, index) => ( - onDismiss(toast.id)} - /> - ))} -
- ); -} - -function ToastItem({ toast, index, onDismiss }: { - toast: Toast; - index: number; - onDismiss: () => void; -}) { - const [isPaused, setIsPaused] = useState(false); - const [progress, setProgress] = useState(100); - - useEffect(() => { - if (isPaused || !toast.duration) return; - - const interval = setInterval(() => { - setProgress(p => { - if (p <= 0) { - onDismiss(); - return 0; - } - return p - (100 / (toast.duration! / 100)); - }); - }, 100); - - return () => clearInterval(interval); - }, [isPaused, toast.duration, onDismiss]); - - const icon = { - success: , - error: , - warning: , - info: , - }[toast.type]; - - return ( -
setIsPaused(true)} - onMouseLeave={() => setIsPaused(false)} - role="alert" - > -
{icon}
-
-

{toast.title}

- {toast.message &&

{toast.message}

} - {toast.action && ( - - )} -
- - {toast.duration && ( -
- )} -
- ); -} -``` - ---- - -## Phase 4: Accessibility Compliance (Week 4) - -### Improvement 9: Reduced Motion Support - -**Description:** Respect `prefers-reduced-motion` for animations. - -```css -/* web/app/globals.css */ - -/* Default animations */ -@keyframes pulse { - 0%, 100% { opacity: 1; } - 50% { opacity: 0.5; } -} - -@keyframes spin { - from { transform: rotate(0deg); } - to { transform: rotate(360deg); } -} - -.animate-pulse { - animation: pulse 2s cubic-bezier(0.4, 0, 0.6, 1) infinite; -} - -.animate-spin { - animation: spin 1s linear infinite; -} - -/* Reduced motion */ -@media (prefers-reduced-motion: reduce) { - .animate-pulse, - .animate-spin { - animation: none; - } - - .step { - transition: none; - } - - .toast { - transition: none; - } - - * { - animation-duration: 0.01ms !important; - animation-iteration-count: 1 !important; - transition-duration: 0.01ms !important; - } -} -``` - ---- - -### Improvement 10: High Contrast Mode - -**Description:** Support Windows High Contrast mode. - -```css -/* web/app/globals.css */ - -@media (forced-colors: active) { - /* Use system colors */ - .step-icon, - .code-block, - .button, - .input { - border: 2px solid currentColor; - } - - /* Ensure sufficient contrast */ - .text-secondary { - color: CanvasText; - } - - /* Remove decorative backgrounds */ - .gradient-bg, - .blur-effect { - background: Canvas; - } -} -``` - ---- - -## Dependencies - -### Web -```json -{ - "prismjs": "^1.29", - "@types/prismjs": "^1.26", - "recharts": "^2.10", - "lucide-react": "latest" -} -``` - -### Rust CLI -```toml -[dependencies] -colored = "2.0" -indicatif = "0.17" -``` - ---- - -## Testing - -1. **Accessibility audit** with axe-core -2. **Keyboard navigation** test -3. **Screen reader** test (VoiceOver, NVDA) -4. **Mobile responsiveness** test -5. **Reduced motion** test -6. **High contrast** test - ---- - -## Success Metrics - -- [ ] All components keyboard accessible -- [ ] WCAG 2.2 AA compliance -- [ ] Mobile-friendly history -- [ ] CLI color support -- [ ] Reduced motion support -- [ ] High contrast support - ---- - -## Phase 5: Result Experience Enhancements (Week 5) - -### Improvement 11: Result Canonicalization & Deduplication - -**Description:** Normalize URLs, strip redundant mirrors (e.g., `llm-digest` prefixes), and collapse duplicate entries before rendering cards or history rows. This keeps provider output readable even when upstream feeds surface the same document multiple ways. - -```typescript -// web/app/lib/normalizeResults.ts - -const NORMALIZERS = [ - (url: URL) => { - if (url.hostname === 'nextjs.org' && url.pathname.startsWith('/docs/llm-digest/')) { - url.pathname = url.pathname.replace('/docs/llm-digest', '/docs'); - } - return url; - }, - (url: URL) => { - url.hash = ''; - return url; - }, -]; - -export function normalizeResult(result: ProviderResult) { - try { - const url = NORMALIZERS.reduce((acc, fn) => fn(acc), new URL(result.url)); - const normalizedUrl = url.toString(); - return { ...result, url: normalizedUrl, dedupeKey: normalizedUrl.toLowerCase() }; - } catch (_) { - return { ...result, dedupeKey: result.url.toLowerCase() }; - } -} - -export function dedupeResults(results: ProviderResult[]) { - const seen = new Map(); - results.forEach((raw) => { - const normalized = normalizeResult(raw); - if (!seen.has(normalized.dedupeKey)) { - seen.set(normalized.dedupeKey, normalized); - } - }); - return Array.from(seen.values()); -} -``` - -### Improvement 12: Provider Status Tooltips & CTA - -**Description:** Replace the static "provider unavailable" buttons with contextual tooltips explaining why they are disabled (e.g., missing API key) plus a single-click CTA to open the API Keys panel. - -```tsx - setPanel('keys')} - tooltip={!hasTavilyKey ? 'Click to open API key drawer' : undefined} - aria-describedby={!hasTavilyKey ? 'tavily-help' : undefined} -/> - - - Tavily requires `TAVILY_API_KEY`. Open the API Keys drawer to add it. - -``` - -### Improvement 13: Search Profile Combobox Refactor - -**Description:** Swap the current native select + textbox hack for a headless combobox so keyboard users can change profiles without polluting the query field. Capture arrow keys, typeahead, and announce the active profile via `aria-live`. - -```tsx -// web/app/components/ProfileCombobox.tsx - -import { useCombobox } from 'downshift'; - -export function ProfileCombobox({ options, value, onChange }: Props) { - const combobox = useCombobox({ - items: options, - selectedItem: options.find((opt) => opt.value === value), - onSelectedItemChange: ({ selectedItem }) => selectedItem && onChange(selectedItem.value), - }); - - return ( -
- -
    - {combobox.isOpen && options.map((item, index) => ( -
  • - {item.label} -
  • - ))} -
-
- ); -} -``` - -### Improvement 14: Quick Toggles for Advanced Flags - -**Description:** Surface `Skip cache`, `Deep research`, and latency budget presets directly next to the profile selector so power users can run "Balanced + Deep research" without reopening the drawer. - -```tsx -
- updateSettings({ deepResearch: pressed })} - /> - updateSettings({ skipCache: pressed })} - /> - updateSettings({ latencyBudget })} - /> -
-``` - -### Improvement 15: Result Cards Instead of Textarea - -**Description:** Replace the monolithic textarea with discrete cards so each hit can show metadata, actions, and accessible markup. Enables per-result copy, open-in-new-tab, and "helpful" toggles. - -```tsx -// web/app/components/ResultCard.tsx - -export function ResultCard({ result }: { result: ProviderResult }) { - return ( -
-
- - {result.title} - - {result.provider} -
-

{result.summary}

-
- - -
-
- ); -} -``` - -### Improvement 16: History Storage Cleanup - -**Description:** Store normalized URLs + profile metadata in history entries so duplicates collapse automatically and filters (by profile or flags) work. This also lets us highlight which providers fed each response without fetching new data. - -```ts -type HistoryEntry = { - id: string; - input: string; - profile: SearchProfile; - flags: { deepResearch: boolean; skipCache: boolean }; - providers: string[]; - normalizedUrlHashes: string[]; -}; - -export function addHistoryEntry(entry: Omit) { - const id = crypto.randomUUID(); - const payload = { ...entry, id }; - historyStore.update((current) => { - const deduped = current.filter((existing) => !hasSameUrls(existing, payload)); - return [payload, ...deduped].slice(0, 50); - }); -} -``` - ---- +- [web/app/page.tsx](../web/app/page.tsx) — Main UI entry point +- [cli/ui/](../cli/ui/) — CLI design system +- [ADR-009](009-cross-runtime-analysis.md) — Web parity gaps diff --git a/plans/06-testing-improvements.md b/plans/06-testing-improvements.md index 2d3d2b9..9749400 100644 --- a/plans/06-testing-improvements.md +++ b/plans/06-testing-improvements.md @@ -1,876 +1,37 @@ -# Testing & Quality Improvements Plan +# Testing & Quality Improvements (Condensed Status) -## Overview +## Original Overview -This plan implements 10 testing and quality improvements to increase code confidence, coverage, and release reliability. +10 testing improvements across 4 phases: provider tests, security tests, +parity tests, test infrastructure, E2E tests, documentation testing. ---- +## Status -## Phase 1: Critical Test Coverage (Week 1) +Partially addressed by ongoing CI improvements (ADR-013). Detailed test +coverage plans remain valid. -### Improvement 1: Serper Provider Tests +## What's Done -**Gap:** Serper provider (Google Search via Serper API) has no dedicated tests. +- **CI reliability**: ✅ npm `--legacy-peer-deps` fix, libsql `--test-threads=1`, + ESLint config update (see [ADR-013](013-test-coverage-and-ci-reliability.md)). +- **Test fixtures**: `conftest.py` updated with lock-safe clearing methods + (ADR-012 Wave 1). -**Implementation:** +## What Remains -```python -# tests/test_providers.py +All 10 improvements remain to be implemented: +- Serper provider tests (Phase 1) +- Security test suite (SSRF, URL validation, input sanitization) +- Python/Rust parity tests +- Performance benchmark tests +- Coverage threshold enforcement (80%) +- Web E2E with real backend +- Error condition tests (rate limit, network errors, quality thresholds) +- Documentation testing (README examples) +- Flaky test detection (reruns) -import os -import pytest -from scripts.resolve import resolve_with_order -from scripts.models import ProviderType +## References -@pytest.fixture -def mock_serper_response(): - """Mock Serper API response.""" - return { - "searchParameters": { - "q": "Python tutorial", - "engine": "google" - }, - "organic": [ - { - "title": "Python Tutorial - W3Schools", - "link": "https://www.w3schools.com/python/", - "snippet": "Python is a popular programming language...", - "position": 1 - }, - { - "title": "Python For Beginners", - "link": "https://www.python.org/about/gettingstarted/", - "snippet": "Learning Python has never been easier...", - "position": 2 - } - ], - "relatedSearches": [ - {"query": "python tutorial for beginners"}, - {"query": "python tutorial pdf"} - ] - } - -class TestSerperProvider: - """Test Serper search provider.""" - - def test_serper_available_with_key(self, monkeypatch): - """Test provider is available when API key is set.""" - monkeypatch.setenv("SERPER_API_KEY", "test-key") - from scripts.providers_impl import resolve_with_serper - # Provider should be available - assert resolve_with_serper is not None - - def test_serper_unavailable_without_key(self, monkeypatch): - """Test provider is unavailable when API key is not set.""" - monkeypatch.delenv("SERPER_API_KEY", raising=False) - from scripts.providers_impl import resolve_with_serper - - result = resolve_with_serper("test query", max_chars=1000) - assert result is None - - @pytest.mark.live - @pytest.mark.skipif( - not os.getenv("SERPER_API_KEY"), - reason="No SERPER_API_KEY environment variable" - ) - def test_live_serper_search(self): - """Test Serper with real API.""" - from scripts.providers_impl import resolve_with_serper - - result = resolve_with_serper( - "Python tutorial", - max_chars=2000 - ) - - assert result is not None - assert result.source == "serper" - assert result.query == "Python tutorial" - assert len(result.content) > 200 - assert "Python" in result.content - - def test_serper_rate_limit_handling(self, requests_mock, monkeypatch): - """Test rate limit detection and cooldown.""" - monkeypatch.setenv("SERPER_API_KEY", "test-key") - from scripts.providers_impl import resolve_with_serper - - # Mock 429 response - requests_mock.post( - "https://google.serper.dev/search", - status_code=429, - json={"error": "Rate limit exceeded"} - ) - - result = resolve_with_serper("test query", max_chars=1000) - assert result is None - - # Verify rate limit was set - from scripts.providers_impl import _is_rate_limited - assert _is_rate_limited("serper") - -@pytest.mark.live -@pytest.mark.skipif( - not os.getenv("SERPER_API_KEY"), - reason="No SERPER_API_KEY environment variable" -) -def test_live_serper_with_real_api_key(): - """Live integration test for Serper.""" - from scripts.resolve import resolve_query - - result = resolve_query( - "Python tutorial", - skip_providers={"exa_mcp", "exa", "tavily", "duckduckgo", "mistral_websearch"} - ) - - assert result is not None - assert result["source"] == "serper" - assert len(result["content"]) > 200 -``` - ---- - -### Improvement 2: Security Test Suite - -**Gap:** No security tests for SSRF prevention, URL validation, or input sanitization. - -**Implementation:** - -```python -# tests/test_security.py - -import pytest -from scripts.utils import validate_url, is_url -from scripts.resolve import resolve_url - -class TestURLValidation: - """Test URL validation security.""" - - @pytest.mark.parametrize("url,expected", [ - ("https://example.com", True), - ("http://example.com", True), - ("ftp://example.com", False), # Not allowed - ("file:///etc/passwd", False), # SSRF attempt - ("http://localhost:8080", False), # Private IP - ("http://192.168.1.1", False), # Private IP - ("http://10.0.0.1", False), # Private IP - ("http://127.0.0.1", False), # Loopback - ("http://169.254.169.254", False), # AWS metadata - ("http://[::1]", False), # IPv6 loopback - ("javascript:alert(1)", False), # XSS attempt - ("data:text/html,", False), # Data URI - ]) - def test_validate_url_blocks_private_ips(self, url, expected): - """Test that private IP ranges are blocked.""" - result = validate_url(url) - assert result == expected - - def test_validate_url_blocks_localhost(self): - """Test localhost is blocked.""" - assert not validate_url("http://localhost:3000") - assert not validate_url("https://localhost") - - def test_validate_url_allows_public_urls(self): - """Test public URLs are allowed.""" - assert validate_url("https://github.com") - assert validate_url("https://docs.python.org") - -class TestSSRFPrevention: - """Test SSRF attack prevention.""" - - def test_resolve_blocks_internal_ips(self): - """Test resolution blocks internal IP addresses.""" - result = resolve_url("http://192.168.1.1/admin") - assert result["source"] == "none" - assert "blocked" in result.get("error", "").lower() or "invalid" in result.get("error", "").lower() - - def test_resolve_blocks_localhost(self): - """Test resolution blocks localhost.""" - result = resolve_url("http://localhost:8000/api") - assert result["source"] == "none" - - def test_resolve_blocks_metadata_endpoints(self): - """Test resolution blocks cloud metadata endpoints.""" - result = resolve_url("http://169.254.169.254/latest/meta-data/") - assert result["source"] == "none" - - def test_resolve_allows_public_urls(self): - """Test resolution allows public URLs.""" - result = resolve_url("https://example.com") - # Should not be blocked (may fail for other reasons, but not security) - assert "blocked" not in result.get("error", "").lower() - -class TestInputSanitization: - """Test input sanitization.""" - - def test_query_input_sanitization(self): - """Test query input is sanitized.""" - # SQL injection attempt - malicious_query = "test'; DROP TABLE users; --" - # Should be handled safely (not executed) - result = resolve_query(malicious_query, max_chars=100) - # Should not crash or execute SQL - assert isinstance(result, dict) - - def test_xss_prevention_in_content(self): - """Test XSS payloads in resolved content are neutralized.""" - # This would require mocking a response with XSS - pass - - def test_header_injection_prevention(self): - """Test header injection attempts are blocked.""" - # URL with newline in header - malicious_url = "https://example.com\r\nX-Injected: malicious" - # Should be rejected - assert not validate_url(malicious_url) -``` - ---- - -### Improvement 3: Python/Rust Parity Tests - -**Gap:** No validation that Python and Rust CLI produce consistent results. - -**Implementation:** - -```python -# tests/test_python_rust_parity.py - -import subprocess -import json -import pytest -from scripts.resolve import resolve - -class TestPythonRustParity: - """Test Python and Rust implementations produce consistent results.""" - - @pytest.fixture(scope="class") - def rust_cli(self): - """Build Rust CLI for testing.""" - result = subprocess.run( - ["cargo", "build", "--release"], - cwd="cli", - capture_output=True - ) - if result.returncode != 0: - pytest.skip("Rust CLI build failed") - - return "cli/target/release/do-wdr" - - def test_url_resolution_parity(self, rust_cli): - """Test URL resolution produces similar results.""" - url = "https://example.com" - - # Python result - py_result = resolve(url, max_chars=1000) - - # Rust result - rust_output = subprocess.run( - [rust_cli, "resolve", url, "--json", "--max-chars", "1000"], - capture_output=True, - text=True - ) - - if rust_output.returncode != 0: - pytest.skip(f"Rust CLI failed: {rust_output.stderr}") - - rust_result = json.loads(rust_output.stdout) - - # Compare key fields - assert py_result["url"] == rust_result.get("url") - - # Content should be similar (may differ slightly due to extraction differences) - py_content_len = len(py_result.get("content", "")) - rust_content_len = len(rust_result.get("content", "")) - - # Within 20% of each other - assert abs(py_content_len - rust_content_len) / max(py_content_len, 1) < 0.2 - - def test_query_resolution_parity(self, rust_cli): - """Test query resolution produces similar results.""" - query = "Python programming language" - - # Python result - py_result = resolve(query, max_chars=1000, profile="free") - - # Rust result - rust_output = subprocess.run( - [rust_cli, "resolve", query, "--json", "--profile", "free", "--max-chars", "1000"], - capture_output=True, - text=True - ) - - if rust_output.returncode != 0: - pytest.skip(f"Rust CLI failed: {rust_output.stderr}") - - rust_result = json.loads(rust_output.stdout) - - # Both should have content - assert py_result.get("content") - assert rust_result.get("content") - - def test_error_handling_parity(self, rust_cli): - """Test error handling produces consistent results.""" - # Invalid URL - invalid_url = "not-a-valid-url" - - py_result = resolve(invalid_url) - - rust_output = subprocess.run( - [rust_cli, "resolve", invalid_url, "--json"], - capture_output=True, - text=True - ) - - rust_result = json.loads(rust_output.stdout) - - # Both should indicate failure - assert py_result["source"] == "none" or rust_result.get("source") == "none" -``` - ---- - -## Phase 2: Test Infrastructure (Week 2) - -### Improvement 4: Performance Benchmark Tests - -**Implementation:** - -```python -# tests/test_performance.py - -import pytest -import time -import statistics -from scripts.resolve import resolve, resolve_url, resolve_query - -@pytest.mark.benchmark -class TestPerformance: - """Performance benchmarks for resolver.""" - - @pytest.fixture - def sample_urls(self): - return [ - "https://example.com", - "https://docs.python.org/3/tutorial/", - ] - - def test_url_resolution_latency(self, sample_urls, benchmark): - """Benchmark URL resolution latency.""" - def resolve_urls(): - for url in sample_urls: - resolve_url(url, max_chars=1000) - - result = benchmark(resolve_urls) - - # Assert reasonable performance - assert result.stats.mean < 5.0 # Average under 5 seconds - - def test_query_resolution_latency(self, benchmark): - """Benchmark query resolution latency.""" - queries = [ - "Python tutorial", - "Rust programming", - ] - - def resolve_queries(): - for query in queries: - resolve_query(query, max_chars=1000, profile="free") - - result = benchmark(resolve_queries) - assert result.stats.mean < 10.0 # Average under 10 seconds - - def test_cache_performance(self): - """Test cache hit performance.""" - url = "https://example.com" - - # First call (cache miss) - start = time.perf_counter() - resolve_url(url) - miss_time = time.perf_counter() - start - - # Second call (cache hit) - start = time.perf_counter() - resolve_url(url) - hit_time = time.perf_counter() - start - - # Cache hit should be significantly faster - assert hit_time < miss_time * 0.1 # 10x faster - - def test_concurrent_resolution(self): - """Test concurrent resolution performance.""" - import concurrent.futures - - urls = ["https://example.com"] * 5 - - start = time.perf_counter() - - with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: - futures = [executor.submit(resolve_url, url) for url in urls] - results = [f.result() for f in concurrent.futures.as_completed(futures)] - - total_time = time.perf_counter() - start - - # Should complete faster than sequential - assert len(results) == 5 - assert total_time < 15.0 # Under 15 seconds for 5 concurrent -``` - -**Dependencies:** -``` -pytest-benchmark>=4.0.0 -``` - ---- - -### Improvement 5: Coverage Threshold Enforcement - -**Implementation:** - -```yaml -# .github/workflows/ci.yml (update) - -- name: Run tests with coverage - run: | - python -m pytest tests/ -v \ - --cov=scripts \ - --cov-report=xml \ - --cov-report=html \ - --cov-fail-under=80 # Fail if coverage below 80% - -- name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - fail_ci_if_error: true -``` - -**Badge for README:** -```markdown -[![codecov](https://codecov.io/gh/d-oit/do-web-doc-resolver/branch/main/graph/badge.svg)](https://codecov.io/gh/d-oit/do-web-doc-resolver) -``` - ---- - -### Improvement 6: Test Fixtures Standardization - -**Implementation:** - -```python -# tests/conftest.py (additions) - -import pytest -import json - -@pytest.fixture -def mock_exa_result(): - """Standard mock Exa result.""" - return { - "source": "exa", - "content": "# Python Tutorial\n\nPython is a programming language...", - "url": "https://docs.python.org/tutorial/", - "score": 0.85, - "metadata": {"title": "Python Tutorial"} - } - -@pytest.fixture -def mock_jina_result(): - """Standard mock Jina result.""" - return { - "source": "jina", - "content": "Example Domain\n\nThis domain is for use in illustrative examples...", - "url": "https://example.com", - "score": 0.75 - } - -@pytest.fixture -def mock_firecrawl_result(): - """Standard mock Firecrawl result.""" - return { - "source": "firecrawl", - "content": "# Example Domain\n\nThis domain...", - "url": "https://example.com", - "score": 0.90, - "metadata": {"title": "Example Domain"} - } - -@pytest.fixture -def sample_markdown_content(): - """Sample markdown content for testing.""" - return """# Heading 1 - -This is a paragraph with **bold** and *italic* text. - -## Heading 2 - -- List item 1 -- List item 2 - -```python -def hello(): - print("Hello") -``` - -[Link text](https://example.com) -""" - -@pytest.fixture -def sample_html_content(): - """Sample HTML content for testing.""" - return """ - - - Test Page - -

Main Heading

-

Paragraph content

- Link - - - """ - -@pytest.fixture -def temp_cache_dir(tmp_path): - """Temporary cache directory for testing.""" - cache_dir = tmp_path / "cache" - cache_dir.mkdir() - return str(cache_dir) -``` - ---- - -## Phase 3: Web E2E & Integration (Week 3) - -### Improvement 7: Web E2E with Real Backend - -**Implementation:** - -```typescript -// web/tests/e2e/provider-selection.spec.ts - -import { test, expect } from '@playwright/test'; - -test.describe('Provider Selection', () => { - test.beforeEach(async ({ page }) => { - // Start local dev server if not already running - await page.goto('http://localhost:3000'); - }); - - test('should toggle providers', async ({ page }) => { - // Open settings/provider panel - await page.click('[data-testid="settings-toggle"]'); - - // Uncheck a provider - await page.click('[data-testid="provider-exa_mcp"'); - - // Verify provider is disabled - const isChecked = await page.isChecked('[data-testid="provider-exa_mcp"'); - expect(isChecked).toBe(false); - }); - - test('should resolve with selected providers', async ({ page }) => { - // Enable only free providers - await page.click('[data-testid="profile-free"]'); - - // Enter URL - await page.fill('[data-testid="url-input"]', 'https://example.com'); - - // Submit - await page.click('[data-testid="resolve-button"]'); - - // Wait for result - await page.waitForSelector('[data-testid="result-content"]', { timeout: 30000 }); - - // Verify result - const content = await page.textContent('[data-testid="result-content"]'); - expect(content).toContain('Example Domain'); - }); - - test('should show cascade progress', async ({ page }) => { - await page.fill('[data-testid="url-input"]', 'https://example.com'); - await page.click('[data-testid="resolve-button"]'); - - // Should show stepper - await page.waitForSelector('[data-testid="cascade-stepper"]'); - - // Should have provider steps - const steps = await page.$$('[data-testid="provider-step"]'); - expect(steps.length).toBeGreaterThan(0); - }); -}); -``` - ---- - -### Improvement 8: Error Condition Testing - -**Implementation:** - -```python -# tests/test_error_handling.py - -import pytest -from unittest.mock import patch, MagicMock -from scripts.resolve import resolve_url, resolve_query -from scripts.providers_impl import resolve_with_jina, resolve_with_exa_mcp - -class TestRateLimitHandling: - """Test rate limit detection and backoff.""" - - def test_jina_rate_limit_sets_cooldown(self, requests_mock, monkeypatch): - """Test Jina rate limit triggers cooldown.""" - # Mock 429 response - requests_mock.get( - "https://r.jina.ai/http://example.com", - status_code=429, - text="Rate limit exceeded" - ) - - result = resolve_with_jina("http://example.com", max_chars=1000) - assert result is None - - # Verify rate limit was set - from scripts.providers_impl import _is_rate_limited - assert _is_rate_limited("jina") - - def test_provider_skipped_when_rate_limited(self, monkeypatch): - """Test rate-limited provider is skipped in cascade.""" - from scripts.providers_impl import _set_rate_limit, _is_rate_limited - - # Set rate limit on Jina - _set_rate_limited("jina", 60) - - # Verify it's skipped - assert _is_rate_limited("jina") - - # Resolve should skip Jina and use fallback - result = resolve_url("https://example.com") - assert result["source"] != "none" - -class TestNetworkErrorHandling: - """Test network error resilience.""" - - @patch('requests.get') - def test_timeout_fallback_to_next_provider(self, mock_get): - """Test timeout falls back to next provider.""" - # First provider times out - mock_get.side_effect = [ - TimeoutError("Connection timeout"), # Jina - MagicMock(status_code=200, text="Success") # Firecrawl - ] - - result = resolve_url("https://example.com") - # Should eventually succeed - assert result is not None - - def test_all_providers_fail_gracefully(self, monkeypatch): - """Test graceful failure when all providers fail.""" - # Mock all providers to fail - def mock_fail(*args, **kwargs): - return None - - monkeypatch.setattr("scripts.providers_impl.resolve_with_jina", mock_fail) - monkeypatch.setattr("scripts.providers_impl.resolve_with_firecrawl", mock_fail) - monkeypatch.setattr("scripts.providers_impl.resolve_with_direct_fetch", mock_fail) - - result = resolve_url("https://example.com") - - # Should indicate failure - assert result["source"] == "none" - assert "error" in result - -class TestQualityThresholdRejection: - """Test quality threshold handling.""" - - def test_low_quality_content_rejected(self, monkeypatch): - """Test content below quality threshold is rejected.""" - # Mock provider returning thin content - def mock_thin_content(url, max_chars): - from scripts.models import ResolvedResult - return ResolvedResult( - source="jina", - content="Hi", # Too short - url=url - ) - - monkeypatch.setattr("scripts.providers_impl.resolve_with_jina", mock_thin_content) - - result = resolve_url("https://example.com") - - # Should reject thin content and try next provider - # or return failure if no other providers - pass # Implementation depends on cascade behavior -``` - ---- - -## Phase 4: Documentation & Process (Week 4) - -### Improvement 9: Documentation Testing - -**Implementation:** - -```python -# tests/test_documentation.py - -import subprocess -import re -import pytest - -class TestDocumentation: - """Test that documentation examples work.""" - - def test_readme_examples_are_valid_python(self): - """Test Python examples in README.""" - # Extract code blocks from README - with open("README.md") as f: - content = f.read() - - # Find all Python code blocks - python_blocks = re.findall(r'```python\n(.*?)```', content, re.DOTALL) - - for block in python_blocks: - # Skip incomplete examples - if "..." in block or "# " in block: - continue - - # Try to compile - try: - compile(block, '', 'exec') - except SyntaxError as e: - pytest.fail(f"Syntax error in README Python example: {e}") - - def test_cli_examples_in_readme(self): - """Test CLI examples in README are valid commands.""" - with open("README.md") as f: - content = f.read() - - # Find all bash code blocks - bash_blocks = re.findall(r'```bash\n(.*?)```', content, re.DOTALL) - - for block in bash_blocks: - lines = block.strip().split('\n') - for line in lines: - line = line.strip() - if line.startswith('do-wdr ') or line.startswith('./target/release/do-wdr '): - # Extract command - cmd = line.split()[0] - # Verify binary exists or command is valid - pass # Just check syntax, don't actually run - - def test_skill_documentation_valid(self): - """Test skill documentation is valid.""" - import yaml - - # Check skill metadata if applicable - pass -``` - ---- - -### Improvement 10: Flaky Test Detection - -**Implementation:** - -```ini -# pytest.ini (additions) - -[pytest] -addopts = - -v - --reruns 3 - --reruns-delay 1 - --only-rerun "RateLimitError" - --only-rerun "TimeoutError" - --only-rerun "ConnectionError" -``` - -```python -# tests/conftest.py (addition) - -import pytest - -# Mark known flaky tests -flaky = pytest.mark.flaky(reruns=3, reruns_delay=2) - -# Apply to live tests -pytestmark = [ - pytest.mark.live, - flaky -] -``` - -**CI Configuration:** - -```yaml -# .github/workflows/ci.yml - -- name: Run tests with retry - run: | - python -m pytest tests/ -v \ - --reruns 3 \ - --reruns-delay 1 \ - --only-rerun "RateLimitError" \ - --only-rerun "TimeoutError" \ - -m "live" - env: - EXA_API_KEY: ${{ secrets.EXA_API_KEY }} - # ... other secrets -``` - ---- - -## Test Execution Guide - -### Running Tests - -```bash -# Unit tests only (no API keys needed) -python -m pytest tests/ -v -m "not live" - -# Live integration tests (requires API keys) -python -m pytest tests/ -v -m live - -# Performance benchmarks -python -m pytest tests/test_performance.py -v --benchmark-only - -# Security tests -python -m pytest tests/test_security.py -v - -# With coverage -python -m pytest tests/ -v --cov=scripts --cov-report=html - -# Flaky tests with retry -python -m pytest tests/ -v --reruns 3 - -# Specific test file -python -m pytest tests/test_providers.py -v - -# Rust tests -cd cli && cargo test - -# Web E2E tests -cd web && npx playwright test -``` - ---- - -## Success Metrics - -| Metric | Target | Current | -|--------|--------|---------| -| Code Coverage | 80% | ? | -| Test Count | 200+ | ? | -| Live Test Reliability | 95% | ? | -| Security Test Coverage | 100% | 0% | -| Python/Rust Parity | 100% | 0% | -| CI Pass Rate | 98% | ? | - ---- - -## Timeline - -| Week | Focus | Deliverables | -|------|-------|--------------| -| 1 | Provider & Security Tests | Serper tests, security suite | -| 2 | Infrastructure | Fixtures, coverage, benchmarks | -| 3 | Integration | E2E tests, parity tests | -| 4 | Process | Flaky test handling, docs testing | +- [ADR-013](013-test-coverage-and-ci-reliability.md) — CI & test coverage plan +- [AUDIT.md](AUDIT.md) — M5 (web unit tests), M6 (Rust tests), M7 (mobile CI) +- [tests/](../tests/) — Test directory diff --git a/plans/07-documentation-improvements.md b/plans/07-documentation-improvements.md index a0c2da3..a2e1838 100644 --- a/plans/07-documentation-improvements.md +++ b/plans/07-documentation-improvements.md @@ -1,1774 +1,39 @@ -# Documentation & DevEx Improvements Plan +# Documentation & DevEx Improvements (Condensed Status) -## Overview +## Original Overview -This plan implements 8 documentation and developer experience improvements to make the project more approachable for new users and contributors. - ---- - -## Phase 1: User Onboarding (Week 1) - -### Improvement 1: Getting Started Tutorial - -**Description:** Step-by-step "First 5 Minutes" tutorial for new users. - -**New File:** `TUTORIAL.md` (in project root) - -```markdown -# Quick Start Tutorial - -Welcome to do-web-doc-resolver! This tutorial will get you resolving URLs and queries in under 5 minutes. - -## 1. Installation (1 minute) - -```bash -# Clone the repository -git clone https://github.com/d-oit/do-web-doc-resolver.git -cd do-web-doc-resolver - -# Install Python dependencies -pip install -r requirements.txt -``` - -## 2. Your First Resolution (2 minutes) - -### Resolve a URL (No API Key Required!) - -```bash -python -m scripts.cli "https://docs.python.org" -``` - -You'll see the cascade in action: -1. Checking cache... -2. Trying llms.txt... -3. Using Jina Reader... -4. Success! ✓ - -### Resolve a Query (No API Key Required!) - -```bash -python -m scripts.cli "Python tutorial" -``` - -The resolver automatically uses Exa MCP (free) first, then falls back to DuckDuckGo if needed. - -## 3. Understanding the Output - -```json -{ - "source": "jina", - "url": "https://docs.python.org", - "content": "# Python documentation...", - "score": 0.85, - "metrics": { - "latency_ms": 1234, - "providers_attempted": ["llms_txt", "jina"] - } -} -``` - -**What this means:** -- `source`: Which provider succeeded -- `score`: Content quality (0.0-1.0, higher is better) -- `metrics.latency_ms`: Total resolution time -- `metrics.providers_attempted`: Cascade path taken - -## 4. Adding Your First API Key (1 minute) - -While the resolver works without API keys, adding them improves results: - -1. Get a free API key from [Serper](https://serper.dev) (2500 free credits) -2. Set the environment variable: - ```bash - export SERPER_API_KEY="your-key-here" - ``` -3. Run a query: - ```bash - python -m scripts.cli "latest AI research" - ``` - -## 5. Common First-Time Issues - -### "No resolution method available" -**Cause:** All providers failed or are unavailable. -**Solution:** -- Check your internet connection -- Try a different URL/query -- Check provider status with `--log-level DEBUG` - -### Rate Limiting -**Cause:** Too many requests to a provider. -**Solution:** -- Wait 30-60 seconds -- Use `--profile free` to avoid paid providers -- The resolver automatically falls back to free alternatives - -### Understanding Quality Scores -- `0.80-1.00`: Excellent content -- `0.65-0.79`: Good content -- `0.50-0.64`: Thin content (may retry with other providers) -- `< 0.50`: Poor content (rejected) - -## Next Steps - -- Try the **Rust CLI** for faster performance: `cd cli && cargo build --release` -- Explore the **Web UI**: `cd web && npm run dev` -- Read the [full documentation](README.md) -- Check out the [API reference](.agents/skills/do-web-doc-resolver/references/CLI.md) - -## Getting Help - -- [Open an issue](https://github.com/d-oit/do-web-doc-resolver/issues) -- Check the [troubleshooting guide](TROUBLESHOOTING.md) -- Read [AGENTS.md](AGENTS.md) for development setup - ---- - -Happy resolving! 🚀 -``` - -**Update README.md:** -Add link to tutorial: -```markdown -## Quick Start - -New here? Start with our [5-minute tutorial](TUTORIAL.md) to resolve your first URL. -``` - ---- - -### Improvement 2: Comprehensive Troubleshooting Guide - -**New File:** `agents-docs/TROUBLESHOOTING.md` - -```markdown -# Troubleshooting Guide - -## Error Message Index - -| Error Message | Likely Cause | Solution | -|---------------|--------------|----------| -| "No resolution method available" | All providers failed | Check network, verify URL/query, check logs | -| "Rate limited" | Too many requests | Wait 30-60s, use `--profile free`, check provider limits | -| "Provider unavailable" | Circuit breaker tripped | Wait 5 minutes, check provider status | -| "SSRF_BLOCKED" | Invalid/unsafe URL | Use public URLs only (no localhost, private IPs) | -| "Authentication failed" | Invalid API key | Verify key, check for typos, regenerate if needed | -| "Timeout" | Slow provider/network | Use `--profile fast`, check connection, try again | -| "Empty content" | Provider returned nothing | Try different provider, check URL is valid | - -## Debugging Resolution - -### Enable Debug Logging - -```bash -python -m scripts.cli "https://example.com" --log-level DEBUG -``` - -You'll see: -``` -DEBUG:root:Resolving URL: https://example.com -DEBUG:root:Trying provider: llms_txt -DEBUG:root:Provider llms_txt failed: 404 -DEBUG:root:Trying provider: jina -DEBUG:root:Provider jina succeeded in 1234ms -``` - -### Test Individual Providers - -```bash -# Test specific provider -python -m scripts.cli "https://example.com" --provider jina - -# Skip specific providers -python -m scripts.cli "https://example.com" --skip exa_mcp --skip firecrawl -``` - -### Check Provider Status - -```bash -# List all providers -./target/release/do-wdr providers - -# Show current configuration -./target/release/do-wdr config -``` - -## Provider-Specific Issues - -### Exa MCP -**Symptom:** "Exa MCP failed" or "Rate limit" -**Solution:** -- Wait 30 seconds (built-in cooldown) -- Try again with `--skip exa_mcp` -- Check if MCP server is available: `curl https://mcp.exa.ai/mcp` - -### Jina Reader -**Symptom:** "429 Too Many Requests" -**Solution:** -- Wait 60 seconds -- Jina has a 20 RPM free tier -- Consider adding delay between requests - -### Firecrawl -**Symptom:** "Authentication failed" or "No credits" -**Solution:** -- Verify `FIRECRAWL_API_KEY` is set correctly -- Check remaining credits in Firecrawl dashboard -- Consider using `--profile free` to skip paid providers - -### Tavily -**Symptom:** Queries not returning results -**Solution:** -- Tavily works best for factual/research queries -- Try rephrasing as a question -- Check `TAVILY_API_KEY` is valid - -## Performance Issues - -### Slow Resolution (10+ seconds) - -**Check cascade order:** -```bash -python -m scripts.cli "query" --log-level INFO -``` - -**Solutions:** -1. Use `--profile fast` for low-latency mode -2. Enable semantic cache for repeated queries -3. Check routing memory isn't outdated -4. Skip slow providers: `--skip mistral_browser` - -### High Memory Usage - -**Symptom:** Process using too much RAM -**Solution:** -- Reduce `WEB_RESOLVER_MAX_CHARS` (default: 8000) -- Clear cache: `rm -rf .cache/` -- Use batch processing with smaller batches - -## Web UI Issues - -### Build Failures - -**Symptom:** `npm run build` fails -**Solutions:** -1. Ensure `postcss.config.mjs` exists -2. Clear `node_modules` and reinstall: - ```bash - rm -rf node_modules package-lock.json - npm install - ``` -3. Check Node.js version (requires 18+) - -### E2E Test Failures - -**Symptom:** Playwright tests fail -**Solutions:** -1. Update browsers: `npx playwright install` -2. Check dev server is running on port 3000 -3. Verify backend on port 8000 - -## Configuration Issues - -### Environment Variables Not Loading - -**Check:** -```bash -echo $EXA_API_KEY # Should show your key -``` - -**Solutions:** -1. Export variables in current shell (not just .bashrc) -2. Use `.env` file with python-dotenv -3. For Rust CLI, use `DO_WDR_*` prefix or config.toml - -### Config File Not Found - -**Rust CLI config locations:** -- `~/.config/do-wdr/config.toml` -- `./config.toml` - -**Example config.toml:** -```toml -max_chars = 8000 -profile = "balanced" -skip_providers = ["exa"] - -[api_keys] -tavily = "your-key-here" -``` - -## Still Having Issues? - -1. **Check the logs:** `--log-level DEBUG` -2. **Test with free providers only:** `--profile free` -3. **Verify your environment:** `./scripts/quality_gate.sh` -4. **Open an issue:** Include debug logs and error messages - -## Related Resources - -- [Provider Reference](.agents/skills/do-web-doc-resolver/references/PROVIDERS.md) -- [Configuration Guide](.agents/skills/do-web-doc-resolver/references/CONFIG.md) -- [GitHub Issues](https://github.com/d-oit/do-web-doc-resolver/issues) -``` - ---- - -## Phase 2: Architecture Documentation (Week 2) - -### Improvement 3: Architecture Decision Records (ADRs) - -**New Directory:** `agents-docs/adr/` - -**New File:** `agents-docs/adr/001-provider-cascade-architecture.md` - -```markdown -# ADR 001: Provider Cascade Architecture +8 improvements across 4 phases: onboarding tutorial, troubleshooting guide, +architecture ADRs, migration guide, dev container, contributing guide, OpenAPI +spec, performance guide. ## Status -Accepted - -## Context -The resolver needs to query multiple web sources to maximize content quality and minimize cost. A single provider approach is risky (single point of failure) and expensive (always using paid APIs). - -## Decision -Implement a cascade architecture with the following characteristics: - -1. **Free-first ordering:** Free providers are attempted before paid ones -2. **Parallel execution:** Multiple providers can run simultaneously with hedging -3. **Quality gating:** Results below threshold trigger next provider -4. **Automatic failover:** Provider failures are handled gracefully -5. **Learned preferences:** Routing memory optimizes provider ordering -## Consequences +Partially addressed. Key documentation infrastructure exists. ADR directory +structure proposed but not created. -### Positive -- Cost efficiency: Most queries resolved by free providers -- Reliability: Multiple fallback options -- Quality: Can retry with different providers for better results -- Performance: Parallel execution reduces latency +## What's Done -### Negative -- Complexity: More complex than single-provider approach -- Debugging: Harder to trace which provider succeeded -- Resource usage: Parallel execution uses more memory/connections +- **README overhaul**: ✅ PR #336 merged — improved onboarding for Python, CLI, + and web personas. +- **AGENTS.md alignment**: ✅ PR #320 merged — aligned with upstream standard. +- **Central index**: ✅ PR #327 merged — clarified AGENTS vs agents-docs roles. +- **ADR documentation**: This `plans/` update creates the missing ADR files + (009, 012, 013, 014). -## Alternatives Considered +## What Remains -### Single Provider -- **Rejected:** Too expensive, single point of failure - -### Random Provider Selection -- **Rejected:** Doesn't optimize for cost or quality - -### Static Provider Order -- **Rejected:** Can't adapt to changing conditions +All 8 improvements remain candidates: +- Getting Started Tutorial (`TUTORIAL.md`) +- Comprehensive Troubleshooting Guide (`agents-docs/TROUBLESHOOTING.md`) +- Migration Guide (`MIGRATING.md`) +- Dev Container / Docker development +- Enhanced Contributing Guide (`CONTRIBUTING.md`) +- OpenAPI Specification (`web/openapi.yaml`) +- Performance Tuning Guide (`agents-docs/PERFORMANCE.md`) +- Provider development tutorial (`agents-docs/ADDING_PROVIDERS.md`) ## References -- [Cascade Reference](.agents/skills/do-web-doc-resolver/references/CASCADE.md) -``` - -**New File:** `agents-docs/adr/002-python-plus-rust.md` - -```markdown -# ADR 002: Dual Python and Rust Implementation - -## Status -Accepted (under review for consolidation) - -## Context -The project started in Python for rapid prototyping, then added Rust CLI for performance. Need to decide on long-term strategy. - -## Decision -Maintain both implementations with shared core logic: - -1. **Python:** Primary development, easier to iterate -2. **Rust:** High-performance CLI, type safety -3. **Shared patterns:** Keep cascade logic consistent - -## Future Direction -Evaluate PyO3 bindings to unify implementations: -- Rust as core library -- Python as thin wrapper -- Single source of truth - -## Consequences - -### Positive -- Python: Fast iteration, rich ecosystem -- Rust: Performance, reliability, better deployment - -### Negative -- Code duplication: ~500 lines of similar logic -- Maintenance burden: Changes must be made twice -- Divergence risk: Implementations may drift apart - -## References -- [Rust CLI Reference](.agents/skills/do-web-doc-resolver/references/RUST_CLI.md) -``` - -**Additional ADRs to create:** -- `003-free-first-strategy.md` -- `004-quality-scoring-algorithm.md` -- `005-circuit-breaker-pattern.md` -- `006-semantic-cache-design.md` -- `007-routing-memory.md` - -**Template:** `agents-docs/adr/template.md` - -```markdown -# ADR XXX: Title - -## Status -- Proposed -- Accepted -- Deprecated -- Superseded by ADR XXX - -## Context -What is the issue we're seeing that is motivating this decision? - -## Decision -What is the decision being made? - -## Consequences -What becomes easier or more difficult to do? - -## Alternatives Considered -What other options were evaluated? - -## References -Links to related documents or issues. -``` - ---- - -### Improvement 4: Migration Guide - -**New File:** `MIGRATING.md` - -```markdown -# Migration Guide - -This guide helps you upgrade between versions of do-web-doc-resolver. - -## 0.2.x → 0.3.0 - -### Breaking Changes -- Binary renamed: `wdr` → `do-wdr` -- Environment variables: `WDR_*` → `DO_WDR_*` -- Config directory: `~/.config/wdr` → `~/.config/do-wdr` - -### Migration Steps - -1. **Update scripts using the binary:** - ```bash - # Before - wdr resolve "query" - - # After - do-wdr resolve "query" - ``` - -2. **Update environment variables:** - ```bash - # Add to ~/.bashrc, ~/.zshrc, or ~/.profile - export DO_WDR_API_KEY="$WDR_API_KEY" - export DO_WDR_TAVILY_API_KEY="$WDR_TAVILY_API_KEY" - - # Unset old variables - unset WDR_API_KEY - unset WDR_TAVILY_API_KEY - ``` - -3. **Migrate configuration file:** - ```bash - mkdir -p ~/.config/do-wdr - - # If you have a config file - mv ~/.config/wdr/config.toml ~/.config/do-wdr/ - - # Update environment variable references in config - sed -i 's/WDR_/DO_WDR_/g' ~/.config/do-wdr/config.toml - ``` - -4. **Update shell aliases:** - ```bash - # In ~/.bashrc or ~/.zshrc - alias wdr='do-wdr' # Temporary compatibility - ``` - -### Deprecation Timeline -- 0.3.0: Old names work with warnings -- 0.4.0: Old names removed - -## 0.3.x → 0.4.0 - -### Breaking Changes -- Python 3.9 support removed (minimum 3.10) -- Removed deprecated `resolve_sync()` function -- CLI flag `--output-json` renamed to `--json` - -### Migration Steps - -1. **Update Python version:** - ```bash - # Check current version - python --version # Must be 3.10+ - - # Update if needed - pyenv install 3.12 - pyenv local 3.12 - ``` - -2. **Update function calls:** - ```python - # Before - from scripts.resolve import resolve_sync - result = resolve_sync(url) - - # After - from scripts.resolve import resolve - result = resolve(url) # Now async by default - ``` - -3. **Update CLI scripts:** - ```bash - # Before - do-wdr resolve "query" --output-json - - # After - do-wdr resolve "query" --json - ``` - -## General Migration Tips - -### Check Your Current Version -```bash -# Python -python -c "from scripts import __version__; print(__version__)" - -# Rust CLI -do-wdr --version -``` - -### Backup Your Configuration -Before upgrading: -```bash -cp ~/.config/do-wdr/config.toml ~/.config/do-wdr/config.toml.backup -``` - -### Test After Migration -```bash -# Quick functionality test -./scripts/quality_gate.sh - -# Resolve a test URL -python -m scripts.cli "https://example.com" -``` - -## Getting Help - -If you encounter issues during migration: -1. Check this guide for your specific version -2. Review the [CHANGELOG](CHANGELOG.md) -3. [Open an issue](https://github.com/d-oit/do-web-doc-resolver/issues) - -## Version Compatibility Matrix - -| Version | Python | Rust | Node.js | Status | -|---------|--------|------|---------|--------| -| 0.4.x | 3.10+ | 1.75+ | 20+ | Current | -| 0.3.x | 3.9+ | 1.70+ | 18+ | Supported | -| 0.2.x | 3.9+ | 1.65+ | 18+ | EOL | -``` - ---- - -## Phase 3: Developer Experience (Week 3) - -### Improvement 5: Dev Container / Docker Development - -**New File:** `Dockerfile.dev` - -```dockerfile -# Development container for do-web-doc-resolver - -FROM python:3.12-slim - -# Install system dependencies -RUN apt-get update && apt-get install -y \ - curl \ - build-essential \ - git \ - pkg-config \ - libssl-dev \ - tesseract-ocr \ - && rm -rf /var/lib/apt/lists/* - -# Install Rust -RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y -ENV PATH="/root/.cargo/bin:${PATH}" - -# Install Node.js 22 -RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \ - && apt-get install -y nodejs - -# Set working directory -WORKDIR /workspace - -# Install Python dependencies -COPY requirements.txt . -RUN pip install -r requirements.txt - -# Install development tools -RUN pip install \ - black \ - ruff \ - pytest \ - pytest-asyncio \ - mypy - -# Pre-install Rust dependencies for faster builds -COPY cli/Cargo.toml cli/Cargo.lock ./cli/ -RUN mkdir -p cli/src && echo "fn main() {}" > cli/src/main.rs -RUN cd cli && cargo fetch - -# Expose ports -EXPOSE 8000 3000 - -# Set environment -ENV PYTHONPATH=/workspace -ENV WEB_RESOLVER_LOG_LEVEL=DEBUG - -# Default command -CMD ["bash"] -``` - -**New File:** `docker-compose.yml` - -```yaml -version: '3.8' - -services: - resolver: - build: - context: . - dockerfile: Dockerfile.dev - volumes: - - .:/workspace - - cargo-cache:/root/.cargo - - node-modules:/workspace/web/node_modules - environment: - - WEB_RESOLVER_LOG_LEVEL=DEBUG - - PYTHONPATH=/workspace - # Add your API keys here or use .env file - - EXA_API_KEY=${EXA_API_KEY} - - TAVILY_API_KEY=${TAVILY_API_KEY} - ports: - - "8000:8000" # Python backend - - "3000:3000" # Next.js dev server - command: bash - stdin_open: true - tty: true - -volumes: - cargo-cache: - node-modules: -``` - -**New File:** `.devcontainer/devcontainer.json` - -```json -{ - "name": "do-web-doc-resolver", - "dockerComposeFile": "../docker-compose.yml", - "service": "resolver", - "workspaceFolder": "/workspace", - - "features": { - "ghcr.io/devcontainers/features/rust:1": { - "version": "latest", - "profile": "default" - }, - "ghcr.io/devcontainers/features/node:1": { - "version": "22" - }, - "ghcr.io/devcontainers/features/github-cli:1": {} - }, - - "customizations": { - "vscode": { - "extensions": [ - "ms-python.python", - "ms-python.vscode-pylance", - "rust-lang.rust-analyzer", - "vadimcn.vscode-lldb", - "esbenp.prettier-vscode", - "bradlc.vscode-tailwindcss", - "ms-playwright.playwright" - ], - "settings": { - "python.defaultInterpreterPath": "/usr/local/bin/python", - "rust-analyzer.cargo.target": "x86_64-unknown-linux-gnu", - "editor.formatOnSave": true, - "editor.codeActionsOnSave": { - "source.fixAll": true - } - } - } - }, - - "postCreateCommand": "pip install -r requirements.txt && cd cli && cargo build", - - "postStartCommand": "git config --global --add safe.directory /workspace", - - "remoteUser": "root", - - "forwardPorts": [8000, 3000], - - "portsAttributes": { - "8000": { - "label": "Python Backend", - "onAutoForward": "notify" - }, - "3000": { - "label": "Next.js Dev Server", - "onAutoForward": "openPreview" - } - } -} -``` - -**New File:** `.devcontainer/post-create.sh` - -```bash -#!/bin/bash -set -e - -echo "🚀 Setting up development environment..." - -# Install Python dependencies -echo "📦 Installing Python dependencies..." -pip install -r requirements.txt -pip install black ruff pytest mypy - -# Install pre-commit hooks -echo "🎣 Installing pre-commit hooks..." -./scripts/setup-hooks.sh - -# Build Rust CLI -echo "🦀 Building Rust CLI..." -cd cli && cargo build - -# Install web dependencies -echo "🌐 Installing web dependencies..." -cd web && npm install - -echo "✅ Development environment ready!" -echo "" -echo "Quick start:" -echo " Python: python -m scripts.cli 'https://example.com'" -echo " Rust: cd cli && cargo run -- resolve 'https://example.com'" -echo " Web: cd web && npm run dev" -``` - ---- - -### Improvement 6: Enhanced Contributing Guide - -**Update:** `CONTRIBUTING.md` - -```markdown -# Contributing to do-web-doc-resolver - -Thank you for your interest in contributing! This guide will help you get started. - -## Development Setup - -### Option 1: Local Development - -```bash -# Clone the repository -git clone https://github.com/d-oit/do-web-doc-resolver.git -cd do-web-doc-resolver - -# Install Python dependencies -pip install -r requirements.txt - -# Build Rust CLI -cd cli && cargo build -cd .. - -# Install web dependencies -cd web && npm install -cd .. - -# Setup git hooks -./scripts/setup-hooks.sh -``` - -### Option 2: Dev Container (Recommended) - -Using VS Code with Dev Containers: -1. Open project in VS Code -2. Run "Dev Containers: Reopen in Container" -3. Wait for setup to complete - -Or with Docker: -```bash -docker-compose up -d -docker-compose exec resolver bash -``` - -## How to Contribute - -### Reporting Bugs - -1. Check if the issue already exists -2. Create a new issue with: - - Clear title and description - - Steps to reproduce - - Expected vs actual behavior - - Environment details (OS, Python version, etc.) - - Debug logs (use `--log-level DEBUG`) - -### Suggesting Features - -1. Open a feature request issue -2. Describe the use case -3. Explain why existing solutions don't work -4. Propose an implementation approach - -### Adding New Providers - -See [Adding Providers Guide](agents-docs/ADDING_PROVIDERS.md) for detailed instructions. - -Quick checklist: -- [ ] Provider implementation in `scripts/providers_impl.py` -- [ ] ProviderType enum value -- [ ] Cascade integration -- [ ] Unit tests with mocked responses -- [ ] Live integration tests (if you have API key) -- [ ] Documentation updates -- [ ] Rate limit handling -- [ ] Error classification - -### Code Style - -#### Python -- Use `black` for formatting: `black scripts/ tests/` -- Use `ruff` for linting: `ruff check scripts/ tests/` -- Type hints required for public functions -- Docstrings for all public functions and classes -- Maximum 500 lines per file - -#### Rust -- Use `cargo fmt` for formatting -- Use `cargo clippy` for linting: `cargo clippy -- -D warnings` -- Maximum 500 lines per file -- Errors via `thiserror`, propagation via `anyhow` - -#### Commits -- Use [Conventional Commits](https://www.conventionalcommits.org/) -- Format: `type(scope): description` -- Types: `feat`, `fix`, `docs`, `ci`, `test`, `refactor` -- Examples: - - `feat(providers): add ScrapingAnt integration` - - `fix(resolve): handle timeout in cascade` - - `docs(readme): update installation instructions` - -### Testing - -```bash -# Run all tests -./scripts/quality_gate.sh - -# Python unit tests -python -m pytest tests/ -v -m "not live" - -# Python live tests (requires API keys) -python -m pytest tests/ -v -m live - -# Rust tests -cd cli && cargo test - -# Web tests -cd web && npx playwright test -``` - -### Pull Request Process - -1. **Fork and branch:** - ```bash - git checkout -b feature/my-feature - ``` - -2. **Make changes:** - - Write code - - Add tests - - Update documentation - -3. **Run quality gate:** - ```bash - ./scripts/quality_gate.sh - ``` - -4. **Commit:** - ```bash - git add . - git commit -m "feat(scope): description" - ``` - -5. **Push and create PR:** - ```bash - git push origin feature/my-feature - ``` - -6. **PR requirements:** - - Clear description of changes - - Link to related issues - - Tests passing - - Documentation updated - - Code review approval - -### Development Workflow - -``` -┌─────────────┐ ┌─────────────┐ ┌─────────────┐ -│ Issue │ -> │ Branch │ -> │ PR │ -│ Created │ │ Created │ │ Review │ -└─────────────┘ └─────────────┘ └─────────────┘ - │ - v - ┌─────────────┐ - │ Merge │ - │ to main │ - └─────────────┘ -``` - -### Getting Help - -- [Discord/Slack community link] -- [GitHub Discussions](https://github.com/d-oit/do-web-doc-resolver/discussions) -- [Open an issue](https://github.com/d-oit/do-web-doc-resolver/issues) - -## Code of Conduct - -- Be respectful and inclusive -- Focus on constructive feedback -- Help newcomers learn and grow -- Respect different viewpoints and experiences - -Thank you for contributing! 🎉 -``` - -**New File:** `agents-docs/ADDING_PROVIDERS.md` - -```markdown -# Adding a New Provider - -This guide walks you through adding a new provider to the resolver. - -## Overview - -Providers implement the provider interface/protocol and are registered in the cascade. The resolver supports two types: - -- **URL Providers:** Extract content from URLs (Jina, Firecrawl, etc.) -- **Query Providers:** Search for information (Exa, Tavily, etc.) - -## Quick Start - -1. Choose provider type (URL or Query) -2. Implement provider function/module -3. Add ProviderType enum value -4. Register in cascade -5. Add tests -6. Update documentation - -## Step-by-Step Guide - -### Step 1: Choose Provider Type - -**URL Provider Example:** Jina Reader -- Input: URL string -- Output: Extracted content -- Use for: Converting web pages to markdown - -**Query Provider Example:** Tavily -- Input: Query string -- Output: Search results -- Use for: Finding information across the web - -### Step 2: Implement Provider (Python) - -**New Provider Template:** - -```python -# scripts/providers_impl.py - -def resolve_with_newprovider( - input_str: str, - max_chars: int = MAX_CHARS -) -> ResolvedResult | None: - """ - Resolve using NewProvider API. - - Args: - input_str: URL or query string - max_chars: Maximum characters to return - - Returns: - ResolvedResult on success, None on failure - """ - # Get API key from environment - api_key = os.getenv("NEWPROVIDER_API_KEY") - if not api_key: - return None - - # Check rate limiting - if _is_rate_limited("newprovider"): - return None - - try: - # API call - response = requests.get( - "https://api.newprovider.com/extract", - headers={"Authorization": f"Bearer {api_key}"}, - params={"url": input_str}, - timeout=30 - ) - - # Handle rate limits - if response.status_code == 429: - _set_rate_limit("newprovider", 60) - return None - - response.raise_for_status() - data = response.json() - - # Extract content - content = data.get("content", "") - - if len(content) < MIN_CHARS: - return None - - return ResolvedResult( - source="newprovider", - content=compact_content(content, max_chars), - url=input_str if is_url(input_str) else None, - query=input_str if not is_url(input_str) else None, - metadata={ - "title": data.get("title"), - "credits_used": data.get("credits") - } - ) - - except requests.Timeout: - logger.warning(f"NewProvider timeout for {input_str}") - return None - except Exception as e: - logger.error(f"NewProvider error: {e}") - return None -``` - -### Step 3: Add ProviderType - -```python -# scripts/models.py - -class ProviderType(Enum): - # ... existing providers - NEWPROVIDER = "newprovider" - - def is_paid(self) -> bool: - """Return True if provider is paid.""" - return self in { - # ... existing paid providers - ProviderType.NEWPROVIDER, - } -``` - -### Step 4: Register in Cascade - -```python -# scripts/resolve.py - -# For URL providers (in resolve_url_stream): -cascade_map = { - # ... existing providers - "newprovider": ( - ProviderType.NEWPROVIDER, - lambda: resolve_with_newprovider(url, max_chars) - ), -} - -# For query providers (in resolve_query_stream): -cascade_map = { - # ... existing providers - "newprovider": ( - ProviderType.NEWPROVIDER, - lambda: resolve_with_newprovider(query, max_chars) - ), -} -``` - -### Step 5: Add Tests - -```python -# tests/test_providers.py - -class TestNewProvider: - """Test NewProvider integration.""" - - def test_newprovider_available_with_key(self, monkeypatch): - """Test provider available when API key set.""" - monkeypatch.setenv("NEWPROVIDER_API_KEY", "test-key") - # Test implementation - - def test_newprovider_unavailable_without_key(self, monkeypatch): - """Test provider unavailable without API key.""" - monkeypatch.delenv("NEWPROVIDER_API_KEY", raising=False) - # Test implementation - - @pytest.mark.live - @pytest.mark.skipif( - not os.getenv("NEWPROVIDER_API_KEY"), - reason="No API key" - ) - def test_live_newprovider(self): - """Test with real API.""" - result = resolve_with_newprovider("test input", max_chars=1000) - assert result is not None - assert result.source == "newprovider" -``` - -### Step 6: Update Documentation - -1. **PROVIDERS.md:** Add provider details -2. **CASCADE.md:** Update cascade diagram -3. **README.md:** Add to provider list -4. **CONFIG.md:** Add environment variable - -## Provider Checklist - -Before submitting PR: - -- [ ] Provider implementation -- [ ] ProviderType enum -- [ ] Cascade registration -- [ ] Rate limit handling -- [ ] Error handling -- [ ] Unit tests -- [ ] Integration tests -- [ ] Documentation updates -- [ ] CHANGELOG.md entry - -## Provider Pattern Reference - -### URL Provider Pattern - -``` -1. Check API key -2. Check rate limit -3. Make HTTP request -4. Handle errors (429, timeout, etc.) -5. Parse response -6. Validate content length -7. Return ResolvedResult -``` - -### Query Provider Pattern - -``` -1. Check API key -2. Check rate limit -3. Make search request -4. Handle errors -5. Parse results -6. Format as markdown -7. Return ResolvedResult -``` - -## Common Issues - -### Rate Limiting -Always handle 429 status: -```python -if response.status_code == 429: - _set_rate_limit("provider_name", 60) # 60 second cooldown - return None -``` - -### Timeouts -Set appropriate timeouts: -```python -response = requests.get(url, timeout=30) -``` - -### Error Classification -Log errors appropriately: -```python -except requests.Timeout: - logger.warning(f"Provider timeout for {input_str}") - return None -except Exception as e: - logger.error(f"Provider error: {e}") - return None -``` - -## Getting Help - -- Check existing providers for examples -- Open a draft PR for early feedback -- Ask questions in discussions - -## Examples - -See these providers for reference implementations: -- **URL:** `scripts/providers_impl.py::resolve_with_jina` -- **Query:** `scripts/providers_impl.py::resolve_with_tavily` - -For Rust implementations: -- **URL:** `cli/src/providers/jina.rs` -- **Query:** `cli/src/providers/tavily.rs` -``` - ---- - -## Phase 4: API Documentation (Week 4) - -### Improvement 7: OpenAPI Specification - -**New File:** `web/openapi.yaml` - -```yaml -openapi: 3.0.3 -info: - title: do-web-doc-resolver API - description: | - Resolve URLs and queries into compact, LLM-ready markdown. - - ## Authentication - No authentication required for basic usage. Set API keys via environment variables for paid providers. - - ## Rate Limiting - Free tier has generous limits. Paid providers have their own rate limits. - version: 1.0.0 - contact: - name: GitHub Issues - url: https://github.com/d-oit/do-web-doc-resolver/issues - -servers: - - url: http://localhost:8000 - description: Local development - - url: https://api.do-wdr.dev - description: Production - -paths: - /api/resolve: - post: - summary: Resolve URL or query - description: | - Resolve a URL or query into markdown content. - - The resolver automatically detects URL vs query and runs the appropriate cascade. - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/ResolveRequest' - responses: - '200': - description: Successful resolution - content: - application/json: - schema: - $ref: '#/components/schemas/ResolveResponse' - '400': - description: Invalid request - content: - application/json: - schema: - $ref: '#/components/schemas/Error' - '429': - description: Rate limited - content: - application/json: - schema: - $ref: '#/components/schemas/Error' - - /api/resolve/batch: - post: - summary: Batch resolution - description: Resolve multiple URLs or queries in parallel. - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/BatchRequest' - responses: - '200': - description: Batch results - content: - application/json: - schema: - $ref: '#/components/schemas/BatchResponse' - - /api/health: - get: - summary: Health check - responses: - '200': - description: Service is healthy - content: - application/json: - schema: - type: object - properties: - status: - type: string - example: "ok" - -components: - schemas: - ResolveRequest: - type: object - required: - - input - properties: - input: - type: string - description: URL or query string - example: "https://example.com" - max_chars: - type: integer - description: Maximum characters to return - default: 8000 - example: 5000 - profile: - type: string - enum: [free, fast, balanced, quality] - default: balanced - description: Execution profile - format: - type: string - enum: [markdown, json, plain, html] - default: markdown - description: Output format - - ResolveResponse: - type: object - properties: - source: - type: string - description: Provider that succeeded - example: "jina" - content: - type: string - description: Resolved content - url: - type: string - description: Resolved URL (if input was URL) - query: - type: string - description: Query string (if input was query) - score: - type: number - description: Content quality score (0.0-1.0) - example: 0.85 - metrics: - type: object - properties: - latency_ms: - type: integer - example: 1234 - providers_attempted: - type: array - items: - type: string - example: ["llms_txt", "jina"] - cache_hit: - type: boolean - - BatchRequest: - type: object - properties: - requests: - type: array - items: - type: object - properties: - id: - type: string - input: - type: string - max_chars: - type: integer - maxItems: 20 - - BatchResponse: - type: object - properties: - results: - type: array - items: - $ref: '#/components/schemas/ResolveResponse' - - Error: - type: object - properties: - error: - type: string - code: - type: string - details: - type: object -``` - ---- - -### Improvement 8: Performance Tuning Guide - -**New File:** `agents-docs/PERFORMANCE.md` - -```markdown -# Performance Tuning Guide - -This guide helps you optimize do-web-doc-resolver for your use case. - -## Understanding Metrics - -When you run resolution with `--metrics-json`: - -```json -{ - "latency_ms": 1234, - "providers_attempted": ["llms_txt", "jina"], - "cache_hit": false, - "paid_usage": false -} -``` - -### Key Metrics - -| Metric | Good | Bad | Action | -|--------|------|-----|--------| -| `latency_ms` | < 3000 | > 10000 | Use `--profile fast` | -| `providers_attempted` | 1-2 | 5+ | Check routing memory | -| `cache_hit` | true | false | Enable semantic cache | -| `paid_usage` | false (if cost-sensitive) | true unexpectedly | Use `--profile free` | - -## Profile Selection - -### When to Use Each Profile - -| Profile | Use Case | Latency | Cost | -|---------|----------|---------|------| -| `free` | CI/CD, cost-sensitive | Variable | $0 | -| `fast` | Interactive, low latency | < 4s | Low | -| `balanced` | General use | 5-12s | Medium | -| `quality` | Deep research | 10-20s | High | - -### Profile Examples - -```bash -# CI/CD pipeline - never use paid providers -python -m scripts.cli "https://example.com" --profile free - -# Interactive use - quick results -python -m scripts.cli "query" --profile fast - -# Research - best results regardless of cost -python -m scripts.cli "complex research topic" --profile quality -``` - -## Optimization Strategies - -### 1. Enable Semantic Cache - -```python -# Automatic with default settings -result = resolve("https://example.com") - -# Second call uses cache -result = resolve("https://example.com") # Instant! -``` - -Benefits: -- 10-100x faster for repeated queries -- Reduces API costs -- Works across sessions - -### 2. Use Routing Memory - -The resolver learns which providers work best for each domain: - -```bash -# First call - explores providers -python -m scripts.cli "https://docs.python.org" - -# Subsequent calls - uses fastest provider -python -m scripts.cli "https://docs.python.org" # Faster! -``` - -To reset: -```bash -rm -rf .cache/routing_memory.json -``` - -### 3. Connection Pooling - -For high-throughput scenarios: - -```python -import requests -from scripts.resolve import resolve - -# Create session with pooling -session = requests.Session() -adapter = requests.adapters.HTTPAdapter( - pool_connections=20, - pool_maxsize=50 -) -session.mount("https://", adapter) - -# Reuse session for multiple requests -``` - -### 4. Batch Processing - -Process multiple URLs efficiently: - -```python -from scripts.batch_resolve import resolve_batch - -urls = [ - "https://example.com/1", - "https://example.com/2", - "https://example.com/3", -] - -results = resolve_batch(urls, max_concurrent=5) -``` - -### 5. Skip Slow Providers - -If you know certain providers are slow: - -```bash -# Skip Mistral browser (slow but thorough) -python -m scripts.cli "https://example.com" --skip mistral_browser - -# Skip multiple providers -python -m scripts.cli "query" --skip exa_mcp --skip tavily -``` - -## Benchmarking Your Setup - -### Simple Benchmark - -```bash -# Time a single resolution -time python -m scripts.cli "https://example.com" - -# Or with hyperfine (recommended) -hyperfine --warmup 3 'python -m scripts.cli "https://example.com"' -``` - -### Load Testing - -```bash -# Install oha (Rust-based load tester) -cargo install oha - -# Test concurrent requests -oha -z 30s -c 10 \ - --method POST \ - -d '{"input": "https://example.com"}' \ - http://localhost:8000/api/resolve -``` - -### Profiling - -```python -# Python profiling -import cProfile -import pstats - -profiler = cProfile.Profile() -profiler.enable() - -resolve("https://example.com") - -profiler.disable() -stats = pstats.Stats(profiler) -stats.sort_stats('cumulative') -stats.print_stats(20) -``` - -## Common Bottlenecks - -### 1. Network Latency - -**Symptom:** High latency for first request to new domain -**Solution:** -- Pre-warm DNS cache -- Use keep-alive connections -- Enable HTTP/2 - -### 2. Provider Throttling - -**Symptom:** Frequent rate limits -**Solution:** -- Use `--profile free` for testing -- Add delays between requests -- Spread load across multiple providers - -### 3. Content Processing - -**Symptom:** High CPU usage during compaction -**Solution:** -- Reduce `WEB_RESOLVER_MAX_CHARS` -- Skip compaction for small content -- Use streaming for large documents - -### 4. Cache Misses - -**Symptom:** Repeated identical queries are slow -**Solution:** -- Enable semantic cache -- Check cache TTL settings -- Verify cache directory permissions - -## Environment Tuning - -### Environment Variables - -```bash -# Reduce max content size for faster processing -export WEB_RESOLVER_MAX_CHARS=4000 - -# Increase for better results -export WEB_RESOLVER_MAX_CHARS=16000 - -# Adjust timeout for slow networks -export WEB_RESOLVER_TIMEOUT=60 - -# Enable debug logging for troubleshooting -export WEB_RESOLVER_LOG_LEVEL=DEBUG -``` - -### Rust CLI Config - -```toml -# config.toml -max_chars = 8000 -profile = "fast" -quality_threshold = 0.60 # Lower = faster but may retry more -``` - -## Scaling Considerations - -### Single Instance - -Good for: -- Personal use -- Small teams -- < 1000 requests/day - -### Horizontal Scaling - -For high throughput: - -1. **Load balancer** in front of multiple instances -2. **Shared cache** (Redis/Turso) -3. **Rate limiting** per provider across instances - -Example with Docker Compose: - -```yaml -version: '3.8' - -services: - resolver-1: - build: . - environment: - - REDIS_URL=redis://cache:6379 - - resolver-2: - build: . - environment: - - REDIS_URL=redis://cache:6379 - - cache: - image: redis:alpine -``` - -## Troubleshooting Performance - -### High Latency (> 10s) - -1. Check provider cascade: - ```bash - python -m scripts.cli "https://example.com" --log-level DEBUG - ``` - -2. Identify slow provider - -3. Skip or replace: - ```bash - python -m scripts.cli "https://example.com" --skip slow_provider - ``` - -### Memory Issues - -1. Reduce max chars: - ```bash - export WEB_RESOLVER_MAX_CHARS=4000 - ``` - -2. Clear cache: - ```bash - rm -rf .cache/ - ``` - -3. Use streaming for large documents - -## References - -- [Architecture Overview](adr/001-provider-cascade-architecture.md) -- [Provider Reference](references/PROVIDERS.md) -- [Configuration Guide](references/CONFIG.md) -``` - ---- - -## Summary of New Files - -| File | Purpose | -|------|---------| -| `TUTORIAL.md` | 5-minute getting started guide | -| `agents-docs/TROUBLESHOOTING.md` | Comprehensive error guide | -| `agents-docs/adr/` | Architecture decision records | -| `MIGRATING.md` | Version migration guide | -| `Dockerfile.dev` | Development container | -| `docker-compose.yml` | Multi-service dev setup | -| `.devcontainer/` | VS Code dev container config | -| `CONTRIBUTING.md` | Enhanced contribution guide | -| `agents-docs/ADDING_PROVIDERS.md` | Provider development tutorial | -| `web/openapi.yaml` | API specification | -| `agents-docs/PERFORMANCE.md` | Performance tuning guide | - ---- - -## Timeline -| Week | Focus | Deliverables | -|------|-------|--------------| -| 1 | User Onboarding | Tutorial, troubleshooting guide | -| 2 | Architecture | ADRs, migration guide | -| 3 | Developer Experience | Dev container, contributing guide, provider tutorial | -| 4 | API & Performance | OpenAPI spec, performance guide | +- [AGENTS.md](../AGENTS.md) — Project conventions +- [README.md](../README.md) — Main documentation +- [agents-docs/](../agents-docs/) — Technical reference diff --git a/plans/08-deep-research.md b/plans/08-deep-research.md index 8975d61..cd9a325 100644 --- a/plans/08-deep-research.md +++ b/plans/08-deep-research.md @@ -1,947 +1,31 @@ -# Deep Research & Evaluation Plan +# Deep Research & Evaluation (Condensed Status) -## Overview +## Original Overview -This plan implements deep research capabilities and a comprehensive evaluation framework for the do-web-doc-resolver project. Focus on quality, accuracy, and performance measurement. +Multi-step research engine, evaluation framework, and performance benchmarking +suite. -**Status:** Active Development -**Priority:** P0 (Critical) -**Duration:** 2 weeks -**No New Providers Required** +## Status ---- +None of the proposed code has been implemented. This plan is deprioritized due +to the current focus on correctness, CI, and architecture consolidation. -## Phase 1: Deep Research Framework (Week 1) +## What's Done -### Task 1.1: Multi-Step Research Engine +- None. All 947 lines (deep research engine, evaluation framework, performance + suite) are aspirational. -**Description:** Enable iterative, multi-step research that builds context over multiple queries. +## What Remains -**Implementation:** +All tasks remain unimplemented: +- Multi-step research engine (`scripts/deep_research.py`) +- Evaluation framework with 6 metrics (`scripts/evaluation.py`) +- Performance benchmarking suite (`scripts/performance_suite.py`) +- Test files for all three modules +- `psutil` dependency (not yet in `requirements.txt`) -```python -# scripts/deep_research.py -""" -Deep research engine for multi-step iterative research. -""" +## References -from dataclasses import dataclass, field -from typing import List, Dict, Optional, Callable -from enum import Enum -import json -import time - -class ResearchStepType(Enum): - INITIAL_QUERY = "initial" - FOLLOW_UP = "follow_up" - SYNTHESIS = "synthesis" - VERIFICATION = "verification" - -@dataclass -class ResearchStep: - step_number: int - step_type: ResearchStepType - query: str - results: List[Dict] - insights: List[str] - timestamp: float = field(default_factory=time.time) - -@dataclass -class ResearchSession: - session_id: str - initial_query: str - steps: List[ResearchStep] - final_report: Optional[str] = None - metrics: Dict = field(default_factory=dict) - -class DeepResearchEngine: - """ - Multi-step research engine with iterative refinement. - """ - - def __init__( - self, - max_steps: int = 5, - min_insights_per_step: int = 3, - synthesis_threshold: float = 0.8 - ): - self.max_steps = max_steps - self.min_insights_per_step = min_insights_per_step - self.synthesis_threshold = synthesis_threshold - - async def research( - self, - query: str, - progress_callback: Optional[Callable] = None - ) -> ResearchSession: - """ - Execute multi-step research. - - Args: - query: Initial research query - progress_callback: Optional callback for progress updates - - Returns: - ResearchSession with all steps and final report - """ - import uuid - session = ResearchSession( - session_id=str(uuid.uuid4()), - initial_query=query, - steps=[] - ) - - # Step 1: Initial broad search - step1 = await self._execute_step( - session, 1, ResearchStepType.INITIAL_QUERY, query - ) - session.steps.append(step1) - - if progress_callback: - progress_callback(1, self.max_steps, "initial_search_complete") - - # Steps 2-N: Iterative refinement - for step_num in range(2, self.max_steps + 1): - # Generate follow-up queries based on insights - follow_up_queries = self._generate_follow_ups(session) - - if not follow_up_queries: - break - - # Execute follow-up searches - for follow_up in follow_up_queries[:2]: # Max 2 per step - step = await self._execute_step( - session, step_num, ResearchStepType.FOLLOW_UP, follow_up - ) - session.steps.append(step) - - if progress_callback: - progress_callback(step_num, self.max_steps, f"step_{step_num}_complete") - - # Final synthesis - synthesis_step = await self._synthesize(session) - session.steps.append(synthesis_step) - - if progress_callback: - progress_callback(self.max_steps, self.max_steps, "synthesis_complete") - - # Calculate metrics - session.metrics = self._calculate_metrics(session) - - return session - - async def _execute_step( - self, - session: ResearchSession, - step_number: int, - step_type: ResearchStepType, - query: str - ) -> ResearchStep: - """Execute a single research step.""" - from scripts.resolve import resolve_query - - # Resolve the query - result = resolve_query(query, max_chars=8000) - - # Extract insights using simple NLP - insights = self._extract_insights(result.get("content", "")) - - return ResearchStep( - step_number=step_number, - step_type=step_type, - query=query, - results=[result], - insights=insights - ) - - def _extract_insights(self, content: str) -> List[str]: - """Extract key insights from content.""" - insights = [] - - # Simple extraction: bullet points, key sentences - lines = content.split('\n') - for line in lines: - line = line.strip() - # Look for bullet points - if line.startswith('- ') or line.startswith('* '): - insights.append(line[2:]) - # Look for numbered points - elif line[:2].isdigit() and line[2:4] in ['. ', ') ']: - insights.append(line[3:]) - # Key sentences (containing key phrases) - elif any(phrase in line.lower() for phrase in ['important', 'key', 'critical', 'essential']): - if len(line) > 50: - insights.append(line) - - return insights[:10] # Max 10 insights - - def _generate_follow_ups(self, session: ResearchSession) -> List[str]: - """Generate follow-up queries based on accumulated insights.""" - if not session.steps: - return [] - - # Collect all insights - all_insights = [] - for step in session.steps: - all_insights.extend(step.insights) - - if len(all_insights) < self.min_insights_per_step: - return [] - - # Generate queries around gaps - follow_ups = [] - - # Query 1: Deeper dive into most mentioned topic - from collections import Counter - words = ' '.join(all_insights).lower().split() - common_words = Counter(w for w in words if len(w) > 5).most_common(3) - - if common_words: - topic = common_words[0][0] - follow_ups.append(f"{session.initial_query} {topic} detailed analysis") - - # Query 2: Recent developments - follow_ups.append(f"{session.initial_query} latest updates 2024") - - # Query 3: Comparison/contrast - if len(common_words) > 1: - topic2 = common_words[1][0] - follow_ups.append(f"{common_words[0][0]} vs {topic2}") - - return follow_ups[:3] - - async def _synthesize(self, session: ResearchSession) -> ResearchStep: - """Synthesize all research into final report.""" - all_content = [] - for step in session.steps: - for result in step.results: - all_content.append(result.get("content", "")) - - # Combine and deduplicate - combined = '\n\n'.join(all_content) - - # Generate synthesis insights - synthesis_insights = [ - f"Research covered {len(session.steps)} steps", - f"Total sources: {len(all_content)}", - f"Total insights extracted: {sum(len(s.insights) for s in session.steps)}" - ] - - return ResearchStep( - step_number=len(session.steps) + 1, - step_type=ResearchStepType.SYNTHESIS, - query="synthesis", - results=[{"content": combined}], - insights=synthesis_insights - ) - - def _calculate_metrics(self, session: ResearchSession) -> Dict: - """Calculate research quality metrics.""" - total_time = time.time() - session.steps[0].timestamp if session.steps else 0 - - return { - "total_steps": len(session.steps), - "total_insights": sum(len(s.insights) for s in session.steps), - "total_time_seconds": total_time, - "avg_time_per_step": total_time / len(session.steps) if session.steps else 0, - "sources_per_step": sum(len(s.results) for s in session.steps) / len(session.steps) if session.steps else 0 - } - - -# CLI Interface -if __name__ == "__main__": - import asyncio - import sys - - async def main(): - if len(sys.argv) < 2: - print("Usage: python -m scripts.deep_research ") - sys.exit(1) - - query = ' '.join(sys.argv[1:]) - engine = DeepResearchEngine(max_steps=3) - - def progress(step, total, status): - print(f"[{step}/{total}] {status}") - - print(f"Starting deep research: {query}") - session = await engine.research(query, progress) - - print("\n" + "="*60) - print(f"Research Complete: {session.session_id}") - print("="*60) - print(f"Steps executed: {len(session.steps)}") - print(f"Total insights: {session.metrics['total_insights']}") - print(f"Time: {session.metrics['total_time_seconds']:.1f}s") - - print("\nKey Insights:") - for i, step in enumerate(session.steps, 1): - print(f"\nStep {i} ({step.step_type.value}): {step.query}") - for insight in step.insights[:5]: - print(f" - {insight[:100]}...") - - asyncio.run(main()) -``` - -**New Files:** -- `scripts/deep_research.py` - Research engine (max 500 lines, split if needed) -- `tests/test_deep_research.py` - Research engine tests - -**Acceptance Criteria:** -- Multi-step research executes 3-5 iterations -- Each step generates meaningful follow-up queries -- Final synthesis combines insights -- Progress callbacks provide visibility -- Metrics calculated for evaluation - ---- - -### Task 1.2: Evaluation Framework - -**Description:** Comprehensive evaluation system for measuring resolution quality. - -**Implementation:** - -```python -# scripts/evaluation.py -""" -Evaluation framework for measuring resolver quality and performance. -""" - -from dataclasses import dataclass -from typing import Dict, List, Optional, Callable -import json -import time -from enum import Enum - -class EvaluationMetric(Enum): - RELEVANCE = "relevance" - COMPLETENESS = "completeness" - ACCURACY = "accuracy" - LATENCY = "latency" - COVERAGE = "coverage" - DIVERSITY = "diversity" - -@dataclass -class EvaluationResult: - metric: EvaluationMetric - score: float # 0.0 - 1.0 - details: Dict - timestamp: float - -@dataclass -class EvaluatedResolution: - query: str - result: Dict - evaluations: List[EvaluationResult] - overall_score: float - duration_ms: int - -class ResolutionEvaluator: - """ - Evaluates resolution quality across multiple dimensions. - """ - - def __init__(self): - self.metrics: Dict[EvaluationMetric, Callable] = { - EvaluationMetric.RELEVANCE: self._evaluate_relevance, - EvaluationMetric.COMPLETENESS: self._evaluate_completeness, - EvaluationMetric.ACCURACY: self._evaluate_accuracy, - EvaluationMetric.LATENCY: self._evaluate_latency, - EvaluationMetric.COVERAGE: self._evaluate_coverage, - } - - def evaluate( - self, - query: str, - result: Dict, - duration_ms: int, - reference: Optional[str] = None - ) -> EvaluatedResolution: - """ - Evaluate a resolution result. - - Args: - query: Original query - result: Resolution result dict - duration_ms: Resolution time - reference: Optional reference content for comparison - - Returns: - EvaluatedResolution with all metrics - """ - evaluations = [] - - for metric, evaluator in self.metrics.items(): - score, details = evaluator(query, result, duration_ms, reference) - evaluations.append(EvaluationResult( - metric=metric, - score=score, - details=details, - timestamp=time.time() - )) - - # Calculate overall score (weighted average) - weights = { - EvaluationMetric.RELEVANCE: 0.3, - EvaluationMetric.COMPLETENESS: 0.25, - EvaluationMetric.ACCURACY: 0.2, - EvaluationMetric.LATENCY: 0.15, - EvaluationMetric.COVERAGE: 0.1, - } - - overall = sum( - e.score * weights.get(e.metric, 0.1) - for e in evaluations - ) - - return EvaluatedResolution( - query=query, - result=result, - evaluations=evaluations, - overall_score=overall, - duration_ms=duration_ms - ) - - def _evaluate_relevance( - self, - query: str, - result: Dict, - duration_ms: int, - reference: Optional[str] - ) -> tuple[float, Dict]: - """Evaluate relevance of content to query.""" - content = result.get("content", "").lower() - query_words = set(query.lower().split()) - - # Calculate word overlap - content_words = set(content.split()) - overlap = query_words & content_words - - score = len(overlap) / len(query_words) if query_words else 0.0 - - # Boost if key phrases present - key_phrases = [query.lower()] - for phrase in key_phrases: - if phrase in content: - score = min(1.0, score + 0.2) - - return min(1.0, score), { - "query_words": len(query_words), - "overlap": len(overlap), - "coverage_ratio": score - } - - def _evaluate_completeness( - self, - query: str, - result: Dict, - duration_ms: int, - reference: Optional[str] - ) -> tuple[float, Dict]: - """Evaluate content completeness.""" - content = result.get("content", "") - - # Score based on length and structure - char_count = len(content) - word_count = len(content.split()) - - # Ideal range: 500-3000 words - if word_count < 100: - score = 0.3 - elif word_count < 500: - score = 0.6 - elif word_count < 1000: - score = 0.8 - else: - score = 1.0 - - # Check for structure (headers, lists) - has_headers = "#" in content - has_lists = "- " in content or "* " in content - - if has_headers: - score = min(1.0, score + 0.1) - if has_lists: - score = min(1.0, score + 0.1) - - return score, { - "char_count": char_count, - "word_count": word_count, - "has_headers": has_headers, - "has_lists": has_lists - } - - def _evaluate_accuracy( - self, - query: str, - result: Dict, - duration_ms: int, - reference: Optional[str] - ) -> tuple[float, Dict]: - """Evaluate factual accuracy (requires reference).""" - if not reference: - # Without reference, base on source reputation - source = result.get("source", "unknown") - trusted_sources = {"exa", "tavily", "serper", "firecrawl"} - - if source in trusted_sources: - return 0.85, {"source": source, "trust_score": "high"} - elif source in {"jina", "direct_fetch"}: - return 0.75, {"source": source, "trust_score": "medium"} - else: - return 0.65, {"source": source, "trust_score": "low"} - - # With reference, compare content similarity - content = result.get("content", "") - # Simple word overlap comparison - content_words = set(content.lower().split()) - ref_words = set(reference.lower().split()) - - if not ref_words: - return 0.5, {"method": "no_reference"} - - overlap = len(content_words & ref_words) - score = overlap / len(ref_words) - - return min(1.0, score), { - "method": "reference_comparison", - "overlap_words": overlap - } - - def _evaluate_latency( - self, - query: str, - result: Dict, - duration_ms: int, - reference: Optional[str] - ) -> tuple[float, Dict]: - """Evaluate resolution latency.""" - # Score based on duration - if duration_ms < 2000: - score = 1.0 - elif duration_ms < 5000: - score = 0.8 - elif duration_ms < 10000: - score = 0.6 - elif duration_ms < 20000: - score = 0.4 - else: - score = 0.2 - - return score, { - "duration_ms": duration_ms, - "threshold": "<2s=excellent, <5s=good, <10s=acceptable" - } - - def _evaluate_coverage( - self, - query: str, - result: Dict, - duration_ms: int, - reference: Optional[str] - ) -> tuple[float, Dict]: - """Evaluate how many aspects of query are covered.""" - content = result.get("content", "").lower() - - # Extract aspects from query (simplified) - # For "Python tutorial for beginners" - # aspects: ["python", "tutorial", "beginners"] - aspects = [w for w in query.lower().split() if len(w) > 3] - - covered = sum(1 for aspect in aspects if aspect in content) - score = covered / len(aspects) if aspects else 0.0 - - return score, { - "aspects": len(aspects), - "covered": covered, - "coverage_ratio": score - } - - -class EvaluationBenchmark: - """ - Benchmark suite for systematic evaluation. - """ - - def __init__(self): - self.evaluator = ResolutionEvaluator() - self.results: List[EvaluatedResolution] = [] - - async def run_benchmark( - self, - test_queries: List[str], - profile: str = "balanced" - ) -> Dict: - """ - Run benchmark on test queries. - - Args: - test_queries: List of queries to test - profile: Resolution profile to use - - Returns: - Benchmark summary statistics - """ - from scripts.resolve import resolve - - print(f"Running benchmark with {len(test_queries)} queries...") - print(f"Profile: {profile}") - print("-" * 60) - - for i, query in enumerate(test_queries, 1): - print(f"[{i}/{len(test_queries)}] Testing: {query[:50]}...") - - start = time.time() - result = resolve(query, profile=profile) - duration_ms = int((time.time() - start) * 1000) - - evaluation = self.evaluator.evaluate( - query=query, - result=result, - duration_ms=duration_ms - ) - - self.results.append(evaluation) - - print(f" Source: {result.get('source', 'none')}") - print(f" Score: {evaluation.overall_score:.2f}") - print(f" Latency: {duration_ms}ms") - print() - - return self._generate_summary() - - def _generate_summary(self) -> Dict: - """Generate benchmark summary.""" - if not self.results: - return {} - - scores = [r.overall_score for r in self.results] - latencies = [r.duration_ms for r in self.results] - - # Calculate metric averages - metric_scores: Dict[str, List[float]] = {} - for result in self.results: - for eval_result in result.evaluations: - metric_name = eval_result.metric.value - if metric_name not in metric_scores: - metric_scores[metric_name] = [] - metric_scores[metric_name].append(eval_result.score) - - summary = { - "total_queries": len(self.results), - "overall_score": { - "mean": sum(scores) / len(scores), - "median": sorted(scores)[len(scores) // 2], - "min": min(scores), - "max": max(scores), - }, - "latency_ms": { - "mean": sum(latencies) / len(latencies), - "median": sorted(latencies)[len(latencies) // 2], - "p95": sorted(latencies)[int(len(latencies) * 0.95)], - "min": min(latencies), - "max": max(latencies), - }, - "by_metric": { - metric: { - "mean": sum(scores) / len(scores), - "min": min(scores), - "max": max(scores), - } - for metric, scores in metric_scores.items() - }, - "results": [ - { - "query": r.query[:50], - "score": r.overall_score, - "duration_ms": r.duration_ms, - "source": r.result.get("source", "none") - } - for r in self.results - ] - } - - return summary - - def export_report(self, filename: str = "evaluation_report.json"): - """Export detailed report to file.""" - summary = self._generate_summary() - - with open(filename, 'w') as f: - json.dump(summary, f, indent=2) - - print(f"Report exported to {filename}") - - -# Test queries for benchmarking -DEFAULT_BENCHMARK_QUERIES = [ - "Python programming tutorial", - "What is machine learning", - "Rust vs Go comparison", - "Docker containerization guide", - "API design best practices", - "PostgreSQL vs MySQL", - "React hooks tutorial", - "Kubernetes basics", - "CI/CD pipeline setup", - "Microservices architecture", -] - - -if __name__ == "__main__": - import asyncio - - async def run(): - benchmark = EvaluationBenchmark() - summary = await benchmark.run_benchmark( - test_queries=DEFAULT_BENCHMARK_QUERIES[:5], # Use subset for testing - profile="free" - ) - - print("\n" + "="*60) - print("BENCHMARK SUMMARY") - print("="*60) - print(f"Overall Score: {summary['overall_score']['mean']:.2f}") - print(f"Mean Latency: {summary['latency_ms']['mean']:.0f}ms") - print(f"P95 Latency: {summary['latency_ms']['p95']:.0f}ms") - - print("\nBy Metric:") - for metric, stats in summary['by_metric'].items(): - print(f" {metric}: {stats['mean']:.2f}") - - benchmark.export_report() - - asyncio.run(run()) -``` - -**New Files:** -- `scripts/evaluation.py` - Evaluation framework -- `tests/test_evaluation.py` - Evaluation tests -- `scripts/benchmark.py` - CLI benchmark runner - -**Acceptance Criteria:** -- 6 evaluation metrics implemented -- Overall score calculated with weighted average -- Benchmark suite runs on test queries -- Report generation with JSON export -- Performance metrics tracked - ---- - -## Phase 2: Performance Benchmarking (Week 2) - -### Task 2.1: Comprehensive Performance Suite - -**Description:** Systematic performance measurement and regression detection. - -**Implementation:** - -```python -# scripts/performance_suite.py -""" -Comprehensive performance benchmarking suite. -""" - -import time -import statistics -import json -from dataclasses import dataclass -from typing import List, Dict, Optional -from datetime import datetime -import psutil -import os - -@dataclass -class PerformanceResult: - test_name: str - latency_ms: float - memory_mb: float - cpu_percent: float - timestamp: str - metadata: Dict - -class PerformanceSuite: - """ - Performance testing suite with resource monitoring. - """ - - def __init__(self): - self.results: List[PerformanceResult] = [] - self.process = psutil.Process(os.getpid()) - - def measure( - self, - test_name: str, - func, - iterations: int = 10, - warmup: int = 2 - ) -> PerformanceResult: - """ - Measure performance of a function. - - Args: - test_name: Name of the test - func: Function to measure - iterations: Number of iterations - warmup: Warmup iterations (not measured) - - Returns: - PerformanceResult with metrics - """ - # Warmup - for _ in range(warmup): - func() - - # Measure - latencies = [] - memory_samples = [] - cpu_samples = [] - - for _ in range(iterations): - # Get baseline metrics - mem_before = self.process.memory_info().rss / 1024 / 1024 - cpu_before = self.process.cpu_percent() - - start = time.perf_counter() - result = func() - elapsed_ms = (time.perf_counter() - start) * 1000 - - # Get post metrics - mem_after = self.process.memory_info().rss / 1024 / 1024 - cpu_after = self.process.cpu_percent() - - latencies.append(elapsed_ms) - memory_samples.append(mem_after - mem_before) - cpu_samples.append(cpu_after) - - # Calculate statistics - result = PerformanceResult( - test_name=test_name, - latency_ms=statistics.median(latencies), - memory_mb=statistics.mean(memory_samples), - cpu_percent=statistics.mean(cpu_samples), - timestamp=datetime.now().isoformat(), - metadata={ - "iterations": iterations, - "latency_p95": sorted(latencies)[int(len(latencies) * 0.95)], - "latency_std": statistics.stdev(latencies) if len(latencies) > 1 else 0, - } - ) - - self.results.append(result) - return result - - def benchmark_resolution(self, urls: List[str], queries: List[str]): - """Benchmark resolution performance.""" - from scripts.resolve import resolve_url, resolve_query - - print("Benchmarking URL resolution...") - for url in urls: - result = self.measure( - f"url:{url}", - lambda u=url: resolve_url(u, max_chars=1000) - ) - print(f" {url}: {result.latency_ms:.0f}ms") - - print("\nBenchmarking query resolution...") - for query in queries: - result = self.measure( - f"query:{query[:30]}", - lambda q=query: resolve_query(q, max_chars=1000, profile="free") - ) - print(f" {query[:40]}: {result.latency_ms:.0f}ms") - - def generate_report(self) -> Dict: - """Generate performance report.""" - if not self.results: - return {} - - url_results = [r for r in self.results if r.test_name.startswith("url:")] - query_results = [r for r in self.results if r.test_name.startswith("query:")] - - report = { - "timestamp": datetime.now().isoformat(), - "summary": { - "total_tests": len(self.results), - "mean_latency_ms": statistics.mean([r.latency_ms for r in self.results]), - "mean_memory_mb": statistics.mean([r.memory_mb for r in self.results]), - }, - "url_resolution": { - "count": len(url_results), - "mean_latency_ms": statistics.mean([r.latency_ms for r in url_results]) if url_results else 0, - "p95_latency_ms": sorted([r.latency_ms for r in url_results])[int(len(url_results) * 0.95)] if url_results else 0, - }, - "query_resolution": { - "count": len(query_results), - "mean_latency_ms": statistics.mean([r.latency_ms for r in query_results]) if query_results else 0, - "p95_latency_ms": sorted([r.latency_ms for r in query_results])[int(len(query_results) * 0.95)] if query_results else 0, - }, - "details": [ - { - "test": r.test_name, - "latency_ms": r.latency_ms, - "memory_mb": r.memory_mb, - "cpu_percent": r.cpu_percent, - } - for r in self.results - ] - } - - return report - - def save_report(self, filename: str = "performance_report.json"): - """Save report to file.""" - report = self.generate_report() - with open(filename, 'w') as f: - json.dump(report, f, indent=2) - print(f"\nReport saved to {filename}") - - -if __name__ == "__main__": - suite = PerformanceSuite() - - # Test URLs - test_urls = [ - "https://example.com", - "https://docs.python.org", - ] - - # Test queries - test_queries = [ - "Python tutorial", - "API design", - ] - - suite.benchmark_resolution(test_urls, test_queries) - suite.save_report() -``` - -**Acceptance Criteria:** -- Latency, memory, CPU metrics tracked -- P95 and std deviation calculated -- Report generation with JSON export -- Regression detection support - ---- - -## Summary - -### New Files -- `scripts/deep_research.py` - Multi-step research engine -- `scripts/evaluation.py` - Evaluation framework -- `scripts/performance_suite.py` - Performance benchmarking -- `tests/test_deep_research.py` - Research tests -- `tests/test_evaluation.py` - Evaluation tests - -### Dependencies -``` -psutil>=5.9.0 # For resource monitoring -``` - -### Success Metrics -- [ ] Deep research executes 3-5 steps -- [ ] Evaluation accuracy >90% -- [ ] Performance benchmarks track all metrics -- [ ] No new providers required -- [ ] All tests pass \ No newline at end of file +- [AUDIT.md](AUDIT.md) — Priority overview +- [ADR-009](009-cross-runtime-analysis.md) — Cross-runtime deep research parity +- [scripts/synthesis.py](../scripts/synthesis.py) — Related synthesis logic diff --git a/plans/15-GOAP-NEXT-PHASE.md b/plans/15-GOAP-NEXT-PHASE.md new file mode 100644 index 0000000..76e355c --- /dev/null +++ b/plans/15-GOAP-NEXT-PHASE.md @@ -0,0 +1,105 @@ +# GOAP Next Phase: Codacy Cleanup, CI Config, Constants Extraction + +> Generated 2026-05-13 after ADR-012 Wave 1 complete. +> Next: Wave 2 (CI/config) → Wave 3 (constants/state). + +## Goal + +Close out remaining ADR-012/013/014 work, fix AUDIT P0/P1 items, and unblock +further development. + +## Preconditions + +- ADR-012 Wave 1 merged ✅ +- Quality gate (PR #341), tiered TTL (#338), provider skip (#342), + adaptive reordering (#343), rate throttling (#358) all merged ✅ + +## Actions (dependency-ordered waves) + +### Wave 2 — ADR-013 CI & Config Fixes (Effort: S) + +| ID | Task | File | Notes | +|----|------|------|-------| +| I1 | Fix coverage upload condition | `.github/workflows/ci.yml:106` | Only on main | +| I2 | Fix gitleaks branch triggers | `.github/workflows/gitleaks.yml:5-6` | all branches | +| I3 | Update actions/checkout in gitleaks | `.github/workflows/gitleaks.yml:21` | v3→v4 | +| I4 | Install lint deps from requirements.txt | `.github/workflows/ci.yml:69` | — | +| I5 | Shellcheck severity → error | `.pre-commit-config.yaml:34` | — | +| K1-K3 | Consolidate pre-commit hooks | `scripts/setup-hooks.sh`, `.githooks/` | Reduce drift | +| K4 | Fix requirements.txt package names | `requirements.txt` | — | +| K5 | Add Python 3.13 classifier | `pyproject.toml:16-18` | — | +| K7 | Fix close-resolved-issues.yml trigger | `.github/workflows/close-resolved-issues.yml:4` | — | + +### Wave 3 — ADR-014 Constants & State Extraction (Effort: M, prerequisite) + +| ID | Task | File | Notes | +|----|------|------|-------| +| A1 | Create `scripts/constants.py` | New | Single source of truth | +| A2-A4 | Remove duplicate constants from resolve.py, utils.py, providers_impl.py | 3 files | — | +| A5 | Create `scripts/state.py` | New | Shared instances | +| A6 | Remove monkey-patching from resolve.py | `scripts/resolve.py` | Replace with state.py import | +| A7 | Import state in _url_resolve, _query_resolve | 2 files | — | +| A8 | Centralize semantic cache env vars | `scripts/semantic_cache.py` | — | + +### Wave 4 — ADR-012 Remaining + Quality Fixes (depends on Wave 3) + +| ID | Task | File | Effort | Status | +|----|------|------|--------|--------| +| P3b | Log all provider exceptions (9 still silent) | `scripts/providers_impl.py` | M | ❌ | +| P4 | Replace requests.post with shared session | `scripts/synthesis.py` | M | ❌ | +| P5 | Fix preflight_route loose pattern matching | `scripts/routing.py` | M | ❌ | +| P6 | Remove unused NegativeCacheEntry (Python) | `scripts/cache_negative.py` | S | ❌ | +| P7 | Move TIERED_TTL dict to constants.py (not dead) | `scripts/utils.py` → `constants.py` | S | ❌ (do in Wave 3) | +| Q1-Q6 | Quality scoring fixes | `scripts/quality.py` | M | ❌ | + +### AUDIT P2/P3 Roadmap Items + +| # | Task | Area | Priority | +|---|------|------|----------| +| 10 | Port preflight routing to Rust + Web | Cross-platform | P2 | +| 11 | Add hedged requests to Rust | `cli/src/resolver/cascade.rs` | P2 | +| 12 | Add `evals.json` to skills | `.agents/skills/*/` | P2 | +| 13 | Add Python 3.10 to CI or bump `requires-python` | CI | P2 | +| 14 | Port `exa_mcp_mistral` combo to Python + Rust | Cross-platform | P3 | +| 15 | Full `--deep-research` parallel mode for CLIs | Python + Rust | P3 | +| 16 | File-based routing memory for Python | `scripts/` | P3 | + +### Open AUDIT Items (P0/P1) + +| # | Task | File/Location | Priority | Notes | +|---|------|--------------|----------|-------| +| Q1 | Split `page.tsx` (496 lines, near 500 limit) | `web/app/page.tsx` | P0 | Borderline — extract components | +| Q2 | Shrink `query.rs` (527 lines, over 500 limit) | `cli/src/resolver/query.rs` | P0 | **EXCEEDED** — needs splitting | +| M5 | Unit tests for web utilities | `web/lib/*.ts` | P1 | circuit-breaker, errors, quality, keys | +| M7 | Mobile/tablet Playwright in CI | `.github/workflows/ci-ui.yml` | P1 | `--project=desktop` only | +| P3 | Wire Rust `--profile` to budget presets | `cli/src/` | P1 | CLI flag exists but not wired | +| M6 | Unit tests for Rust resolver | `cli/src/resolver/` | P2 | query.rs, url.rs | +| P4/P5 | Port preflight routing + hedged requests to Rust | `cli/src/` | P2 | Parity gaps | +| M8 | Add `evals.json` to skills | `.agents/skills/*/` | P2 | 0/13 have evals | + +## Postconditions + +1. CI config is clean, gitleaks runs on all branches, coverage uploads correctly +2. Constants are centralized in `scripts/constants.py` +3. Shared state lives in `scripts/state.py` — no more monkey-patching +4. `scripts/synthesis.py` uses shared session instead of raw `requests.post` +5. Quality scoring has no dead code or magic numbers +6. `page.tsx` and `query.rs` are within the 500-line limit +7. Web utilities have basic unit tests + +## Execution Order + +``` +Wave 2 (fast: CI config) → Wave 3 (prerequisite for Waves 4-6) +→ Wave 4 (quality fixes) + AUDIT P0/P1 items in parallel +→ Parity items (P4/P5/M6) +``` + +## Risk Assessment + +| Risk | Mitigation | +|------|------------| +| Wave 3 state.py breaks test fixtures | Update conftest to import from state.py; run full suite after each sub-task | +| Wave 3 constants extraction changes behavior | Verify all constants are functionally identical; use grep to find all references | +| Wave 2 CI changes break the pipeline | Test via `act` locally before pushing; keep old config as comment for rollback | +| Q2 (query.rs split) introduces circular imports | Follow existing module pattern; keep public API surface unchanged | diff --git a/plans/16-GOAP-WAVE2-6.md b/plans/16-GOAP-WAVE2-6.md new file mode 100644 index 0000000..0c1c2a2 --- /dev/null +++ b/plans/16-GOAP-WAVE2-6.md @@ -0,0 +1,143 @@ +# GOAP Waves 2-6: CI Config, Constants, Quality, Splits, Tests, Parity + +> Generated 2026-05-13. Supersedes `15-GOAP-NEXT-PHASE.md` for remaining work. + +## Goal + +Close all P0/P1 issues, execute Waves 2-6 from ADR-013/014, and address +newly discovered gaps (Rust file size violations, dead code, thread-safety +concerns, parity gaps). + +## Preconditions + +- ADR-012 Wave 1 merged (PR #364) +- ADR-013 Wave 1b merged +- Quality gate, tiered TTL, provider skip, rate throttling all merged + +## New Discoveries (not in prior plans) + +| ID | Issue | File | Severity | +|----|-------|------|----------| +| N1 | `semantic_cache.rs` 1056 lines (2x limit) | `cli/src/semantic_cache.rs` | P0 | +| N2 | `config.rs` 712 lines (over 500 limit) | `cli/src/config.rs` | P0 | +| N3 | `build_budget()` duplicated verbatim in 2 files | `query.rs:506` + `url.rs:475` | P1 | +| N4 | Dead `Profile::is_provider_allowed()` + `max_hops()` | `cli/src/types.rs:99-116` | P2 | +| N5 | `CircuitBreakerRegistry.is_open()` TOCTOU — state used outside lock | `scripts/circuit_breaker.py:46-47` | P1 ✅ RESOLVED (PR #365) | +| N6 | `_maybe_evict()` not independently lock-protected | `scripts/semantic_cache.py:336` | P2 | +| N7 | 11/13 skills missing `evals.json` (was 0/13) | `.agents/skills/*/` | P2 | +| N8 | No `pnpm-lock.yaml` in repo | `cli/ui/`, `web/` | P2 | +| N9 | `duckduckgo-search` vs `ddgs` package name mismatch | `requirements.txt:9` | P1 | +| N10 | `setup-hooks.sh` only validates symlinks, not quality gate | `scripts/setup-hooks.sh` | P2 | +| N11 | CI runs 3 Playwright projects; AGENTS.md says 1 | `ci-ui.yml:176` vs `AGENTS.md:55` | P2 | +| N12 | Raw `requests.post()` in synthesis — no SSRF, no retry, no session | `scripts/synthesis.py:165` | P1 ✅ RESOLVED (PR #365) | +| N13 | SSRF gaps in `resolve_with_docling()` + `resolve_with_ocr()` — no `is_safe_url()` | `scripts/providers_impl.py:373-393` | P1 ✅ RESOLVED (PR #365) | + +## Actions (dependency-ordered waves) + +### Wave 2 — ADR-013 CI & Config Fixes (Effort: S, ~1 PR) + +| ID | Task | File | Notes | +|----|------|------|-------| +| I1 | Fix coverage upload condition to use literal `'3.12'` | `ci.yml:106` | Fragile env context comparison | +| I2 | Fix gitleaks branch triggers (remove `master`, `develop`) | `gitleaks.yml:5-6` | Only `main` needed | +| I3 | Pin gitleaks checkout to v6.0.2 (match ci.yml) | `gitleaks.yml:21` | v4.2.2 outdated | +| I4 | Add `flake8` to CI lint deps | `ci.yml:69` | Missing from install step | +| I5 | Fix shellcheck severity to `error` in pre-commit config | `.pre-commit-config.yaml:34` | Currently `warning` | +| K4 | Fix `duckduckgo-search` → `ddgs` in requirements.txt | `requirements.txt:9` | Package renamed upstream | +| K5 | Add `3.13` classifier + black/ruff target-version | `pyproject.toml` | CI tests 3.13 but not listed | +| K6 | Update AGENTS.md Playwright command to include all 3 projects | `AGENTS.md:55` | CI runs `desktop+mobile+tablet` | +| K7 | Fix `markdownlint.toml` config parsing — `MD013=false` ignored | `markdownlint.toml`, `.githooks/pre-commit`, `.pre-commit-config.yaml` | TOML format may not be recognized; consider JSON or YAML config, or add `--disable MD013` to the hook args | + +### Wave 3 — ADR-014 Constants & State Extraction (Effort: M, ~1 PR) + +| ID | Task | File | Notes | +|----|------|------|-------| +| A1 | Create `scripts/constants.py` | New | `MAX_CHARS`, `MIN_CHARS`, `DEFAULT_TIMEOUT`, `TIERED_TTL` | +| A2 | Remove duplicate constants from `resolve.py` | `scripts/resolve.py:62-64` | Import from constants | +| A3 | Remove duplicate constants from `providers_impl.py` | `scripts/providers_impl.py:26-28` | Note: has config fallback | +| A4 | Remove duplicate constants from `utils.py` | `scripts/utils.py:28-31` | Import from constants | +| A5 | Create `scripts/state.py` with shared singletons | New | CB registry, routing memory | +| A6 | Remove monkey-patching from resolve.py | `scripts/resolve.py:85-91` | Import from state | +| A7 | Update `_url_resolve` + `_query_resolve` imports | 2 files | Import from state | +| A8 | Centralize semantic cache env vars | `scripts/semantic_cache.py` | | + +### Wave 4 — Quality, Safety & Code Fixes (Effort: M-L, ~2-3 PRs) + +| ID | Task | File | Notes | +|----|------|------|-------| +| P3b | Add logging to 7 silent exception handlers | `scripts/providers_impl.py` | `except Exception:` → `except Exception as e: logger.warning(...)` | +| P4 | Replace `requests.post` with `get_session()` + SSRF check | `scripts/synthesis.py:165` | ✅ DONE (PR #365) | +| P5 | Anchor `preflight_route` patterns with word boundaries | `scripts/routing.py:157-158` | Regex or anchored matching | +| P6 | Remove dead `NegativeCacheEntry` dataclass | `scripts/cache_negative.py:11-16` | Never instantiated | +| Q1-Q6 | Extract 11 magic numbers to named constants | `scripts/quality.py` | | +| N5 | Fix `CircuitBreakerRegistry.is_open()` TOCTOU | `scripts/circuit_breaker.py:46-47` | ✅ DONE (PR #365) | +| N6 | Add lock guard to `_maybe_evict()` as defense-in-depth | `scripts/semantic_cache.py:336` | Reentrant-safe | +| N12 | Add SSRF check to Mistral API call in synthesis | `scripts/synthesis.py` | ✅ DONE (PR #365) | +| N13 | Add SSRF checks to docling + ocr providers | `scripts/providers_impl.py:373-393` | ✅ DONE (PR #365) | +| N13b | Fix lazy logging (f-string → %s) in mistral_browser SSRF warn | `scripts/providers_impl.py:277` | ✅ DONE (PR #365) | + +### Wave 5 — Rust File Splits & Dedup (Effort: M-L, ~2 PRs) + +| ID | Task | File | Notes | +|----|------|------|-------| +| R1 | Split `semantic_cache.rs` (1056→<500) | `cli/src/semantic_cache.rs` | Worst offender, 2x limit | +| R2 | Split `config.rs` (712→<500) | `cli/src/config.rs` | Split parsing vs defaults | +| R3 | Split `query.rs` (527→<500) | `cli/src/resolver/query.rs` | Extract to cascade.rs | +| R4 | Extract duplicate `build_budget()` to `cascade.rs` | `query.rs:506` + `url.rs:475` | 22-line exact duplicate | +| R5 | Extract shared gate-check logic to `cascade.rs` | `query.rs` + `url.rs` | Negative cache + CB checks | +| R6 | Remove dead `Profile::is_provider_allowed()` + `max_hops()` | `cli/src/types.rs:99-116` | Never called | +| R7 | Refactor `page.tsx` (496 lines) → extract components | `web/app/page.tsx` | Near limit | + +### Wave 6 — Tests & Coverage (Effort: M, ~2 PRs) + +| ID | Task | File | Notes | +|----|------|------|-------| +| T1 | Unit tests for `circuit-breaker.ts` | `web/tests/circuit-breaker.test.ts` | 0 coverage | +| T2 | Unit tests for `errors.ts` | `web/tests/errors.test.ts` | 0 coverage | +| T3 | Unit tests for `quality.ts` | `web/tests/quality.test.ts` | 0 coverage | +| T4 | Unit tests for `keys.ts` | `web/tests/keys.test.ts` | 0 coverage | +| T5 | Unit tests for `log.ts` | `web/tests/log.test.ts` | 0 coverage | +| T6 | Unit tests for `results.ts` | `web/tests/results.test.ts` | 0 coverage | +| T7 | Inline tests for `query.rs` + `url.rs` | `cli/src/resolver/` | 0 coverage (vs 2 inline tests in mod.rs) | +| T8 | Add `evals.json` to 3 most-used skills | `.agents/skills/*/` | 11/13 missing | + +### Wave 7 — Web Middleware & Cross-Platform Parity (Effort: L, ~2 PRs) + +| ID | Task | File | Notes | +|----|------|------|-------| +| W1 | Create `web/middleware.ts` with rate limiting | New | AUDIT M2 — currently only lib util | +| W2 | Port `preflight_route()` / `detect_doc_platform()` to Rust | `cli/src/routing.rs` | Python has it, Rust doesn't | +| W3 | Port hedged/parallel provider execution to Rust | `cli/src/resolver/` | Currently sequential only | +| W4 | Align budget profile presets (Python vs Rust divergence) | `cli/src/config.rs` + `scripts/routing.py` | Different defaults per profile | + +## Postconditions + +1. CI config is clean, gitleaks runs on all branches, coverage uploads correctly +2. Constants centralized in `scripts/constants.py`; no duplication +3. Shared state in `scripts/state.py`; no monkey-patching +4. All Rust source files under 500-line limit +5. Dead code removed (`NegativeCacheEntry`, `Profile` dead methods) +6. Thread-safety concerns fixed (CB TOCTOU, evict lock guard) +7. No silent exception handlers in production providers +8. `synthesis.py` uses shared session with SSRF protection +9. Web lib modules have basic unit test coverage +10. Rate-limiting middleware intercepts API requests at edge + +## Execution Order + +``` +Wave 2 (fast: CI config) → Wave 3 (prerequisite: constants/state) +→ Wave 4 (quality/safety) + Wave 5 (Rust splits) in parallel +→ Wave 6 (tests) + Wave 7 (middleware + parity) in parallel +``` + +## Risk Assessment + +| Risk | Mitigation | +|------|------------| +| Wave 3 `state.py` breaks test fixtures | Update conftest to import from state.py; run full suite | +| Wave 5 Rust splits introduce circular imports | Follow existing module pattern; keep public API unchanged | +| `semantic_cache.rs` at 1056 lines has complex split points | Audit module boundaries first; consider `{mod,store,query,eviction}.rs` | +| `config.rs` at 712 lines affects CLI startup | Split into `config/{mod,parsing,defaults}.rs` | +| `_maybe_evict` lock guard may cause nested lock | Use RLock or restructure to avoid nested acquisition | +| Budget profile divergence may be intentional per runtime | Document divergence rationale; don't force alignment without testing | diff --git a/plans/AUDIT.md b/plans/AUDIT.md index cba822d..5a5b7a6 100644 --- a/plans/AUDIT.md +++ b/plans/AUDIT.md @@ -1,4 +1,4 @@ -# Project Audit — 2026-05-05 +# Project Audit — 2026-05-13 > Single source of truth for project health. Supersedes all prior audit/bug/issue files in `plans/`. @@ -39,24 +39,26 @@ ### 2. Missing Implementations -| # | Gap | File / Location | Impact | -|---|---|---|---| -| M1 | No Next.js error boundary | `web/app/error.tsx` (does not exist) | Unhandled errors crash page | -| M2 | No rate-limiting middleware | `web/middleware.ts` (does not exist) | API abuse possible | -| M3 | No 404 page | `web/app/not-found.tsx` (does not exist) | Generic Next.js 404 | -| M4 | `validateUrl()` SSRF check not called | Defined in `web/lib/resolvers/index.ts`, unused in `web/app/api/resolve/route.ts` | SSRF vulnerability | -| M5 | No unit tests for web utilities | `web/lib/circuit-breaker.ts`, `errors.ts`, `quality.ts`, `keys.ts` | Regression risk | -| M6 | No direct unit tests for Rust resolver | `cli/src/resolver/query.rs` (480 lines), `url.rs` (444 lines) | Low coverage | -| M7 | Mobile/tablet Playwright not in CI | `ci-ui.yml` runs `--project=desktop` only | Mobile regressions undetected | -| M8 | 11 of 13 skills have no `evals.json` | `.agents/skills/*/` | Skill quality unmeasured | +| # | Gap | File / Location | Impact | Status | +|---|---|---|---|---| +| M1 | No Next.js error boundary | `web/app/error.tsx` | Unhandled errors crash page | ✅ RESOLVED (exists, 30 lines) | +| M2 | No rate-limiting middleware | `web/middleware.ts` (does not exist) | API abuse possible | ❌ OPEN | +| M3 | No 404 page | `web/app/not-found.tsx` | Generic Next.js 404 | ✅ RESOLVED (exists, 18 lines) | +| M4 | `validateUrl()` SSRF check not called | Defined in `web/lib/validation.ts` (195 lines), unused in `route.ts` | SSRF vulnerability | ✅ RESOLVED (called via `validateUrlForFetchAsync` in `url.ts`) | +| M5 | No unit tests for web utilities | `web/lib/circuit-breaker.ts`, `errors.ts`, `quality.ts`, `keys.ts` | Regression risk | ❌ OPEN | +| M6 | No direct unit tests for Rust resolver | `cli/src/resolver/query.rs` (527 lines), `url.rs` (496 lines) | Low coverage | ❌ OPEN | +| M7 | Mobile/tablet Playwright not in CI | `ci-ui.yml` runs `--project=desktop --project=mobile --project=tablet` (3 projects) | Mobile regressions undetected | ✅ RESOLVED (CI already runs all 3 projects) | +| M8 | 2 of 13 skills have `evals.json` | `.agents/skills/*/` | Skill quality unmeasured | ❌ OPEN — 11 still missing | ### 3. Code Quality | # | File | Lines | Limit | Action | -|---|---|---|---|---| -| Q1 | `web/app/page.tsx` | 730 | 500 | **Split required** — extract components | -| Q2 | `cli/src/resolver/query.rs` | 480 | 500 | Monitor — close to limit | -| Q3 | `cli/src/resolver/url.rs` | 444 | 500 | OK | +|---|---|---|---|---|---| +| Q1 | `web/app/page.tsx` | 496 | 500 | **Near limit** — extract components soon | +| Q2 | `cli/src/resolver/query.rs` | 527 | 500 | **EXCEEDED** — split required | +| Q3 | `cli/src/resolver/url.rs` | 496 | 500 | Near limit — monitor | +| Q4 | `cli/src/semantic_cache.rs` | 1056 | 500 | **CRITICALLY EXCEEDED** — split required | +| Q5 | `cli/src/config.rs` | 712 | 500 | **EXCEEDED** — split required | ### 4. Cross-Platform Parity @@ -64,10 +66,12 @@ |---|---|---|---|---|---| | P1 | `exa_mcp_mistral` combo | ❌ | ❌ | ✅ | Port to Python + Rust | | P2 | Deep research parallel mode | Partial | `--synthesize` only | ✅ | Full parallel mode missing in CLIs | -| P3 | Budget profiles / presets | N/A | `--profile` flag exists, not wired | N/A | Wire Rust flag to presets | +| P3 | Budget profiles / presets | N/A | `--profile` flag **wired** in `main.rs:68-84` | N/A | ✅ WIRED — but `is_provider_allowed()` + `max_hops()` are dead code in `types.rs:99-116` | +| P8 | Duplicate `build_budget()` fn | N/A | Same fn in `query.rs:506` and `url.rs:475` | N/A | Extract to `cascade.rs` | | P4 | Preflight routing | `detect_doc_platform()` | Minimal `detectJsHeavy()` | Minimal | Port advanced routing to Rust/Web | | P5 | Hedged requests | ✅ | ❌ | ❌ | Port to Rust + Web | | P6 | Routing memory persistence | In-memory only | File persistence | N/A | Add file persistence to Python | +| P7 | Rate throttling (token bucket) | ✅ | ✅ | N/A | Done across CLIs; web remains un-throttled | ### 5. Infrastructure @@ -77,77 +81,159 @@ | I2 | `cli/ui/` no pnpm lock file in repo | CI uses pnpm but lock file not checked in | | I3 | Version number question | All at 0.3.1 — verify if should be 1.x | | I4 | DuckDuckGo CAPTCHA blocking | Externally blocked — deprioritized, monitoring | +| I5 | `cli/ui/` pnpm lock file | Repo uses pnpm; lock file status needs verification | +| I6 | `markdownlint.toml` config not respected | `MD013 = false` set but rule still fires; pre-commit blocks valid docs-only commits | `markdownlint.toml`, `.githooks/pre-commit` | + +### 6. Recently Merged Features (since last audit) + +| PR | Feature | Merged | +|----|---------|--------| +| #338 | Tiered provider TTL — all cache TTLs in config.toml | ✅ | +| #339 | Startup pre-warm for top-N domains | ✅ | +| #340 | Synthesis alignment with 2026 LLM-ready standards | ✅ | +| #341 | Quality confidence gate — skip paid on high free quality | ✅ | +| #342 | Probabilistic provider skip for low-win-rate/quota-gated | ✅ | +| #343 | Adaptive per-domain provider reordering | ✅ | +| #344-#351 | Dependabot + CI hardening (7 PRs) | ✅ | +| #353 | Semantic cache optimization + observability | ✅ | +| #354 | Transitive vulnerability fix + ecosystem sync | ✅ | +| #356 | Exa MCP monthly usage tracking in routing memory | ✅ | +| #357 | CLI Markdown parsing fix (code blocks, indentation) | ✅ | +| #358 | Per-provider token-bucket rate throttling | ✅ | +| #359-#361 | Template workflows, gitleaks SHA-pins, .gitattributes | ✅ | +| #364 | ADR-012 Wave 1: thread safety, SSRF, provider fixes | ✅ | +| #365 | PR #365: GOAP Wave 2-7 plan + N5/N12 fixes + SSRF gaps | ✅ | + +### 7. Newly Discovered Issues (2026-05-13 Audit) + +| ID | Issue | File | Severity | +|----|-------|------|----------| +| N1 | `semantic_cache.rs` 1056 lines — **2x** the 500-line limit | `cli/src/semantic_cache.rs` | P0 | +| N2 | `config.rs` 712 lines — **exceeds** 500-line limit | `cli/src/config.rs` | P0 | +| N3 | `build_budget()` duplicated verbatim in 2 files | `query.rs:506` + `url.rs:475` | P1 | +| N4 | Dead `Profile::is_provider_allowed()` + `max_hops()` — never called | `cli/src/types.rs:99-116` | P2 | +| N5 | `CircuitBreakerRegistry.is_open()` — TOCTOU: state object used outside lock scope | `scripts/circuit_breaker.py:46-47` | ✅ RESOLVED — inlined under lock in PR #365 | +| N6 | `_maybe_evict()` not independently lock-protected | `scripts/semantic_cache.py:336` | P2 | +| N7 | 11/13 skills missing `evals.json` | `.agents/skills/*/` | P2 | +| N8 | No `pnpm-lock.yaml` anywhere in repo | `cli/ui/`, `web/` | P2 | +| N9 | `duckduckgo-search` vs `ddgs` package name mismatch | `requirements.txt:9` | P1 | +| N10 | `setup-hooks.sh` only validates symlinks, not quality gate | `scripts/setup-hooks.sh` | P2 | +| N11 | CI runs 3 Playwright projects; AGENTS.md says 1 | `ci-ui.yml:176` vs `AGENTS.md:55` | P2 | +| N12 | Raw `requests.post()` in synthesis — no SSRF, no retry, no shared session | `scripts/synthesis.py:165` | ✅ RESOLVED — switched to `get_session()` in PR #365 | +| N13 | SSRF gaps in `resolve_with_docling()` + `resolve_with_ocr()` — no `is_safe_url()` | `scripts/providers_impl.py:373-393` | ✅ RESOLVED — added `is_safe_url()` checks in PR #365 | --- +## Additional Learnings (post-2026-05-05) + +### Provider Rate Throttling +- **Token bucket design**: Clamp `capacity` to `max(1.0, ·)` to prevent infinite acquire loops +- **Avoid polling**: Calculate exact sleep duration instead of fixed-interval polling +- **Cascade safety**: `acquire_timeout()` so rate-limited providers fall back instead of blocking cascade +- **Config merge**: Target specific `rate_limit` fields rather than wholesale `ProviderConfig` replacement + +### Feature Implementation Patterns +- **Quality confidence gate**: Free-tier results evaluated against 0.70 threshold; if met, paid providers skipped +- **Probabilistic skip**: Providers with low win rate get skip probability proportional to fail ratio +- **Adaptive reordering**: Routing memory ranks providers by domain performance per-profile +- **Exa MCP tracking**: Monthly usage count stored in routing memory DB; resets on provider cooldown + +### PR #365 — TOCTOU, SSRF Gaps, Shared Session (2026-05-13) +- **TOCTOU in CircuitBreakerRegistry.is_open()**: Fix by inlining the state lookup + `is_open()` check under a single `self._lock` scope. This eliminates the window where `get_breaker()` releases the lock before the caller reads `breaker.is_open()`. +- **SSRF gaps in docling + ocr**: Two provider functions (`resolve_with_docling`, `resolve_with_ocr`) passed user URLs to `subprocess.run()` without `is_safe_url()` validation. Added the check consistent with jina/firecrawl/mistral_browser pattern. +- **Shared session for synthesis**: `synthesize_results()` used raw `requests.post()` bypassing connection pooling, retry, and SSRF validation. Switched to `get_session()` from `scripts.utils`. +- **Lazy logging fix**: Changed f-string logging in mistral_browser SSRF warning to `%s` format for consistency with DeepSource PYL-W1203 rules. +- **Monkey-patching in resolve.py (lines 85-91)** remains necessary until ADR-014 creates `scripts/state.py`. Tests depend on these overwrites for state synchronization. + ## Priority Actions +### Previously Resolved (removed from active priority actions) + +| Old # | Action | Status | +|-------|--------|--------| +| 5 (old) | Split `query.rs` into sub-modules | Still OPEN — moved to P0 #3 | +| 6 (old) | Add mobile + tablet Playwright to CI | ✅ RESOLVED — already runs 3 projects | +| 8 (old) | Wire Rust `--profile` to budget presets | ✅ RESOLVED — wired in `main.rs:68-84` | + ### P0 — Critical (do now) -| # | Action | File | -|---|---|---| -| 1 | Call `validateUrl()` before resolution | `web/app/api/resolve/route.ts` | -| 2 | Create error boundary | `web/app/error.tsx` | -| 3 | Split page component (730→<500 lines) | `web/app/page.tsx` | +| # | Action | File | Status | +|---|---|---|---| +| 1 | Call `validateUrl()` before resolution | `web/app/api/resolve/route.ts` | ✅ RESOLVED (called in url.ts) | +| 2 | Create error boundary | `web/app/error.tsx` | ✅ RESOLVED (exists) | +| 3 | Split `query.rs` (527 > 500 limit) | `cli/src/resolver/query.rs` | ❌ OPEN — EXCEEDED | +| 4 | Split page component (496, near limit) | `web/app/page.tsx` | ⚠️ Near limit — monitor | +| 5 | Split `semantic_cache.rs` (1056 > 500 limit) | `cli/src/semantic_cache.rs` | ❌ OPEN — CRITICALLY EXCEEDED | +| 6 | Split `config.rs` (712 > 500 limit) | `cli/src/config.rs` | ❌ OPEN — EXCEEDED | ### P1 — High (next sprint) -| # | Action | File / Area | -|---|---|---| -| 4 | Add mobile + tablet Playwright to CI | `ci-ui.yml` | -| 5 | Create rate-limiting middleware | `web/middleware.ts` | -| 6 | Wire Rust `--profile` to budget presets | `cli/src/resolver/mod.rs` | -| 7 | Unit tests for web utilities | `web/lib/circuit-breaker.ts`, `errors.ts`, `quality.ts`, `keys.ts` | +| # | Action | File / Area | Status | +|---|---|---|---| +| 7 | Create rate-limiting middleware | `web/middleware.ts` | | +| 8 | Unit tests for web utilities (6 files: circuit-breaker, errors, quality, keys, log, results) | `web/lib/*.ts` | | +| 9 | Fix `CircuitBreakerRegistry.is_open()` TOCTOU | `scripts/circuit_breaker.py:46-47` | ✅ RESOLVED (PR #365) | +| 10 | Fix 7 silent exception handlers in providers | `scripts/providers_impl.py` | | +| 11 | Replace raw `requests.post()` with shared session + SSRF in synthesis | `scripts/synthesis.py:165` | ✅ RESOLVED (PR #365) | +| 12 | Extract duplicate `build_budget()` to cascade.rs | `query.rs:506` + `url.rs:475` | | +| 13 | Fix CI/config issues (coverage upload, gitleaks, flake8, shellcheck severity) | `.github/workflows/`, `.pre-commit-config.yaml` | | ### P2 — Medium (roadmap) | # | Action | Area | |---|---|---| -| 8 | Port preflight routing to Rust + Web | Cross-platform | -| 9 | Add hedged requests to Rust | `cli/src/resolver/cascade.rs` | -| 10 | Add `evals.json` to more skills | `.agents/skills/*/` | -| 11 | Add Python 3.10 to CI or bump `requires-python` | `pyproject.toml`, `.github/workflows/ci.yml` | +| 15 | Port preflight routing to Rust + Web | Cross-platform | +| 16 | Add hedged requests to Rust | `cli/src/resolver/cascade.rs` | +| 17 | Add `evals.json` to more skills (11/13 missing) | `.agents/skills/*/` | +| 18 | Add Python 3.10 to CI or bump `requires-python` | `pyproject.toml`, `.github/workflows/ci.yml` | +| 19 | Add `evals.json` to 3 most-used skills | `.agents/skills/*/` | +| 20 | Remove dead `Profile::is_provider_allowed()` + `max_hops()` | `cli/src/types.rs:99-116` | +| 21 | Remove dead `NegativeCacheEntry` dataclass | `scripts/cache_negative.py:11-16` | +| 22 | Extract magic numbers in quality.py to named constants | `scripts/quality.py` | ### P3 — Low (nice to have) | # | Action | Area | |---|---|---| -| 12 | Port `exa_mcp_mistral` combo to Python + Rust | Cross-platform | -| 13 | Full `--deep-research` parallel mode for CLIs | Python + Rust | -| 14 | File-based routing memory for Python | `scripts/` | +| 23 | Port `exa_mcp_mistral` combo to Python + Rust | Cross-platform | +| 24 | Full `--deep-research` parallel mode for CLIs | Python + Rust | +| 25 | File-based routing memory for Python | `scripts/` | +| 26 | Anchor `preflight_route` patterns with word boundaries | `scripts/routing.py:157-158` | +| 27 | Add lock guard to `_maybe_evict()` | `scripts/semantic_cache.py:336` | +| 28 | Fix `setup-hooks.sh` to install full quality gate | `scripts/setup-hooks.sh` | --- ## Plans Directory Cleanup -### Files to DELETE (superseded by this AUDIT.md) +### Files Already Deleted (completed — listed here for history) -| File | Reason | -|---|---| -| `BUGS_AND_ISSUES.md` | Bugs from 2026-03-27, most fixed | -| `IMPLEMENTATION_PLAN.md` | Phases 1–8 all complete | -| `SWARM_ANALYSIS_SUMMARY.md` | Initial planning artifact | -| `CODEBASE_AUDIT_2026_04_01.md` | Superseded by this file | -| `FEATURE_IMPROVEMENTS_2026_04_01.md` | Superseded by this file | -| `ADDITIONAL_IMPROVEMENTS_PLAN.md` | Duplicates other plans | -| `AI_AGENT_INSTRUCTIONS_ANALYSIS.md` | Recommendations largely implemented | -| `UI_ENHANCEMENTS_PLAN.md` | Most items implemented | -| `UI_UX_BEST_PRACTICES.md` | Captured in this audit | -| `WEB_AUDIT_RESULTS.md` | From 2026-03-27, mostly addressed | -| `PROVIDER_SCORE_OPTIMIZATION.md` | Old provider scores, fixed | - -### Files to KEEP (future roadmap) - -| File | Topic | -|---|---| -| `01-architecture-improvements.md` | PyO3 bindings, async mutex | -| `02-new-providers.md` | 7 new provider integrations | -| `03-performance-optimization.md` | 10 optimizations | -| `04-new-features.md` | Batch API, streaming, webhooks | -| `05-ui-ux-improvements.md` | Stepper, streaming UI | -| `06-testing-improvements.md` | Security tests, parity tests | -| `07-documentation-improvements.md` | Tutorials, ADRs | -| `08-deep-research.md` | Deep research framework | +All 11 files listed in the initial audit (`BUGS_AND_ISSUES.md`, `IMPLEMENTATION_PLAN.md`, +`SWARM_ANALYSIS_SUMMARY.md`, `CODEBASE_AUDIT_2026_04_01.md`, `FEATURE_IMPROVEMENTS_2026_04_01.md`, +`ADDITIONAL_IMPROVEMENTS_PLAN.md`, `AI_AGENT_INSTRUCTIONS_ANALYSIS.md`, `UI_ENHANCEMENTS_PLAN.md`, +`UI_UX_BEST_PRACTICES.md`, `WEB_AUDIT_RESULTS.md`, `PROVIDER_SCORE_OPTIMIZATION.md`) +were already deleted before this audit and confirmed not present. + +### Current Files + +| File | Topic | Status | +|---|---|---| +| `01-architecture-improvements.md` | PyO3, async mutex, provider trait | Condensed (~36 lines) | +| `02-new-providers.md` | 7 new provider integrations | Condensed (~28 lines) | +| `03-performance-optimization.md` | 10 optimizations | Condensed (~46 lines) | +| `04-new-features.md` | Batch API, streaming, webhooks | Condensed (~28 lines) | +| `05-ui-ux-improvements.md` | Stepper, streaming UI | Condensed (~35 lines) | +| `06-testing-improvements.md` | Security tests, parity tests | Condensed (~37 lines) | +| `07-documentation-improvements.md` | Tutorials, ADRs | Condensed (~39 lines) | +| `08-deep-research.md` | Deep research framework | Condensed (~31 lines) | +| `009-cross-runtime-analysis.md` | ADR: cross-runtime parity | New | +| `10-pr341-quality-gate-fixes.md` | PR #341 merge resolution | Standalone | +| `11-cache-prewarming.md` | ADR-011: cache pre-warming follow-up | Standalone | +| `012-correctness-and-safety-fixes.md` | ADR-012: thread safety, SSRF | New | +| `013-test-coverage-and-ci-reliability.md` | ADR-013: CI fixes, test gaps | New | +| `014-architecture-and-parity.md` | ADR-014: DRY consolidation | New | +| `15-GOAP-NEXT-PHASE.md` | Next implementation wave | Superseded by 16 | +| `16-GOAP-WAVE2-6.md` | Comprehensive 7-wave plan | Active | --- @@ -163,7 +249,7 @@ --- -*Last updated: 2026-05-13. ADR-012 Wave 1 complete. Next: Wave 2 CI/config fixes.* +*Last updated: 2026-05-13. ADR-012 Wave 1 ✅. ADR-013 Wave 1b ✅. Next: Waves 2-7. See [16-GOAP-WAVE2-6.md](16-GOAP-WAVE2-6.md).* ## Learnings (captured 2026-05-12) @@ -186,3 +272,12 @@ - **SSRF validation must be consistent**: Codacy review flagged that Mistral browser got SSRF check but Jina and Firecrawl didn't. Fixed: added `is_safe_url()` to all URL-fetching providers. - **Monkey-patching is a necessary evil**: `resolve.py` lines 85-91 wire shared instances to `_url_resolve`/`_query_resolve`. Until ADR-014 creates `scripts/state.py`, these overwrites must remain — tests depend on them for state synchronization. - **Test suite runs in ~60s**: The full non-live suite runs in ~60 seconds. The `pre-commit` hook timeout was caused by a deadlock, not slow tests. + +### GOAP Audit 2026-05-13 +- **`TOCTOU` in CircuitBreakerRegistry.is_open()**: The registry acquires `self._lock` in `get_breaker()` to retrieve the state object, but the caller's subsequent `.is_open()` on the returned state runs **outside** the lock. The state's `open_until` field can be mutated by another thread between retrieval and check. Fix: inline the comparison inside the locked method or return a snapshot. +- **Raw `requests.post()` bypasses shared session**: `synthesis.py:165` calls `requests.post()` directly instead of `get_session().post()`. This loses: retry logic (3 attempts), connection pooling, SSRF validation (`is_safe_url()`), and consistent User-Agent headers. The shared session in `utils.py` has `Retry(total=3, backoff_factor=1.0)`. +- **`semantic_cache.rs` (1056 lines) is the largest file in the project**: Nearly 2x the 500-line limit. It needs splitting into sub-modules — likely `{mod,store,query,eviction}.rs` — to stay maintainable. +- **`Profile` dead methods**: `is_provider_allowed()` and `max_hops()` at `types.rs:99-116` are never called in the cascade. The budget is managed via `ResolutionBudget` from `routing_profile_defaults()`, making these methods pure dead code. +- **Duplicate `build_budget()`**: The exact same 22-line function exists in both `query.rs:506-527` and `url.rs:475-496`. After extracting to `cascade.rs`, this alone saves 44 lines and eliminates drift risk. +- **Mobile/tablet Playwright already in CI**: `ci-ui.yml:176` runs `--project=desktop --project=mobile --project=tablet`. The AUDIT was incorrect — this was already resolved. We updated the status. +- **Rust `--profile` flag is wired**: `main.rs:68-84` parses the profile string and applies budget presets. The AUDIT was incorrect — this was already implemented. We updated the status. diff --git a/plans/GOAP_FOLLOWUP.md b/plans/GOAP_FOLLOWUP.md index 30214ae..011f41c 100644 --- a/plans/GOAP_FOLLOWUP.md +++ b/plans/GOAP_FOLLOWUP.md @@ -69,14 +69,30 @@ depend on them for state synchronization. ## Wave 4 — ADR-012 Remaining + Quality Fixes -| Task | Files | Effort | -|---|---|---| -| P3 Log provider exceptions (not silent return None) | `scripts/providers_impl.py` | S ✅ | -| P4 Replace requests.post with shared session | `scripts/synthesis.py` | M | -| P5 Fix preflight_route loose pattern matching | `scripts/routing.py` | M | -| P6 Remove unused NegativeCacheEntry | `scripts/cache_negative.py` | S | -| P7 Remove dead TIERED_TTL entry | `scripts/utils.py` | S | -| Q1-Q6 Quality scoring fixes | `scripts/quality.py` | M | +| Task | Files | Effort | Status | +|---|---|---|---| +| P3a SSRF warning logs added (Wave 1 Codacy) | `scripts/providers_impl.py` | S | ✅ | +| P3b Log all exceptions (9 providers still silent) | `scripts/providers_impl.py` | M | ❌ | +| P4 Replace requests.post with shared session | `scripts/synthesis.py` | M | ❌ | +| P5 Fix preflight_route loose pattern matching | `scripts/routing.py` | M | ❌ | +| P6 Remove unused NegativeCacheEntry (Python) | `scripts/cache_negative.py` | S | ❌ (dataclass dead code) | +| P7 TIERED_TTL → move to constants.py (NOT dead) | `scripts/utils.py` → `constants.py` | S | ❌ (active code, move in Wave 3) | +| Q1-Q6 Quality scoring fixes | `scripts/quality.py` | M | ❌ | + +## Feature PRs Merged Since Wave 1 (alongside ADR work) + +| PR | Feature | +|----|---------| +| #338 | Tiered provider TTL in config.toml | +| #339 | Cache pre-warming (Rust CLI) | +| #340 | Synthesis alignment with 2026 LLM-ready standards | +| #341 | Quality confidence gate | +| #342 | Probabilistic provider skip | +| #343 | Adaptive per-domain provider reordering | +| #353 | Semantic cache optimization + observability | +| #356 | Exa MCP monthly usage tracking | +| #358 | Per-provider token-bucket rate throttling | +| #354, #357 | Security fix + CLI Markdown parsing | ## Wave 5 — ADR-013 New Test Files @@ -110,10 +126,24 @@ depend on them for state synchronization. | Minor: Bare except in Mistral browser | `scripts/providers_impl.py` | Changed to `except Exception as e:` with logging | | HIGH: TOCTOU race in CircuitBreakerState.is_open | `scripts/circuit_breaker.py` | Capture `open_until` once at function entry | -## Execution Order +## Completed ADRs + +| ADR | Status | Notes | +|-----|--------|-------| +| ADR-009 | Referenced | Cross-runtime parity analysis documented | +| ADR-012 | Wave 1 ✅ PR #364 | Correctness & Safety — Codacy feedback incorporated | +| ADR-013 | Wave 1b ✅ | Test coverage & CI reliability — npm peer deps, libsql fix | +| ADR-014 | PENDING | Architecture & parity — prerequisite for Waves 4-6 | + +## Next Steps + +See **[15-GOAP-NEXT-PHASE.md](15-GOAP-NEXT-PHASE.md)** for the detailed +next-phase plan covering Waves 2-6 plus AUDIT P0/P1 items. + +## Execution Order (Updated) ``` -Wave 1 ✅ → Wave 2 (fast: CI config) → Wave 3 (prerequisite for all code work) -→ Wave 4 (logging + quality) + Wave 5 (tests) in parallel -→ Wave 6 (cascade consolidation, depends on Wave 3) +Wave 1 ✅ → Wave 2 (CI config) → Wave 3 (constants/state extraction) +→ Wave 4 (quality fixes) + AUDIT P0/P1 items in parallel +→ Wave 5 (tests) + Wave 6 (cascade consolidation) in parallel ``` diff --git a/plans/README.md b/plans/README.md index 0cfdf68..c870b1f 100644 --- a/plans/README.md +++ b/plans/README.md @@ -3,39 +3,52 @@ ## Current State → **[AUDIT.md](AUDIT.md)** — Project audit. Start here. +→ **[16-GOAP-WAVE2-6.md](16-GOAP-WAVE2-6.md)** — Comprehensive 7-wave plan (supersedes 15). +→ **[15-GOAP-NEXT-PHASE.md](15-GOAP-NEXT-PHASE.md)** — Previous plan (superseded by 16). ## Active ADRs | # | ADR | Topic | Status | -|---|---|---|---| +|---|-----|-------|--------| | 009 | [Cross-Runtime](009-cross-runtime-analysis.md) | Parity gaps, config vs env | Referenced | -| 012 | [Correctness & Safety](012-correctness-and-safety-fixes.md) | Thread safety, SSRF, provider gaps | Wave 1 ✅ Merged (PR #364) | -| 013 | [Test Coverage & CI](013-test-coverage-and-ci-reliability.md) | Misleading tests, CI fixes | Pending | -| 014 | [Architecture & Parity](014-architecture-and-parity.md) | DRY consolidation, constants, dead code | Pending | +| 012 | [Correctness & Safety](012-correctness-and-safety-fixes.md) | Thread safety, SSRF, provider gaps | Wave 1 ✅ Wave 4 PENDING | +| 013 | [Test Coverage & CI](013-test-coverage-and-ci-reliability.md) | Misleading tests, CI fixes | Wave 1b ✅ Wave 2,5 PENDING | +| 014 | [Architecture & Parity](014-architecture-and-parity.md) | DRY consolidation, constants, dead code | Wave 3,6 PENDING | ## Implementation Waves | Wave | ADR | Focus | Status | -|---|---|---|---| -| 1 | ADR-012 T1-T6, S1-S3, P1-P2 | Thread safety, SSRF, provider reachability | ✅ **DONE** | -| 1b | ADR-013 I6-I8 | web/package.json version fixes | ✅ **DONE** | -| 2 | ADR-013 I1-I5 | CI fixes, pre-commit, gitleaks | Pending | -| 3 | ADR-014 A1-A8 | constants.py, state.py extraction | Pending | -| 4 | ADR-012 P3-P7, Q1-Q6 | Logging, quality, synthesis fixes | Pending | -| 5 | ADR-013 C1-C7 | New test files for uncovered paths | Pending | -| 6 | ADR-014 D1-D7 | Cascade consolidation | Pending | - -## Roadmap Plans - -| # | Plan | Topic | -|---|---|---| -| 01 | [Architecture](01-architecture-improvements.md) | PyO3 bindings, async mutex | -| 02 | [Providers](02-new-providers.md) | New provider integrations | -| 03 | [Performance](03-performance-optimization.md) | Latency, caching, HTTP/2 | -| 04 | [Features](04-new-features.md) | Batch API, streaming, webhooks | -| 05 | [UI/UX](05-ui-ux-improvements.md) | Stepper, streaming UI | -| 06 | [Testing](06-testing-improvements.md) | Security, parity, benchmarks | -| 07 | [Documentation](07-documentation-improvements.md) | Tutorials, ADRs | -| 08 | [Deep Research](08-deep-research.md) | Multi-step research framework | -| 10 | [PR #341 Fixes](10-pr341-quality-gate-fixes.md) | Quality gate merge, feedback fixes | -| 11 | [Cache Pre-warming](11-cache-prewarming.md) | Follow-up PR from scope creep extraction | +|------|-----|-------|--------| +| 1 | ADR-012 T1-T6, S1-S3, P1-P2 | Thread safety, SSRF, provider reachability | ✅ **DONE** (PR #364) | +| 1b | ADR-013 I6-I8 | web/package.json version fixes, npm peer deps, libsql | ✅ **DONE** | +| 2 | ADR-013 I1-I5, K1-K7 + N9/N11 | CI fixes, pre-commit, gitleaks, classifiers, package names | PENDING | +| 3 | ADR-014 A1-A8 | constants.py, state.py extraction | PENDING | +| 4 | ADR-012 P3b,P4-P7, Q1-Q6 + N5/N6/N12/N13 | Logging, quality, synthesis fixes, TOCTOU, lock guards, SSRF gaps | PARTIAL (P4,N5,N12,N13,N13b ✅ DONE) | +| 5 | R1-R7 | Rust file splits & dedup (semantic_cache, config, query) | PENDING | +| 6 | T1-T8 | Test coverage for web lib + Rust resolver + skills evals | PENDING | +| 7 | W1-W4 | Web middleware + cross-platform parity (preflight, hedging) | PENDING | + +## Roadmap Plans (Condensed Status) + +| # | Plan | Topic | Status | +|---|------|-------|--------| +| 01 | [Architecture](01-architecture-improvements.md) | PyO3, async mutex, provider trait | All phases PENDING | +| 02 | [Providers](02-new-providers.md) | 7 new integrations | All PENDING | +| 03 | [Performance](03-performance-optimization.md) | Latency, caching, HTTP/2 | 1/10 done (compaction) | +| 04 | [Features](04-new-features.md) | Batch API, streaming, webhooks | All PENDING | +| 05 | [UI/UX](05-ui-ux-improvements.md) | Stepper, streaming, accessibility | 4 items done | +| 06 | [Testing](06-testing-improvements.md) | Security, parity, benchmarks | CI fixes done | +| 07 | [Documentation](07-documentation-improvements.md) | Tutorials, ADRs | 4 doc improvements done | +| 08 | [Deep Research](08-deep-research.md) | Multi-step research framework | All PENDING | +| 10 | [PR #341 Fixes](10-pr341-quality-gate-fixes.md) | Quality gate merge, scope-creep extraction | Merged, prewarm extracted | +| 11 | [Cache Pre-warming](11-cache-prewarming.md) | CLI + web prewarm (Scope creep extraction) | PENDING | +| 15 | [Next Phase](15-GOAP-NEXT-PHASE.md) | Wave 2-6 + AUDIT P0/P1 items | Superseded (see 16) | +| 16 | [GOAP Waves 2-6](16-GOAP-WAVE2-6.md) | CI, constants, quality, splits, tests, parity | Active plan | + +## Executed Plans (Completed) + +| File | Topic | +|------|-------| +| [CI_FIX.md](CI_FIX.md) | npm peer deps + libsql fix | +| [ESLINT_CONFIG_UPDATE.md](ESLINT_CONFIG_UPDATE.md) | ESLint 2026 config | +| [GOAP_FOLLOWUP.md](GOAP_FOLLOWUP.md) | ADR-012/013/014 wave tracking | diff --git a/scripts/circuit_breaker.py b/scripts/circuit_breaker.py index 1f7ff6c..e3c7c6c 100644 --- a/scripts/circuit_breaker.py +++ b/scripts/circuit_breaker.py @@ -44,7 +44,12 @@ def get_breaker(self, provider: str) -> CircuitBreakerState: return self.breakers[provider] def is_open(self, provider: str) -> bool: - return self.get_breaker(provider).is_open() + with self._lock: + breaker = self.breakers.get(provider) + if breaker is None: + self.breakers[provider] = CircuitBreakerState() + breaker = self.breakers[provider] + return breaker.is_open() def record_failure( self, provider: str, threshold: int | None = None, cooldown_seconds: int = 300 diff --git a/scripts/providers_impl.py b/scripts/providers_impl.py index 254a28b..b3dc321 100644 --- a/scripts/providers_impl.py +++ b/scripts/providers_impl.py @@ -274,7 +274,7 @@ def resolve_with_firecrawl(url: str, max_chars: int = MAX_CHARS) -> ResolvedResu def resolve_with_mistral_browser(url: str, max_chars: int = MAX_CHARS) -> ResolvedResult | None: if not is_safe_url(url): - logger.warning(f"SSRF: blocked URL {url}") + logger.warning("SSRF blocked: %s", url) return None cached = _get_from_cache(url, "mistral_browser") if cached: @@ -371,6 +371,9 @@ def resolve_with_mistral_websearch(query: str, max_chars: int = MAX_CHARS) -> Re def resolve_with_docling(url: str, max_chars: int) -> ResolvedResult | None: + if not is_safe_url(url): + logger.warning("SSRF blocked: %s", url) + return None try: res = subprocess.run( ["docling", "--format", "markdown", url], capture_output=True, text=True, timeout=60 @@ -383,6 +386,9 @@ def resolve_with_docling(url: str, max_chars: int) -> ResolvedResult | None: def resolve_with_ocr(url: str, max_chars: int) -> ResolvedResult | None: + if not is_safe_url(url): + logger.warning("SSRF blocked: %s", url) + return None try: res = subprocess.run( ["tesseract", url, "stdout"], capture_output=True, text=True, timeout=30 diff --git a/scripts/synthesis.py b/scripts/synthesis.py index 2af3572..d5fbaa9 100644 --- a/scripts/synthesis.py +++ b/scripts/synthesis.py @@ -6,9 +6,8 @@ import logging from difflib import SequenceMatcher -import requests - from scripts.models import ResolvedResult +from scripts.utils import get_session logger = logging.getLogger(__name__) @@ -162,7 +161,8 @@ def synthesize_results(query: str, results: list[ResolvedResult], api_key: str, user_prompt = f"Query: '{query}'\n\nContext:\n{context}" try: - resp = requests.post( + session = get_session() + resp = session.post( "https://api.mistral.ai/v1/chat/completions", headers={ "Authorization": f"Bearer {api_key}", @@ -182,5 +182,5 @@ def synthesize_results(query: str, results: list[ResolvedResult], api_key: str, content = resp.json()["choices"][0]["message"]["content"] return str(content) except Exception as e: - logger.error(f"LLM Synthesis failed: {e}") + logger.error("LLM Synthesis failed: %s", e) return deterministic_merge(results)