From 01b2b41ff827fc25951e377032694fcac02eca78 Mon Sep 17 00:00:00 2001 From: Rayan Salhab Date: Sun, 29 Mar 2026 03:23:14 +0000 Subject: [PATCH] fix(cli): use custom polling for --wait without --progress flag Fixes #3210 The --wait flag without --progress was using app.crawl() which relies on SDK's built-in polling that doesn't work with self-hosted instances. This change makes both --wait and --wait --progress use the same custom polling loop that works correctly with both cloud and self-hosted APIs. Changes: - Always use custom polling loop for --wait mode - Only show progress output when --progress flag is specified - Fixes indefinite hang on self-hosted instances --- src/commands/crawl.ts | 71 +++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 37 deletions(-) diff --git a/src/commands/crawl.ts b/src/commands/crawl.ts index ab00bea..a465baf 100644 --- a/src/commands/crawl.ts +++ b/src/commands/crawl.ts @@ -108,58 +108,55 @@ export async function executeCrawl( crawlOptions.timeout = timeout * 1000; // Convert to milliseconds } - // Show progress if requested - use custom polling for better UX - if (options.progress) { - // Start crawl first - const response = await app.startCrawl(urlOrJobId, crawlOptions); - const jobId = response.id; + // Use custom polling loop for all wait modes (works with both cloud and self-hosted) + // Start crawl first + const response = await app.startCrawl(urlOrJobId, crawlOptions); + const jobId = response.id; - process.stderr.write(`Crawling ${urlOrJobId}...\n`); - process.stderr.write(`Job ID: ${jobId}\n`); + process.stderr.write(`Crawling ${urlOrJobId}...\n`); + process.stderr.write(`Job ID: ${jobId}\n`); - // Poll for status with progress updates - const pollMs = crawlOptions.pollInterval || 5000; - const startTime = Date.now(); - const timeoutMs = timeout ? timeout * 1000 : undefined; + // Poll for status with progress updates + const pollMs = crawlOptions.pollInterval || 5000; + const startTime = Date.now(); + const timeoutMs = timeout ? timeout * 1000 : undefined; - while (true) { - await new Promise((resolve) => setTimeout(resolve, pollMs)); + while (true) { + await new Promise((resolve) => setTimeout(resolve, pollMs)); - const status = await app.getCrawlStatus(jobId); + const status = await app.getCrawlStatus(jobId); - // Show progress + // Show progress if requested + if (options.progress) { process.stderr.write( `\rProgress: ${status.completed}/${status.total} pages (${status.status})` ); + } - if ( - status.status === 'completed' || - status.status === 'failed' || - status.status === 'cancelled' - ) { + if ( + status.status === 'completed' || + status.status === 'failed' || + status.status === 'cancelled' + ) { + if (options.progress) { process.stderr.write('\n'); - return { - success: true, - data: status, - }; } + return { + success: true, + data: status, + }; + } - // Check timeout - if (timeoutMs && Date.now() - startTime > timeoutMs) { + // Check timeout + if (timeoutMs && Date.now() - startTime > timeoutMs) { + if (options.progress) { process.stderr.write('\n'); - return { - success: false, - error: `Timeout after ${timeout} seconds. Crawl still in progress.`, - }; } + return { + success: false, + error: `Timeout after ${timeout} seconds. Crawl still in progress.`, + }; } - } else { - // Use SDK's built-in polling (no progress display) - const crawlJob = await app.crawl(urlOrJobId, crawlOptions); - return { - success: true, - data: crawlJob, - }; } }