Skip to content

Commit 3ff8e33

Browse files
committed
fix(context-dev): wire includeFrames, split crawl/extract maxPages, derive screenshot MIME
Addresses review feedback: - includeFrames is now a block subblock + param for scrape_markdown/scrape_html - crawl and extract use separate Max Pages fields (crawl 1-500, extract 1-50) so a crawl value can no longer be forwarded to extract beyond its limit - screenshot file MIME type and extension are derived from the returned URL instead of being hardcoded to PNG
1 parent c8ad62a commit 3ff8e33

2 files changed

Lines changed: 55 additions & 6 deletions

File tree

apps/sim/blocks/blocks/context_dev.ts

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,13 @@ Do not include any explanations, markdown formatting, or other text outside the
238238
type: 'switch',
239239
condition: { field: 'operation', value: ['scrape_markdown', 'crawl'] },
240240
},
241+
{
242+
id: 'includeFrames',
243+
title: 'Include Frames',
244+
type: 'switch',
245+
mode: 'advanced',
246+
condition: { field: 'operation', value: ['scrape_markdown', 'scrape_html'] },
247+
},
241248
{
242249
id: 'fullScreenshot',
243250
title: 'Full Page Screenshot',
@@ -328,9 +335,17 @@ Do not include any explanations, markdown formatting, or other text outside the
328335
id: 'maxPages',
329336
title: 'Max Pages',
330337
type: 'short-input',
331-
placeholder: 'crawl: 100, extract: 5',
338+
placeholder: '100',
332339
mode: 'advanced',
333-
condition: { field: 'operation', value: ['crawl', 'extract'] },
340+
condition: { field: 'operation', value: 'crawl' },
341+
},
342+
{
343+
id: 'extractMaxPages',
344+
title: 'Max Pages',
345+
type: 'short-input',
346+
placeholder: '5',
347+
mode: 'advanced',
348+
condition: { field: 'operation', value: 'extract' },
334349
},
335350
{
336351
id: 'maxDepth',
@@ -560,13 +575,15 @@ Do not include any explanations, markdown formatting, or other text outside the
560575
setBool('useMainContentOnly')
561576
setBool('includeLinks')
562577
setBool('includeImages')
578+
setBool('includeFrames')
563579
setNumber('maxAgeMs')
564580
setNumber('waitForMs')
565581
setNumber('timeoutMS')
566582
break
567583
case 'scrape_html':
568584
setString('url')
569585
setBool('useMainContentOnly')
586+
setBool('includeFrames')
570587
setNumber('maxAgeMs')
571588
setNumber('waitForMs')
572589
setNumber('timeoutMS')
@@ -638,7 +655,7 @@ Do not include any explanations, markdown formatting, or other text outside the
638655
setString('instructions')
639656
setBool('factCheck')
640657
setBool('followSubdomains')
641-
setNumber('maxPages')
658+
setNumber('extractMaxPages', 'maxPages')
642659
setNumber('maxDepth')
643660
setNumber('maxAgeMs')
644661
setNumber('stopAfterMs')
@@ -755,6 +772,7 @@ Do not include any explanations, markdown formatting, or other text outside the
755772
useMainContentOnly: { type: 'boolean', description: 'Return only main content' },
756773
includeLinks: { type: 'boolean', description: 'Preserve hyperlinks' },
757774
includeImages: { type: 'boolean', description: 'Include image references' },
775+
includeFrames: { type: 'boolean', description: 'Render iframe contents inline' },
758776
fullScreenshot: { type: 'boolean', description: 'Capture the full page' },
759777
handleCookiePopup: { type: 'boolean', description: 'Dismiss cookie banners' },
760778
markdownEnabled: { type: 'boolean', description: 'Scrape search results to markdown' },
@@ -766,7 +784,11 @@ Do not include any explanations, markdown formatting, or other text outside the
766784
queryFanout: { type: 'boolean', description: 'Expand query into variants' },
767785
factCheck: { type: 'boolean', description: 'Ground extracted values in page facts' },
768786
followSubdomains: { type: 'boolean', description: 'Follow subdomain links' },
769-
maxPages: { type: 'number', description: 'Maximum pages to process' },
787+
maxPages: { type: 'number', description: 'Maximum pages to crawl (1-500)' },
788+
extractMaxPages: {
789+
type: 'number',
790+
description: 'Maximum pages to analyze for extraction (1-50)',
791+
},
770792
maxDepth: { type: 'number', description: 'Maximum link depth' },
771793
maxProducts: { type: 'number', description: 'Maximum products to extract' },
772794
urlRegex: { type: 'string', description: 'Regex to filter URLs' },

apps/sim/tools/context_dev/screenshot.ts

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,32 @@ import {
1212
} from '@/tools/context_dev/utils'
1313
import type { ToolConfig, ToolFileData } from '@/tools/types'
1414

15+
/** Maps a lowercase image file extension to its MIME type. */
16+
const IMAGE_MIME_BY_EXTENSION: Record<string, string> = {
17+
png: 'image/png',
18+
jpg: 'image/jpeg',
19+
jpeg: 'image/jpeg',
20+
webp: 'image/webp',
21+
gif: 'image/gif',
22+
avif: 'image/avif',
23+
}
24+
25+
/**
26+
* Derives the file extension and MIME type for a stored screenshot from its URL,
27+
* falling back to PNG when the URL has no recognizable image extension.
28+
*/
29+
function screenshotFileMeta(url: string): { extension: string; mimeType: string } {
30+
try {
31+
const ext = new URL(url).pathname.split('.').pop()?.toLowerCase() ?? ''
32+
if (IMAGE_MIME_BY_EXTENSION[ext]) {
33+
return { extension: ext, mimeType: IMAGE_MIME_BY_EXTENSION[ext] }
34+
}
35+
} catch {
36+
// Unparseable URL — fall back to the default below.
37+
}
38+
return { extension: 'png', mimeType: 'image/png' }
39+
}
40+
1541
export const contextDevScreenshotTool: ToolConfig<
1642
ContextDevScreenshotParams,
1743
ContextDevScreenshotResponse
@@ -100,10 +126,11 @@ export const contextDevScreenshotTool: ToolConfig<
100126
const screenshotUrl: string = data.screenshot ?? ''
101127
const domain: string | null = data.domain ?? null
102128

129+
const { extension, mimeType } = screenshotFileMeta(screenshotUrl)
103130
const file: ToolFileData | undefined = screenshotUrl
104131
? {
105-
name: `${domain ?? 'screenshot'}.png`,
106-
mimeType: 'image/png',
132+
name: `${domain ?? 'screenshot'}.${extension}`,
133+
mimeType,
107134
url: screenshotUrl,
108135
}
109136
: undefined

0 commit comments

Comments
 (0)