diff --git a/apps/docs/components/icons.tsx b/apps/docs/components/icons.tsx index 8ea1529b1e8..dc3c91c5bef 100644 --- a/apps/docs/components/icons.tsx +++ b/apps/docs/components/icons.tsx @@ -2053,6 +2053,17 @@ export function ConfluenceIcon(props: SVGProps) { ) } +export function ContextDevIcon(props: SVGProps) { + return ( + + + + ) +} + export function ConvexIcon(props: SVGProps) { return ( = { codepipeline: CodePipelineIcon, confluence: ConfluenceIcon, confluence_v2: ConfluenceIcon, + context_dev: ContextDevIcon, convex: ConvexIcon, crowdstrike: CrowdStrikeIcon, cursor: CursorIcon, diff --git a/apps/docs/content/docs/en/integrations/context_dev.mdx b/apps/docs/content/docs/en/integrations/context_dev.mdx new file mode 100644 index 00000000000..1da78894369 --- /dev/null +++ b/apps/docs/content/docs/en/integrations/context_dev.mdx @@ -0,0 +1,678 @@ +--- +title: Context.dev +description: Scrape, crawl, search, extract, and enrich web and brand data +--- + +import { BlockInfoCard } from "@/components/ui/block-info-card" + + + +## Usage Instructions + +Integrate Context.dev into the workflow. Scrape pages to markdown or HTML, capture screenshots, list images, crawl entire sites, map sitemaps, search the web, extract structured data and products, pull design systems, classify industries, and retrieve brand assets by domain, name, email, ticker, or transaction — all from one API. + + + +## Actions + +### `context_dev_scrape_markdown` + +Scrape any URL and return clean, LLM-ready markdown content. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `url` | string | Yes | The full URL to scrape \(must include http:// or https://\) | +| `useMainContentOnly` | boolean | No | Return only main content, excluding headers, footers, and navigation | +| `includeLinks` | boolean | No | Preserve hyperlinks in the markdown output \(default: true\) | +| `includeImages` | boolean | No | Include image references in the markdown output \(default: false\) | +| `includeFrames` | boolean | No | Render iframe contents inline \(default: false\) | +| `maxAgeMs` | number | No | Cache duration in milliseconds \(0-2592000000, default: 86400000\) | +| `waitForMs` | number | No | Browser wait time after page load in milliseconds \(0-30000\) | +| `timeoutMS` | number | No | Request timeout in milliseconds \(1000-300000\) | +| `apiKey` | string | Yes | Context.dev API key | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `markdown` | string | Page content as clean markdown | +| `url` | string | The scraped URL | + +### `context_dev_scrape_html` + +Scrape any URL and return the raw HTML content of the page. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `url` | string | Yes | The full URL to scrape \(must include http:// or https://\) | +| `useMainContentOnly` | boolean | No | Return only main content, excluding headers, footers, and navigation | +| `includeFrames` | boolean | No | Render iframe contents inline into the returned HTML \(default: false\) | +| `maxAgeMs` | number | No | Cache duration in milliseconds \(0-2592000000, default: 86400000\) | +| `waitForMs` | number | No | Browser wait time after page load in milliseconds \(0-30000\) | +| `timeoutMS` | number | No | Request timeout in milliseconds \(1000-300000\) | +| `apiKey` | string | Yes | Context.dev API key | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `html` | string | Raw HTML content of the page | +| `url` | string | The scraped URL | +| `type` | string | Detected content type \(html, xml, json, text, csv, markdown, svg, pdf\) | + +### `context_dev_scrape_images` + +Discover every image asset on a page, with optional dimension and type enrichment. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `url` | string | Yes | The full URL to scrape images from \(must include http:// or https://\) | +| `maxAgeMs` | number | No | Cache duration in milliseconds \(0-2592000000, default: 86400000\) | +| `waitForMs` | number | No | Browser wait time after page load in milliseconds \(0-30000\) | +| `timeoutMS` | number | No | Request timeout in milliseconds \(1000-300000\) | +| `enrichResolution` | boolean | No | Measure image dimensions \(enables 5-credit enrichment\) | +| `enrichHostedUrl` | boolean | No | Host images on a CDN and return their URL and MIME type \(enables enrichment\) | +| `enrichClassification` | boolean | No | Classify each image by visual asset type \(enables enrichment\) | +| `apiKey` | string | Yes | Context.dev API key | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `success` | boolean | Whether the scrape succeeded | +| `images` | array | Discovered image assets with source, element, type, and optional enrichment | +| ↳ `src` | string | Image source URL or data | +| ↳ `element` | string | Source element \(img, svg, link, source, video, css, object, meta, background\) | +| ↳ `type` | string | Image representation \(url, html, base64\) | +| ↳ `alt` | string | Alt text | +| ↳ `enrichment` | json | Optional enrichment \(width, height, mimetype, url, type\) when requested | +| `url` | string | The scraped URL | + +### `context_dev_screenshot` + +Capture a screenshot of any web page and store it as a downloadable image file. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `url` | string | Yes | The full URL to capture \(must include http:// or https://\) | +| `fullScreenshot` | boolean | No | Capture the full scrollable page instead of just the viewport \(default: false\) | +| `handleCookiePopup` | boolean | No | Attempt to dismiss cookie banners before capturing \(default: false\) | +| `viewportWidth` | number | No | Viewport width in pixels \(240-7680, default: 1920\) | +| `viewportHeight` | number | No | Viewport height in pixels \(240-4320, default: 1080\) | +| `maxAgeMs` | number | No | Cache duration in milliseconds \(0-2592000000, default: 86400000\) | +| `waitForMs` | number | No | Post-load delay before capturing in milliseconds \(0-30000, default: 3000\) | +| `timeoutMS` | number | No | Request timeout in milliseconds \(1000-300000\) | +| `apiKey` | string | Yes | Context.dev API key | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `file` | file | Stored screenshot image file | +| `screenshotUrl` | string | Public URL of the captured screenshot | +| `screenshotType` | string | Screenshot type \(viewport or fullPage\) | +| `domain` | string | Domain that was captured | +| `width` | number | Screenshot width in pixels | +| `height` | number | Screenshot height in pixels | + +### `context_dev_crawl` + +Crawl an entire website and return each discovered page as clean markdown. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `url` | string | Yes | The starting URL to crawl \(must include http:// or https://\) | +| `maxPages` | number | No | Maximum number of pages to crawl \(1-500, default: 100\) | +| `maxDepth` | number | No | Maximum link depth from the starting URL \(0 = start page only\) | +| `urlRegex` | string | No | Regex pattern to filter which URLs are crawled | +| `includeLinks` | boolean | No | Preserve hyperlinks in the markdown output \(default: true\) | +| `includeImages` | boolean | No | Include image references in the markdown output \(default: false\) | +| `useMainContentOnly` | boolean | No | Strip headers, footers, and sidebars from each page \(default: false\) | +| `followSubdomains` | boolean | No | Follow links to subdomains of the starting domain \(default: false\) | +| `maxAgeMs` | number | No | Cache duration in milliseconds \(0-2592000000, default: 86400000\) | +| `waitForMs` | number | No | Browser wait time after page load in milliseconds \(0-30000\) | +| `stopAfterMs` | number | No | Soft crawl time budget in milliseconds \(10000-110000, default: 80000\) | +| `timeoutMS` | number | No | Request timeout in milliseconds \(1000-300000\) | +| `apiKey` | string | Yes | Context.dev API key | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `results` | array | Crawled pages with markdown content and per-page metadata | +| ↳ `markdown` | string | Page content as markdown | +| ↳ `metadata` | json | Page metadata \(url, title, crawlDepth, statusCode\) | +| `metadata` | object | Crawl summary \(numUrls, maxCrawlDepth, numSucceeded, numFailed, numSkipped\) | + +### `context_dev_map` + +Build a sitemap of a domain and return every discovered page URL. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `domain` | string | Yes | The domain to build a sitemap for \(e.g., "example.com"\) | +| `maxLinks` | number | No | Maximum number of URLs to return \(1-100000, default: 10000\) | +| `urlRegex` | string | No | RE2-compatible regex to filter URLs \(max 256 chars\) | +| `timeoutMS` | number | No | Request timeout in milliseconds \(1000-300000\) | +| `apiKey` | string | Yes | Context.dev API key | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `domain` | string | The domain that was mapped | +| `urls` | array | All page URLs discovered from the sitemap | +| `meta` | object | Sitemap discovery stats \(sitemapsDiscovered, sitemapsFetched, errors\) | + +### `context_dev_search` + +Search the web with natural language and optionally scrape results to markdown. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `query` | string | Yes | The natural language search query \(1-500 characters\) | +| `includeDomains` | array | No | Only return results from these domains | +| `excludeDomains` | array | No | Exclude results from these domains | +| `freshness` | string | No | Recency filter \(last_24_hours, last_week, last_month, last_year\) | +| `queryFanout` | boolean | No | Expand the query into parallel variants for broader coverage | +| `markdownEnabled` | boolean | No | Scrape each result page to markdown \(default: false\) | +| `timeoutMS` | number | No | Request timeout in milliseconds \(1000-300000\) | +| `apiKey` | string | Yes | Context.dev API key | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `results` | array | Search results with url, title, description, relevance, and optional markdown | +| ↳ `url` | string | Result page URL | +| ↳ `title` | string | Result page title | +| ↳ `description` | string | Result snippet/description | +| ↳ `relevance` | string | Relevance rating \(high, medium, low\) | +| ↳ `markdown` | json | Scraped markdown for the result \(when markdown scraping is enabled\) | +| `query` | string | The query that was searched | + +### `context_dev_extract` + +Crawl a website and extract structured data matching a provided JSON schema. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `url` | string | Yes | The starting website URL \(must include http:// or https://\) | +| `schema` | json | Yes | JSON Schema describing the structure of the data to extract | +| `instructions` | string | No | Optional extraction guidance for link prioritization \(max 2000 chars\) | +| `factCheck` | boolean | No | Require extracted values to be grounded in page facts \(default: false\) | +| `followSubdomains` | boolean | No | Follow links on subdomains of the starting domain \(default: false\) | +| `maxPages` | number | No | Maximum number of pages to analyze \(1-50, default: 5\) | +| `maxDepth` | number | No | Maximum link depth from the starting URL | +| `maxAgeMs` | number | No | Cache duration in milliseconds \(0-2592000000, default: 604800000\) | +| `stopAfterMs` | number | No | Soft crawl time budget in milliseconds \(10000-110000, default: 80000\) | +| `timeoutMS` | number | No | Request timeout in milliseconds \(1000-300000\) | +| `apiKey` | string | Yes | Context.dev API key | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `status` | string | Extraction status | +| `url` | string | The starting URL that was crawled | +| `urlsAnalyzed` | array | URLs that were analyzed during extraction | +| `data` | json | Structured data matching the requested schema | +| `metadata` | object | Crawl summary \(numUrls, maxCrawlDepth, numSucceeded, numFailed, numSkipped\) | + +### `context_dev_extract_product` + +Detect and extract structured product details from a single product page URL. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `url` | string | Yes | The product page URL \(must include http:// or https://\) | +| `maxAgeMs` | number | No | Cache duration in milliseconds \(0-2592000000, default: 604800000\) | +| `timeoutMS` | number | No | Request timeout in milliseconds \(1000-300000\) | +| `apiKey` | string | Yes | Context.dev API key | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `isProductPage` | boolean | Whether the URL is a product page | +| `platform` | string | Detected platform \(amazon, tiktok_shop, etsy, generic\) | +| `product` | object | Extracted product details | +| ↳ `name` | string | Product name | +| ↳ `description` | string | Product description | +| ↳ `price` | number | Product price | +| ↳ `currency` | string | Price currency | +| ↳ `billing_frequency` | string | Billing frequency \(monthly, yearly, one_time, usage_based\) | +| ↳ `pricing_model` | string | Pricing model \(per_seat, flat, tiered, freemium, custom\) | +| ↳ `url` | string | Product URL | +| ↳ `category` | string | Product category | +| ↳ `features` | json | Product features | +| ↳ `target_audience` | json | Target audience | +| ↳ `tags` | json | Product tags | +| ↳ `image_url` | string | Primary product image URL | +| ↳ `images` | json | Product image URLs | +| ↳ `sku` | string | Product SKU | + +### `context_dev_extract_products` + +Extract the product catalog from a brand + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `domain` | string | Yes | The domain to extract products from \(e.g., "example.com"\) | +| `maxProducts` | number | No | Maximum number of products to extract \(1-12\) | +| `maxAgeMs` | number | No | Cache duration in milliseconds \(0-2592000000, default: 604800000\) | +| `timeoutMS` | number | No | Request timeout in milliseconds \(1000-300000\) | +| `apiKey` | string | Yes | Context.dev API key | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `products` | array | Extracted products with pricing, features, and metadata | +| ↳ `name` | string | Product name | +| ↳ `description` | string | Product description | +| ↳ `price` | number | Product price | +| ↳ `currency` | string | Price currency | +| ↳ `billing_frequency` | string | Billing frequency \(monthly, yearly, one_time, usage_based\) | +| ↳ `pricing_model` | string | Pricing model \(per_seat, flat, tiered, freemium, custom\) | +| ↳ `url` | string | Product URL | +| ↳ `category` | string | Product category | +| ↳ `features` | json | Product features | +| ↳ `target_audience` | json | Target audience | +| ↳ `tags` | json | Product tags | +| ↳ `image_url` | string | Primary product image URL | +| ↳ `images` | json | Product image URLs | +| ↳ `sku` | string | Product SKU | + +### `context_dev_scrape_fonts` + +Extract the font families, usage stats, and font files used by a domain. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `domain` | string | Yes | The domain to extract fonts from \(e.g., "example.com"\) | +| `maxAgeMs` | number | No | Cache max age in milliseconds \(86400000-31536000000, default: 7776000000\) | +| `timeoutMS` | number | No | Request timeout in milliseconds \(1000-300000\) | +| `apiKey` | string | Yes | Context.dev API key | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `status` | string | Extraction status | +| `domain` | string | The domain that was analyzed | +| `fonts` | array | Fonts with usage statistics and fallbacks | +| ↳ `font` | string | Font family name | +| ↳ `uses` | json | Where the font is used | +| ↳ `fallbacks` | json | Fallback font families | +| ↳ `num_elements` | number | Number of elements using the font | +| ↳ `num_words` | number | Number of words rendered in the font | +| ↳ `percent_words` | number | Percent of words using the font | +| ↳ `percent_elements` | number | Percent of elements using the font | +| `fontLinks` | json | Font family download links keyed by font name \(type, files, category\) | + +### `context_dev_scrape_styleguide` + +Extract a domain + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `domain` | string | Yes | The domain to extract the styleguide from \(e.g., "example.com"\) | +| `maxAgeMs` | number | No | Cache max age in milliseconds \(86400000-31536000000, default: 7776000000\) | +| `timeoutMS` | number | No | Request timeout in milliseconds \(1000-300000\) | +| `apiKey` | string | Yes | Context.dev API key | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `status` | string | Extraction status | +| `domain` | string | The domain that was analyzed | +| `styleguide` | json | Design system: mode, colors, typography, elementSpacing, shadows, fontLinks, components | + +### `context_dev_classify_naics` + +Classify a brand into NAICS industry codes from its domain or company name. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `input` | string | Yes | Brand domain or company name to classify \(e.g., "stripe.com" or "Stripe"\) | +| `minResults` | number | No | Minimum number of codes to return \(1-10, default: 1\) | +| `maxResults` | number | No | Maximum number of codes to return \(1-10, default: 5\) | +| `timeoutMS` | number | No | Request timeout in milliseconds \(1000-300000\) | +| `apiKey` | string | Yes | Context.dev API key | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `status` | string | Classification status | +| `domain` | string | Resolved domain | +| `type` | string | Input type that was resolved | +| `codes` | array | Matched NAICS codes with name and confidence | +| ↳ `code` | string | Industry code | +| ↳ `name` | string | Industry name | +| ↳ `confidence` | string | Match confidence \(high, medium, low\) | + +### `context_dev_classify_sic` + +Classify a brand into SIC industry codes from its domain or company name. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `input` | string | Yes | Brand domain or company name to classify \(e.g., "stripe.com" or "Stripe"\) | +| `type` | string | No | SIC taxonomy version: "original_sic" \(default\) or "latest_sec" | +| `minResults` | number | No | Minimum number of codes to return \(1-10, default: 1\) | +| `maxResults` | number | No | Maximum number of codes to return \(1-10, default: 5\) | +| `timeoutMS` | number | No | Request timeout in milliseconds \(1000-300000\) | +| `apiKey` | string | Yes | Context.dev API key | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `status` | string | Classification status | +| `domain` | string | Resolved domain | +| `type` | string | Input type that was resolved | +| `classification` | string | SIC taxonomy version used \(original_sic or latest_sec\) | +| `codes` | array | Matched SIC codes with name, confidence, and group metadata | +| ↳ `code` | string | Industry code | +| ↳ `name` | string | Industry name | +| ↳ `confidence` | string | Match confidence \(high, medium, low\) | +| ↳ `majorGroup` | string | Major group code \(original_sic only\) | +| ↳ `majorGroupName` | string | Major group name \(original_sic only\) | +| ↳ `office` | string | SEC office \(latest_sec only\) | + +### `context_dev_get_brand` + +Retrieve brand data for a domain: logos, colors, backdrops, socials, address, and industry. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `domain` | string | Yes | The domain to retrieve brand data for \(e.g., "airbnb.com"\) | +| `forceLanguage` | string | No | Override the detected language with a supported language code | +| `maxSpeed` | boolean | No | Skip time-consuming operations for a faster response \(default: false\) | +| `maxAgeMs` | number | No | Cache max age in milliseconds \(86400000-31536000000, default: 7776000000\) | +| `timeoutMS` | number | No | Request timeout in milliseconds \(1000-300000\) | +| `apiKey` | string | Yes | Context.dev API key | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `status` | string | Retrieval status | +| `brand` | object | Brand data object | +| ↳ `domain` | string | Brand domain | +| ↳ `title` | string | Brand title | +| ↳ `description` | string | Brand description | +| ↳ `slogan` | string | Brand slogan | +| ↳ `colors` | json | Brand colors \(hex and name\) | +| ↳ `logos` | json | Brand logos with mode, colors, resolution, and type | +| ↳ `backdrops` | json | Brand backdrop images | +| ↳ `socials` | json | Social media profiles \(type and url\) | +| ↳ `address` | json | Brand address | +| ↳ `stock` | json | Stock info \(ticker and exchange\) | +| ↳ `is_nsfw` | boolean | Whether the brand contains adult content | +| ↳ `email` | string | Brand contact email | +| ↳ `phone` | string | Brand contact phone | +| ↳ `industries` | json | Industry taxonomy \(eic industry/subindustry pairs\) | +| ↳ `links` | json | Key brand links \(careers, privacy, terms, blog, pricing\) | +| ↳ `primary_language` | string | Primary language of the brand site | + +### `context_dev_get_brand_by_name` + +Retrieve brand data by company name: logos, colors, socials, address, and industry. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `name` | string | Yes | Company name to retrieve brand data for \(3-30 chars, e.g., "Apple Inc"\) | +| `countryGl` | string | No | ISO 2-letter country code to prioritize \(e.g., "us"\) | +| `forceLanguage` | string | No | Override the detected language with a supported language code | +| `maxSpeed` | boolean | No | Skip time-consuming operations for a faster response \(default: false\) | +| `maxAgeMs` | number | No | Cache max age in milliseconds \(86400000-31536000000, default: 7776000000\) | +| `timeoutMS` | number | No | Request timeout in milliseconds \(1000-300000\) | +| `apiKey` | string | Yes | Context.dev API key | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `status` | string | Retrieval status | +| `brand` | object | Brand data object | +| ↳ `domain` | string | Brand domain | +| ↳ `title` | string | Brand title | +| ↳ `description` | string | Brand description | +| ↳ `slogan` | string | Brand slogan | +| ↳ `colors` | json | Brand colors \(hex and name\) | +| ↳ `logos` | json | Brand logos with mode, colors, resolution, and type | +| ↳ `backdrops` | json | Brand backdrop images | +| ↳ `socials` | json | Social media profiles \(type and url\) | +| ↳ `address` | json | Brand address | +| ↳ `stock` | json | Stock info \(ticker and exchange\) | +| ↳ `is_nsfw` | boolean | Whether the brand contains adult content | +| ↳ `email` | string | Brand contact email | +| ↳ `phone` | string | Brand contact phone | +| ↳ `industries` | json | Industry taxonomy \(eic industry/subindustry pairs\) | +| ↳ `links` | json | Key brand links \(careers, privacy, terms, blog, pricing\) | +| ↳ `primary_language` | string | Primary language of the brand site | + +### `context_dev_get_brand_by_email` + +Retrieve brand data from a work email address. Free/disposable emails are rejected (422). + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `email` | string | Yes | Work email address; the domain is extracted \(free providers are rejected\) | +| `forceLanguage` | string | No | Override the detected language with a supported language code | +| `maxSpeed` | boolean | No | Skip time-consuming operations for a faster response \(default: false\) | +| `maxAgeMs` | number | No | Cache max age in milliseconds \(86400000-31536000000, default: 7776000000\) | +| `timeoutMS` | number | No | Request timeout in milliseconds \(1000-300000\) | +| `apiKey` | string | Yes | Context.dev API key | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `status` | string | Retrieval status | +| `brand` | object | Brand data object | +| ↳ `domain` | string | Brand domain | +| ↳ `title` | string | Brand title | +| ↳ `description` | string | Brand description | +| ↳ `slogan` | string | Brand slogan | +| ↳ `colors` | json | Brand colors \(hex and name\) | +| ↳ `logos` | json | Brand logos with mode, colors, resolution, and type | +| ↳ `backdrops` | json | Brand backdrop images | +| ↳ `socials` | json | Social media profiles \(type and url\) | +| ↳ `address` | json | Brand address | +| ↳ `stock` | json | Stock info \(ticker and exchange\) | +| ↳ `is_nsfw` | boolean | Whether the brand contains adult content | +| ↳ `email` | string | Brand contact email | +| ↳ `phone` | string | Brand contact phone | +| ↳ `industries` | json | Industry taxonomy \(eic industry/subindustry pairs\) | +| ↳ `links` | json | Key brand links \(careers, privacy, terms, blog, pricing\) | +| ↳ `primary_language` | string | Primary language of the brand site | + +### `context_dev_get_brand_by_ticker` + +Retrieve brand data for a public company by its stock ticker symbol. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `ticker` | string | Yes | Stock ticker symbol \(e.g., "AAPL", "GOOGL", "BRK.A"\) | +| `tickerExchange` | string | No | Exchange code for the ticker \(e.g., "NASDAQ", "NYSE", "LSE"\). Default: NASDAQ | +| `forceLanguage` | string | No | Override the detected language with a supported language code | +| `maxSpeed` | boolean | No | Skip time-consuming operations for a faster response \(default: false\) | +| `maxAgeMs` | number | No | Cache max age in milliseconds \(86400000-31536000000, default: 7776000000\) | +| `timeoutMS` | number | No | Request timeout in milliseconds \(1000-300000\) | +| `apiKey` | string | Yes | Context.dev API key | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `status` | string | Retrieval status | +| `brand` | object | Brand data object | +| ↳ `domain` | string | Brand domain | +| ↳ `title` | string | Brand title | +| ↳ `description` | string | Brand description | +| ↳ `slogan` | string | Brand slogan | +| ↳ `colors` | json | Brand colors \(hex and name\) | +| ↳ `logos` | json | Brand logos with mode, colors, resolution, and type | +| ↳ `backdrops` | json | Brand backdrop images | +| ↳ `socials` | json | Social media profiles \(type and url\) | +| ↳ `address` | json | Brand address | +| ↳ `stock` | json | Stock info \(ticker and exchange\) | +| ↳ `is_nsfw` | boolean | Whether the brand contains adult content | +| ↳ `email` | string | Brand contact email | +| ↳ `phone` | string | Brand contact phone | +| ↳ `industries` | json | Industry taxonomy \(eic industry/subindustry pairs\) | +| ↳ `links` | json | Key brand links \(careers, privacy, terms, blog, pricing\) | +| ↳ `primary_language` | string | Primary language of the brand site | + +### `context_dev_get_brand_simplified` + +Retrieve essential brand data for a domain: title, colors, logos, and backdrops. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `domain` | string | Yes | The domain to retrieve simplified brand data for \(e.g., "airbnb.com"\) | +| `maxAgeMs` | number | No | Cache max age in milliseconds \(86400000-31536000000, default: 7776000000\) | +| `timeoutMS` | number | No | Request timeout in milliseconds \(1000-300000\) | +| `apiKey` | string | Yes | Context.dev API key | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `status` | string | Retrieval status | +| `brand` | object | Simplified brand data \(domain, title, colors, logos, backdrops\) | +| ↳ `domain` | string | Brand domain | +| ↳ `title` | string | Brand title | +| ↳ `colors` | json | Brand colors \(hex and name\) | +| ↳ `logos` | json | Brand logos with mode, colors, resolution, and type | +| ↳ `backdrops` | json | Brand backdrop images | + +### `context_dev_identify_transaction` + +Identify the brand behind a raw bank/card transaction descriptor and return its brand data. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `transactionInfo` | string | Yes | The raw transaction descriptor or identifier to resolve to a brand | +| `countryGl` | string | No | ISO 2-letter country code from the transaction \(e.g., "us", "gb"\) | +| `city` | string | No | City name to prioritize in the search | +| `mcc` | string | No | Merchant Category Code for the business category | +| `phone` | number | No | Phone number from the transaction for verification | +| `highConfidenceOnly` | boolean | No | Enforce additional verification steps for higher confidence \(default: false\) | +| `forceLanguage` | string | No | Override the detected language with a supported language code | +| `maxSpeed` | boolean | No | Skip time-consuming operations for a faster response \(default: false\) | +| `timeoutMS` | number | No | Request timeout in milliseconds \(1000-300000\) | +| `apiKey` | string | Yes | Context.dev API key | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `status` | string | Identification status | +| `brand` | object | Brand data for the identified merchant | +| ↳ `domain` | string | Brand domain | +| ↳ `title` | string | Brand title | +| ↳ `description` | string | Brand description | +| ↳ `slogan` | string | Brand slogan | +| ↳ `colors` | json | Brand colors \(hex and name\) | +| ↳ `logos` | json | Brand logos with mode, colors, resolution, and type | +| ↳ `backdrops` | json | Brand backdrop images | +| ↳ `socials` | json | Social media profiles \(type and url\) | +| ↳ `address` | json | Brand address | +| ↳ `stock` | json | Stock info \(ticker and exchange\) | +| ↳ `is_nsfw` | boolean | Whether the brand contains adult content | +| ↳ `email` | string | Brand contact email | +| ↳ `phone` | string | Brand contact phone | +| ↳ `industries` | json | Industry taxonomy \(eic industry/subindustry pairs\) | +| ↳ `links` | json | Key brand links \(careers, privacy, terms, blog, pricing\) | +| ↳ `primary_language` | string | Primary language of the brand site | + +### `context_dev_prefetch_domain` + +Queue a domain for brand-data prefetching to reduce latency on later requests (subscribers; 0 credits). + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `domain` | string | Yes | The domain to prefetch brand data for \(e.g., "example.com"\) | +| `timeoutMS` | number | No | Request timeout in milliseconds \(1000-300000\) | +| `apiKey` | string | Yes | Context.dev API key | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `status` | string | Prefetch status | +| `message` | string | Human-readable prefetch result message | +| `domain` | string | The domain queued for prefetching | + +### `context_dev_prefetch_by_email` + +Queue an email + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `email` | string | Yes | Work email address whose domain should be prefetched \(free providers rejected\) | +| `timeoutMS` | number | No | Request timeout in milliseconds \(1000-300000\) | +| `apiKey` | string | Yes | Context.dev API key | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `status` | string | Prefetch status | +| `message` | string | Human-readable prefetch result message | +| `domain` | string | The domain queued for prefetching | + + diff --git a/apps/docs/content/docs/en/integrations/meta.json b/apps/docs/content/docs/en/integrations/meta.json index e67c39642b8..0ee2e26089d 100644 --- a/apps/docs/content/docs/en/integrations/meta.json +++ b/apps/docs/content/docs/en/integrations/meta.json @@ -35,6 +35,7 @@ "cloudwatch", "codepipeline", "confluence", + "context_dev", "convex", "crowdstrike", "cursor", diff --git a/apps/sim/blocks/blocks/context_dev.ts b/apps/sim/blocks/blocks/context_dev.ts new file mode 100644 index 00000000000..26298c41ef8 --- /dev/null +++ b/apps/sim/blocks/blocks/context_dev.ts @@ -0,0 +1,940 @@ +import { ContextDevIcon } from '@/components/icons' +import type { BlockConfig, BlockMeta } from '@/blocks/types' +import { AuthMode, IntegrationType } from '@/blocks/types' +import type { ContextDevScrapeMarkdownResponse } from '@/tools/context_dev/types' + +/** Operations whose primary input is a full page URL. */ +const URL_OPS = [ + 'scrape_markdown', + 'scrape_html', + 'scrape_images', + 'screenshot', + 'crawl', + 'extract', + 'extract_product', +] +/** Operations whose primary input is a bare domain. */ +const DOMAIN_OPS = [ + 'map', + 'get_brand', + 'get_brand_simplified', + 'extract_products', + 'scrape_fonts', + 'scrape_styleguide', + 'prefetch_domain', +] +/** Classification operations keyed on a domain-or-name input. */ +const CLASSIFY_OPS = ['classify_naics', 'classify_sic'] +/** Brand operations that accept language/speed tuning. */ +const BRAND_LANG_OPS = [ + 'get_brand', + 'get_brand_by_name', + 'get_brand_by_email', + 'get_brand_by_ticker', + 'identify_transaction', +] +/** Operations that accept a cache max-age. */ +const MAX_AGE_OPS = [ + 'scrape_markdown', + 'scrape_html', + 'scrape_images', + 'screenshot', + 'crawl', + 'extract', + 'extract_product', + 'extract_products', + 'scrape_fonts', + 'scrape_styleguide', + 'get_brand', + 'get_brand_by_name', + 'get_brand_by_email', + 'get_brand_by_ticker', + 'get_brand_simplified', +] +/** Operations that accept a post-load browser wait. */ +const WAIT_FOR_OPS = ['scrape_markdown', 'scrape_html', 'scrape_images', 'screenshot', 'crawl'] + +/** + * Coerces a value that may be a number or numeric string into a number, or undefined. + */ +function toNumber(value: unknown): number | undefined { + if (value === undefined || value === null || value === '') return undefined + const parsed = Number(value) + return Number.isNaN(parsed) ? undefined : parsed +} + +/** + * Parses a value that may already be an array or a JSON-encoded array string. + */ +function toStringArray(value: unknown): string[] | undefined { + if (Array.isArray(value)) return value.filter((v): v is string => typeof v === 'string') + if (typeof value === 'string' && value.trim() !== '') { + try { + const parsed = JSON.parse(value) + if (Array.isArray(parsed)) return parsed.filter((v): v is string => typeof v === 'string') + } catch { + return value + .split(',') + .map((v) => v.trim()) + .filter(Boolean) + } + } + return undefined +} + +export const ContextDevBlock: BlockConfig = { + type: 'context_dev', + name: 'Context.dev', + description: 'Scrape, crawl, search, extract, and enrich web and brand data', + authMode: AuthMode.ApiKey, + longDescription: + 'Integrate Context.dev into the workflow. Scrape pages to markdown or HTML, capture screenshots, list images, crawl entire sites, map sitemaps, search the web, extract structured data and products, pull design systems, classify industries, and retrieve brand assets by domain, name, email, ticker, or transaction — all from one API.', + docsLink: 'https://docs.sim.ai/integrations/context_dev', + category: 'tools', + integrationType: IntegrationType.Search, + bgColor: '#ffffff', + icon: ContextDevIcon, + subBlocks: [ + { + id: 'operation', + title: 'Operation', + type: 'dropdown', + options: [ + { label: 'Scrape Markdown', id: 'scrape_markdown' }, + { label: 'Scrape HTML', id: 'scrape_html' }, + { label: 'Scrape Images', id: 'scrape_images' }, + { label: 'Screenshot', id: 'screenshot' }, + { label: 'Crawl Website', id: 'crawl' }, + { label: 'Map Sitemap', id: 'map' }, + { label: 'Web Search', id: 'search' }, + { label: 'Extract Structured Data', id: 'extract' }, + { label: 'Extract Product', id: 'extract_product' }, + { label: 'Extract Products', id: 'extract_products' }, + { label: 'Scrape Fonts', id: 'scrape_fonts' }, + { label: 'Scrape Styleguide', id: 'scrape_styleguide' }, + { label: 'Classify NAICS', id: 'classify_naics' }, + { label: 'Classify SIC', id: 'classify_sic' }, + { label: 'Get Brand by Domain', id: 'get_brand' }, + { label: 'Get Brand by Name', id: 'get_brand_by_name' }, + { label: 'Get Brand by Email', id: 'get_brand_by_email' }, + { label: 'Get Brand by Ticker', id: 'get_brand_by_ticker' }, + { label: 'Get Brand (Simplified)', id: 'get_brand_simplified' }, + { label: 'Identify Transaction', id: 'identify_transaction' }, + { label: 'Prefetch Domain', id: 'prefetch_domain' }, + { label: 'Prefetch by Email', id: 'prefetch_by_email' }, + ], + value: () => 'scrape_markdown', + }, + { + id: 'url', + title: 'Website URL', + type: 'short-input', + placeholder: 'https://example.com', + condition: { field: 'operation', value: URL_OPS }, + required: { field: 'operation', value: URL_OPS }, + }, + { + id: 'domain', + title: 'Domain', + type: 'short-input', + placeholder: 'example.com', + condition: { field: 'operation', value: DOMAIN_OPS }, + required: { field: 'operation', value: DOMAIN_OPS }, + }, + { + id: 'input', + title: 'Domain or Company Name', + type: 'short-input', + placeholder: 'example.com or Company Name', + condition: { field: 'operation', value: CLASSIFY_OPS }, + required: { field: 'operation', value: CLASSIFY_OPS }, + }, + { + id: 'query', + title: 'Search Query', + type: 'short-input', + placeholder: 'Enter your search query', + condition: { field: 'operation', value: 'search' }, + required: { field: 'operation', value: 'search' }, + }, + { + id: 'name', + title: 'Company Name', + type: 'short-input', + placeholder: 'Apple Inc', + condition: { field: 'operation', value: 'get_brand_by_name' }, + required: { field: 'operation', value: 'get_brand_by_name' }, + }, + { + id: 'email', + title: 'Work Email', + type: 'short-input', + placeholder: 'name@company.com', + condition: { field: 'operation', value: ['get_brand_by_email', 'prefetch_by_email'] }, + required: { field: 'operation', value: ['get_brand_by_email', 'prefetch_by_email'] }, + }, + { + id: 'ticker', + title: 'Stock Ticker', + type: 'short-input', + placeholder: 'AAPL', + condition: { field: 'operation', value: 'get_brand_by_ticker' }, + required: { field: 'operation', value: 'get_brand_by_ticker' }, + }, + { + id: 'transactionInfo', + title: 'Transaction Descriptor', + type: 'short-input', + placeholder: 'SQ *COFFEE SHOP 1234', + condition: { field: 'operation', value: 'identify_transaction' }, + required: { field: 'operation', value: 'identify_transaction' }, + }, + { + id: 'schema', + title: 'Extraction Schema', + type: 'code', + language: 'json', + placeholder: 'Enter a JSON schema describing the data to extract...', + condition: { field: 'operation', value: 'extract' }, + required: { field: 'operation', value: 'extract' }, + wandConfig: { + enabled: true, + maintainHistory: true, + prompt: `You are an expert at writing JSON Schemas for structured web data extraction. +Generate ONLY the JSON schema based on the user's request. +The output MUST be a single, valid JSON object, starting with { and ending with }. +Use standard JSON Schema properties (type, description, properties, items, required). + +Current schema: {context} + +Do not include any explanations, markdown formatting, or other text outside the JSON object.`, + placeholder: 'Describe the data structure you want to extract...', + generationType: 'json-schema', + }, + }, + { + id: 'instructions', + title: 'Instructions', + type: 'long-input', + placeholder: 'Optional guidance for which links to prioritize', + mode: 'advanced', + condition: { field: 'operation', value: 'extract' }, + }, + { + id: 'useMainContentOnly', + title: 'Only Main Content', + type: 'switch', + condition: { field: 'operation', value: ['scrape_markdown', 'scrape_html', 'crawl'] }, + }, + { + id: 'includeLinks', + title: 'Include Links', + type: 'switch', + condition: { field: 'operation', value: ['scrape_markdown', 'crawl'] }, + }, + { + id: 'includeImages', + title: 'Include Images', + type: 'switch', + condition: { field: 'operation', value: ['scrape_markdown', 'crawl'] }, + }, + { + id: 'includeFrames', + title: 'Include Frames', + type: 'switch', + mode: 'advanced', + condition: { field: 'operation', value: ['scrape_markdown', 'scrape_html'] }, + }, + { + id: 'fullScreenshot', + title: 'Full Page Screenshot', + type: 'switch', + condition: { field: 'operation', value: 'screenshot' }, + }, + { + id: 'handleCookiePopup', + title: 'Dismiss Cookie Popups', + type: 'switch', + condition: { field: 'operation', value: 'screenshot' }, + }, + { + id: 'markdownEnabled', + title: 'Scrape Results to Markdown', + type: 'switch', + condition: { field: 'operation', value: 'search' }, + }, + { + id: 'tickerExchange', + title: 'Exchange', + type: 'short-input', + placeholder: 'NASDAQ', + condition: { field: 'operation', value: 'get_brand_by_ticker' }, + }, + { + id: 'sicType', + title: 'SIC Taxonomy', + type: 'dropdown', + options: [ + { label: 'Original SIC', id: 'original_sic' }, + { label: 'Latest SEC', id: 'latest_sec' }, + ], + value: () => 'original_sic', + condition: { field: 'operation', value: 'classify_sic' }, + }, + { + id: 'freshness', + title: 'Freshness', + type: 'dropdown', + options: [ + { label: 'Last 24 Hours', id: 'last_24_hours' }, + { label: 'Last Week', id: 'last_week' }, + { label: 'Last Month', id: 'last_month' }, + { label: 'Last Year', id: 'last_year' }, + ], + mode: 'advanced', + condition: { field: 'operation', value: 'search' }, + }, + { + id: 'includeDomains', + title: 'Include Domains', + type: 'long-input', + placeholder: '["example.com", "docs.example.com"]', + mode: 'advanced', + condition: { field: 'operation', value: 'search' }, + }, + { + id: 'excludeDomains', + title: 'Exclude Domains', + type: 'long-input', + placeholder: '["spam.com"]', + mode: 'advanced', + condition: { field: 'operation', value: 'search' }, + }, + { + id: 'queryFanout', + title: 'Query Fan-out', + type: 'switch', + mode: 'advanced', + condition: { field: 'operation', value: 'search' }, + }, + { + id: 'factCheck', + title: 'Fact Check', + type: 'switch', + mode: 'advanced', + condition: { field: 'operation', value: 'extract' }, + }, + { + id: 'followSubdomains', + title: 'Follow Subdomains', + type: 'switch', + mode: 'advanced', + condition: { field: 'operation', value: ['crawl', 'extract'] }, + }, + { + id: 'maxPages', + title: 'Max Pages', + type: 'short-input', + placeholder: '100', + mode: 'advanced', + condition: { field: 'operation', value: 'crawl' }, + }, + { + id: 'extractMaxPages', + title: 'Max Pages', + type: 'short-input', + placeholder: '5', + mode: 'advanced', + condition: { field: 'operation', value: 'extract' }, + }, + { + id: 'maxDepth', + title: 'Max Depth', + type: 'short-input', + placeholder: 'Maximum link depth', + mode: 'advanced', + condition: { field: 'operation', value: ['crawl', 'extract'] }, + }, + { + id: 'maxProducts', + title: 'Max Products', + type: 'short-input', + placeholder: '12', + mode: 'advanced', + condition: { field: 'operation', value: 'extract_products' }, + }, + { + id: 'urlRegex', + title: 'URL Regex', + type: 'short-input', + placeholder: 'Regex to filter URLs', + mode: 'advanced', + condition: { field: 'operation', value: ['crawl', 'map'] }, + }, + { + id: 'maxLinks', + title: 'Max Links', + type: 'short-input', + placeholder: '10000', + mode: 'advanced', + condition: { field: 'operation', value: 'map' }, + }, + { + id: 'viewportWidth', + title: 'Viewport Width', + type: 'short-input', + placeholder: '1920', + mode: 'advanced', + condition: { field: 'operation', value: 'screenshot' }, + }, + { + id: 'viewportHeight', + title: 'Viewport Height', + type: 'short-input', + placeholder: '1080', + mode: 'advanced', + condition: { field: 'operation', value: 'screenshot' }, + }, + { + id: 'enrichResolution', + title: 'Enrich: Resolution', + type: 'switch', + mode: 'advanced', + condition: { field: 'operation', value: 'scrape_images' }, + }, + { + id: 'enrichHostedUrl', + title: 'Enrich: Hosted URL', + type: 'switch', + mode: 'advanced', + condition: { field: 'operation', value: 'scrape_images' }, + }, + { + id: 'enrichClassification', + title: 'Enrich: Classification', + type: 'switch', + mode: 'advanced', + condition: { field: 'operation', value: 'scrape_images' }, + }, + { + id: 'minResults', + title: 'Min Results', + type: 'short-input', + placeholder: '1', + mode: 'advanced', + condition: { field: 'operation', value: CLASSIFY_OPS }, + }, + { + id: 'maxResults', + title: 'Max Results', + type: 'short-input', + placeholder: '5', + mode: 'advanced', + condition: { field: 'operation', value: CLASSIFY_OPS }, + }, + { + id: 'countryGl', + title: 'Country Code', + type: 'short-input', + placeholder: 'us', + mode: 'advanced', + condition: { field: 'operation', value: ['get_brand_by_name', 'identify_transaction'] }, + }, + { + id: 'city', + title: 'City', + type: 'short-input', + placeholder: 'San Francisco', + mode: 'advanced', + condition: { field: 'operation', value: 'identify_transaction' }, + }, + { + id: 'mcc', + title: 'Merchant Category Code', + type: 'short-input', + placeholder: '5812', + mode: 'advanced', + condition: { field: 'operation', value: 'identify_transaction' }, + }, + { + id: 'phone', + title: 'Phone', + type: 'short-input', + placeholder: '14155551234', + mode: 'advanced', + condition: { field: 'operation', value: 'identify_transaction' }, + }, + { + id: 'highConfidenceOnly', + title: 'High Confidence Only', + type: 'switch', + mode: 'advanced', + condition: { field: 'operation', value: 'identify_transaction' }, + }, + { + id: 'forceLanguage', + title: 'Force Language', + type: 'short-input', + placeholder: 'e.g., en, es, fr', + mode: 'advanced', + condition: { field: 'operation', value: BRAND_LANG_OPS }, + }, + { + id: 'maxSpeed', + title: 'Max Speed', + type: 'switch', + mode: 'advanced', + condition: { field: 'operation', value: BRAND_LANG_OPS }, + }, + { + id: 'waitForMs', + title: 'Wait For (ms)', + type: 'short-input', + placeholder: '0', + mode: 'advanced', + condition: { field: 'operation', value: WAIT_FOR_OPS }, + }, + { + id: 'stopAfterMs', + title: 'Stop After (ms)', + type: 'short-input', + placeholder: '80000', + mode: 'advanced', + condition: { field: 'operation', value: ['crawl', 'extract'] }, + }, + { + id: 'maxAgeMs', + title: 'Cache Max Age (ms)', + type: 'short-input', + placeholder: '86400000', + mode: 'advanced', + condition: { field: 'operation', value: MAX_AGE_OPS }, + }, + { + id: 'timeoutMS', + title: 'Timeout (ms)', + type: 'short-input', + placeholder: '60000', + mode: 'advanced', + }, + { + id: 'apiKey', + title: 'API Key', + type: 'short-input', + placeholder: 'Enter your Context.dev API key', + password: true, + required: true, + }, + ], + tools: { + access: [ + 'context_dev_scrape_markdown', + 'context_dev_scrape_html', + 'context_dev_scrape_images', + 'context_dev_screenshot', + 'context_dev_crawl', + 'context_dev_map', + 'context_dev_search', + 'context_dev_extract', + 'context_dev_extract_product', + 'context_dev_extract_products', + 'context_dev_scrape_fonts', + 'context_dev_scrape_styleguide', + 'context_dev_classify_naics', + 'context_dev_classify_sic', + 'context_dev_get_brand', + 'context_dev_get_brand_by_name', + 'context_dev_get_brand_by_email', + 'context_dev_get_brand_by_ticker', + 'context_dev_get_brand_simplified', + 'context_dev_identify_transaction', + 'context_dev_prefetch_domain', + 'context_dev_prefetch_by_email', + ], + config: { + tool: (params) => + params.operation ? `context_dev_${params.operation}` : 'context_dev_scrape_markdown', + params: (params) => { + const { operation, apiKey } = params + const result: Record = { apiKey } + + const setBool = (key: string) => { + if (params[key] != null) result[key] = params[key] + } + const setNumber = (key: string, target = key) => { + const n = toNumber(params[key]) + if (n !== undefined) result[target] = n + } + const setString = (key: string, target = key) => { + if (params[key]) result[target] = params[key] + } + + switch (operation) { + case 'scrape_markdown': + setString('url') + setBool('useMainContentOnly') + setBool('includeLinks') + setBool('includeImages') + setBool('includeFrames') + setNumber('maxAgeMs') + setNumber('waitForMs') + setNumber('timeoutMS') + break + case 'scrape_html': + setString('url') + setBool('useMainContentOnly') + setBool('includeFrames') + setNumber('maxAgeMs') + setNumber('waitForMs') + setNumber('timeoutMS') + break + case 'scrape_images': + setString('url') + setNumber('maxAgeMs') + setNumber('waitForMs') + setNumber('timeoutMS') + setBool('enrichResolution') + setBool('enrichHostedUrl') + setBool('enrichClassification') + break + case 'screenshot': + setString('url') + setBool('fullScreenshot') + setBool('handleCookiePopup') + setNumber('viewportWidth') + setNumber('viewportHeight') + setNumber('maxAgeMs') + setNumber('waitForMs') + setNumber('timeoutMS') + break + case 'crawl': + setString('url') + setNumber('maxPages') + setNumber('maxDepth') + setString('urlRegex') + setBool('useMainContentOnly') + setBool('includeLinks') + setBool('includeImages') + setBool('followSubdomains') + setNumber('maxAgeMs') + setNumber('waitForMs') + setNumber('stopAfterMs') + setNumber('timeoutMS') + break + case 'map': + setString('domain') + setNumber('maxLinks') + setString('urlRegex') + setNumber('timeoutMS') + break + case 'search': { + setString('query') + const include = toStringArray(params.includeDomains) + if (include?.length) result.includeDomains = include + const exclude = toStringArray(params.excludeDomains) + if (exclude?.length) result.excludeDomains = exclude + setString('freshness') + setBool('queryFanout') + setBool('markdownEnabled') + setNumber('timeoutMS') + break + } + case 'extract': { + setString('url') + if (params.schema) { + if (typeof params.schema === 'object') { + result.schema = params.schema + } else if (typeof params.schema === 'string') { + try { + result.schema = JSON.parse(params.schema) + } catch { + throw new Error('Extraction schema must be valid JSON') + } + } + } + setString('instructions') + setBool('factCheck') + setBool('followSubdomains') + setNumber('extractMaxPages', 'maxPages') + setNumber('maxDepth') + setNumber('maxAgeMs') + setNumber('stopAfterMs') + setNumber('timeoutMS') + break + } + case 'extract_product': + setString('url') + setNumber('maxAgeMs') + setNumber('timeoutMS') + break + case 'extract_products': + setString('domain') + setNumber('maxProducts') + setNumber('maxAgeMs') + setNumber('timeoutMS') + break + case 'scrape_fonts': + setString('domain') + setNumber('maxAgeMs') + setNumber('timeoutMS') + break + case 'scrape_styleguide': + setString('domain') + setNumber('maxAgeMs') + setNumber('timeoutMS') + break + case 'classify_naics': + setString('input') + setNumber('minResults') + setNumber('maxResults') + setNumber('timeoutMS') + break + case 'classify_sic': + setString('input') + setString('sicType', 'type') + setNumber('minResults') + setNumber('maxResults') + setNumber('timeoutMS') + break + case 'get_brand': + setString('domain') + setString('forceLanguage') + setBool('maxSpeed') + setNumber('maxAgeMs') + setNumber('timeoutMS') + break + case 'get_brand_by_name': + setString('name') + setString('countryGl') + setString('forceLanguage') + setBool('maxSpeed') + setNumber('maxAgeMs') + setNumber('timeoutMS') + break + case 'get_brand_by_email': + setString('email') + setString('forceLanguage') + setBool('maxSpeed') + setNumber('maxAgeMs') + setNumber('timeoutMS') + break + case 'get_brand_by_ticker': + setString('ticker') + setString('tickerExchange') + setString('forceLanguage') + setBool('maxSpeed') + setNumber('maxAgeMs') + setNumber('timeoutMS') + break + case 'get_brand_simplified': + setString('domain') + setNumber('maxAgeMs') + setNumber('timeoutMS') + break + case 'identify_transaction': + setString('transactionInfo') + setString('countryGl') + setString('city') + setString('mcc') + setNumber('phone') + setBool('highConfidenceOnly') + setString('forceLanguage') + setBool('maxSpeed') + setNumber('timeoutMS') + break + case 'prefetch_domain': + setString('domain') + setNumber('timeoutMS') + break + case 'prefetch_by_email': + setString('email') + setNumber('timeoutMS') + break + } + + return result + }, + }, + }, + inputs: { + apiKey: { type: 'string', description: 'Context.dev API key' }, + operation: { type: 'string', description: 'Operation to perform' }, + url: { type: 'string', description: 'Target website or page URL' }, + domain: { type: 'string', description: 'Target domain' }, + input: { type: 'string', description: 'Domain or company name for classification' }, + query: { type: 'string', description: 'Web search query' }, + name: { type: 'string', description: 'Company name for brand lookup' }, + email: { type: 'string', description: 'Work email for brand lookup or prefetch' }, + ticker: { type: 'string', description: 'Stock ticker for brand lookup' }, + transactionInfo: { type: 'string', description: 'Transaction descriptor to identify' }, + schema: { type: 'json', description: 'JSON schema for structured extraction' }, + instructions: { type: 'string', description: 'Extraction guidance' }, + useMainContentOnly: { type: 'boolean', description: 'Return only main content' }, + includeLinks: { type: 'boolean', description: 'Preserve hyperlinks' }, + includeImages: { type: 'boolean', description: 'Include image references' }, + includeFrames: { type: 'boolean', description: 'Render iframe contents inline' }, + fullScreenshot: { type: 'boolean', description: 'Capture the full page' }, + handleCookiePopup: { type: 'boolean', description: 'Dismiss cookie banners' }, + markdownEnabled: { type: 'boolean', description: 'Scrape search results to markdown' }, + tickerExchange: { type: 'string', description: 'Stock exchange for the ticker' }, + sicType: { type: 'string', description: 'SIC taxonomy version' }, + freshness: { type: 'string', description: 'Search recency filter' }, + includeDomains: { type: 'json', description: 'Domains to allowlist in search' }, + excludeDomains: { type: 'json', description: 'Domains to blocklist in search' }, + queryFanout: { type: 'boolean', description: 'Expand query into variants' }, + factCheck: { type: 'boolean', description: 'Ground extracted values in page facts' }, + followSubdomains: { type: 'boolean', description: 'Follow subdomain links' }, + maxPages: { type: 'number', description: 'Maximum pages to crawl (1-500)' }, + extractMaxPages: { + type: 'number', + description: 'Maximum pages to analyze for extraction (1-50)', + }, + maxDepth: { type: 'number', description: 'Maximum link depth' }, + maxProducts: { type: 'number', description: 'Maximum products to extract' }, + urlRegex: { type: 'string', description: 'Regex to filter URLs' }, + maxLinks: { type: 'number', description: 'Maximum sitemap URLs' }, + viewportWidth: { type: 'number', description: 'Screenshot viewport width' }, + viewportHeight: { type: 'number', description: 'Screenshot viewport height' }, + enrichResolution: { type: 'boolean', description: 'Measure scraped image dimensions' }, + enrichHostedUrl: { type: 'boolean', description: 'Host scraped images and return URLs' }, + enrichClassification: { type: 'boolean', description: 'Classify scraped images by type' }, + minResults: { type: 'number', description: 'Minimum classification results' }, + maxResults: { type: 'number', description: 'Maximum classification results' }, + countryGl: { type: 'string', description: 'ISO country code hint' }, + city: { type: 'string', description: 'City hint for transaction lookup' }, + mcc: { type: 'string', description: 'Merchant category code' }, + phone: { type: 'number', description: 'Phone number from transaction' }, + highConfidenceOnly: { type: 'boolean', description: 'Require high-confidence match' }, + forceLanguage: { type: 'string', description: 'Override detected brand language' }, + maxSpeed: { type: 'boolean', description: 'Skip slow brand operations' }, + waitForMs: { type: 'number', description: 'Browser wait time in ms' }, + stopAfterMs: { type: 'number', description: 'Soft crawl time budget in ms' }, + maxAgeMs: { type: 'number', description: 'Cache max age in ms' }, + timeoutMS: { type: 'number', description: 'Request timeout in ms' }, + }, + outputs: { + markdown: { type: 'string', description: 'Scraped markdown content' }, + html: { type: 'string', description: 'Scraped raw HTML content' }, + type: { type: 'string', description: 'Detected content type or resolved input type' }, + url: { type: 'string', description: 'Resolved target URL' }, + file: { type: 'file', description: 'Stored screenshot image file' }, + screenshotUrl: { type: 'string', description: 'Public URL of the captured screenshot' }, + screenshotType: { type: 'string', description: 'Screenshot type (viewport or fullPage)' }, + domain: { type: 'string', description: 'Resolved domain' }, + width: { type: 'number', description: 'Screenshot width in pixels' }, + height: { type: 'number', description: 'Screenshot height in pixels' }, + success: { type: 'boolean', description: 'Whether the scrape succeeded' }, + images: { type: 'json', description: 'Discovered image assets' }, + results: { type: 'json', description: 'Crawl pages or search results' }, + metadata: { type: 'json', description: 'Crawl or extraction summary metadata' }, + urls: { type: 'json', description: 'Discovered sitemap URLs' }, + meta: { type: 'json', description: 'Sitemap discovery stats' }, + query: { type: 'string', description: 'The query that was searched' }, + status: { type: 'string', description: 'Operation status' }, + message: { type: 'string', description: 'Prefetch result message' }, + urlsAnalyzed: { type: 'json', description: 'URLs analyzed during extraction' }, + data: { type: 'json', description: 'Structured data extracted from the site' }, + isProductPage: { type: 'boolean', description: 'Whether the URL is a product page' }, + platform: { type: 'string', description: 'Detected commerce platform' }, + product: { type: 'json', description: 'Extracted single product details' }, + products: { type: 'json', description: 'Extracted product catalog' }, + fonts: { type: 'json', description: 'Fonts with usage statistics' }, + fontLinks: { type: 'json', description: 'Font family download links' }, + styleguide: { type: 'json', description: 'Design system (colors, typography, components)' }, + codes: { type: 'json', description: 'Matched industry classification codes' }, + classification: { type: 'string', description: 'SIC taxonomy version used' }, + brand: { type: 'json', description: 'Brand data (logos, colors, socials, industry)' }, + creditsConsumed: { type: 'number', description: 'Credits consumed by this request' }, + creditsRemaining: { type: 'number', description: 'Credits remaining on the API key' }, + }, +} + +export const ContextDevBlockMeta = { + tags: ['web-scraping', 'enrichment', 'automation'], + url: 'https://www.context.dev', + templates: [ + { + icon: ContextDevIcon, + title: 'Context.dev knowledge-base builder', + prompt: + 'Build a workflow that maps a documentation site with Context.dev, crawls each page to clean markdown, chunks and embeds the content, and upserts it into a knowledge base for an answering agent.', + modules: ['knowledge-base', 'agent', 'workflows'], + category: 'engineering', + tags: ['research', 'sync'], + }, + { + icon: ContextDevIcon, + title: 'Context.dev competitor monitor', + prompt: + 'Build a scheduled workflow that scrapes competitor pricing and changelog pages to markdown with Context.dev weekly, diffs against the prior snapshot, logs changes to a table, and posts notable updates to Slack.', + modules: ['scheduled', 'tables', 'agent', 'workflows'], + category: 'marketing', + tags: ['marketing', 'monitoring'], + alsoIntegrations: ['slack'], + }, + { + icon: ContextDevIcon, + title: 'Context.dev lead enrichment', + prompt: + 'Create a workflow that takes a work email, uses Context.dev to retrieve brand data by email and classify the company into NAICS codes, and writes the enriched firmographics to a CRM record.', + modules: ['agent', 'tables', 'workflows'], + category: 'sales', + tags: ['enrichment', 'sales'], + }, + { + icon: ContextDevIcon, + title: 'Context.dev structured data extractor', + prompt: + 'Build a workflow that takes a website URL and a JSON schema, uses Context.dev Extract to pull structured fields across the site, and returns the validated records as JSON.', + modules: ['agent', 'workflows'], + category: 'operations', + tags: ['automation', 'research'], + }, + { + icon: ContextDevIcon, + title: 'Context.dev research brief', + prompt: + 'Create an agent that runs a Context.dev web search on a topic, scrapes the top results to markdown, and synthesizes a cited research brief saved as a file.', + modules: ['agent', 'files', 'workflows'], + category: 'productivity', + tags: ['research'], + }, + { + icon: ContextDevIcon, + title: 'Context.dev design-system extractor', + prompt: + 'Build a workflow that takes a domain, uses Context.dev to scrape its styleguide and fonts plus a homepage screenshot, and stores the design tokens and assets as files for a design handoff.', + modules: ['agent', 'files', 'workflows'], + category: 'engineering', + tags: ['design', 'research'], + }, + { + icon: ContextDevIcon, + title: 'Context.dev transaction enrichment', + prompt: + 'Create a workflow that takes raw bank transaction descriptors, uses Context.dev to identify the merchant brand behind each one, and appends the resolved company and logo to a table.', + modules: ['tables', 'agent', 'workflows'], + category: 'operations', + tags: ['enrichment', 'automation'], + }, + { + icon: ContextDevIcon, + title: 'Context.dev product catalog importer', + prompt: + "Build a workflow that takes a brand domain, uses Context.dev to extract the brand's product catalog with pricing and features, and writes each product as a row in a table.", + modules: ['tables', 'agent', 'workflows'], + category: 'operations', + tags: ['enrichment', 'automation'], + }, + { + icon: ContextDevIcon, + title: 'Context.dev site change watcher', + prompt: + 'Build a scheduled workflow that maps a site sitemap with Context.dev, scrapes new or changed pages to markdown, summarizes the differences, and emails a digest.', + modules: ['scheduled', 'agent', 'workflows'], + category: 'operations', + tags: ['monitoring', 'automation'], + alsoIntegrations: ['gmail'], + }, + ], +} as const satisfies BlockMeta diff --git a/apps/sim/blocks/registry.ts b/apps/sim/blocks/registry.ts index 2cacc4576c6..88f03250471 100644 --- a/apps/sim/blocks/registry.ts +++ b/apps/sim/blocks/registry.ts @@ -38,6 +38,7 @@ import { CloudWatchBlock, CloudWatchBlockMeta } from '@/blocks/blocks/cloudwatch import { CodePipelineBlock, CodePipelineBlockMeta } from '@/blocks/blocks/codepipeline' import { ConditionBlock } from '@/blocks/blocks/condition' import { ConfluenceBlock, ConfluenceBlockMeta, ConfluenceV2Block } from '@/blocks/blocks/confluence' +import { ContextDevBlock, ContextDevBlockMeta } from '@/blocks/blocks/context_dev' import { ConvexBlock, ConvexBlockMeta } from '@/blocks/blocks/convex' import { CredentialBlock } from '@/blocks/blocks/credential' import { CrowdStrikeBlock, CrowdStrikeBlockMeta } from '@/blocks/blocks/crowdstrike' @@ -368,6 +369,7 @@ const BLOCK_REGISTRY: Record = { condition: ConditionBlock, confluence: ConfluenceBlock, confluence_v2: ConfluenceV2Block, + context_dev: ContextDevBlock, convex: ConvexBlock, credential: CredentialBlock, crowdstrike: CrowdStrikeBlock, @@ -667,6 +669,7 @@ const BLOCK_META_REGISTRY: Record = { cloudwatch: CloudWatchBlockMeta, codepipeline: CodePipelineBlockMeta, confluence: ConfluenceBlockMeta, + context_dev: ContextDevBlockMeta, convex: ConvexBlockMeta, crowdstrike: CrowdStrikeBlockMeta, cursor: CursorBlockMeta, diff --git a/apps/sim/components/icons.tsx b/apps/sim/components/icons.tsx index 8ea1529b1e8..dc3c91c5bef 100644 --- a/apps/sim/components/icons.tsx +++ b/apps/sim/components/icons.tsx @@ -2053,6 +2053,17 @@ export function ConfluenceIcon(props: SVGProps) { ) } +export function ContextDevIcon(props: SVGProps) { + return ( + + + + ) +} + export function ConvexIcon(props: SVGProps) { return ( = { cloudwatch: CloudWatchIcon, codepipeline: CodePipelineIcon, confluence_v2: ConfluenceIcon, + context_dev: ContextDevIcon, convex: ConvexIcon, crowdstrike: CrowdStrikeIcon, cursor_v2: CursorIcon, diff --git a/apps/sim/lib/integrations/integrations.json b/apps/sim/lib/integrations/integrations.json index c846bcdaf35..219fed3b77c 100644 --- a/apps/sim/lib/integrations/integrations.json +++ b/apps/sim/lib/integrations/integrations.json @@ -1,5 +1,5 @@ { - "updatedAt": "2026-06-14", + "updatedAt": "2026-06-15", "integrations": [ { "type": "onepassword", @@ -3336,6 +3336,113 @@ "integrationType": "documents", "tags": ["knowledge-base", "content-management", "note-taking"] }, + { + "type": "context_dev", + "slug": "context-dev", + "name": "Context.dev", + "description": "Scrape, crawl, search, extract, and enrich web and brand data", + "longDescription": "Integrate Context.dev into the workflow. Scrape pages to markdown or HTML, capture screenshots, list images, crawl entire sites, map sitemaps, search the web, extract structured data and products, pull design systems, classify industries, and retrieve brand assets by domain, name, email, ticker, or transaction — all from one API.", + "bgColor": "#ffffff", + "iconName": "ContextDevIcon", + "docsUrl": "https://docs.sim.ai/integrations/context_dev", + "operations": [ + { + "name": "Scrape Markdown", + "description": "Scrape any URL and return clean, LLM-ready markdown content." + }, + { + "name": "Scrape HTML", + "description": "Scrape any URL and return the raw HTML content of the page." + }, + { + "name": "Scrape Images", + "description": "Discover every image asset on a page, with optional dimension and type enrichment." + }, + { + "name": "Screenshot", + "description": "Capture a screenshot of any web page and store it as a downloadable image file." + }, + { + "name": "Crawl Website", + "description": "Crawl an entire website and return each discovered page as clean markdown." + }, + { + "name": "Map Sitemap", + "description": "Build a sitemap of a domain and return every discovered page URL." + }, + { + "name": "Web Search", + "description": "Search the web with natural language and optionally scrape results to markdown." + }, + { + "name": "Extract Structured Data", + "description": "Crawl a website and extract structured data matching a provided JSON schema." + }, + { + "name": "Extract Product", + "description": "Detect and extract structured product details from a single product page URL." + }, + { + "name": "Extract Products", + "description": "Extract the product catalog from a brand" + }, + { + "name": "Scrape Fonts", + "description": "Extract the font families, usage stats, and font files used by a domain." + }, + { + "name": "Scrape Styleguide", + "description": "Extract a domain" + }, + { + "name": "Classify NAICS", + "description": "Classify a brand into NAICS industry codes from its domain or company name." + }, + { + "name": "Classify SIC", + "description": "Classify a brand into SIC industry codes from its domain or company name." + }, + { + "name": "Get Brand by Domain", + "description": "Retrieve brand data for a domain: logos, colors, backdrops, socials, address, and industry." + }, + { + "name": "Get Brand by Name", + "description": "Retrieve brand data by company name: logos, colors, socials, address, and industry." + }, + { + "name": "Get Brand by Email", + "description": "Retrieve brand data from a work email address. Free/disposable emails are rejected (422)." + }, + { + "name": "Get Brand by Ticker", + "description": "Retrieve brand data for a public company by its stock ticker symbol." + }, + { + "name": "Get Brand (Simplified)", + "description": "Retrieve essential brand data for a domain: title, colors, logos, and backdrops." + }, + { + "name": "Identify Transaction", + "description": "Identify the brand behind a raw bank/card transaction descriptor and return its brand data." + }, + { + "name": "Prefetch Domain", + "description": "Queue a domain for brand-data prefetching to reduce latency on later requests (subscribers; 0 credits)." + }, + { + "name": "Prefetch by Email", + "description": "Queue an email" + } + ], + "operationCount": 22, + "triggers": [], + "triggerCount": 0, + "authType": "api-key", + "category": "tools", + "integrationType": "search", + "tags": ["web-scraping", "enrichment", "automation"] + }, { "type": "convex", "slug": "convex", diff --git a/apps/sim/tools/context_dev/classify_naics.ts b/apps/sim/tools/context_dev/classify_naics.ts new file mode 100644 index 00000000000..1386b672487 --- /dev/null +++ b/apps/sim/tools/context_dev/classify_naics.ts @@ -0,0 +1,96 @@ +import type { + ContextDevClassifyNaicsParams, + ContextDevClassifyNaicsResponse, +} from '@/tools/context_dev/types' +import { CLASSIFICATION_CODE_OUTPUT_PROPERTIES } from '@/tools/context_dev/types' +import { + appendParam, + CONTEXT_DEV_BASE_URL, + CREDIT_OUTPUTS, + contextDevHeaders, + extractCreditMetadata, + parseContextDevResponse, +} from '@/tools/context_dev/utils' +import type { ToolConfig } from '@/tools/types' + +export const contextDevClassifyNaicsTool: ToolConfig< + ContextDevClassifyNaicsParams, + ContextDevClassifyNaicsResponse +> = { + id: 'context_dev_classify_naics', + name: 'Context.dev Classify NAICS', + description: 'Classify a brand into NAICS industry codes from its domain or company name.', + version: '1.0.0', + + params: { + input: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Brand domain or company name to classify (e.g., "stripe.com" or "Stripe")', + }, + minResults: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Minimum number of codes to return (1-10, default: 1)', + }, + maxResults: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Maximum number of codes to return (1-10, default: 5)', + }, + timeoutMS: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Request timeout in milliseconds (1000-300000)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Context.dev API key', + }, + }, + + request: { + method: 'GET', + url: (params) => { + const url = new URL(`${CONTEXT_DEV_BASE_URL}/web/naics`) + appendParam(url.searchParams, 'input', params.input) + appendParam(url.searchParams, 'minResults', params.minResults) + appendParam(url.searchParams, 'maxResults', params.maxResults) + appendParam(url.searchParams, 'timeoutMS', params.timeoutMS) + return url.toString() + }, + headers: (params) => contextDevHeaders(params.apiKey), + }, + + transformResponse: async (response: Response) => { + const data = await parseContextDevResponse(response) + return { + success: true, + output: { + status: data.status ?? '', + domain: data.domain ?? null, + type: data.type ?? null, + codes: data.codes ?? [], + ...extractCreditMetadata(data.key_metadata), + }, + } + }, + + outputs: { + status: { type: 'string', description: 'Classification status' }, + domain: { type: 'string', description: 'Resolved domain', optional: true }, + type: { type: 'string', description: 'Input type that was resolved', optional: true }, + codes: { + type: 'array', + description: 'Matched NAICS codes with name and confidence', + items: { type: 'object', properties: CLASSIFICATION_CODE_OUTPUT_PROPERTIES }, + }, + ...CREDIT_OUTPUTS, + }, +} diff --git a/apps/sim/tools/context_dev/classify_sic.ts b/apps/sim/tools/context_dev/classify_sic.ts new file mode 100644 index 00000000000..a45b9ebb5d6 --- /dev/null +++ b/apps/sim/tools/context_dev/classify_sic.ts @@ -0,0 +1,117 @@ +import type { + ContextDevClassifySicParams, + ContextDevClassifySicResponse, +} from '@/tools/context_dev/types' +import { CLASSIFICATION_CODE_OUTPUT_PROPERTIES } from '@/tools/context_dev/types' +import { + appendParam, + CONTEXT_DEV_BASE_URL, + CREDIT_OUTPUTS, + contextDevHeaders, + extractCreditMetadata, + parseContextDevResponse, +} from '@/tools/context_dev/utils' +import type { ToolConfig } from '@/tools/types' + +export const contextDevClassifySicTool: ToolConfig< + ContextDevClassifySicParams, + ContextDevClassifySicResponse +> = { + id: 'context_dev_classify_sic', + name: 'Context.dev Classify SIC', + description: 'Classify a brand into SIC industry codes from its domain or company name.', + version: '1.0.0', + + params: { + input: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Brand domain or company name to classify (e.g., "stripe.com" or "Stripe")', + }, + type: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'SIC taxonomy version: "original_sic" (default) or "latest_sec"', + }, + minResults: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Minimum number of codes to return (1-10, default: 1)', + }, + maxResults: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Maximum number of codes to return (1-10, default: 5)', + }, + timeoutMS: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Request timeout in milliseconds (1000-300000)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Context.dev API key', + }, + }, + + request: { + method: 'GET', + url: (params) => { + const url = new URL(`${CONTEXT_DEV_BASE_URL}/web/sic`) + appendParam(url.searchParams, 'input', params.input) + appendParam(url.searchParams, 'type', params.type) + appendParam(url.searchParams, 'minResults', params.minResults) + appendParam(url.searchParams, 'maxResults', params.maxResults) + appendParam(url.searchParams, 'timeoutMS', params.timeoutMS) + return url.toString() + }, + headers: (params) => contextDevHeaders(params.apiKey), + }, + + transformResponse: async (response: Response) => { + const data = await parseContextDevResponse(response) + return { + success: true, + output: { + status: data.status ?? '', + domain: data.domain ?? null, + type: data.type ?? null, + classification: data.classification ?? null, + codes: data.codes ?? [], + ...extractCreditMetadata(data.key_metadata), + }, + } + }, + + outputs: { + status: { type: 'string', description: 'Classification status' }, + domain: { type: 'string', description: 'Resolved domain', optional: true }, + type: { type: 'string', description: 'Input type that was resolved', optional: true }, + classification: { + type: 'string', + description: 'SIC taxonomy version used (original_sic or latest_sec)', + optional: true, + }, + codes: { + type: 'array', + description: 'Matched SIC codes with name, confidence, and group metadata', + items: { + type: 'object', + properties: { + ...CLASSIFICATION_CODE_OUTPUT_PROPERTIES, + majorGroup: { type: 'string', description: 'Major group code (original_sic only)' }, + majorGroupName: { type: 'string', description: 'Major group name (original_sic only)' }, + office: { type: 'string', description: 'SEC office (latest_sec only)' }, + }, + }, + }, + ...CREDIT_OUTPUTS, + }, +} diff --git a/apps/sim/tools/context_dev/crawl.ts b/apps/sim/tools/context_dev/crawl.ts new file mode 100644 index 00000000000..6184899363b --- /dev/null +++ b/apps/sim/tools/context_dev/crawl.ts @@ -0,0 +1,144 @@ +import type { ContextDevCrawlParams, ContextDevCrawlResponse } from '@/tools/context_dev/types' +import { CRAWL_RESULT_OUTPUT_PROPERTIES } from '@/tools/context_dev/types' +import { + CONTEXT_DEV_BASE_URL, + CREDIT_OUTPUTS, + contextDevJsonHeaders, + extractCreditMetadata, + parseContextDevResponse, +} from '@/tools/context_dev/utils' +import type { ToolConfig } from '@/tools/types' + +export const contextDevCrawlTool: ToolConfig = { + id: 'context_dev_crawl', + name: 'Context.dev Crawl', + description: 'Crawl an entire website and return each discovered page as clean markdown.', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'The starting URL to crawl (must include http:// or https://)', + }, + maxPages: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Maximum number of pages to crawl (1-500, default: 100)', + }, + maxDepth: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Maximum link depth from the starting URL (0 = start page only)', + }, + urlRegex: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Regex pattern to filter which URLs are crawled', + }, + includeLinks: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Preserve hyperlinks in the markdown output (default: true)', + }, + includeImages: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Include image references in the markdown output (default: false)', + }, + useMainContentOnly: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Strip headers, footers, and sidebars from each page (default: false)', + }, + followSubdomains: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Follow links to subdomains of the starting domain (default: false)', + }, + maxAgeMs: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Cache duration in milliseconds (0-2592000000, default: 86400000)', + }, + waitForMs: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Browser wait time after page load in milliseconds (0-30000)', + }, + stopAfterMs: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Soft crawl time budget in milliseconds (10000-110000, default: 80000)', + }, + timeoutMS: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Request timeout in milliseconds (1000-300000)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Context.dev API key', + }, + }, + + request: { + method: 'POST', + url: () => `${CONTEXT_DEV_BASE_URL}/web/crawl`, + headers: (params) => contextDevJsonHeaders(params.apiKey), + body: (params) => { + const body: Record = { url: params.url } + if (params.maxPages != null) body.maxPages = params.maxPages + if (params.maxDepth != null) body.maxDepth = params.maxDepth + if (params.urlRegex) body.urlRegex = params.urlRegex + if (params.includeLinks != null) body.includeLinks = params.includeLinks + if (params.includeImages != null) body.includeImages = params.includeImages + if (params.useMainContentOnly != null) body.useMainContentOnly = params.useMainContentOnly + if (params.followSubdomains != null) body.followSubdomains = params.followSubdomains + if (params.maxAgeMs != null) body.maxAgeMs = params.maxAgeMs + if (params.waitForMs != null) body.waitForMs = params.waitForMs + if (params.stopAfterMs != null) body.stopAfterMs = params.stopAfterMs + if (params.timeoutMS != null) body.timeoutMS = params.timeoutMS + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await parseContextDevResponse(response) + return { + success: true, + output: { + results: data.results ?? [], + metadata: data.metadata ?? {}, + ...extractCreditMetadata(data.key_metadata), + }, + } + }, + + outputs: { + results: { + type: 'array', + description: 'Crawled pages with markdown content and per-page metadata', + items: { type: 'object', properties: CRAWL_RESULT_OUTPUT_PROPERTIES }, + }, + metadata: { + type: 'object', + description: 'Crawl summary (numUrls, maxCrawlDepth, numSucceeded, numFailed, numSkipped)', + }, + ...CREDIT_OUTPUTS, + }, +} diff --git a/apps/sim/tools/context_dev/extract.ts b/apps/sim/tools/context_dev/extract.ts new file mode 100644 index 00000000000..21cefdb1e5d --- /dev/null +++ b/apps/sim/tools/context_dev/extract.ts @@ -0,0 +1,135 @@ +import type { ContextDevExtractParams, ContextDevExtractResponse } from '@/tools/context_dev/types' +import { + CONTEXT_DEV_BASE_URL, + CREDIT_OUTPUTS, + contextDevJsonHeaders, + extractCreditMetadata, + parseContextDevResponse, +} from '@/tools/context_dev/utils' +import type { ToolConfig } from '@/tools/types' + +export const contextDevExtractTool: ToolConfig = + { + id: 'context_dev_extract', + name: 'Context.dev Extract', + description: 'Crawl a website and extract structured data matching a provided JSON schema.', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'The starting website URL (must include http:// or https://)', + }, + schema: { + type: 'json', + required: true, + visibility: 'user-or-llm', + description: 'JSON Schema describing the structure of the data to extract', + }, + instructions: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Optional extraction guidance for link prioritization (max 2000 chars)', + }, + factCheck: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Require extracted values to be grounded in page facts (default: false)', + }, + followSubdomains: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Follow links on subdomains of the starting domain (default: false)', + }, + maxPages: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Maximum number of pages to analyze (1-50, default: 5)', + }, + maxDepth: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Maximum link depth from the starting URL', + }, + maxAgeMs: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Cache duration in milliseconds (0-2592000000, default: 604800000)', + }, + stopAfterMs: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Soft crawl time budget in milliseconds (10000-110000, default: 80000)', + }, + timeoutMS: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Request timeout in milliseconds (1000-300000)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Context.dev API key', + }, + }, + + request: { + method: 'POST', + url: () => `${CONTEXT_DEV_BASE_URL}/web/extract`, + headers: (params) => contextDevJsonHeaders(params.apiKey), + body: (params) => { + const body: Record = { url: params.url, schema: params.schema } + if (params.instructions) body.instructions = params.instructions + if (params.factCheck != null) body.factCheck = params.factCheck + if (params.followSubdomains != null) body.followSubdomains = params.followSubdomains + if (params.maxPages != null) body.maxPages = params.maxPages + if (params.maxDepth != null) body.maxDepth = params.maxDepth + if (params.maxAgeMs != null) body.maxAgeMs = params.maxAgeMs + if (params.stopAfterMs != null) body.stopAfterMs = params.stopAfterMs + if (params.timeoutMS != null) body.timeoutMS = params.timeoutMS + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await parseContextDevResponse(response) + return { + success: true, + output: { + status: data.status ?? '', + url: data.url ?? '', + urlsAnalyzed: data.urls_analyzed ?? [], + data: data.data ?? {}, + metadata: data.metadata ?? {}, + ...extractCreditMetadata(data.key_metadata), + }, + } + }, + + outputs: { + status: { type: 'string', description: 'Extraction status' }, + url: { type: 'string', description: 'The starting URL that was crawled' }, + urlsAnalyzed: { + type: 'array', + description: 'URLs that were analyzed during extraction', + items: { type: 'string', description: 'Analyzed page URL' }, + }, + data: { type: 'json', description: 'Structured data matching the requested schema' }, + metadata: { + type: 'object', + description: 'Crawl summary (numUrls, maxCrawlDepth, numSucceeded, numFailed, numSkipped)', + }, + ...CREDIT_OUTPUTS, + }, + } diff --git a/apps/sim/tools/context_dev/extract_product.ts b/apps/sim/tools/context_dev/extract_product.ts new file mode 100644 index 00000000000..a70f39fa96f --- /dev/null +++ b/apps/sim/tools/context_dev/extract_product.ts @@ -0,0 +1,90 @@ +import type { + ContextDevExtractProductParams, + ContextDevExtractProductResponse, +} from '@/tools/context_dev/types' +import { PRODUCT_OUTPUT_PROPERTIES } from '@/tools/context_dev/types' +import { + CONTEXT_DEV_BASE_URL, + CREDIT_OUTPUTS, + contextDevJsonHeaders, + extractCreditMetadata, + parseContextDevResponse, +} from '@/tools/context_dev/utils' +import type { ToolConfig } from '@/tools/types' + +export const contextDevExtractProductTool: ToolConfig< + ContextDevExtractProductParams, + ContextDevExtractProductResponse +> = { + id: 'context_dev_extract_product', + name: 'Context.dev Extract Product', + description: 'Detect and extract structured product details from a single product page URL.', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'The product page URL (must include http:// or https://)', + }, + maxAgeMs: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Cache duration in milliseconds (0-2592000000, default: 604800000)', + }, + timeoutMS: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Request timeout in milliseconds (1000-300000)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Context.dev API key', + }, + }, + + request: { + method: 'POST', + url: () => `${CONTEXT_DEV_BASE_URL}/brand/ai/product`, + headers: (params) => contextDevJsonHeaders(params.apiKey), + body: (params) => { + const body: Record = { url: params.url } + if (params.maxAgeMs != null) body.maxAgeMs = params.maxAgeMs + if (params.timeoutMS != null) body.timeoutMS = params.timeoutMS + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await parseContextDevResponse(response) + return { + success: true, + output: { + isProductPage: data.is_product_page ?? false, + platform: data.platform ?? null, + product: data.product ?? null, + ...extractCreditMetadata(data.key_metadata), + }, + } + }, + + outputs: { + isProductPage: { type: 'boolean', description: 'Whether the URL is a product page' }, + platform: { + type: 'string', + description: 'Detected platform (amazon, tiktok_shop, etsy, generic)', + optional: true, + }, + product: { + type: 'object', + description: 'Extracted product details', + properties: PRODUCT_OUTPUT_PROPERTIES, + }, + ...CREDIT_OUTPUTS, + }, +} diff --git a/apps/sim/tools/context_dev/extract_products.ts b/apps/sim/tools/context_dev/extract_products.ts new file mode 100644 index 00000000000..aa22102a446 --- /dev/null +++ b/apps/sim/tools/context_dev/extract_products.ts @@ -0,0 +1,89 @@ +import type { + ContextDevExtractProductsParams, + ContextDevExtractProductsResponse, +} from '@/tools/context_dev/types' +import { PRODUCT_OUTPUT_PROPERTIES } from '@/tools/context_dev/types' +import { + CONTEXT_DEV_BASE_URL, + CREDIT_OUTPUTS, + contextDevJsonHeaders, + extractCreditMetadata, + parseContextDevResponse, +} from '@/tools/context_dev/utils' +import type { ToolConfig } from '@/tools/types' + +export const contextDevExtractProductsTool: ToolConfig< + ContextDevExtractProductsParams, + ContextDevExtractProductsResponse +> = { + id: 'context_dev_extract_products', + name: 'Context.dev Extract Products', + description: "Extract the product catalog from a brand's website by domain (beta).", + version: '1.0.0', + + params: { + domain: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'The domain to extract products from (e.g., "example.com")', + }, + maxProducts: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Maximum number of products to extract (1-12)', + }, + maxAgeMs: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Cache duration in milliseconds (0-2592000000, default: 604800000)', + }, + timeoutMS: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Request timeout in milliseconds (1000-300000)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Context.dev API key', + }, + }, + + request: { + method: 'POST', + url: () => `${CONTEXT_DEV_BASE_URL}/brand/ai/products`, + headers: (params) => contextDevJsonHeaders(params.apiKey), + body: (params) => { + const body: Record = { domain: params.domain } + if (params.maxProducts != null) body.maxProducts = params.maxProducts + if (params.maxAgeMs != null) body.maxAgeMs = params.maxAgeMs + if (params.timeoutMS != null) body.timeoutMS = params.timeoutMS + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await parseContextDevResponse(response) + return { + success: true, + output: { + products: data.products ?? [], + ...extractCreditMetadata(data.key_metadata), + }, + } + }, + + outputs: { + products: { + type: 'array', + description: 'Extracted products with pricing, features, and metadata', + items: { type: 'object', properties: PRODUCT_OUTPUT_PROPERTIES }, + }, + ...CREDIT_OUTPUTS, + }, +} diff --git a/apps/sim/tools/context_dev/get_brand.ts b/apps/sim/tools/context_dev/get_brand.ts new file mode 100644 index 00000000000..daffe65f08e --- /dev/null +++ b/apps/sim/tools/context_dev/get_brand.ts @@ -0,0 +1,88 @@ +import type { ContextDevBrandResponse, ContextDevGetBrandParams } from '@/tools/context_dev/types' +import { BRAND_OUTPUT_PROPERTIES } from '@/tools/context_dev/types' +import { + appendParam, + CONTEXT_DEV_BASE_URL, + CREDIT_OUTPUTS, + contextDevHeaders, + parseContextDevResponse, + transformBrandResponse, +} from '@/tools/context_dev/utils' +import type { ToolConfig } from '@/tools/types' + +export const contextDevGetBrandTool: ToolConfig = + { + id: 'context_dev_get_brand', + name: 'Context.dev Get Brand', + description: + 'Retrieve brand data for a domain: logos, colors, backdrops, socials, address, and industry.', + version: '1.0.0', + + params: { + domain: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'The domain to retrieve brand data for (e.g., "airbnb.com")', + }, + forceLanguage: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Override the detected language with a supported language code', + }, + maxSpeed: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Skip time-consuming operations for a faster response (default: false)', + }, + maxAgeMs: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Cache max age in milliseconds (86400000-31536000000, default: 7776000000)', + }, + timeoutMS: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Request timeout in milliseconds (1000-300000)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Context.dev API key', + }, + }, + + request: { + method: 'GET', + url: (params) => { + const url = new URL(`${CONTEXT_DEV_BASE_URL}/brand/retrieve`) + appendParam(url.searchParams, 'domain', params.domain) + appendParam(url.searchParams, 'force_language', params.forceLanguage) + appendParam(url.searchParams, 'maxSpeed', params.maxSpeed) + appendParam(url.searchParams, 'maxAgeMs', params.maxAgeMs) + appendParam(url.searchParams, 'timeoutMS', params.timeoutMS) + return url.toString() + }, + headers: (params) => contextDevHeaders(params.apiKey), + }, + + transformResponse: async (response: Response) => { + const data = await parseContextDevResponse(response) + return { success: true, output: transformBrandResponse(data) } + }, + + outputs: { + status: { type: 'string', description: 'Retrieval status' }, + brand: { + type: 'object', + description: 'Brand data object', + properties: BRAND_OUTPUT_PROPERTIES, + }, + ...CREDIT_OUTPUTS, + }, + } diff --git a/apps/sim/tools/context_dev/get_brand_by_email.ts b/apps/sim/tools/context_dev/get_brand_by_email.ts new file mode 100644 index 00000000000..e93fbacdfdf --- /dev/null +++ b/apps/sim/tools/context_dev/get_brand_by_email.ts @@ -0,0 +1,93 @@ +import type { + ContextDevBrandResponse, + ContextDevGetBrandByEmailParams, +} from '@/tools/context_dev/types' +import { BRAND_OUTPUT_PROPERTIES } from '@/tools/context_dev/types' +import { + appendParam, + CONTEXT_DEV_BASE_URL, + CREDIT_OUTPUTS, + contextDevHeaders, + parseContextDevResponse, + transformBrandResponse, +} from '@/tools/context_dev/utils' +import type { ToolConfig } from '@/tools/types' + +export const contextDevGetBrandByEmailTool: ToolConfig< + ContextDevGetBrandByEmailParams, + ContextDevBrandResponse +> = { + id: 'context_dev_get_brand_by_email', + name: 'Context.dev Get Brand by Email', + description: + 'Retrieve brand data from a work email address. Free/disposable emails are rejected (422).', + version: '1.0.0', + + params: { + email: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Work email address; the domain is extracted (free providers are rejected)', + }, + forceLanguage: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Override the detected language with a supported language code', + }, + maxSpeed: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Skip time-consuming operations for a faster response (default: false)', + }, + maxAgeMs: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Cache max age in milliseconds (86400000-31536000000, default: 7776000000)', + }, + timeoutMS: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Request timeout in milliseconds (1000-300000)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Context.dev API key', + }, + }, + + request: { + method: 'GET', + url: (params) => { + const url = new URL(`${CONTEXT_DEV_BASE_URL}/brand/retrieve-by-email`) + appendParam(url.searchParams, 'email', params.email) + appendParam(url.searchParams, 'force_language', params.forceLanguage) + appendParam(url.searchParams, 'maxSpeed', params.maxSpeed) + appendParam(url.searchParams, 'maxAgeMs', params.maxAgeMs) + appendParam(url.searchParams, 'timeoutMS', params.timeoutMS) + return url.toString() + }, + headers: (params) => contextDevHeaders(params.apiKey), + }, + + transformResponse: async (response: Response) => { + const data = await parseContextDevResponse(response) + return { success: true, output: transformBrandResponse(data) } + }, + + outputs: { + status: { type: 'string', description: 'Retrieval status' }, + brand: { + type: 'object', + description: 'Brand data object', + properties: BRAND_OUTPUT_PROPERTIES, + }, + ...CREDIT_OUTPUTS, + }, +} diff --git a/apps/sim/tools/context_dev/get_brand_by_name.ts b/apps/sim/tools/context_dev/get_brand_by_name.ts new file mode 100644 index 00000000000..78f8970e49e --- /dev/null +++ b/apps/sim/tools/context_dev/get_brand_by_name.ts @@ -0,0 +1,100 @@ +import type { + ContextDevBrandResponse, + ContextDevGetBrandByNameParams, +} from '@/tools/context_dev/types' +import { BRAND_OUTPUT_PROPERTIES } from '@/tools/context_dev/types' +import { + appendParam, + CONTEXT_DEV_BASE_URL, + CREDIT_OUTPUTS, + contextDevHeaders, + parseContextDevResponse, + transformBrandResponse, +} from '@/tools/context_dev/utils' +import type { ToolConfig } from '@/tools/types' + +export const contextDevGetBrandByNameTool: ToolConfig< + ContextDevGetBrandByNameParams, + ContextDevBrandResponse +> = { + id: 'context_dev_get_brand_by_name', + name: 'Context.dev Get Brand by Name', + description: + 'Retrieve brand data by company name: logos, colors, socials, address, and industry.', + version: '1.0.0', + + params: { + name: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Company name to retrieve brand data for (3-30 chars, e.g., "Apple Inc")', + }, + countryGl: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'ISO 2-letter country code to prioritize (e.g., "us")', + }, + forceLanguage: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Override the detected language with a supported language code', + }, + maxSpeed: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Skip time-consuming operations for a faster response (default: false)', + }, + maxAgeMs: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Cache max age in milliseconds (86400000-31536000000, default: 7776000000)', + }, + timeoutMS: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Request timeout in milliseconds (1000-300000)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Context.dev API key', + }, + }, + + request: { + method: 'GET', + url: (params) => { + const url = new URL(`${CONTEXT_DEV_BASE_URL}/brand/retrieve-by-name`) + appendParam(url.searchParams, 'name', params.name) + appendParam(url.searchParams, 'country_gl', params.countryGl) + appendParam(url.searchParams, 'force_language', params.forceLanguage) + appendParam(url.searchParams, 'maxSpeed', params.maxSpeed) + appendParam(url.searchParams, 'maxAgeMs', params.maxAgeMs) + appendParam(url.searchParams, 'timeoutMS', params.timeoutMS) + return url.toString() + }, + headers: (params) => contextDevHeaders(params.apiKey), + }, + + transformResponse: async (response: Response) => { + const data = await parseContextDevResponse(response) + return { success: true, output: transformBrandResponse(data) } + }, + + outputs: { + status: { type: 'string', description: 'Retrieval status' }, + brand: { + type: 'object', + description: 'Brand data object', + properties: BRAND_OUTPUT_PROPERTIES, + }, + ...CREDIT_OUTPUTS, + }, +} diff --git a/apps/sim/tools/context_dev/get_brand_by_ticker.ts b/apps/sim/tools/context_dev/get_brand_by_ticker.ts new file mode 100644 index 00000000000..ff0d92b4018 --- /dev/null +++ b/apps/sim/tools/context_dev/get_brand_by_ticker.ts @@ -0,0 +1,99 @@ +import type { + ContextDevBrandResponse, + ContextDevGetBrandByTickerParams, +} from '@/tools/context_dev/types' +import { BRAND_OUTPUT_PROPERTIES } from '@/tools/context_dev/types' +import { + appendParam, + CONTEXT_DEV_BASE_URL, + CREDIT_OUTPUTS, + contextDevHeaders, + parseContextDevResponse, + transformBrandResponse, +} from '@/tools/context_dev/utils' +import type { ToolConfig } from '@/tools/types' + +export const contextDevGetBrandByTickerTool: ToolConfig< + ContextDevGetBrandByTickerParams, + ContextDevBrandResponse +> = { + id: 'context_dev_get_brand_by_ticker', + name: 'Context.dev Get Brand by Ticker', + description: 'Retrieve brand data for a public company by its stock ticker symbol.', + version: '1.0.0', + + params: { + ticker: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Stock ticker symbol (e.g., "AAPL", "GOOGL", "BRK.A")', + }, + tickerExchange: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Exchange code for the ticker (e.g., "NASDAQ", "NYSE", "LSE"). Default: NASDAQ', + }, + forceLanguage: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Override the detected language with a supported language code', + }, + maxSpeed: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Skip time-consuming operations for a faster response (default: false)', + }, + maxAgeMs: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Cache max age in milliseconds (86400000-31536000000, default: 7776000000)', + }, + timeoutMS: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Request timeout in milliseconds (1000-300000)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Context.dev API key', + }, + }, + + request: { + method: 'GET', + url: (params) => { + const url = new URL(`${CONTEXT_DEV_BASE_URL}/brand/retrieve-by-ticker`) + appendParam(url.searchParams, 'ticker', params.ticker) + appendParam(url.searchParams, 'ticker_exchange', params.tickerExchange) + appendParam(url.searchParams, 'force_language', params.forceLanguage) + appendParam(url.searchParams, 'maxSpeed', params.maxSpeed) + appendParam(url.searchParams, 'maxAgeMs', params.maxAgeMs) + appendParam(url.searchParams, 'timeoutMS', params.timeoutMS) + return url.toString() + }, + headers: (params) => contextDevHeaders(params.apiKey), + }, + + transformResponse: async (response: Response) => { + const data = await parseContextDevResponse(response) + return { success: true, output: transformBrandResponse(data) } + }, + + outputs: { + status: { type: 'string', description: 'Retrieval status' }, + brand: { + type: 'object', + description: 'Brand data object', + properties: BRAND_OUTPUT_PROPERTIES, + }, + ...CREDIT_OUTPUTS, + }, +} diff --git a/apps/sim/tools/context_dev/get_brand_simplified.ts b/apps/sim/tools/context_dev/get_brand_simplified.ts new file mode 100644 index 00000000000..f120104139c --- /dev/null +++ b/apps/sim/tools/context_dev/get_brand_simplified.ts @@ -0,0 +1,78 @@ +import type { + ContextDevGetBrandSimplifiedParams, + ContextDevGetBrandSimplifiedResponse, +} from '@/tools/context_dev/types' +import { SIMPLIFIED_BRAND_OUTPUT_PROPERTIES } from '@/tools/context_dev/types' +import { + appendParam, + CONTEXT_DEV_BASE_URL, + CREDIT_OUTPUTS, + contextDevHeaders, + parseContextDevResponse, + transformBrandResponse, +} from '@/tools/context_dev/utils' +import type { ToolConfig } from '@/tools/types' + +export const contextDevGetBrandSimplifiedTool: ToolConfig< + ContextDevGetBrandSimplifiedParams, + ContextDevGetBrandSimplifiedResponse +> = { + id: 'context_dev_get_brand_simplified', + name: 'Context.dev Get Brand (Simplified)', + description: 'Retrieve essential brand data for a domain: title, colors, logos, and backdrops.', + version: '1.0.0', + + params: { + domain: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'The domain to retrieve simplified brand data for (e.g., "airbnb.com")', + }, + maxAgeMs: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Cache max age in milliseconds (86400000-31536000000, default: 7776000000)', + }, + timeoutMS: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Request timeout in milliseconds (1000-300000)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Context.dev API key', + }, + }, + + request: { + method: 'GET', + url: (params) => { + const url = new URL(`${CONTEXT_DEV_BASE_URL}/brand/retrieve-simplified`) + appendParam(url.searchParams, 'domain', params.domain) + appendParam(url.searchParams, 'maxAgeMs', params.maxAgeMs) + appendParam(url.searchParams, 'timeoutMS', params.timeoutMS) + return url.toString() + }, + headers: (params) => contextDevHeaders(params.apiKey), + }, + + transformResponse: async (response: Response) => { + const data = await parseContextDevResponse(response) + return { success: true, output: transformBrandResponse(data) } + }, + + outputs: { + status: { type: 'string', description: 'Retrieval status' }, + brand: { + type: 'object', + description: 'Simplified brand data (domain, title, colors, logos, backdrops)', + properties: SIMPLIFIED_BRAND_OUTPUT_PROPERTIES, + }, + ...CREDIT_OUTPUTS, + }, +} diff --git a/apps/sim/tools/context_dev/identify_transaction.ts b/apps/sim/tools/context_dev/identify_transaction.ts new file mode 100644 index 00000000000..47cba312dce --- /dev/null +++ b/apps/sim/tools/context_dev/identify_transaction.ts @@ -0,0 +1,121 @@ +import type { + ContextDevBrandResponse, + ContextDevIdentifyTransactionParams, +} from '@/tools/context_dev/types' +import { BRAND_OUTPUT_PROPERTIES } from '@/tools/context_dev/types' +import { + appendParam, + CONTEXT_DEV_BASE_URL, + CREDIT_OUTPUTS, + contextDevHeaders, + parseContextDevResponse, + transformBrandResponse, +} from '@/tools/context_dev/utils' +import type { ToolConfig } from '@/tools/types' + +export const contextDevIdentifyTransactionTool: ToolConfig< + ContextDevIdentifyTransactionParams, + ContextDevBrandResponse +> = { + id: 'context_dev_identify_transaction', + name: 'Context.dev Identify Transaction', + description: + 'Identify the brand behind a raw bank/card transaction descriptor and return its brand data.', + version: '1.0.0', + + params: { + transactionInfo: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'The raw transaction descriptor or identifier to resolve to a brand', + }, + countryGl: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'ISO 2-letter country code from the transaction (e.g., "us", "gb")', + }, + city: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'City name to prioritize in the search', + }, + mcc: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Merchant Category Code for the business category', + }, + phone: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Phone number from the transaction for verification', + }, + highConfidenceOnly: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Enforce additional verification steps for higher confidence (default: false)', + }, + forceLanguage: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Override the detected language with a supported language code', + }, + maxSpeed: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Skip time-consuming operations for a faster response (default: false)', + }, + timeoutMS: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Request timeout in milliseconds (1000-300000)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Context.dev API key', + }, + }, + + request: { + method: 'GET', + url: (params) => { + const url = new URL(`${CONTEXT_DEV_BASE_URL}/brand/transaction_identifier`) + appendParam(url.searchParams, 'transaction_info', params.transactionInfo) + appendParam(url.searchParams, 'country_gl', params.countryGl) + appendParam(url.searchParams, 'city', params.city) + appendParam(url.searchParams, 'mcc', params.mcc) + appendParam(url.searchParams, 'phone', params.phone) + appendParam(url.searchParams, 'high_confidence_only', params.highConfidenceOnly) + appendParam(url.searchParams, 'force_language', params.forceLanguage) + appendParam(url.searchParams, 'maxSpeed', params.maxSpeed) + appendParam(url.searchParams, 'timeoutMS', params.timeoutMS) + return url.toString() + }, + headers: (params) => contextDevHeaders(params.apiKey), + }, + + transformResponse: async (response: Response) => { + const data = await parseContextDevResponse(response) + return { success: true, output: transformBrandResponse(data) } + }, + + outputs: { + status: { type: 'string', description: 'Identification status' }, + brand: { + type: 'object', + description: 'Brand data for the identified merchant', + properties: BRAND_OUTPUT_PROPERTIES, + }, + ...CREDIT_OUTPUTS, + }, +} diff --git a/apps/sim/tools/context_dev/index.ts b/apps/sim/tools/context_dev/index.ts new file mode 100644 index 00000000000..9848723ebf3 --- /dev/null +++ b/apps/sim/tools/context_dev/index.ts @@ -0,0 +1,22 @@ +export { contextDevClassifyNaicsTool } from '@/tools/context_dev/classify_naics' +export { contextDevClassifySicTool } from '@/tools/context_dev/classify_sic' +export { contextDevCrawlTool } from '@/tools/context_dev/crawl' +export { contextDevExtractTool } from '@/tools/context_dev/extract' +export { contextDevExtractProductTool } from '@/tools/context_dev/extract_product' +export { contextDevExtractProductsTool } from '@/tools/context_dev/extract_products' +export { contextDevGetBrandTool } from '@/tools/context_dev/get_brand' +export { contextDevGetBrandByEmailTool } from '@/tools/context_dev/get_brand_by_email' +export { contextDevGetBrandByNameTool } from '@/tools/context_dev/get_brand_by_name' +export { contextDevGetBrandByTickerTool } from '@/tools/context_dev/get_brand_by_ticker' +export { contextDevGetBrandSimplifiedTool } from '@/tools/context_dev/get_brand_simplified' +export { contextDevIdentifyTransactionTool } from '@/tools/context_dev/identify_transaction' +export { contextDevMapTool } from '@/tools/context_dev/map' +export { contextDevPrefetchByEmailTool } from '@/tools/context_dev/prefetch_by_email' +export { contextDevPrefetchDomainTool } from '@/tools/context_dev/prefetch_domain' +export { contextDevScrapeFontsTool } from '@/tools/context_dev/scrape_fonts' +export { contextDevScrapeHtmlTool } from '@/tools/context_dev/scrape_html' +export { contextDevScrapeImagesTool } from '@/tools/context_dev/scrape_images' +export { contextDevScrapeMarkdownTool } from '@/tools/context_dev/scrape_markdown' +export { contextDevScrapeStyleguideTool } from '@/tools/context_dev/scrape_styleguide' +export { contextDevScreenshotTool } from '@/tools/context_dev/screenshot' +export { contextDevSearchTool } from '@/tools/context_dev/search' diff --git a/apps/sim/tools/context_dev/map.ts b/apps/sim/tools/context_dev/map.ts new file mode 100644 index 00000000000..c8491b02d5e --- /dev/null +++ b/apps/sim/tools/context_dev/map.ts @@ -0,0 +1,90 @@ +import type { ContextDevMapParams, ContextDevMapResponse } from '@/tools/context_dev/types' +import { + appendParam, + CONTEXT_DEV_BASE_URL, + CREDIT_OUTPUTS, + contextDevHeaders, + extractCreditMetadata, + parseContextDevResponse, +} from '@/tools/context_dev/utils' +import type { ToolConfig } from '@/tools/types' + +export const contextDevMapTool: ToolConfig = { + id: 'context_dev_map', + name: 'Context.dev Map', + description: 'Build a sitemap of a domain and return every discovered page URL.', + version: '1.0.0', + + params: { + domain: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'The domain to build a sitemap for (e.g., "example.com")', + }, + maxLinks: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Maximum number of URLs to return (1-100000, default: 10000)', + }, + urlRegex: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'RE2-compatible regex to filter URLs (max 256 chars)', + }, + timeoutMS: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Request timeout in milliseconds (1000-300000)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Context.dev API key', + }, + }, + + request: { + method: 'GET', + url: (params) => { + const url = new URL(`${CONTEXT_DEV_BASE_URL}/web/scrape/sitemap`) + appendParam(url.searchParams, 'domain', params.domain) + appendParam(url.searchParams, 'maxLinks', params.maxLinks) + appendParam(url.searchParams, 'urlRegex', params.urlRegex) + appendParam(url.searchParams, 'timeoutMS', params.timeoutMS) + return url.toString() + }, + headers: (params) => contextDevHeaders(params.apiKey), + }, + + transformResponse: async (response: Response) => { + const data = await parseContextDevResponse(response) + return { + success: true, + output: { + domain: data.domain ?? '', + urls: data.urls ?? [], + meta: data.meta ?? {}, + ...extractCreditMetadata(data.key_metadata), + }, + } + }, + + outputs: { + domain: { type: 'string', description: 'The domain that was mapped' }, + urls: { + type: 'array', + description: 'All page URLs discovered from the sitemap', + items: { type: 'string', description: 'Page URL' }, + }, + meta: { + type: 'object', + description: 'Sitemap discovery stats (sitemapsDiscovered, sitemapsFetched, errors)', + }, + ...CREDIT_OUTPUTS, + }, +} diff --git a/apps/sim/tools/context_dev/prefetch_by_email.ts b/apps/sim/tools/context_dev/prefetch_by_email.ts new file mode 100644 index 00000000000..af0ed013cd6 --- /dev/null +++ b/apps/sim/tools/context_dev/prefetch_by_email.ts @@ -0,0 +1,75 @@ +import type { + ContextDevPrefetchByEmailParams, + ContextDevPrefetchResponse, +} from '@/tools/context_dev/types' +import { + CONTEXT_DEV_BASE_URL, + CREDIT_OUTPUTS, + contextDevJsonHeaders, + extractCreditMetadata, + parseContextDevResponse, +} from '@/tools/context_dev/utils' +import type { ToolConfig } from '@/tools/types' + +export const contextDevPrefetchByEmailTool: ToolConfig< + ContextDevPrefetchByEmailParams, + ContextDevPrefetchResponse +> = { + id: 'context_dev_prefetch_by_email', + name: 'Context.dev Prefetch by Email', + description: + "Queue an email's domain for brand-data prefetching to reduce later latency (subscribers; 0 credits). Free/disposable emails are rejected.", + version: '1.0.0', + + params: { + email: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Work email address whose domain should be prefetched (free providers rejected)', + }, + timeoutMS: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Request timeout in milliseconds (1000-300000)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Context.dev API key', + }, + }, + + request: { + method: 'POST', + url: () => `${CONTEXT_DEV_BASE_URL}/brand/prefetch-by-email`, + headers: (params) => contextDevJsonHeaders(params.apiKey), + body: (params) => { + const body: Record = { email: params.email } + if (params.timeoutMS != null) body.timeoutMS = params.timeoutMS + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await parseContextDevResponse(response) + return { + success: true, + output: { + status: data.status ?? '', + message: data.message ?? '', + domain: data.domain ?? '', + ...extractCreditMetadata(data.key_metadata), + }, + } + }, + + outputs: { + status: { type: 'string', description: 'Prefetch status' }, + message: { type: 'string', description: 'Human-readable prefetch result message' }, + domain: { type: 'string', description: 'The domain queued for prefetching' }, + ...CREDIT_OUTPUTS, + }, +} diff --git a/apps/sim/tools/context_dev/prefetch_domain.ts b/apps/sim/tools/context_dev/prefetch_domain.ts new file mode 100644 index 00000000000..77d33d08520 --- /dev/null +++ b/apps/sim/tools/context_dev/prefetch_domain.ts @@ -0,0 +1,75 @@ +import type { + ContextDevPrefetchDomainParams, + ContextDevPrefetchResponse, +} from '@/tools/context_dev/types' +import { + CONTEXT_DEV_BASE_URL, + CREDIT_OUTPUTS, + contextDevJsonHeaders, + extractCreditMetadata, + parseContextDevResponse, +} from '@/tools/context_dev/utils' +import type { ToolConfig } from '@/tools/types' + +export const contextDevPrefetchDomainTool: ToolConfig< + ContextDevPrefetchDomainParams, + ContextDevPrefetchResponse +> = { + id: 'context_dev_prefetch_domain', + name: 'Context.dev Prefetch Domain', + description: + 'Queue a domain for brand-data prefetching to reduce latency on later requests (subscribers; 0 credits).', + version: '1.0.0', + + params: { + domain: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'The domain to prefetch brand data for (e.g., "example.com")', + }, + timeoutMS: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Request timeout in milliseconds (1000-300000)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Context.dev API key', + }, + }, + + request: { + method: 'POST', + url: () => `${CONTEXT_DEV_BASE_URL}/brand/prefetch`, + headers: (params) => contextDevJsonHeaders(params.apiKey), + body: (params) => { + const body: Record = { domain: params.domain } + if (params.timeoutMS != null) body.timeoutMS = params.timeoutMS + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await parseContextDevResponse(response) + return { + success: true, + output: { + status: data.status ?? '', + message: data.message ?? '', + domain: data.domain ?? '', + ...extractCreditMetadata(data.key_metadata), + }, + } + }, + + outputs: { + status: { type: 'string', description: 'Prefetch status' }, + message: { type: 'string', description: 'Human-readable prefetch result message' }, + domain: { type: 'string', description: 'The domain queued for prefetching' }, + ...CREDIT_OUTPUTS, + }, +} diff --git a/apps/sim/tools/context_dev/scrape_fonts.ts b/apps/sim/tools/context_dev/scrape_fonts.ts new file mode 100644 index 00000000000..c64c676bf9e --- /dev/null +++ b/apps/sim/tools/context_dev/scrape_fonts.ts @@ -0,0 +1,92 @@ +import type { + ContextDevScrapeFontsParams, + ContextDevScrapeFontsResponse, +} from '@/tools/context_dev/types' +import { FONT_OUTPUT_PROPERTIES } from '@/tools/context_dev/types' +import { + appendParam, + CONTEXT_DEV_BASE_URL, + CREDIT_OUTPUTS, + contextDevHeaders, + extractCreditMetadata, + parseContextDevResponse, +} from '@/tools/context_dev/utils' +import type { ToolConfig } from '@/tools/types' + +export const contextDevScrapeFontsTool: ToolConfig< + ContextDevScrapeFontsParams, + ContextDevScrapeFontsResponse +> = { + id: 'context_dev_scrape_fonts', + name: 'Context.dev Scrape Fonts', + description: 'Extract the font families, usage stats, and font files used by a domain.', + version: '1.0.0', + + params: { + domain: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'The domain to extract fonts from (e.g., "example.com")', + }, + maxAgeMs: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Cache max age in milliseconds (86400000-31536000000, default: 7776000000)', + }, + timeoutMS: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Request timeout in milliseconds (1000-300000)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Context.dev API key', + }, + }, + + request: { + method: 'GET', + url: (params) => { + const url = new URL(`${CONTEXT_DEV_BASE_URL}/web/fonts`) + appendParam(url.searchParams, 'domain', params.domain) + appendParam(url.searchParams, 'maxAgeMs', params.maxAgeMs) + appendParam(url.searchParams, 'timeoutMS', params.timeoutMS) + return url.toString() + }, + headers: (params) => contextDevHeaders(params.apiKey), + }, + + transformResponse: async (response: Response) => { + const data = await parseContextDevResponse(response) + return { + success: true, + output: { + status: data.status ?? '', + domain: data.domain ?? '', + fonts: data.fonts ?? [], + fontLinks: data.fontLinks ?? {}, + ...extractCreditMetadata(data.key_metadata), + }, + } + }, + + outputs: { + status: { type: 'string', description: 'Extraction status' }, + domain: { type: 'string', description: 'The domain that was analyzed' }, + fonts: { + type: 'array', + description: 'Fonts with usage statistics and fallbacks', + items: { type: 'object', properties: FONT_OUTPUT_PROPERTIES }, + }, + fontLinks: { + type: 'json', + description: 'Font family download links keyed by font name (type, files, category)', + }, + ...CREDIT_OUTPUTS, + }, +} diff --git a/apps/sim/tools/context_dev/scrape_html.ts b/apps/sim/tools/context_dev/scrape_html.ts new file mode 100644 index 00000000000..b4cf5d42e2f --- /dev/null +++ b/apps/sim/tools/context_dev/scrape_html.ts @@ -0,0 +1,106 @@ +import type { + ContextDevScrapeHtmlParams, + ContextDevScrapeHtmlResponse, +} from '@/tools/context_dev/types' +import { + appendParam, + CONTEXT_DEV_BASE_URL, + CREDIT_OUTPUTS, + contextDevHeaders, + extractCreditMetadata, + parseContextDevResponse, +} from '@/tools/context_dev/utils' +import type { ToolConfig } from '@/tools/types' + +export const contextDevScrapeHtmlTool: ToolConfig< + ContextDevScrapeHtmlParams, + ContextDevScrapeHtmlResponse +> = { + id: 'context_dev_scrape_html', + name: 'Context.dev Scrape HTML', + description: 'Scrape any URL and return the raw HTML content of the page.', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'The full URL to scrape (must include http:// or https://)', + }, + useMainContentOnly: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Return only main content, excluding headers, footers, and navigation', + }, + includeFrames: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Render iframe contents inline into the returned HTML (default: false)', + }, + maxAgeMs: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Cache duration in milliseconds (0-2592000000, default: 86400000)', + }, + waitForMs: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Browser wait time after page load in milliseconds (0-30000)', + }, + timeoutMS: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Request timeout in milliseconds (1000-300000)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Context.dev API key', + }, + }, + + request: { + method: 'GET', + url: (params) => { + const url = new URL(`${CONTEXT_DEV_BASE_URL}/web/scrape/html`) + appendParam(url.searchParams, 'url', params.url) + appendParam(url.searchParams, 'useMainContentOnly', params.useMainContentOnly) + appendParam(url.searchParams, 'includeFrames', params.includeFrames) + appendParam(url.searchParams, 'maxAgeMs', params.maxAgeMs) + appendParam(url.searchParams, 'waitForMs', params.waitForMs) + appendParam(url.searchParams, 'timeoutMS', params.timeoutMS) + return url.toString() + }, + headers: (params) => contextDevHeaders(params.apiKey), + }, + + transformResponse: async (response: Response) => { + const data = await parseContextDevResponse(response) + return { + success: true, + output: { + html: data.html ?? '', + url: data.url ?? '', + type: data.type ?? 'html', + ...extractCreditMetadata(data.key_metadata), + }, + } + }, + + outputs: { + html: { type: 'string', description: 'Raw HTML content of the page' }, + url: { type: 'string', description: 'The scraped URL' }, + type: { + type: 'string', + description: 'Detected content type (html, xml, json, text, csv, markdown, svg, pdf)', + }, + ...CREDIT_OUTPUTS, + }, +} diff --git a/apps/sim/tools/context_dev/scrape_images.ts b/apps/sim/tools/context_dev/scrape_images.ts new file mode 100644 index 00000000000..d2dfade8acb --- /dev/null +++ b/apps/sim/tools/context_dev/scrape_images.ts @@ -0,0 +1,115 @@ +import type { + ContextDevScrapeImagesParams, + ContextDevScrapeImagesResponse, +} from '@/tools/context_dev/types' +import { IMAGE_OUTPUT_PROPERTIES } from '@/tools/context_dev/types' +import { + appendParam, + CONTEXT_DEV_BASE_URL, + CREDIT_OUTPUTS, + contextDevHeaders, + extractCreditMetadata, + parseContextDevResponse, +} from '@/tools/context_dev/utils' +import type { ToolConfig } from '@/tools/types' + +export const contextDevScrapeImagesTool: ToolConfig< + ContextDevScrapeImagesParams, + ContextDevScrapeImagesResponse +> = { + id: 'context_dev_scrape_images', + name: 'Context.dev Scrape Images', + description: 'Discover every image asset on a page, with optional dimension and type enrichment.', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'The full URL to scrape images from (must include http:// or https://)', + }, + maxAgeMs: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Cache duration in milliseconds (0-2592000000, default: 86400000)', + }, + waitForMs: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Browser wait time after page load in milliseconds (0-30000)', + }, + timeoutMS: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Request timeout in milliseconds (1000-300000)', + }, + enrichResolution: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Measure image dimensions (enables 5-credit enrichment)', + }, + enrichHostedUrl: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Host images on a CDN and return their URL and MIME type (enables enrichment)', + }, + enrichClassification: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Classify each image by visual asset type (enables enrichment)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Context.dev API key', + }, + }, + + request: { + method: 'GET', + url: (params) => { + const url = new URL(`${CONTEXT_DEV_BASE_URL}/web/scrape/images`) + appendParam(url.searchParams, 'url', params.url) + appendParam(url.searchParams, 'maxAgeMs', params.maxAgeMs) + appendParam(url.searchParams, 'waitForMs', params.waitForMs) + appendParam(url.searchParams, 'timeoutMS', params.timeoutMS) + appendParam(url.searchParams, 'enrichment[resolution]', params.enrichResolution) + appendParam(url.searchParams, 'enrichment[hostedUrl]', params.enrichHostedUrl) + appendParam(url.searchParams, 'enrichment[classification]', params.enrichClassification) + return url.toString() + }, + headers: (params) => contextDevHeaders(params.apiKey), + }, + + transformResponse: async (response: Response) => { + const data = await parseContextDevResponse(response) + return { + success: true, + output: { + success: data.success ?? true, + images: data.images ?? [], + url: data.url ?? '', + ...extractCreditMetadata(data.key_metadata), + }, + } + }, + + outputs: { + success: { type: 'boolean', description: 'Whether the scrape succeeded' }, + images: { + type: 'array', + description: 'Discovered image assets with source, element, type, and optional enrichment', + items: { type: 'object', properties: IMAGE_OUTPUT_PROPERTIES }, + }, + url: { type: 'string', description: 'The scraped URL' }, + ...CREDIT_OUTPUTS, + }, +} diff --git a/apps/sim/tools/context_dev/scrape_markdown.ts b/apps/sim/tools/context_dev/scrape_markdown.ts new file mode 100644 index 00000000000..8faed1c8a57 --- /dev/null +++ b/apps/sim/tools/context_dev/scrape_markdown.ts @@ -0,0 +1,115 @@ +import type { + ContextDevScrapeMarkdownParams, + ContextDevScrapeMarkdownResponse, +} from '@/tools/context_dev/types' +import { + appendParam, + CONTEXT_DEV_BASE_URL, + CREDIT_OUTPUTS, + contextDevHeaders, + extractCreditMetadata, + parseContextDevResponse, +} from '@/tools/context_dev/utils' +import type { ToolConfig } from '@/tools/types' + +export const contextDevScrapeMarkdownTool: ToolConfig< + ContextDevScrapeMarkdownParams, + ContextDevScrapeMarkdownResponse +> = { + id: 'context_dev_scrape_markdown', + name: 'Context.dev Scrape Markdown', + description: 'Scrape any URL and return clean, LLM-ready markdown content.', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'The full URL to scrape (must include http:// or https://)', + }, + useMainContentOnly: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Return only main content, excluding headers, footers, and navigation', + }, + includeLinks: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Preserve hyperlinks in the markdown output (default: true)', + }, + includeImages: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Include image references in the markdown output (default: false)', + }, + includeFrames: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Render iframe contents inline (default: false)', + }, + maxAgeMs: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Cache duration in milliseconds (0-2592000000, default: 86400000)', + }, + waitForMs: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Browser wait time after page load in milliseconds (0-30000)', + }, + timeoutMS: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Request timeout in milliseconds (1000-300000)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Context.dev API key', + }, + }, + + request: { + method: 'GET', + url: (params) => { + const url = new URL(`${CONTEXT_DEV_BASE_URL}/web/scrape/markdown`) + appendParam(url.searchParams, 'url', params.url) + appendParam(url.searchParams, 'useMainContentOnly', params.useMainContentOnly) + appendParam(url.searchParams, 'includeLinks', params.includeLinks) + appendParam(url.searchParams, 'includeImages', params.includeImages) + appendParam(url.searchParams, 'includeFrames', params.includeFrames) + appendParam(url.searchParams, 'maxAgeMs', params.maxAgeMs) + appendParam(url.searchParams, 'waitForMs', params.waitForMs) + appendParam(url.searchParams, 'timeoutMS', params.timeoutMS) + return url.toString() + }, + headers: (params) => contextDevHeaders(params.apiKey), + }, + + transformResponse: async (response: Response) => { + const data = await parseContextDevResponse(response) + return { + success: true, + output: { + markdown: data.markdown ?? '', + url: data.url ?? '', + ...extractCreditMetadata(data.key_metadata), + }, + } + }, + + outputs: { + markdown: { type: 'string', description: 'Page content as clean markdown' }, + url: { type: 'string', description: 'The scraped URL' }, + ...CREDIT_OUTPUTS, + }, +} diff --git a/apps/sim/tools/context_dev/scrape_styleguide.ts b/apps/sim/tools/context_dev/scrape_styleguide.ts new file mode 100644 index 00000000000..3be34c35d44 --- /dev/null +++ b/apps/sim/tools/context_dev/scrape_styleguide.ts @@ -0,0 +1,87 @@ +import type { + ContextDevScrapeStyleguideParams, + ContextDevScrapeStyleguideResponse, +} from '@/tools/context_dev/types' +import { + appendParam, + CONTEXT_DEV_BASE_URL, + CREDIT_OUTPUTS, + contextDevHeaders, + extractCreditMetadata, + parseContextDevResponse, +} from '@/tools/context_dev/utils' +import type { ToolConfig } from '@/tools/types' + +export const contextDevScrapeStyleguideTool: ToolConfig< + ContextDevScrapeStyleguideParams, + ContextDevScrapeStyleguideResponse +> = { + id: 'context_dev_scrape_styleguide', + name: 'Context.dev Scrape Styleguide', + description: + "Extract a domain's design system: colors, typography, spacing, shadows, and UI components.", + version: '1.0.0', + + params: { + domain: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'The domain to extract the styleguide from (e.g., "example.com")', + }, + maxAgeMs: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Cache max age in milliseconds (86400000-31536000000, default: 7776000000)', + }, + timeoutMS: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Request timeout in milliseconds (1000-300000)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Context.dev API key', + }, + }, + + request: { + method: 'GET', + url: (params) => { + const url = new URL(`${CONTEXT_DEV_BASE_URL}/web/styleguide`) + appendParam(url.searchParams, 'domain', params.domain) + appendParam(url.searchParams, 'maxAgeMs', params.maxAgeMs) + appendParam(url.searchParams, 'timeoutMS', params.timeoutMS) + return url.toString() + }, + headers: (params) => contextDevHeaders(params.apiKey), + }, + + transformResponse: async (response: Response) => { + const data = await parseContextDevResponse(response) + return { + success: true, + output: { + status: data.status ?? '', + domain: data.domain ?? '', + styleguide: data.styleguide ?? null, + ...extractCreditMetadata(data.key_metadata), + }, + } + }, + + outputs: { + status: { type: 'string', description: 'Extraction status' }, + domain: { type: 'string', description: 'The domain that was analyzed' }, + styleguide: { + type: 'json', + description: + 'Design system: mode, colors, typography, elementSpacing, shadows, fontLinks, components', + }, + ...CREDIT_OUTPUTS, + }, +} diff --git a/apps/sim/tools/context_dev/screenshot.ts b/apps/sim/tools/context_dev/screenshot.ts new file mode 100644 index 00000000000..130402a8023 --- /dev/null +++ b/apps/sim/tools/context_dev/screenshot.ts @@ -0,0 +1,165 @@ +import type { + ContextDevScreenshotParams, + ContextDevScreenshotResponse, +} from '@/tools/context_dev/types' +import { + appendParam, + CONTEXT_DEV_BASE_URL, + CREDIT_OUTPUTS, + contextDevHeaders, + extractCreditMetadata, + parseContextDevResponse, +} from '@/tools/context_dev/utils' +import type { ToolConfig, ToolFileData } from '@/tools/types' + +/** Maps a lowercase image file extension to its MIME type. */ +const IMAGE_MIME_BY_EXTENSION: Record = { + png: 'image/png', + jpg: 'image/jpeg', + jpeg: 'image/jpeg', + webp: 'image/webp', + gif: 'image/gif', + avif: 'image/avif', +} + +/** + * Derives the file extension and MIME type for a stored screenshot from its URL, + * falling back to PNG when the URL has no recognizable image extension. + */ +function screenshotFileMeta(url: string): { extension: string; mimeType: string } { + try { + const ext = new URL(url).pathname.split('.').pop()?.toLowerCase() ?? '' + if (IMAGE_MIME_BY_EXTENSION[ext]) { + return { extension: ext, mimeType: IMAGE_MIME_BY_EXTENSION[ext] } + } + } catch { + // Unparseable URL — fall back to the default below. + } + return { extension: 'png', mimeType: 'image/png' } +} + +export const contextDevScreenshotTool: ToolConfig< + ContextDevScreenshotParams, + ContextDevScreenshotResponse +> = { + id: 'context_dev_screenshot', + name: 'Context.dev Screenshot', + description: 'Capture a screenshot of any web page and store it as a downloadable image file.', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'The full URL to capture (must include http:// or https://)', + }, + fullScreenshot: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Capture the full scrollable page instead of just the viewport (default: false)', + }, + handleCookiePopup: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Attempt to dismiss cookie banners before capturing (default: false)', + }, + viewportWidth: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Viewport width in pixels (240-7680, default: 1920)', + }, + viewportHeight: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Viewport height in pixels (240-4320, default: 1080)', + }, + maxAgeMs: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Cache duration in milliseconds (0-2592000000, default: 86400000)', + }, + waitForMs: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Post-load delay before capturing in milliseconds (0-30000, default: 3000)', + }, + timeoutMS: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Request timeout in milliseconds (1000-300000)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Context.dev API key', + }, + }, + + request: { + method: 'GET', + url: (params) => { + const url = new URL(`${CONTEXT_DEV_BASE_URL}/web/screenshot`) + appendParam(url.searchParams, 'directUrl', params.url) + appendParam(url.searchParams, 'fullScreenshot', params.fullScreenshot) + appendParam(url.searchParams, 'handleCookiePopup', params.handleCookiePopup) + appendParam(url.searchParams, 'viewport[width]', params.viewportWidth) + appendParam(url.searchParams, 'viewport[height]', params.viewportHeight) + appendParam(url.searchParams, 'maxAgeMs', params.maxAgeMs) + appendParam(url.searchParams, 'waitForMs', params.waitForMs) + appendParam(url.searchParams, 'timeoutMS', params.timeoutMS) + return url.toString() + }, + headers: (params) => contextDevHeaders(params.apiKey), + }, + + transformResponse: async (response: Response) => { + const data = await parseContextDevResponse(response) + const screenshotUrl: string = data.screenshot ?? '' + const domain: string | null = data.domain ?? null + + const { extension, mimeType } = screenshotFileMeta(screenshotUrl) + const file: ToolFileData | undefined = screenshotUrl + ? { + name: `${domain ?? 'screenshot'}.${extension}`, + mimeType, + url: screenshotUrl, + } + : undefined + + return { + success: true, + output: { + ...(file ? { file } : {}), + screenshotUrl, + screenshotType: data.screenshotType ?? null, + domain, + width: data.width ?? null, + height: data.height ?? null, + ...extractCreditMetadata(data.key_metadata), + }, + } + }, + + outputs: { + file: { type: 'file', description: 'Stored screenshot image file', optional: true }, + screenshotUrl: { type: 'string', description: 'Public URL of the captured screenshot' }, + screenshotType: { + type: 'string', + description: 'Screenshot type (viewport or fullPage)', + optional: true, + }, + domain: { type: 'string', description: 'Domain that was captured', optional: true }, + width: { type: 'number', description: 'Screenshot width in pixels', optional: true }, + height: { type: 'number', description: 'Screenshot height in pixels', optional: true }, + ...CREDIT_OUTPUTS, + }, +} diff --git a/apps/sim/tools/context_dev/search.ts b/apps/sim/tools/context_dev/search.ts new file mode 100644 index 00000000000..2fd150d0b41 --- /dev/null +++ b/apps/sim/tools/context_dev/search.ts @@ -0,0 +1,108 @@ +import type { ContextDevSearchParams, ContextDevSearchResponse } from '@/tools/context_dev/types' +import { SEARCH_RESULT_OUTPUT_PROPERTIES } from '@/tools/context_dev/types' +import { + CONTEXT_DEV_BASE_URL, + CREDIT_OUTPUTS, + contextDevJsonHeaders, + extractCreditMetadata, + parseContextDevResponse, +} from '@/tools/context_dev/utils' +import type { ToolConfig } from '@/tools/types' + +export const contextDevSearchTool: ToolConfig = { + id: 'context_dev_search', + name: 'Context.dev Search', + description: 'Search the web with natural language and optionally scrape results to markdown.', + version: '1.0.0', + + params: { + query: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'The natural language search query (1-500 characters)', + }, + includeDomains: { + type: 'array', + required: false, + visibility: 'user-or-llm', + description: 'Only return results from these domains', + }, + excludeDomains: { + type: 'array', + required: false, + visibility: 'user-or-llm', + description: 'Exclude results from these domains', + }, + freshness: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Recency filter (last_24_hours, last_week, last_month, last_year)', + }, + queryFanout: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Expand the query into parallel variants for broader coverage', + }, + markdownEnabled: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Scrape each result page to markdown (default: false)', + }, + timeoutMS: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Request timeout in milliseconds (1000-300000)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Context.dev API key', + }, + }, + + request: { + method: 'POST', + url: () => `${CONTEXT_DEV_BASE_URL}/web/search`, + headers: (params) => contextDevJsonHeaders(params.apiKey), + body: (params) => { + const body: Record = { query: params.query } + if (params.includeDomains?.length) body.includeDomains = params.includeDomains + if (params.excludeDomains?.length) body.excludeDomains = params.excludeDomains + if (params.freshness) body.freshness = params.freshness + if (params.queryFanout != null) body.queryFanout = params.queryFanout + if (params.markdownEnabled != null) { + body.markdownOptions = { enabled: params.markdownEnabled } + } + if (params.timeoutMS != null) body.timeoutMS = params.timeoutMS + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await parseContextDevResponse(response) + return { + success: true, + output: { + results: data.results ?? [], + query: data.query ?? '', + ...extractCreditMetadata(data.key_metadata), + }, + } + }, + + outputs: { + results: { + type: 'array', + description: 'Search results with url, title, description, relevance, and optional markdown', + items: { type: 'object', properties: SEARCH_RESULT_OUTPUT_PROPERTIES }, + }, + query: { type: 'string', description: 'The query that was searched' }, + ...CREDIT_OUTPUTS, + }, +} diff --git a/apps/sim/tools/context_dev/types.ts b/apps/sim/tools/context_dev/types.ts new file mode 100644 index 00000000000..63cde60c786 --- /dev/null +++ b/apps/sim/tools/context_dev/types.ts @@ -0,0 +1,464 @@ +import type { ToolFileData, ToolResponse } from '@/tools/types' + +/** Credit accounting fields surfaced on every Context.dev tool output. */ +interface CreditFields { + creditsConsumed: number | null + creditsRemaining: number | null +} + +export interface ContextDevScrapeMarkdownParams { + apiKey: string + url: string + useMainContentOnly?: boolean + includeLinks?: boolean + includeImages?: boolean + includeFrames?: boolean + maxAgeMs?: number + waitForMs?: number + timeoutMS?: number +} + +export interface ContextDevScrapeMarkdownResponse extends ToolResponse { + output: CreditFields & { + markdown: string + url: string + } +} + +export interface ContextDevScrapeHtmlParams { + apiKey: string + url: string + useMainContentOnly?: boolean + includeFrames?: boolean + maxAgeMs?: number + waitForMs?: number + timeoutMS?: number +} + +export interface ContextDevScrapeHtmlResponse extends ToolResponse { + output: CreditFields & { + html: string + url: string + type: string + } +} + +export interface ContextDevScreenshotParams { + apiKey: string + url: string + fullScreenshot?: boolean + handleCookiePopup?: boolean + viewportWidth?: number + viewportHeight?: number + maxAgeMs?: number + waitForMs?: number + timeoutMS?: number +} + +export interface ContextDevScreenshotResponse extends ToolResponse { + output: CreditFields & { + file?: ToolFileData + screenshotUrl: string + screenshotType: string | null + domain: string | null + width: number | null + height: number | null + } +} + +export interface ContextDevScrapeImagesParams { + apiKey: string + url: string + maxAgeMs?: number + waitForMs?: number + timeoutMS?: number + enrichResolution?: boolean + enrichHostedUrl?: boolean + enrichClassification?: boolean +} + +export interface ContextDevScrapeImagesResponse extends ToolResponse { + output: CreditFields & { + success: boolean + images: Array> + url: string + } +} + +export interface ContextDevCrawlParams { + apiKey: string + url: string + maxPages?: number + maxDepth?: number + urlRegex?: string + includeLinks?: boolean + includeImages?: boolean + useMainContentOnly?: boolean + followSubdomains?: boolean + maxAgeMs?: number + waitForMs?: number + stopAfterMs?: number + timeoutMS?: number +} + +export interface ContextDevCrawlResponse extends ToolResponse { + output: CreditFields & { + results: Array<{ + markdown: string + metadata: Record + }> + metadata: Record + } +} + +export interface ContextDevMapParams { + apiKey: string + domain: string + maxLinks?: number + urlRegex?: string + timeoutMS?: number +} + +export interface ContextDevMapResponse extends ToolResponse { + output: CreditFields & { + domain: string + urls: string[] + meta: Record + } +} + +export interface ContextDevSearchParams { + apiKey: string + query: string + includeDomains?: string[] + excludeDomains?: string[] + freshness?: string + queryFanout?: boolean + markdownEnabled?: boolean + timeoutMS?: number +} + +export interface ContextDevSearchResponse extends ToolResponse { + output: CreditFields & { + results: Array> + query: string + } +} + +export interface ContextDevExtractParams { + apiKey: string + url: string + schema: Record + instructions?: string + factCheck?: boolean + followSubdomains?: boolean + maxPages?: number + maxDepth?: number + maxAgeMs?: number + stopAfterMs?: number + timeoutMS?: number +} + +export interface ContextDevExtractResponse extends ToolResponse { + output: CreditFields & { + status: string + url: string + urlsAnalyzed: string[] + data: Record + metadata: Record + } +} + +export interface ContextDevExtractProductParams { + apiKey: string + url: string + maxAgeMs?: number + timeoutMS?: number +} + +export interface ContextDevExtractProductResponse extends ToolResponse { + output: CreditFields & { + isProductPage: boolean + platform: string | null + product: Record | null + } +} + +export interface ContextDevExtractProductsParams { + apiKey: string + domain: string + maxProducts?: number + maxAgeMs?: number + timeoutMS?: number +} + +export interface ContextDevExtractProductsResponse extends ToolResponse { + output: CreditFields & { + products: Array> + } +} + +export interface ContextDevScrapeFontsParams { + apiKey: string + domain: string + maxAgeMs?: number + timeoutMS?: number +} + +export interface ContextDevScrapeFontsResponse extends ToolResponse { + output: CreditFields & { + status: string + domain: string + fonts: Array> + fontLinks: Record + } +} + +export interface ContextDevScrapeStyleguideParams { + apiKey: string + domain: string + maxAgeMs?: number + timeoutMS?: number +} + +export interface ContextDevScrapeStyleguideResponse extends ToolResponse { + output: CreditFields & { + status: string + domain: string + styleguide: Record | null + } +} + +export interface ContextDevClassifyNaicsParams { + apiKey: string + input: string + minResults?: number + maxResults?: number + timeoutMS?: number +} + +export interface ContextDevClassifyNaicsResponse extends ToolResponse { + output: CreditFields & { + status: string + domain: string | null + type: string | null + codes: Array> + } +} + +export interface ContextDevClassifySicParams { + apiKey: string + input: string + type?: string + minResults?: number + maxResults?: number + timeoutMS?: number +} + +export interface ContextDevClassifySicResponse extends ToolResponse { + output: CreditFields & { + status: string + domain: string | null + type: string | null + classification: string | null + codes: Array> + } +} + +/** Shared response shape for every brand-returning endpoint (full brand object). */ +export interface ContextDevBrandResponse extends ToolResponse { + output: CreditFields & { + status: string + brand: Record | null + } +} + +export interface ContextDevGetBrandParams { + apiKey: string + domain: string + forceLanguage?: string + maxSpeed?: boolean + maxAgeMs?: number + timeoutMS?: number +} + +export interface ContextDevGetBrandByNameParams { + apiKey: string + name: string + countryGl?: string + forceLanguage?: string + maxSpeed?: boolean + maxAgeMs?: number + timeoutMS?: number +} + +export interface ContextDevGetBrandByEmailParams { + apiKey: string + email: string + forceLanguage?: string + maxSpeed?: boolean + maxAgeMs?: number + timeoutMS?: number +} + +export interface ContextDevGetBrandByTickerParams { + apiKey: string + ticker: string + tickerExchange?: string + forceLanguage?: string + maxSpeed?: boolean + maxAgeMs?: number + timeoutMS?: number +} + +export interface ContextDevIdentifyTransactionParams { + apiKey: string + transactionInfo: string + countryGl?: string + city?: string + mcc?: string + phone?: number + highConfidenceOnly?: boolean + forceLanguage?: string + maxSpeed?: boolean + timeoutMS?: number +} + +export interface ContextDevGetBrandSimplifiedParams { + apiKey: string + domain: string + maxAgeMs?: number + timeoutMS?: number +} + +export interface ContextDevGetBrandSimplifiedResponse extends ToolResponse { + output: CreditFields & { + status: string + brand: Record | null + } +} + +export interface ContextDevPrefetchByEmailParams { + apiKey: string + email: string + timeoutMS?: number +} + +export interface ContextDevPrefetchDomainParams { + apiKey: string + domain: string + timeoutMS?: number +} + +/** Shared response shape for the prefetch utility endpoints. */ +export interface ContextDevPrefetchResponse extends ToolResponse { + output: CreditFields & { + status: string + message: string + domain: string + } +} + +/** Output schema for a single web search result. */ +export const SEARCH_RESULT_OUTPUT_PROPERTIES = { + url: { type: 'string', description: 'Result page URL' }, + title: { type: 'string', description: 'Result page title' }, + description: { type: 'string', description: 'Result snippet/description' }, + relevance: { type: 'string', description: 'Relevance rating (high, medium, low)' }, + markdown: { + type: 'json', + description: 'Scraped markdown for the result (when markdown scraping is enabled)', + }, +} as const + +/** Output schema for a single crawled page. */ +export const CRAWL_RESULT_OUTPUT_PROPERTIES = { + markdown: { type: 'string', description: 'Page content as markdown' }, + metadata: { type: 'json', description: 'Page metadata (url, title, crawlDepth, statusCode)' }, +} as const + +/** Output schema for a single industry classification code. */ +export const CLASSIFICATION_CODE_OUTPUT_PROPERTIES = { + code: { type: 'string', description: 'Industry code' }, + name: { type: 'string', description: 'Industry name' }, + confidence: { type: 'string', description: 'Match confidence (high, medium, low)' }, +} as const + +/** Output schema for the full brand object returned by brand-intelligence endpoints. */ +export const BRAND_OUTPUT_PROPERTIES = { + domain: { type: 'string', description: 'Brand domain' }, + title: { type: 'string', description: 'Brand title' }, + description: { type: 'string', description: 'Brand description' }, + slogan: { type: 'string', description: 'Brand slogan' }, + colors: { type: 'json', description: 'Brand colors (hex and name)' }, + logos: { type: 'json', description: 'Brand logos with mode, colors, resolution, and type' }, + backdrops: { type: 'json', description: 'Brand backdrop images' }, + socials: { type: 'json', description: 'Social media profiles (type and url)' }, + address: { type: 'json', description: 'Brand address' }, + stock: { type: 'json', description: 'Stock info (ticker and exchange)' }, + is_nsfw: { type: 'boolean', description: 'Whether the brand contains adult content' }, + email: { type: 'string', description: 'Brand contact email' }, + phone: { type: 'string', description: 'Brand contact phone' }, + industries: { type: 'json', description: 'Industry taxonomy (eic industry/subindustry pairs)' }, + links: { type: 'json', description: 'Key brand links (careers, privacy, terms, blog, pricing)' }, + primary_language: { type: 'string', description: 'Primary language of the brand site' }, +} as const + +/** Output schema for the reduced brand object returned by the simplified endpoint. */ +export const SIMPLIFIED_BRAND_OUTPUT_PROPERTIES = { + domain: { type: 'string', description: 'Brand domain' }, + title: { type: 'string', description: 'Brand title' }, + colors: { type: 'json', description: 'Brand colors (hex and name)' }, + logos: { type: 'json', description: 'Brand logos with mode, colors, resolution, and type' }, + backdrops: { type: 'json', description: 'Brand backdrop images' }, +} as const + +/** Output schema for a single extracted product. */ +export const PRODUCT_OUTPUT_PROPERTIES = { + name: { type: 'string', description: 'Product name' }, + description: { type: 'string', description: 'Product description' }, + price: { type: 'number', description: 'Product price' }, + currency: { type: 'string', description: 'Price currency' }, + billing_frequency: { + type: 'string', + description: 'Billing frequency (monthly, yearly, one_time, usage_based)', + }, + pricing_model: { + type: 'string', + description: 'Pricing model (per_seat, flat, tiered, freemium, custom)', + }, + url: { type: 'string', description: 'Product URL' }, + category: { type: 'string', description: 'Product category' }, + features: { type: 'json', description: 'Product features' }, + target_audience: { type: 'json', description: 'Target audience' }, + tags: { type: 'json', description: 'Product tags' }, + image_url: { type: 'string', description: 'Primary product image URL' }, + images: { type: 'json', description: 'Product image URLs' }, + sku: { type: 'string', description: 'Product SKU' }, +} as const + +/** Output schema for a single font usage entry. */ +export const FONT_OUTPUT_PROPERTIES = { + font: { type: 'string', description: 'Font family name' }, + uses: { type: 'json', description: 'Where the font is used' }, + fallbacks: { type: 'json', description: 'Fallback font families' }, + num_elements: { type: 'number', description: 'Number of elements using the font' }, + num_words: { type: 'number', description: 'Number of words rendered in the font' }, + percent_words: { type: 'number', description: 'Percent of words using the font' }, + percent_elements: { type: 'number', description: 'Percent of elements using the font' }, +} as const + +/** Output schema for a single scraped image. */ +export const IMAGE_OUTPUT_PROPERTIES = { + src: { type: 'string', description: 'Image source URL or data' }, + element: { + type: 'string', + description: 'Source element (img, svg, link, source, video, css, object, meta, background)', + }, + type: { type: 'string', description: 'Image representation (url, html, base64)' }, + alt: { type: 'string', description: 'Alt text', optional: true }, + enrichment: { + type: 'json', + description: 'Optional enrichment (width, height, mimetype, url, type) when requested', + }, +} as const diff --git a/apps/sim/tools/context_dev/utils.ts b/apps/sim/tools/context_dev/utils.ts new file mode 100644 index 00000000000..b46a28d626a --- /dev/null +++ b/apps/sim/tools/context_dev/utils.ts @@ -0,0 +1,97 @@ +/** Base URL for all Context.dev API endpoints. */ +export const CONTEXT_DEV_BASE_URL = 'https://api.context.dev/v1' + +/** + * Builds the standard Context.dev request headers with Bearer authentication. + */ +export function contextDevHeaders(apiKey: string): Record { + return { + Authorization: `Bearer ${apiKey}`, + Accept: 'application/json', + } +} + +/** + * Builds JSON request headers with Bearer authentication for POST endpoints. + */ +export function contextDevJsonHeaders(apiKey: string): Record { + return { + ...contextDevHeaders(apiKey), + 'Content-Type': 'application/json', + } +} + +/** + * Throws a descriptive error when a Context.dev response is not successful. + * Returns the parsed JSON body on success. + */ +export async function parseContextDevResponse(response: Response): Promise { + if (!response.ok) { + const errorText = await response.text() + throw new Error(`Context.dev API error (${response.status}): ${errorText}`) + } + return response.json() +} + +/** Shape of the credit accounting object present on every Context.dev response. */ +interface ContextDevKeyMetadata { + credits_consumed?: number + credits_remaining?: number +} + +/** + * Extracts the credit accounting fields shared by every Context.dev response. + */ +export function extractCreditMetadata(keyMetadata: ContextDevKeyMetadata | undefined): { + creditsConsumed: number | null + creditsRemaining: number | null +} { + return { + creditsConsumed: keyMetadata?.credits_consumed ?? null, + creditsRemaining: keyMetadata?.credits_remaining ?? null, + } +} + +/** + * Normalizes a brand-returning Context.dev response into the shared tool output shape. + * Used by every endpoint that returns a `brand` object. + */ +export function transformBrandResponse(data: any): { + status: string + brand: Record | null + creditsConsumed: number | null + creditsRemaining: number | null +} { + return { + status: data.status ?? '', + brand: data.brand ?? null, + ...extractCreditMetadata(data.key_metadata), + } +} + +/** + * Appends a parameter to a URLSearchParams instance only when it is defined and non-empty. + * Booleans are serialized as the literal strings 'true' / 'false'. + */ +export function appendParam( + search: URLSearchParams, + key: string, + value: string | number | boolean | undefined | null +): void { + if (value === undefined || value === null || value === '') return + search.append(key, String(value)) +} + +/** Output definitions for the credit accounting fields, reused across every tool. */ +export const CREDIT_OUTPUTS = { + creditsConsumed: { + type: 'number', + description: 'Credits consumed by this request', + optional: true, + }, + creditsRemaining: { + type: 'number', + description: 'Credits remaining on the API key', + optional: true, + }, +} as const diff --git a/apps/sim/tools/registry.ts b/apps/sim/tools/registry.ts index c8f7422f592..a5ffd6ba05d 100644 --- a/apps/sim/tools/registry.ts +++ b/apps/sim/tools/registry.ts @@ -522,6 +522,30 @@ import { confluenceUpdateTool, confluenceUploadAttachmentTool, } from '@/tools/confluence' +import { + contextDevClassifyNaicsTool, + contextDevClassifySicTool, + contextDevCrawlTool, + contextDevExtractProductsTool, + contextDevExtractProductTool, + contextDevExtractTool, + contextDevGetBrandByEmailTool, + contextDevGetBrandByNameTool, + contextDevGetBrandByTickerTool, + contextDevGetBrandSimplifiedTool, + contextDevGetBrandTool, + contextDevIdentifyTransactionTool, + contextDevMapTool, + contextDevPrefetchByEmailTool, + contextDevPrefetchDomainTool, + contextDevScrapeFontsTool, + contextDevScrapeHtmlTool, + contextDevScrapeImagesTool, + contextDevScrapeMarkdownTool, + contextDevScrapeStyleguideTool, + contextDevScreenshotTool, + contextDevSearchTool, +} from '@/tools/context_dev' import { convexActionTool, convexDocumentDeltasTool, @@ -5491,6 +5515,28 @@ export const tools: Record = { confluence_list_space_properties: confluenceListSpacePropertiesTool, confluence_create_space_property: confluenceCreateSpacePropertyTool, confluence_delete_space_property: confluenceDeleteSpacePropertyTool, + context_dev_scrape_markdown: contextDevScrapeMarkdownTool, + context_dev_scrape_html: contextDevScrapeHtmlTool, + context_dev_scrape_images: contextDevScrapeImagesTool, + context_dev_screenshot: contextDevScreenshotTool, + context_dev_crawl: contextDevCrawlTool, + context_dev_map: contextDevMapTool, + context_dev_search: contextDevSearchTool, + context_dev_extract: contextDevExtractTool, + context_dev_extract_product: contextDevExtractProductTool, + context_dev_extract_products: contextDevExtractProductsTool, + context_dev_scrape_fonts: contextDevScrapeFontsTool, + context_dev_scrape_styleguide: contextDevScrapeStyleguideTool, + context_dev_classify_naics: contextDevClassifyNaicsTool, + context_dev_classify_sic: contextDevClassifySicTool, + context_dev_get_brand: contextDevGetBrandTool, + context_dev_get_brand_by_name: contextDevGetBrandByNameTool, + context_dev_get_brand_by_email: contextDevGetBrandByEmailTool, + context_dev_get_brand_by_ticker: contextDevGetBrandByTickerTool, + context_dev_get_brand_simplified: contextDevGetBrandSimplifiedTool, + context_dev_identify_transaction: contextDevIdentifyTransactionTool, + context_dev_prefetch_domain: contextDevPrefetchDomainTool, + context_dev_prefetch_by_email: contextDevPrefetchByEmailTool, cursor_list_agents: cursorListAgentsTool, cursor_list_agents_v2: cursorListAgentsV2Tool, cursor_get_agent: cursorGetAgentTool,