diff --git a/docs.json b/docs.json
index 282bcf6..e3ef5cd 100644
--- a/docs.json
+++ b/docs.json
@@ -3,250 +3,327 @@
"theme": "mint",
"name": "ScrapeGraphAI",
"colors": {
- "primary": "#9333ea",
- "light": "#9f52eb",
- "dark": "#1f2937"
+ "primary": "#AC6DFF",
+ "light": "#AC6DFF",
+ "dark": "#AC6DFF"
},
"favicon": "/favicon.svg",
"navigation": {
- "tabs": [
+ "versions": [
{
- "tab": "Home",
- "groups": [
+ "version": "v2",
+ "default": true,
+ "tabs": [
{
- "group": "Get Started",
- "pages": [
- "introduction",
- "install",
+ "tab": "Home",
+ "groups": [
{
- "group": "Use Cases",
+ "group": "Get Started",
"pages": [
- "use-cases/overview",
- "use-cases/ai-llm",
- "use-cases/lead-generation",
- "use-cases/market-intelligence",
- "use-cases/content-aggregation",
- "use-cases/research-analysis",
- "use-cases/seo-analytics"
+ "introduction",
+ "install",
+ {
+ "group": "Use Cases",
+ "pages": [
+ "use-cases/overview",
+ "use-cases/ai-llm",
+ "use-cases/lead-generation",
+ "use-cases/market-intelligence",
+ "use-cases/content-aggregation",
+ "use-cases/research-analysis",
+ "use-cases/seo-analytics"
+ ]
+ },
+ {
+ "group": "Dashboard",
+ "pages": [
+ "dashboard/overview",
+ "dashboard/playground",
+ "dashboard/settings"
+ ]
+ }
]
},
{
- "group": "Dashboard",
+ "group": "Services",
"pages": [
- "dashboard/overview",
- "dashboard/playground",
- "dashboard/settings"
+ "services/smartscraper",
+ "services/searchscraper",
+ "services/markdownify",
+ "services/scrape",
+ "services/smartcrawler",
+ "services/sitemap",
+ "services/agenticscraper",
+ {
+ "group": "CLI",
+ "icon": "terminal",
+ "pages": [
+ "services/cli/introduction",
+ "services/cli/commands",
+ "services/cli/json-mode",
+ "services/cli/ai-agent-skill",
+ "services/cli/examples"
+ ]
+ },
+ {
+ "group": "MCP Server",
+ "icon": "/logo/mcp.svg",
+ "pages": [
+ "services/mcp-server/introduction",
+ "services/mcp-server/cursor",
+ "services/mcp-server/claude",
+ "services/mcp-server/smithery"
+ ]
+ },
+ "services/toonify",
+ {
+ "group": "Additional Parameters",
+ "pages": [
+ "services/additional-parameters/headers",
+ "services/additional-parameters/pagination",
+ "services/additional-parameters/proxy",
+ "services/additional-parameters/wait-ms"
+ ]
+ }
+ ]
+ },
+ {
+ "group": "Official SDKs",
+ "pages": [
+ "sdks/python",
+ "sdks/javascript",
+ "sdks/mocking"
+ ]
+ },
+ {
+ "group": "Integrations",
+ "pages": [
+ "integrations/langchain",
+ "integrations/llamaindex",
+ "integrations/crewai",
+ "integrations/agno",
+ "integrations/langflow",
+ "integrations/vercel_ai",
+ "integrations/google-adk",
+ "integrations/x402"
+ ]
+ },
+ {
+ "group": "LLM SDKs & Frameworks",
+ "pages": [
+ "developer-guides/llm-sdks-and-frameworks/gemini",
+ "developer-guides/llm-sdks-and-frameworks/anthropic"
+ ]
+ },
+ {
+ "group": "Contribute",
+ "pages": [
+ "contribute/opensource"
]
}
]
},
{
- "group": "Services",
- "pages": [
- "services/smartscraper",
- "services/searchscraper",
- "services/markdownify",
- "services/scrape",
- "services/smartcrawler",
- "services/sitemap",
- "services/agenticscraper",
+ "tab": "Knowledge Base",
+ "groups": [
+ {
+ "group": "Knowledge Base",
+ "pages": [
+ "knowledge-base/introduction"
+ ]
+ },
+ {
+ "group": "Scraping Tools",
+ "pages": [
+ "knowledge-base/ai-tools/lovable",
+ "knowledge-base/ai-tools/v0",
+ "knowledge-base/ai-tools/bolt",
+ "knowledge-base/ai-tools/cursor"
+ ]
+ },
{
"group": "CLI",
- "icon": "terminal",
"pages": [
- "services/cli/introduction",
- "services/cli/commands",
- "services/cli/json-mode",
- "services/cli/ai-agent-skill",
- "services/cli/examples"
+ "knowledge-base/cli/getting-started",
+ "knowledge-base/cli/json-mode",
+ "knowledge-base/cli/ai-agent-skill",
+ "knowledge-base/cli/command-examples"
]
},
{
- "group": "MCP Server",
- "icon": "/logo/mcp.svg",
+ "group": "Troubleshooting",
"pages": [
- "services/mcp-server/introduction",
- "services/mcp-server/cursor",
- "services/mcp-server/claude",
- "services/mcp-server/smithery"
+ "knowledge-base/troubleshooting/cors-error",
+ "knowledge-base/troubleshooting/empty-results",
+ "knowledge-base/troubleshooting/rate-limiting",
+ "knowledge-base/troubleshooting/timeout-errors"
]
},
- "services/toonify",
{
- "group": "Additional Parameters",
+ "group": "Scraping Guides",
"pages": [
- "services/additional-parameters/headers",
- "services/additional-parameters/pagination",
- "services/additional-parameters/proxy",
- "services/additional-parameters/wait-ms"
+ "knowledge-base/scraping/javascript-rendering",
+ "knowledge-base/scraping/pagination",
+ "knowledge-base/scraping/custom-headers",
+ "knowledge-base/scraping/proxy"
+ ]
+ },
+ {
+ "group": "Account & Credits",
+ "pages": [
+ "knowledge-base/account/api-keys",
+ "knowledge-base/account/credits",
+ "knowledge-base/account/rate-limits"
]
}
]
},
{
- "group": "Official SDKs",
- "pages": [
- "sdks/python",
- "sdks/javascript",
- "sdks/mocking"
- ]
- },
- {
- "group": "Integrations",
- "pages": [
- "integrations/langchain",
- "integrations/llamaindex",
- "integrations/crewai",
- "integrations/agno",
- "integrations/langflow",
- "integrations/vercel_ai",
- "integrations/google-adk",
- "integrations/x402"
- ]
- },
- {
- "group": "LLM SDKs & Frameworks",
- "pages": [
- "developer-guides/llm-sdks-and-frameworks/gemini",
- "developer-guides/llm-sdks-and-frameworks/anthropic"
- ]
- },
- {
- "group": "Contribute",
- "pages": [
- "contribute/opensource"
- ]
- }
- ]
- },
- {
- "tab": "Knowledge Base",
- "groups": [
- {
- "group": "Knowledge Base",
- "pages": [
- "knowledge-base/introduction"
- ]
- },
- {
- "group": "Scraping Tools",
- "pages": [
- "knowledge-base/ai-tools/lovable",
- "knowledge-base/ai-tools/v0",
- "knowledge-base/ai-tools/bolt",
- "knowledge-base/ai-tools/cursor"
- ]
- },
- {
- "group": "CLI",
- "pages": [
- "knowledge-base/cli/getting-started",
- "knowledge-base/cli/json-mode",
- "knowledge-base/cli/ai-agent-skill",
- "knowledge-base/cli/command-examples"
- ]
- },
- {
- "group": "Troubleshooting",
- "pages": [
- "knowledge-base/troubleshooting/cors-error",
- "knowledge-base/troubleshooting/empty-results",
- "knowledge-base/troubleshooting/rate-limiting",
- "knowledge-base/troubleshooting/timeout-errors"
- ]
- },
- {
- "group": "Scraping Guides",
- "pages": [
- "knowledge-base/scraping/javascript-rendering",
- "knowledge-base/scraping/pagination",
- "knowledge-base/scraping/custom-headers",
- "knowledge-base/scraping/proxy"
- ]
- },
- {
- "group": "Account & Credits",
- "pages": [
- "knowledge-base/account/api-keys",
- "knowledge-base/account/credits",
- "knowledge-base/account/rate-limits"
- ]
- }
- ]
- },
- {
- "tab": "Cookbook",
- "groups": [
- {
- "group": "Cookbook",
- "pages": [
- "cookbook/introduction"
+ "tab": "Cookbook",
+ "groups": [
+ {
+ "group": "Cookbook",
+ "pages": [
+ "cookbook/introduction"
+ ]
+ },
+ {
+ "group": "Examples",
+ "pages": [
+ "cookbook/examples/company-info",
+ "cookbook/examples/github-trending",
+ "cookbook/examples/wired",
+ "cookbook/examples/homes",
+ "cookbook/examples/research-agent",
+ "cookbook/examples/chat-webpage",
+ "cookbook/examples/pagination"
+ ]
+ }
]
},
{
- "group": "Examples",
- "pages": [
- "cookbook/examples/company-info",
- "cookbook/examples/github-trending",
- "cookbook/examples/wired",
- "cookbook/examples/homes",
- "cookbook/examples/research-agent",
- "cookbook/examples/chat-webpage",
- "cookbook/examples/pagination"
+ "tab": "API Reference",
+ "groups": [
+ {
+ "group": "API Documentation",
+ "pages": [
+ "api-reference/introduction",
+ "api-reference/errors"
+ ]
+ },
+ {
+ "group": "SmartScraper",
+ "pages": [
+ "api-reference/endpoint/smartscraper/start",
+ "api-reference/endpoint/smartscraper/get-status"
+ ]
+ },
+ {
+ "group": "SearchScraper",
+ "pages": [
+ "api-reference/endpoint/searchscraper/start",
+ "api-reference/endpoint/searchscraper/get-status"
+ ]
+ },
+ {
+ "group": "Markdownify",
+ "pages": [
+ "api-reference/endpoint/markdownify/start",
+ "api-reference/endpoint/markdownify/get-status"
+ ]
+ },
+ {
+ "group": "SmartCrawler",
+ "pages": [
+ "api-reference/endpoint/smartcrawler/start",
+ "api-reference/endpoint/smartcrawler/get-status"
+ ]
+ },
+ {
+ "group": "Sitemap",
+ "pages": [
+ "api-reference/endpoint/sitemap/start",
+ "api-reference/endpoint/sitemap/get-status"
+ ]
+ },
+ {
+ "group": "User",
+ "pages": [
+ "api-reference/endpoint/user/get-credits",
+ "api-reference/endpoint/user/submit-feedback"
+ ]
+ }
]
}
]
},
{
- "tab": "API Reference",
- "groups": [
- {
- "group": "API Documentation",
- "pages": [
- "api-reference/introduction",
- "api-reference/errors"
- ]
- },
- {
- "group": "SmartScraper",
- "pages": [
- "api-reference/endpoint/smartscraper/start",
- "api-reference/endpoint/smartscraper/get-status"
- ]
- },
- {
- "group": "SearchScraper",
- "pages": [
- "api-reference/endpoint/searchscraper/start",
- "api-reference/endpoint/searchscraper/get-status"
- ]
- },
- {
- "group": "Markdownify",
- "pages": [
- "api-reference/endpoint/markdownify/start",
- "api-reference/endpoint/markdownify/get-status"
- ]
- },
- {
- "group": "SmartCrawler",
- "pages": [
- "api-reference/endpoint/smartcrawler/start",
- "api-reference/endpoint/smartcrawler/get-status"
- ]
- },
+ "version": "v1",
+ "tabs": [
{
- "group": "Sitemap",
- "pages": [
- "api-reference/endpoint/sitemap/start",
- "api-reference/endpoint/sitemap/get-status"
+ "tab": "Home",
+ "groups": [
+ {
+ "group": "Get Started",
+ "pages": [
+ "v1/introduction",
+ "v1/quickstart"
+ ]
+ },
+ {
+ "group": "Services",
+ "pages": [
+ "v1/smartscraper",
+ "v1/searchscraper",
+ "v1/markdownify",
+ "v1/scrape",
+ "v1/smartcrawler",
+ "v1/sitemap",
+ "v1/agenticscraper",
+ {
+ "group": "CLI",
+ "icon": "terminal",
+ "pages": [
+ "v1/cli/introduction",
+ "v1/cli/commands",
+ "v1/cli/json-mode",
+ "v1/cli/ai-agent-skill",
+ "v1/cli/examples"
+ ]
+ },
+ {
+ "group": "MCP Server",
+ "icon": "/logo/mcp.svg",
+ "pages": [
+ "v1/mcp-server/introduction",
+ "v1/mcp-server/cursor",
+ "v1/mcp-server/claude",
+ "v1/mcp-server/smithery"
+ ]
+ },
+ "v1/toonify",
+ {
+ "group": "Additional Parameters",
+ "pages": [
+ "v1/additional-parameters/headers",
+ "v1/additional-parameters/pagination",
+ "v1/additional-parameters/proxy",
+ "v1/additional-parameters/wait-ms"
+ ]
+ }
+ ]
+ }
]
},
{
- "group": "User",
- "pages": [
- "api-reference/endpoint/user/get-credits",
- "api-reference/endpoint/user/submit-feedback"
+ "tab": "API Reference",
+ "groups": [
+ {
+ "group": "API Documentation",
+ "pages": [
+ "v1/api-reference/introduction"
+ ]
+ }
]
}
]
@@ -279,7 +356,18 @@
},
"background": {
"color": {
- "dark": "#101725"
+ "dark": "#242424",
+ "light": "#EFEFEF"
+ }
+ },
+ "fonts": {
+ "heading": {
+ "family": "IBM Plex Sans",
+ "weight": 500
+ },
+ "body": {
+ "family": "IBM Plex Sans",
+ "weight": 400
}
},
"navbar": {
@@ -293,7 +381,7 @@
"href": "mailto:contact@scrapegraphai.com"
},
{
- "label": "⭐ 23.2k+",
+ "label": "⭐ 23k+",
"href": "https://github.com/ScrapeGraphAI/Scrapegraph-ai"
}
],
@@ -322,4 +410,4 @@
"vscode"
]
}
-}
\ No newline at end of file
+}
diff --git a/install.md b/install.md
index 1f1165d..f39b9d0 100644
--- a/install.md
+++ b/install.md
@@ -1,6 +1,6 @@
---
title: Installation
-description: 'Install and get started with ScrapeGraphAI SDKs'
+description: 'Install and get started with ScrapeGraphAI v2 SDKs'
---
## Prerequisites
@@ -22,10 +22,10 @@ from scrapegraph_py import Client
client = Client(api_key="your-api-key-here")
-# Scrape a website
-response = client.smartscraper(
- website_url="https://scrapegraphai.com",
- user_prompt="Extract information about the company"
+# Extract data from a website
+response = client.extract(
+ url="https://scrapegraphai.com",
+ prompt="Extract information about the company"
)
print(response)
```
@@ -40,6 +40,8 @@ For more advanced usage, see the [Python SDK documentation](/sdks/python).
## JavaScript SDK
+Requires **Node.js >= 22**.
+
Install using npm, pnpm, yarn, or bun:
```bash
@@ -59,20 +61,16 @@ bun add scrapegraph-js
**Usage:**
```javascript
-import { smartScraper } from "scrapegraph-js";
+import scrapegraphai from "scrapegraph-js";
-const apiKey = "your-api-key-here";
+const sgai = scrapegraphai({ apiKey: "your-api-key-here" });
-const response = await smartScraper(apiKey, {
- website_url: "https://scrapegraphai.com",
- user_prompt: "What does the company do?",
-});
+const { data } = await sgai.extract(
+ "https://scrapegraphai.com",
+ { prompt: "What does the company do?" }
+);
-if (response.status === "error") {
- console.error("Error:", response.error);
-} else {
- console.log(response.data.result);
-}
+console.log(data);
```
@@ -85,18 +83,18 @@ For more advanced usage, see the [JavaScript SDK documentation](/sdks/javascript
## Key Concepts
-### SmartScraper
+### Extract (formerly SmartScraper)
Extract specific information from any webpage using AI. Provide a URL and a prompt describing what you want to extract. [Learn more](/services/smartscraper)
-### SearchScraper
-Search and extract information from multiple web sources using AI. Start with just a prompt - SearchScraper will find relevant websites and extract the information you need. [Learn more](/services/searchscraper)
+### Search (formerly SearchScraper)
+Search and extract information from multiple web sources using AI. Start with just a query - Search will find relevant websites and extract the information you need. [Learn more](/services/searchscraper)
+
+### Scrape
+Convert any webpage into markdown, HTML, screenshot, or branding format. Replaces the previous Markdownify endpoint with additional output formats. [Learn more](/services/scrape)
### SmartCrawler
AI-powered extraction for any webpage with crawl capabilities. Automatically navigate and extract data from multiple pages. [Learn more](/services/smartcrawler)
-### Markdownify
-Convert any webpage into clean, formatted markdown. Perfect for content aggregation and processing. [Learn more](/services/markdownify)
-
### Structured Output with Schemas
Both SDKs support structured output using schemas:
- **Python**: Use Pydantic models
@@ -119,34 +117,37 @@ class CompanyInfo(BaseModel):
industry: str = Field(description="Industry sector")
client = Client(api_key="your-api-key")
-result = client.smartscraper(
- website_url="https://scrapegraphai.com",
- user_prompt="Extract company information",
+response = client.extract(
+ url="https://scrapegraphai.com",
+ prompt="Extract company information",
output_schema=CompanyInfo
)
-print(result)
+print(response)
```
### JavaScript Example
```javascript
-import { smartScraper } from "scrapegraph-js";
+import scrapegraphai from "scrapegraph-js";
import { z } from "zod";
+const sgai = scrapegraphai({ apiKey: "your-api-key" });
+
const CompanySchema = z.object({
- company_name: z.string().describe("The company name"),
+ companyName: z.string().describe("The company name"),
description: z.string().describe("Company description"),
website: z.string().url().describe("Company website URL"),
industry: z.string().describe("Industry sector"),
});
-const apiKey = "your-api-key";
-const response = await smartScraper(apiKey, {
- website_url: "https://scrapegraphai.com",
- user_prompt: "Extract company information",
- output_schema: CompanySchema,
-});
-console.log(response.data.result);
+const { data } = await sgai.extract(
+ "https://scrapegraphai.com",
+ {
+ prompt: "Extract company information",
+ schema: CompanySchema,
+ }
+);
+console.log(data);
```
---
diff --git a/sdks/javascript.mdx b/sdks/javascript.mdx
index ed4fb55..ea1c726 100644
--- a/sdks/javascript.mdx
+++ b/sdks/javascript.mdx
@@ -1,6 +1,6 @@
---
title: "JavaScript SDK"
-description: "Official JavaScript/TypeScript SDK for ScrapeGraphAI"
+description: "Official JavaScript/TypeScript SDK for ScrapeGraphAI v2"
icon: "js"
---
@@ -22,8 +22,6 @@ icon: "js"
## Installation
-Install the package using npm, pnpm, yarn or bun:
-
```bash
# Using npm
npm i scrapegraph-js
@@ -38,82 +36,77 @@ yarn add scrapegraph-js
bun add scrapegraph-js
```
-## Features
+
+v2 requires **Node.js >= 22**.
+
-- **AI-Powered Extraction**: Smart web scraping with artificial intelligence
-- **Async by Design**: Fully asynchronous architecture
-- **Type Safety**: Built-in TypeScript support with Zod schemas
-- **Zero Exceptions**: All errors wrapped in `ApiResult` — no try/catch needed
-- **Developer Friendly**: Comprehensive error handling and debug logging
+## What's New in v2
-## Quick Start
+- **Factory pattern**: Create a client with `scrapegraphai({ apiKey })` instead of importing individual functions
+- **Renamed methods**: `smartScraper` → `extract`, `searchScraper` → `search`
+- **camelCase parameters**: All params are now camelCase (e.g., `fetchConfig` instead of `fetch_config`)
+- **Throws on error**: Methods return `{ data, requestId }` and throw on failure (no more `ApiResult` wrapper)
+- **Native Zod support**: Pass Zod schemas directly to `schema` parameter
+- **Namespace methods**: `crawl.start()`, `monitor.create()`, etc.
+- **Removed**: `agenticScraper`, `generateSchema`, `sitemap`, `checkHealth`, `markdownify`
-### Basic example
+
+v2 is a breaking release. If you're upgrading from v1, see the [Migration Guide](https://github.com/ScrapeGraphAI/scrapegraph-js/blob/main/MIGRATION.md).
+
-
- Store your API keys securely in environment variables. Use `.env` files and
- libraries like `dotenv` to load them into your app.
-
+## Quick Start
```javascript
-import { smartScraper } from "scrapegraph-js";
-import "dotenv/config";
+import scrapegraphai from "scrapegraph-js";
-const apiKey = process.env.SGAI_APIKEY;
+const sgai = scrapegraphai({ apiKey: "your-api-key" });
-const response = await smartScraper(apiKey, {
- website_url: "https://example.com",
- user_prompt: "What does the company do?",
-});
+const { data, requestId } = await sgai.extract(
+ "https://example.com",
+ { prompt: "What does the company do?" }
+);
-if (response.status === "error") {
- console.error("Error:", response.error);
-} else {
- console.log(response.data.result);
-}
+console.log(data);
```
+
+Store your API keys securely in environment variables. Use `.env` files and
+libraries like `dotenv` to load them into your app.
+
+
+### Client Options
+
+| Parameter | Type | Default | Description |
+| ---------- | ------ | -------------------------------- | ------------------------------- |
+| apiKey | string | Required | Your ScrapeGraphAI API key |
+| baseUrl | string | `https://api.scrapegraphai.com` | API base URL |
+| timeout | number | `30000` | Request timeout in ms |
+| maxRetries | number | `3` | Maximum number of retries |
+
## Services
-### SmartScraper
+### extract()
-Extract specific information from any webpage using AI:
+Extract structured data from any webpage using AI. Replaces the v1 `smartScraper` function.
```javascript
-const response = await smartScraper(apiKey, {
- website_url: "https://example.com",
- user_prompt: "Extract the main content",
-});
-```
-
-All functions return an `ApiResult` object:
-```typescript
-type ApiResult = {
- status: "success" | "error";
- data: T | null;
- error?: string;
- elapsedMs: number;
-};
+const { data, requestId } = await sgai.extract(
+ "https://example.com",
+ { prompt: "Extract the main heading and description" }
+);
```
#### Parameters
-| Parameter | Type | Required | Description |
-| --------------- | ------- | -------- | ----------------------------------------------------------------------------------- |
-| apiKey | string | Yes | The ScrapeGraph API Key (first argument). |
-| user_prompt | string | Yes | A textual description of what you want to extract. |
-| website_url | string | No* | The URL of the webpage to scrape. *One of `website_url`, `website_html`, or `website_markdown` is required. |
-| output_schema | object | No | A Zod schema (converted to JSON) that describes the structure of the response. |
-| number_of_scrolls | number | No | Number of scrolls for infinite scroll pages (0-50). |
-| stealth | boolean | No | Enable anti-detection mode (+4 credits). |
-| headers | object | No | Custom HTTP headers. |
-| mock | boolean | No | Enable mock mode for testing. |
-| wait_ms | number | No | Page load wait time in ms (default: 3000). |
-| country_code | string | No | Proxy routing country code (e.g., "us"). |
-
-
-Define a simple schema using Zod:
+| Parameter | Type | Required | Description |
+| -------------------- | ----------- | -------- | -------------------------------------------------------- |
+| url | string | Yes | The URL of the webpage to scrape |
+| options.prompt | string | Yes | A description of what you want to extract |
+| options.schema | ZodSchema / object | No | Zod schema or JSON schema for structured response |
+| options.fetchConfig | FetchConfig | No | Fetch configuration |
+| options.llmConfig | LlmConfig | No | LLM configuration |
+
```javascript
import { z } from "zod";
@@ -122,301 +115,216 @@ const ArticleSchema = z.object({
author: z.string().describe("The author's name"),
publishDate: z.string().describe("Article publication date"),
content: z.string().describe("Main article content"),
- category: z.string().describe("Article category"),
});
-const ArticlesArraySchema = z
- .array(ArticleSchema)
- .describe("Array of articles");
+const { data } = await sgai.extract(
+ "https://example.com/blog/article",
+ {
+ prompt: "Extract the article information",
+ schema: ArticleSchema,
+ }
+);
-const response = await smartScraper(apiKey, {
- website_url: "https://example.com/blog/article",
- user_prompt: "Extract the article information",
- output_schema: ArticlesArraySchema,
-});
-
-console.log(`Title: ${response.data.result.title}`);
-console.log(`Author: ${response.data.result.author}`);
-console.log(`Published: ${response.data.result.publishDate}`);
+console.log(`Title: ${data.title}`);
+console.log(`Author: ${data.author}`);
```
-
-
-Define a complex schema for nested data structures:
-
+
```javascript
-import { z } from "zod";
-
-const EmployeeSchema = z.object({
- name: z.string().describe("Employee's full name"),
- position: z.string().describe("Job title"),
- department: z.string().describe("Department name"),
- email: z.string().describe("Email address"),
-});
-
-const OfficeSchema = z.object({
- location: z.string().describe("Office location/city"),
- address: z.string().describe("Full address"),
- phone: z.string().describe("Contact number"),
-});
-
-const CompanySchema = z.object({
- name: z.string().describe("Company name"),
- description: z.string().describe("Company description"),
- industry: z.string().describe("Industry sector"),
- foundedYear: z.number().describe("Year company was founded"),
- employees: z.array(EmployeeSchema).describe("List of key employees"),
- offices: z.array(OfficeSchema).describe("Company office locations"),
- website: z.string().url().describe("Company website URL"),
-});
+const { data } = await sgai.extract(
+ "https://example.com",
+ {
+ prompt: "Extract the main heading",
+ fetchConfig: {
+ stealth: true,
+ renderJs: true,
+ waitMs: 2000,
+ scrolls: 3,
+ },
+ llmConfig: {
+ temperature: 0.3,
+ maxTokens: 1000,
+ },
+ }
+);
+```
+
-const response = await smartScraper(apiKey, {
- website_url: "https://example.com/about",
- user_prompt: "Extract detailed company information including employees and offices",
- output_schema: CompanySchema,
-});
+### search()
-console.log(`Company: ${response.data.result.name}`);
-console.log("\nKey Employees:");
-response.data.result.employees.forEach((employee) => {
- console.log(`- ${employee.name} (${employee.position})`);
-});
+Search the web and extract information. Replaces the v1 `searchScraper` function.
-console.log("\nOffice Locations:");
-response.data.result.offices.forEach((office) => {
- console.log(`- ${office.location}: ${office.address}`);
-});
+```javascript
+const { data } = await sgai.search(
+ "What are the key features and pricing of ChatGPT Plus?",
+ { numResults: 5 }
+);
```
-
+#### Parameters
-
-For modern web applications built with React, Vue, Angular, or other JavaScript frameworks:
+| Parameter | Type | Required | Description |
+| -------------------- | ----------- | -------- | -------------------------------------------------------- |
+| query | string | Yes | The search query |
+| options.numResults | number | No | Number of results (3-20). Default: 3 |
+| options.schema | ZodSchema / object | No | Schema for structured response |
+| options.fetchConfig | FetchConfig | No | Fetch configuration |
+| options.llmConfig | LlmConfig | No | LLM configuration |
+
```javascript
-import { smartScraper } from 'scrapegraph-js';
-import { z } from 'zod';
-
-const apiKey = 'your-api-key';
+import { z } from "zod";
const ProductSchema = z.object({
- name: z.string().describe('Product name'),
- price: z.string().describe('Product price'),
- description: z.string().describe('Product description'),
- availability: z.string().describe('Product availability status')
+ name: z.string().describe("Product name"),
+ price: z.string().describe("Product price"),
+ features: z.array(z.string()).describe("Key features"),
});
-const response = await smartScraper(apiKey, {
- website_url: 'https://example-react-store.com/products/123',
- user_prompt: 'Extract product details including name, price, description, and availability',
- output_schema: ProductSchema,
-});
+const { data } = await sgai.search(
+ "Find information about iPhone 15 Pro",
+ {
+ schema: ProductSchema,
+ numResults: 5,
+ }
+);
-if (response.status === 'error') {
- console.error('Error:', response.error);
-} else {
- console.log('Product:', response.data.result.name);
- console.log('Price:', response.data.result.price);
- console.log('Available:', response.data.result.availability);
-}
+console.log(`Product: ${data.name}`);
+console.log(`Price: ${data.price}`);
```
-
-### SearchScraper
+### scrape()
-Search and extract information from multiple web sources using AI:
+Convert any webpage to markdown, HTML, screenshot, or branding format.
```javascript
-const response = await searchScraper(apiKey, {
- user_prompt: "Find the best restaurants in San Francisco",
- location_geo_code: "us",
- time_range: "past_week",
-});
+const { data } = await sgai.scrape("https://example.com");
+console.log(data);
```
#### Parameters
-| Parameter | Type | Required | Description |
-| ------------------ | ------- | -------- | ---------------------------------------------------------------------------------- |
-| apiKey | string | Yes | The ScrapeGraph API Key (first argument). |
-| user_prompt | string | Yes | A textual description of what you want to achieve. |
-| num_results | number | No | Number of websites to search (3-20). Default: 3. |
-| extraction_mode | boolean | No | **true** = AI extraction mode (10 credits/page), **false** = markdown mode (2 credits/page). |
-| output_schema | object | No | Zod schema for structured response format (AI extraction mode only). |
-| location_geo_code | string | No | Geo code for location-based search (e.g., "us"). |
-| time_range | string | No | Time range filter. Options: "past_hour", "past_24_hours", "past_week", "past_month", "past_year". |
+| Parameter | Type | Required | Description |
+| -------------------- | ----------- | -------- | -------------------------------------------------------- |
+| url | string | Yes | The URL of the webpage to scrape |
+| options.format | string | No | `"markdown"`, `"html"`, `"screenshot"`, `"branding"` |
+| options.fetchConfig | FetchConfig | No | Fetch configuration |
-
-Define a simple schema using Zod:
+### crawl
-```javascript
-import { z } from "zod";
+Manage multi-page crawl operations asynchronously.
-const ArticleSchema = z.object({
- title: z.string().describe("The article title"),
- author: z.string().describe("The author's name"),
- publishDate: z.string().describe("Article publication date"),
- content: z.string().describe("Main article content"),
- category: z.string().describe("Article category"),
+```javascript
+// Start a crawl
+const job = await sgai.crawl.start("https://example.com", {
+ depth: 2,
+ includePatterns: ["/blog/*", "/docs/**"],
+ excludePatterns: ["/admin/*", "/api/*"],
});
+console.log(`Crawl started: ${job.data.id}`);
-const response = await searchScraper(apiKey, {
- user_prompt: "Find news about the latest trends in AI",
- output_schema: ArticleSchema,
- location_geo_code: "us",
- time_range: "past_week",
-});
+// Check status
+const status = await sgai.crawl.status(job.data.id);
+console.log(`Status: ${status.data.status}`);
-console.log(`Title: ${response.data.result.title}`);
-console.log(`Author: ${response.data.result.author}`);
-console.log(`Published: ${response.data.result.publishDate}`);
+// Stop / Resume
+await sgai.crawl.stop(job.data.id);
+await sgai.crawl.resume(job.data.id);
```
-
+### monitor
-
-Define a complex schema for nested data structures:
+Create and manage site monitoring jobs.
```javascript
-import { z } from "zod";
-
-const EmployeeSchema = z.object({
- name: z.string().describe("Employee's full name"),
- position: z.string().describe("Job title"),
- department: z.string().describe("Department name"),
- email: z.string().describe("Email address"),
+// Create a monitor
+const monitor = await sgai.monitor.create({
+ url: "https://example.com",
+ prompt: "Track price changes",
+ schedule: "daily",
});
-const OfficeSchema = z.object({
- location: z.string().describe("Office location/city"),
- address: z.string().describe("Full address"),
- phone: z.string().describe("Contact number"),
-});
-
-const RestaurantSchema = z.object({
- name: z.string().describe("Restaurant name"),
- address: z.string().describe("Restaurant address"),
- rating: z.number().describe("Restaurant rating"),
- website: z.string().url().describe("Restaurant website URL"),
-});
+// List all monitors
+const monitors = await sgai.monitor.list();
-const response = await searchScraper(apiKey, {
- user_prompt: "Find the best restaurants in San Francisco",
- output_schema: RestaurantSchema,
- location_geo_code: "us",
- time_range: "past_month",
-});
+// Get / Pause / Resume / Delete
+const details = await sgai.monitor.get(monitor.data.id);
+await sgai.monitor.pause(monitor.data.id);
+await sgai.monitor.resume(monitor.data.id);
+await sgai.monitor.delete(monitor.data.id);
```
-
+### credits()
-
-Use markdown mode for cost-effective content gathering:
+Check your account credit balance.
```javascript
-import { searchScraper } from 'scrapegraph-js';
+const { data } = await sgai.credits();
+console.log(`Remaining: ${data.remainingCredits}`);
+console.log(`Used: ${data.totalCreditsUsed}`);
+```
-const apiKey = 'your-api-key';
+### history()
-const response = await searchScraper(apiKey, {
- user_prompt: 'Latest developments in artificial intelligence',
- num_results: 3,
- extraction_mode: false,
- location_geo_code: "us",
- time_range: "past_week",
+Retrieve paginated request history.
+
+```javascript
+const { data } = await sgai.history({
+ page: 1,
+ perPage: 20,
+ service: "extract",
});
-if (response.status === 'error') {
- console.error('Error:', response.error);
-} else {
- const markdownContent = response.data.markdown_content;
- console.log('Markdown content length:', markdownContent.length);
- console.log('Reference URLs:', response.data.reference_urls);
- console.log('Content preview:', markdownContent.substring(0, 500) + '...');
-}
+data.items.forEach((entry) => {
+ console.log(`${entry.createdAt} - ${entry.service} - ${entry.status}`);
+});
```
-**Markdown Mode Benefits:**
-- **Cost-effective**: Only 2 credits per page (vs 10 credits for AI extraction)
-- **Full content**: Get complete page content in markdown format
-- **Faster**: No AI processing overhead
-- **Perfect for**: Content analysis, bulk data collection, building datasets
+## Configuration Objects
-
+### FetchConfig
-
-Filter search results by date range to get only recent information:
+Controls how pages are fetched.
```javascript
-import { searchScraper } from 'scrapegraph-js';
-
-const apiKey = 'your-api-key';
-
-const response = await searchScraper(apiKey, {
- user_prompt: 'Latest news about AI developments',
- num_results: 5,
- time_range: 'past_week', // Options: 'past_hour', 'past_24_hours', 'past_week', 'past_month', 'past_year'
-});
-
-if (response.status === 'error') {
- console.error('Error:', response.error);
-} else {
- console.log('Recent AI news:', response.data.result);
- console.log('Reference URLs:', response.data.reference_urls);
+{
+ stealth: true, // Anti-detection mode
+ renderJs: true, // Render JavaScript
+ waitMs: 2000, // Wait time after page load (ms)
+ scrolls: 3, // Number of scrolls
+ country: "us", // Proxy country code
+ cookies: { key: "value" },
+ headers: { "X-Custom": "header" },
}
```
-**Time Range Options:**
-- `past_hour` - Results from the past hour
-- `past_24_hours` - Results from the past 24 hours
-- `past_week` - Results from the past week
-- `past_month` - Results from the past month
-- `past_year` - Results from the past year
-
-**Use Cases:**
-- Finding recent news and updates
-- Tracking time-sensitive information
-- Getting latest product releases
-- Monitoring recent market changes
-
-
-
-### Markdownify
+### LlmConfig
-Convert any webpage into clean, formatted markdown:
+Controls LLM behavior for AI-powered methods.
```javascript
-const response = await markdownify(apiKey, {
- website_url: "https://example.com",
-});
+{
+ model: "default", // LLM model to use
+ temperature: 0.3, // Response creativity (0-1)
+ maxTokens: 1000, // Maximum response tokens
+}
```
-#### Parameters
-
-| Parameter | Type | Required | Description |
-| ----------- | ------- | -------- | ---------------------------------------------- |
-| apiKey | string | Yes | The ScrapeGraph API Key (first argument). |
-| website_url | string | Yes | The URL of the webpage to convert to markdown. |
-| wait_ms | number | No | Page load wait time in ms (default: 3000). |
-| stealth | boolean | No | Enable anti-detection mode (+4 credits). |
-| country_code| string | No | Proxy routing country code (e.g., "us"). |
+## Error Handling
-## API Credits
-
-Check your available API credits:
+v2 throws errors instead of returning `ApiResult`. Use try/catch:
```javascript
-import { getCredits } from "scrapegraph-js";
-
-const credits = await getCredits(apiKey);
-
-if (credits.status === "error") {
- console.error("Error fetching credits:", credits.error);
-} else {
- console.log("Remaining credits:", credits.data.remaining_credits);
- console.log("Total used:", credits.data.total_credits_used);
+try {
+ const { data, requestId } = await sgai.extract(
+ "https://example.com",
+ { prompt: "Extract the title" }
+ );
+ console.log(data);
+} catch (err) {
+ console.error(`Request failed: ${err.message}`);
}
```
@@ -438,9 +346,3 @@ if (credits.status === "error") {
Get help from our development team
-
-
- This project is licensed under the MIT License. See the
- [LICENSE](https://github.com/ScrapeGraphAI/scrapegraph-js/blob/main/LICENSE)
- file for details.
-
diff --git a/sdks/mocking.mdx b/sdks/mocking.mdx
index 592de50..a1a0f58 100644
--- a/sdks/mocking.mdx
+++ b/sdks/mocking.mdx
@@ -1,6 +1,6 @@
---
title: 'Mocking & Testing'
-description: 'Test ScrapeGraphAI functionality in an isolated environment without consuming API credits'
+description: 'Test ScrapeGraphAI v2 functionality without consuming API credits'
icon: 'test-tube'
---
@@ -11,584 +11,261 @@ icon: 'test-tube'
/>
-
- Test your code without making real API calls
+
+ Use familiar testing tools for mocking
-
- Override responses for specific endpoints
+
+ Test without consuming API credits
## Overview
-A mock environment is an isolated test environment. You can use mock mode to test ScrapeGraphAI functionality in your application, and experiment with new features without affecting your live integration or consuming API credits. For example, when testing in mock mode, the scraping requests you create aren't processed by our servers or counted against your credit usage.
+In v2, the built-in mock mode (`mock=True`, `mock_handler`, `mock_responses`) has been removed from the SDKs. Instead, use standard mocking libraries for your language to test ScrapeGraphAI integrations without making real API calls or consuming credits.
-## Use cases
+
+If you're migrating from v1, replace `Client(mock=True)` with standard mocking patterns shown below.
+
-Mock mode provides an environment for testing various functionalities and scenarios without the implications of real API calls. Below are some common use cases for mocking in your ScrapeGraphAI integrations:
+## Python SDK Testing
-| Scenario | Description |
-|----------|-------------|
-| **Simulate scraping responses to test without real API calls** | Use mock mode to test scraping functionality without real API calls. Create mock responses in your application to test data processing logic or use custom handlers to simulate various response scenarios. |
-| **Scale isolated testing for teams** | Your team can test in separate mock environments to make sure that data and actions are completely isolated from other tests. Changes made in one mock configuration don't interfere with changes in another. |
-| **Test without API key requirements** | You can test your integration without providing real API keys, making it easier for external developers, implementation partners, or design agencies to work with your code without access to your live API credentials. |
-| **Test in development or CI/CD pipelines** | Access mock mode from your development environment or continuous integration pipelines. Test ScrapeGraphAI functionality directly in your code or use familiar testing frameworks and fixtures. |
-
-## Test in mock mode
-
-You can simulate scraping responses and use mock data to test your integration without consuming API credits. Learn more about using mock responses to confirm that your integration works correctly.
-
-## Basic Mock Usage
-
-Enable mock mode by setting `mock=True` when initializing the client:
+### Using `unittest.mock`
```python
+from unittest.mock import patch, MagicMock
from scrapegraph_py import Client
-from scrapegraph_py.logger import sgai_logger
-
-# Set logging level for better visibility
-sgai_logger.set_logging(level="INFO")
-
-def basic_mock_usage():
- # Initialize the client with mock mode enabled
- client = Client.from_env(mock=True)
-
- print("\n-- get_credits (mock) --")
- print(client.get_credits())
-
- print("\n-- markdownify (mock) --")
- md = client.markdownify(website_url="https://example.com")
- print(md)
-
- print("\n-- get_markdownify (mock) --")
- md_status = client.get_markdownify("00000000-0000-0000-0000-000000000123")
- print(md_status)
-
- print("\n-- smartscraper (mock) --")
- ss = client.smartscraper(user_prompt="Extract title", website_url="https://example.com")
- print(ss)
-
-if __name__ == "__main__":
- basic_mock_usage()
-```
-
-
-When mock mode is enabled, all API calls return predefined mock responses instead of making real HTTP requests. This ensures your tests run quickly and don't consume API credits.
-
-
-## Custom Response Overrides
-You can override specific endpoint responses using the `mock_responses` parameter:
+def test_extract():
+ client = Client(api_key="test-key")
-```python
-def mock_with_path_overrides():
- # Initialize the client with mock mode and custom responses
- client = Client.from_env(
- mock=True,
- mock_responses={
- "/v1/credits": {"remaining_credits": 42, "total_credits_used": 58, "mock": true}
+ mock_response = {
+ "data": {
+ "title": "Test Page",
+ "content": "This is test content"
},
- )
-
- print("\n-- get_credits with override (mock) --")
- print(client.get_credits())
-```
+ "request_id": "test-request-123"
+ }
-
-You can override responses for any endpoint by providing the path and expected response:
+ with patch.object(client, "extract", return_value=mock_response):
+ response = client.extract(
+ url="https://example.com",
+ prompt="Extract title and content"
+ )
-```python
-client = Client.from_env(
- mock=True,
- mock_responses={
- "/v1/credits": {
- "remaining_credits": 100,
- "total_credits_used": 0,
- "mock": true
- },
- "/v1/smartscraper/start": {
- "job_id": "mock-job-123",
- "status": "processing",
- "mock": true
- },
- "/v1/smartscraper/status/mock-job-123": {
- "job_id": "mock-job-123",
- "status": "completed",
- "result": {
- "title": "Mock Title",
- "content": "Mock content from the webpage",
- "mock": true
- }
- },
- "/v1/markdownify/start": {
- "job_id": "mock-markdown-456",
- "status": "processing",
- "mock": true
- },
- "/v1/markdownify/status/mock-markdown-456": {
- "job_id": "mock-markdown-456",
- "status": "completed",
- "result": "# Mock Markdown\n\nThis is mock markdown content.",
- "mock": true
- }
- }
-)
+ assert response["data"]["title"] == "Test Page"
+ assert response["request_id"] == "test-request-123"
```
-
-## Custom Handler Functions
+### Using `responses` Library
-For more complex mocking scenarios, you can provide a custom handler function:
+Mock HTTP requests at the transport layer:
```python
-def mock_with_custom_handler():
- def handler(method, url, kwargs):
- return {"handled_by": "custom_handler", "method": method, "url": url}
-
- # Initialize the client with mock mode and custom handler
- client = Client.from_env(mock=True, mock_handler=handler)
+import responses
+from scrapegraph_py import Client
- print("\n-- searchscraper via custom handler (mock) --")
- resp = client.searchscraper(user_prompt="Search something")
- print(resp)
-```
+@responses.activate
+def test_extract_http():
+ responses.post(
+ "https://api.scrapegraphai.com/api/v2/extract",
+ json={
+ "data": {"title": "Mock Title"},
+ "request_id": "mock-123"
+ },
+ status=200,
+ )
-
-Create sophisticated mock responses based on request parameters:
+ client = Client(api_key="test-key")
+ response = client.extract(
+ url="https://example.com",
+ prompt="Extract the title"
+ )
-```python
-def advanced_custom_handler():
- def smart_handler(method, url, kwargs):
- # Handle different endpoints with custom logic
- if "/v1/credits" in url:
- return {
- "remaining_credits": 50,
- "total_credits_used": 50,
- "mock": true
- }
- elif "/v1/smartscraper" in url:
- # Extract user_prompt from kwargs to create contextual responses
- user_prompt = kwargs.get("user_prompt", "")
- if "title" in user_prompt.lower():
- return {
- "job_id": "mock-title-job",
- "status": "completed",
- "result": {
- "title": "Extracted Title",
- "content": "This is the extracted content",
- "mock": true
- }
- }
- else:
- return {
- "job_id": "mock-generic-job",
- "status": "completed",
- "result": {
- "data": "Generic extracted data",
- "mock": true
- }
- }
- else:
- return {"error": "Unknown endpoint", "url": url}
-
- client = Client.from_env(mock=True, mock_handler=smart_handler)
-
- # Test different scenarios
- print("Credits:", client.get_credits())
- print("Title extraction:", client.smartscraper(
- website_url="https://example.com",
- user_prompt="Extract the title"
- ))
- print("Generic extraction:", client.smartscraper(
- website_url="https://example.com",
- user_prompt="Extract some data"
- ))
+ assert response["data"]["title"] == "Mock Title"
```
-
-
-## Testing Best Practices
-### Unit Testing with Mocks
+### Using `pytest` Fixtures
```python
-import unittest
-from unittest.mock import patch
+import pytest
+from unittest.mock import MagicMock
from scrapegraph_py import Client
-class TestScrapeGraphAI(unittest.TestCase):
- def setUp(self):
- self.client = Client.from_env(mock=True)
-
- def test_get_credits(self):
- credits = self.client.get_credits()
- self.assertIn("remaining_credits", credits)
- self.assertIn("total_credits_used", credits)
-
- def test_smartscraper_with_schema(self):
- from pydantic import BaseModel, Field
-
- class TestSchema(BaseModel):
- title: str = Field(description="Page title")
- content: str = Field(description="Page content")
-
- response = self.client.smartscraper(
- website_url="https://example.com",
- user_prompt="Extract title and content",
- output_schema=TestSchema
- )
-
- self.assertIsInstance(response, TestSchema)
- self.assertIsNotNone(response.title)
- self.assertIsNotNone(response.content)
-
-if __name__ == "__main__":
- unittest.main()
-```
-
-### Integration Testing
-
-```python
-def test_integration_flow():
- """Test a complete workflow using mocks"""
- client = Client.from_env(
- mock=True,
- mock_responses={
- "/v1/credits": {"remaining_credits": 10, "total_credits_used": 90, "mock": true},
- "/v1/smartscraper/start": {
- "job_id": "test-job-123",
- "status": "processing",
- "mock": true
- },
- "/v1/smartscraper/status/test-job-123": {
- "job_id": "test-job-123",
- "status": "completed",
- "result": {
- "title": "Test Page",
- "content": "Test content",
- "mock": true
- }
- }
- }
+@pytest.fixture
+def mock_client():
+ client = Client(api_key="test-key")
+ client.extract = MagicMock(return_value={
+ "data": {"title": "Mock Title"},
+ "request_id": "mock-123"
+ })
+ client.search = MagicMock(return_value={
+ "data": {"results": []},
+ "request_id": "mock-456"
+ })
+ client.credits = MagicMock(return_value={
+ "remaining_credits": 100,
+ "total_credits_used": 0
+ })
+ return client
+
+def test_extract(mock_client):
+ response = mock_client.extract(
+ url="https://example.com",
+ prompt="Extract the title"
)
-
- # Test the complete flow
- credits = client.get_credits()
- assert credits["remaining_credits"] == 10
-
- # Start a scraping job
- job = client.smartscraper(
- website_url="https://example.com",
- user_prompt="Extract title and content"
- )
-
- # Check job status
- status = client.get_smartscraper("test-job-123")
- assert status["status"] == "completed"
- assert "title" in status["result"]
-```
-
-## Environment Variables
-
-You can also control mocking through environment variables:
+ assert response["data"]["title"] == "Mock Title"
-```bash
-# Enable mock mode via environment variable
-export SGAI_MOCK=true
-
-# Set custom mock responses (JSON format)
-export SGAI_MOCK_RESPONSES='{"\/v1\/credits": {"remaining_credits": 100, "mock": true}}'
+def test_credits(mock_client):
+ credits = mock_client.credits()
+ assert credits["remaining_credits"] == 100
```
-```python
-# The client will automatically detect mock mode from environment
-client = Client.from_env() # Will use mock mode if SGAI_MOCK=true
-```
-
-## Async Mocking
-
-Mocking works seamlessly with async clients:
+### Async Testing with `aioresponses`
```python
+import pytest
import asyncio
+from aioresponses import aioresponses
from scrapegraph_py import AsyncClient
-async def async_mock_example():
- async with AsyncClient(mock=True) as client:
- # All async methods work with mocks
- credits = await client.get_credits()
- print(f"Mock credits: {credits}")
-
- response = await client.smartscraper(
- website_url="https://example.com",
- user_prompt="Extract data"
+@pytest.mark.asyncio
+async def test_async_extract():
+ with aioresponses() as mocked:
+ mocked.post(
+ "https://api.scrapegraphai.com/api/v2/extract",
+ payload={
+ "data": {"title": "Async Mock"},
+ "request_id": "async-123"
+ },
)
- print(f"Mock response: {response}")
-
-# Run the async example
-asyncio.run(async_mock_example())
-```
-## HTTP Method Mocking with cURL
+ async with AsyncClient(api_key="test-key") as client:
+ response = await client.extract(
+ url="https://example.com",
+ prompt="Extract data"
+ )
-You can also test ScrapeGraphAI endpoints directly using cURL with mock responses. This is useful for testing API integrations without using SDKs.
-
-### Basic cURL Mock Usage
-
-```bash
-# Enable mock mode via environment variable
-export SGAI_MOCK=true
-
-# Test credits endpoint with mock
-curl -X GET "https://api.scrapegraph.ai/v1/credits" \
- -H "Authorization: Bearer $SGAI_API_KEY" \
- -H "Content-Type: application/json"
-```
-
-### Custom Mock Responses with cURL
-
-```bash
-# Set custom mock responses via environment variable
-export SGAI_MOCK_RESPONSES='{
- "/v1/credits": {
- "remaining_credits": 100,
- "total_credits_used": 0,
- "mock": true
- },
-}'
-
-# Test smartscraper endpoint
-curl -X POST "https://api.scrapegraph.ai/v1/smartscraper/" \
- -H "Authorization: Bearer $SGAI_API_KEY" \
- -H "Content-Type: application/json" \
- -d '{
- "website_url": "https://example.com",
- "user_prompt": "Extract title and content"
- "mock": true
- }'
+ assert response["data"]["title"] == "Async Mock"
```
-### Testing Different HTTP Methods
+## JavaScript SDK Testing
-```bash
-# POST request - to smartscraper
-curl --location 'https://api.scrapegraphai.com/v1/smartscraper' \
---data '{
- "website_url": "https://www.scrapegraphai.com//",
- "user_prompt": "Extract founder info ",
- "mock":true
-}'
-```
+### Using Jest / Vitest
-```bash
-# POST request - to Markdownify
-curl --location 'https://api.scrapegraphai.com/v1/markdownify' \
---data '{
- "website_url": "https://www.scrapegraphai.com//",
- "mock":true
-}'
-```
-
-```bash
-# POST request - to SearchScraper
-curl --location 'https://api.scrapegraphai.com/v1/searchscraper' \
---data '{
- "website_url": "https://www.scrapegraphai.com//",
- "mock":true
- "output_schema":{},
- "num_results":3,
-}'
+```javascript
+import { describe, it, expect, vi } from "vitest";
+import scrapegraphai from "scrapegraph-js";
+
+// Mock the module
+vi.mock("scrapegraph-js", () => ({
+ default: vi.fn(() => ({
+ extract: vi.fn().mockResolvedValue({
+ data: { title: "Mock Title" },
+ requestId: "mock-123",
+ }),
+ search: vi.fn().mockResolvedValue({
+ data: { results: [] },
+ requestId: "mock-456",
+ }),
+ credits: vi.fn().mockResolvedValue({
+ data: { remainingCredits: 100 },
+ }),
+ })),
+}));
+
+describe("ScrapeGraphAI", () => {
+ const sgai = scrapegraphai({ apiKey: "test-key" });
+
+ it("should extract data", async () => {
+ const { data } = await sgai.extract("https://example.com", {
+ prompt: "Extract the title",
+ });
+ expect(data.title).toBe("Mock Title");
+ });
+
+ it("should check credits", async () => {
+ const { data } = await sgai.credits();
+ expect(data.remainingCredits).toBe(100);
+ });
+});
```
+### Using MSW (Mock Service Worker)
-## JavaScript SDK Mocking
-
-The JavaScript SDK supports per-request mocking via the `mock` parameter. Pass `mock: true` in the params object of any function to receive mock data instead of making a real API call.
-
-### Per-Request Mock Mode
+Mock at the network level for more realistic testing:
```javascript
-import { smartScraper, scrape, searchScraper, getCredits } from 'scrapegraph-js';
-
-const API_KEY = 'your-api-key';
-
-// SmartScraper with mock
-const smartResult = await smartScraper(API_KEY, {
- website_url: 'https://example.com',
- user_prompt: 'Extract the title',
- mock: true,
-});
-console.log('SmartScraper mock:', smartResult.data);
-
-// Scrape with mock
-const scrapeResult = await scrape(API_KEY, {
- website_url: 'https://example.com',
- mock: true,
-});
-console.log('Scrape mock:', scrapeResult.data);
-
-// SearchScraper with mock
-const searchResult = await searchScraper(API_KEY, {
- user_prompt: 'Find AI news',
- mock: true,
+import { http, HttpResponse } from "msw";
+import { setupServer } from "msw/node";
+import scrapegraphai from "scrapegraph-js";
+
+const server = setupServer(
+ http.post("https://api.scrapegraphai.com/api/v2/extract", () => {
+ return HttpResponse.json({
+ data: { title: "MSW Mock Title" },
+ requestId: "msw-123",
+ });
+ }),
+ http.get("https://api.scrapegraphai.com/api/v2/credits", () => {
+ return HttpResponse.json({
+ data: { remainingCredits: 50, totalCreditsUsed: 50 },
+ });
+ })
+);
+
+beforeAll(() => server.listen());
+afterAll(() => server.close());
+afterEach(() => server.resetHandlers());
+
+test("extract returns mocked data", async () => {
+ const sgai = scrapegraphai({ apiKey: "test-key" });
+ const { data } = await sgai.extract("https://example.com", {
+ prompt: "Extract the title",
+ });
+ expect(data.title).toBe("MSW Mock Title");
});
-console.log('SearchScraper mock:', searchResult.data);
```
-
-The JavaScript SDK does not have global mock functions like `enableMock()` or `setMockResponses()`. Mock mode is controlled per-request via the `mock: true` parameter. All functions return `ApiResult` — errors are never thrown.
-
+## Testing with cURL
-## SDK Comparison
-
-
-
- - `Client(mock=True)` initialization
- - `mock_responses` parameter for overrides
- - `mock_handler` for custom logic
- - Environment variable: `SGAI_MOCK=true`
-
-
- - `mock: true` in per-request params
- - All functions support mock parameter
- - Native async/await
-
-
- - Environment variable: `SGAI_MOCK=true`
- - `SGAI_MOCK_RESPONSES` for custom responses
- - Direct HTTP method testing
- - No SDK dependencies required
-
-
-
-### Feature Comparison
-
-| Feature | Python SDK | JavaScript SDK | cURL/HTTP |
-|---------|------------|----------------|-----------|
-| **Global Mock Mode** | `Client(mock=True)` | N/A | `SGAI_MOCK=true` |
-| **Per-Request Mock** | `{mock: True}` in params | `mock: true` in params | N/A |
-| **Custom Responses** | `mock_responses` dict | N/A | `SGAI_MOCK_RESPONSES` |
-| **Custom Handler** | `mock_handler` function | N/A | N/A |
-| **Environment Variable** | `SGAI_MOCK=true` | N/A | `SGAI_MOCK=true` |
-| **Async Support** | `AsyncClient(mock=True)` | Native async/await | N/A |
-| **Dependencies** | Python SDK required | JavaScript SDK required | None |
-
-## Limitations
-
-* You can't test real-time scraping performance in mock mode.
-* Mock responses don't reflect actual website changes or dynamic content.
-* Rate limiting and credit consumption are not simulated in mock mode.
-* Some advanced features may behave differently in mock mode compared to live mode.
-
-## Troubleshooting
-
-
-
-### Mock responses not working
-- Ensure `mock=True` is set when initializing the client
-- Check that your mock response paths match the actual API endpoints
-- Verify the response format matches the expected schema
+Test API endpoints directly using cURL against a local mock server or staging environment:
-### Custom handler not being called
-- Make sure you're passing the `mock_handler` parameter correctly
-- Check that your handler function accepts the correct parameters: `(method, url, kwargs)`
-- Ensure the handler returns a valid response object
+```bash
+# Test extract endpoint
+curl -X POST "https://api.scrapegraphai.com/api/v2/extract" \
+ -H "Authorization: Bearer your-api-key" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "url": "https://example.com",
+ "prompt": "Extract the title"
+ }'
-### Schema validation errors
-- Mock responses must match the expected Pydantic schema structure
-- Use the same field names and types as defined in your schema
-- Test your mock responses with the actual schema classes
+# Test credits endpoint
+curl -X GET "https://api.scrapegraphai.com/api/v2/credits" \
+ -H "Authorization: Bearer your-api-key"
+```
-
+## SDK Comparison
-## Examples
+| Feature | Python | JavaScript |
+|---------|--------|------------|
+| **Mock library** | `unittest.mock`, `responses` | Jest/Vitest mocks, MSW |
+| **HTTP-level mocking** | `responses`, `aioresponses` | MSW (Mock Service Worker) |
+| **Async mocking** | `aioresponses`, `unittest.mock` | Native async/await |
+| **Fixture support** | pytest fixtures | beforeEach/afterEach |
-
-Here's a complete example showing all mocking features:
+## Best Practices
-```python
-from scrapegraph_py import Client
-from scrapegraph_py.logger import sgai_logger
-from pydantic import BaseModel, Field
-from typing import List
-
-# Set up logging
-sgai_logger.set_logging(level="INFO")
-
-class ProductInfo(BaseModel):
- name: str = Field(description="Product name")
- price: str = Field(description="Product price")
- features: List[str] = Field(description="Product features")
-
-def complete_mock_demo():
- # Initialize with comprehensive mock responses
- client = Client.from_env(
- mock=True,
- mock_responses={
- "/v1/credits": {
- "remaining_credits": 25,
- "total_credits_used": 75,
- "mock": true
- },
- "/v1/smartscraper/start": {
- "job_id": "demo-job-789",
- "status": "processing",
- "mock": true
- },
- "/v1/smartscraper/status/demo-job-789": {
- "job_id": "demo-job-789",
- "status": "completed",
- "result": {
- "name": "iPhone 15 Pro",
- "price": "$999",
- "features": [
- "A17 Pro chip",
- "48MP camera system",
- "Titanium design",
- "Action Button"
- ],
- "mock": true
- }
- }
- }
- )
-
- print("=== ScrapeGraphAI Mock Demo ===\n")
-
- # Test credits endpoint
- print("1. Checking credits:")
- credits = client.get_credits()
- print(f" Remaining: {credits['remaining_credits']}")
- print(f" Used: {credits['total_credits_used']}\n")
-
- # Test smartscraper with schema
- print("2. Extracting product information:")
- product = client.smartscraper(
- website_url="https://apple.com/iphone-15-pro",
- user_prompt="Extract product name, price, and key features",
- output_schema=ProductInfo
- )
-
- print(f" Product: {product.name}")
- print(f" Price: {product.price}")
- print(" Features:")
- for feature in product.features:
- print(f" - {feature}")
-
- print("\n3. Testing markdownify:")
- markdown = client.markdownify(website_url="https://example.com")
- print(f" Markdown length: {len(markdown)} characters")
-
- print("\n=== Demo Complete ===")
-
-if __name__ == "__main__":
- complete_mock_demo()
-```
-
+- Mock at the **client method level** for unit tests (fastest, simplest)
+- Mock at the **HTTP level** for integration tests (validates request/response shapes)
+- Use **fixtures** to share mock configurations across tests
+- Keep mock responses **realistic** - match the actual API response structure
+- Test both **success and error** scenarios
## Support
-
+
Report bugs or request features
@@ -596,4 +273,4 @@ if __name__ == "__main__":
-Need help with mocking? Check out our [Python SDK documentation](/sdks/python) or join our [Discord community](https://discord.gg/uJN7TYcpNa) for support.
+Need help with testing? Join our [Discord community](https://discord.gg/uJN7TYcpNa) for support.
diff --git a/sdks/python.mdx b/sdks/python.mdx
index 43da3f2..19b4b51 100644
--- a/sdks/python.mdx
+++ b/sdks/python.mdx
@@ -1,6 +1,6 @@
---
title: 'Python SDK'
-description: 'Official Python SDK for ScrapeGraphAI'
+description: 'Official Python SDK for ScrapeGraphAI v2'
icon: 'python'
---
@@ -21,23 +21,23 @@ icon: 'python'
## Installation
-Install the package using pip:
-
```bash
pip install scrapegraph-py
```
-## Features
+## What's New in v2
-- **AI-Powered Extraction**: Advanced web scraping using artificial intelligence
-- **Flexible Clients**: Both synchronous and asynchronous support
-- **Type Safety**: Structured output with Pydantic schemas
-- **Production Ready**: Detailed logging and automatic retries
-- **Developer Friendly**: Comprehensive error handling
+- **Renamed methods**: `smartscraper()` → `extract()`, `searchscraper()` → `search()`
+- **Unified config objects**: `FetchConfig` and `LlmConfig` replace scattered parameters
+- **Namespace methods**: `crawl.start()`, `crawl.status()`, `monitor.create()`, etc.
+- **New endpoints**: `credits()`, `history()`, `crawl.stop()`, `crawl.resume()`
+- **Removed**: `markdownify()`, `agenticscraper()`, `sitemap()`, `healthz()`, `feedback()`, built-in mock mode
-## Quick Start
+
+v2 is a breaking release. If you're upgrading from v1, see the [Migration Guide](https://github.com/ScrapeGraphAI/scrapegraph-py/blob/main/MIGRATION_V2.md).
+
-Initialize the client with your API key:
+## Quick Start
```python
from scrapegraph_py import Client
@@ -49,30 +49,42 @@ client = Client(api_key="your-api-key-here")
You can also set the `SGAI_API_KEY` environment variable and initialize the client without parameters: `client = Client()`
+### Client Options
+
+| Parameter | Type | Default | Description |
+| ------------- | ------ | -------------------------------- | ------------------------------- |
+| api_key | string | `SGAI_API_KEY` env var | Your ScrapeGraphAI API key |
+| base_url | string | `https://api.scrapegraphai.com` | API base URL |
+| verify_ssl | bool | `True` | Verify SSL certificates |
+| timeout | int | `30` | Request timeout in seconds |
+| max_retries | int | `3` | Maximum number of retries |
+| retry_delay | float | `1.0` | Delay between retries (seconds) |
+
## Services
-### SmartScraper
+### Extract
-Extract specific information from any webpage using AI:
+Extract structured data from any webpage using AI. Replaces the v1 `smartscraper()` method.
```python
-response = client.smartscraper(
- website_url="https://example.com",
- user_prompt="Extract the main heading and description"
+response = client.extract(
+ url="https://example.com",
+ prompt="Extract the main heading and description"
)
+print(response)
```
#### Parameters
-| Parameter | Type | Required | Description |
-| ---------------- | ------- | -------- | ---------------------------------------------------------------------------------- |
-| website_url | string | Yes | The URL of the webpage that needs to be scraped. |
-| user_prompt | string | Yes | A textual description of what you want to achieve. |
-| output_schema | object | No | The Pydantic object that describes the structure and format of the response. |
-
-
-Define a simple schema for basic data extraction:
+| Parameter | Type | Required | Description |
+| ------------ | ----------- | -------- | -------------------------------------------------------- |
+| url | string | Yes | The URL of the webpage to scrape |
+| prompt | string | Yes | A description of what you want to extract |
+| output_schema| object | No | Pydantic model for structured response |
+| fetch_config | FetchConfig | No | Fetch configuration (stealth, rendering, etc.) |
+| llm_config | LlmConfig | No | LLM configuration (model, temperature, etc.) |
+
```python
from pydantic import BaseModel, Field
@@ -81,93 +93,60 @@ class ArticleData(BaseModel):
author: str = Field(description="The author's name")
publish_date: str = Field(description="Article publication date")
content: str = Field(description="Main article content")
- category: str = Field(description="Article category")
-response = client.smartscraper(
- website_url="https://example.com/blog/article",
- user_prompt="Extract the article information",
+response = client.extract(
+ url="https://example.com/blog/article",
+ prompt="Extract the article information",
output_schema=ArticleData
)
-print(f"Title: {response.title}")
-print(f"Author: {response.author}")
-print(f"Published: {response.publish_date}")
+print(f"Title: {response['data']['title']}")
+print(f"Author: {response['data']['author']}")
```
-
-Define a complex schema for nested data structures:
-
+
```python
-from typing import List
-from pydantic import BaseModel, Field
-
-class Employee(BaseModel):
- name: str = Field(description="Employee's full name")
- position: str = Field(description="Job title")
- department: str = Field(description="Department name")
- email: str = Field(description="Email address")
-
-class Office(BaseModel):
- location: str = Field(description="Office location/city")
- address: str = Field(description="Full address")
- phone: str = Field(description="Contact number")
-
-class CompanyData(BaseModel):
- name: str = Field(description="Company name")
- description: str = Field(description="Company description")
- industry: str = Field(description="Industry sector")
- founded_year: int = Field(description="Year company was founded")
- employees: List[Employee] = Field(description="List of key employees")
- offices: List[Office] = Field(description="Company office locations")
- website: str = Field(description="Company website URL")
-
-# Extract comprehensive company information
-response = client.smartscraper(
- website_url="https://example.com/about",
- user_prompt="Extract detailed company information including employees and offices",
- output_schema=CompanyData
+from scrapegraph_py import FetchConfig, LlmConfig
+
+response = client.extract(
+ url="https://example.com",
+ prompt="Extract the main heading",
+ fetch_config=FetchConfig(
+ stealth=True,
+ render_js=True,
+ wait_ms=2000,
+ scrolls=3,
+ ),
+ llm_config=LlmConfig(
+ temperature=0.3,
+ max_tokens=1000,
+ ),
)
-
-# Access nested data
-print(f"Company: {response.name}")
-print("\nKey Employees:")
-for employee in response.employees:
- print(f"- {employee.name} ({employee.position})")
-
-print("\nOffice Locations:")
-for office in response.offices:
- print(f"- {office.location}: {office.address}")
```
-### SearchScraper
+### Search
-Search and extract information from multiple web sources using AI:
+Search the web and extract information from multiple sources. Replaces the v1 `searchscraper()` method.
```python
-from scrapegraph_py.models import TimeRange
-
-response = client.searchscraper(
- user_prompt="What are the key features and pricing of ChatGPT Plus?",
- time_range=TimeRange.PAST_WEEK # Optional: Filter results by time range
+response = client.search(
+ query="What are the key features and pricing of ChatGPT Plus?"
)
```
#### Parameters
-| Parameter | Type | Required | Description |
-| ---------------- | ------- | -------- | ---------------------------------------------------------------------------------- |
-| user_prompt | string | Yes | A textual description of what you want to achieve. |
-| num_results | number | No | Number of websites to search (3-20). Default: 3. |
-| extraction_mode | boolean | No | **True** = AI extraction mode (10 credits/page), **False** = markdown mode (2 credits/page). Default: True |
-| output_schema | object | No | The Pydantic object that describes the structure and format of the response (AI extraction mode only) |
-| location_geo_code| string | No | Optional geo code for location-based search (e.g., "us") |
-| time_range | TimeRange| No | Optional time range filter for search results. Options: TimeRange.PAST_HOUR, TimeRange.PAST_24_HOURS, TimeRange.PAST_WEEK, TimeRange.PAST_MONTH, TimeRange.PAST_YEAR |
-
-
-Define a simple schema for structured search results:
+| Parameter | Type | Required | Description |
+| ------------- | ----------- | -------- | -------------------------------------------------------- |
+| query | string | Yes | The search query |
+| num_results | number | No | Number of results (3-20). Default: 3 |
+| output_schema | object | No | Pydantic model for structured response |
+| fetch_config | FetchConfig | No | Fetch configuration |
+| llm_config | LlmConfig | No | LLM configuration |
+
```python
from pydantic import BaseModel, Field
from typing import List
@@ -177,174 +156,153 @@ class ProductInfo(BaseModel):
description: str = Field(description="Product description")
price: str = Field(description="Product price")
features: List[str] = Field(description="List of key features")
- availability: str = Field(description="Availability information")
-from scrapegraph_py.models import TimeRange
-
-response = client.searchscraper(
- user_prompt="Find information about iPhone 15 Pro",
+response = client.search(
+ query="Find information about iPhone 15 Pro",
output_schema=ProductInfo,
- location_geo_code="us", # Optional: Geo code for location-based search
- time_range=TimeRange.PAST_MONTH # Optional: Filter results by time range
+ num_results=5,
)
-print(f"Product: {response.name}")
-print(f"Price: {response.price}")
-print("\nFeatures:")
-for feature in response.features:
- print(f"- {feature}")
+print(f"Product: {response['data']['name']}")
+print(f"Price: {response['data']['price']}")
```
-
-Define a complex schema for comprehensive market research:
+### Scrape
-```python
-from typing import List
-from pydantic import BaseModel, Field
+Convert any webpage into markdown, HTML, screenshot, or branding format.
-class MarketPlayer(BaseModel):
- name: str = Field(description="Company name")
- market_share: str = Field(description="Market share percentage")
- key_products: List[str] = Field(description="Main products in market")
- strengths: List[str] = Field(description="Company's market strengths")
-
-class MarketTrend(BaseModel):
- name: str = Field(description="Trend name")
- description: str = Field(description="Trend description")
- impact: str = Field(description="Expected market impact")
- timeframe: str = Field(description="Trend timeframe")
-
-class MarketAnalysis(BaseModel):
- market_size: str = Field(description="Total market size")
- growth_rate: str = Field(description="Annual growth rate")
- key_players: List[MarketPlayer] = Field(description="Major market players")
- trends: List[MarketTrend] = Field(description="Market trends")
- challenges: List[str] = Field(description="Industry challenges")
- opportunities: List[str] = Field(description="Market opportunities")
-
-from scrapegraph_py.models import TimeRange
-
-# Perform comprehensive market research
-response = client.searchscraper(
- user_prompt="Analyze the current AI chip market landscape",
- output_schema=MarketAnalysis,
- location_geo_code="us", # Optional: Geo code for location-based search
- time_range=TimeRange.PAST_MONTH # Optional: Filter results by time range
+```python
+response = client.scrape(
+ url="https://example.com"
)
-
-# Access structured market data
-print(f"Market Size: {response.market_size}")
-print(f"Growth Rate: {response.growth_rate}")
-
-print("\nKey Players:")
-for player in response.key_players:
- print(f"\n{player.name}")
- print(f"Market Share: {player.market_share}")
- print("Key Products:")
- for product in player.key_products:
- print(f"- {product}")
-
-print("\nMarket Trends:")
-for trend in response.trends:
- print(f"\n{trend.name}")
- print(f"Impact: {trend.impact}")
- print(f"Timeframe: {trend.timeframe}")
```
-
-
-Use markdown mode for cost-effective content gathering:
+#### Parameters
-```python
-from scrapegraph_py import Client
+| Parameter | Type | Required | Description |
+| ------------- | ----------- | -------- | -------------------------------------------------------- |
+| url | string | Yes | The URL of the webpage to scrape |
+| output_format | string | No | Output format: `"markdown"`, `"html"`, `"screenshot"`, `"branding"` |
+| fetch_config | FetchConfig | No | Fetch configuration |
-client = Client(api_key="your-api-key")
+### Crawl
-from scrapegraph_py.models import TimeRange
+Manage multi-page crawl operations asynchronously.
-# Enable markdown mode for cost-effective content gathering
-response = client.searchscraper(
- user_prompt="Latest developments in artificial intelligence",
- num_results=3,
- extraction_mode=False, # Enable markdown mode (2 credits per page vs 10 credits)
- location_geo_code="us", # Optional: Geo code for location-based search
- time_range=TimeRange.PAST_WEEK # Optional: Filter results by time range
+```python
+# Start a crawl
+job = client.crawl.start(
+ url="https://example.com",
+ depth=2,
+ include_patterns=["/blog/*", "/docs/**"],
+ exclude_patterns=["/admin/*", "/api/*"],
)
+print(f"Crawl started: {job['id']}")
-# Access the raw markdown content
-markdown_content = response['markdown_content']
-reference_urls = response['reference_urls']
+# Check status
+status = client.crawl.status(job["id"])
+print(f"Status: {status['status']}")
-print(f"Markdown content length: {len(markdown_content)} characters")
-print(f"Reference URLs: {len(reference_urls)}")
+# Stop a crawl
+client.crawl.stop(job["id"])
-# Process the markdown content
-print("Content preview:", markdown_content[:500] + "...")
+# Resume a crawl
+client.crawl.resume(job["id"])
+```
-# Save to file for analysis
-with open('ai_research_content.md', 'w', encoding='utf-8') as f:
- f.write(markdown_content)
+#### crawl.start() Parameters
-print("Content saved to ai_research_content.md")
-```
+| Parameter | Type | Required | Description |
+| ---------------- | ----------- | -------- | -------------------------------------------------------- |
+| url | string | Yes | The starting URL to crawl |
+| depth | int | No | Crawl depth level |
+| include_patterns | list[str] | No | URL patterns to include (`*` any chars, `**` any path) |
+| exclude_patterns | list[str] | No | URL patterns to exclude |
+| fetch_config | FetchConfig | No | Fetch configuration |
-**Markdown Mode Benefits:**
-- **Cost-effective**: Only 2 credits per page (vs 10 credits for AI extraction)
-- **Full content**: Get complete page content in markdown format
-- **Faster**: No AI processing overhead
-- **Perfect for**: Content analysis, bulk data collection, building datasets
+### Monitor
-
+Create and manage site monitoring jobs.
+
+```python
+# Create a monitor
+monitor = client.monitor.create(
+ url="https://example.com",
+ prompt="Track price changes",
+ schedule="daily",
+)
+
+# List all monitors
+monitors = client.monitor.list()
+
+# Get a specific monitor
+details = client.monitor.get(monitor["id"])
+
+# Pause / Resume / Delete
+client.monitor.pause(monitor["id"])
+client.monitor.resume(monitor["id"])
+client.monitor.delete(monitor["id"])
+```
+
+### Credits
-
-Filter search results by date range to get only recent information:
+Check your account credit balance.
```python
-from scrapegraph_py import Client
-from scrapegraph_py.models import TimeRange
+credits = client.credits()
+print(f"Remaining: {credits['remaining_credits']}")
+print(f"Used: {credits['total_credits_used']}")
+```
-client = Client(api_key="your-api-key")
+### History
-# Search for recent news from the past week
-response = client.searchscraper(
- user_prompt="Latest news about AI developments",
- num_results=5,
- time_range=TimeRange.PAST_WEEK # Options: PAST_HOUR, PAST_24_HOURS, PAST_WEEK, PAST_MONTH, PAST_YEAR
-)
+Retrieve paginated request history with optional service filtering.
-print("Recent AI news:", response['result'])
-print("Reference URLs:", response['reference_urls'])
+```python
+history = client.history(page=1, per_page=20, service="extract")
+for entry in history["items"]:
+ print(f"{entry['created_at']} - {entry['service']} - {entry['status']}")
```
-**Time Range Options:**
-- `TimeRange.PAST_HOUR` - Results from the past hour
-- `TimeRange.PAST_24_HOURS` - Results from the past 24 hours
-- `TimeRange.PAST_WEEK` - Results from the past week
-- `TimeRange.PAST_MONTH` - Results from the past month
-- `TimeRange.PAST_YEAR` - Results from the past year
+## Configuration Objects
-**Use Cases:**
-- Finding recent news and updates
-- Tracking time-sensitive information
-- Getting latest product releases
-- Monitoring recent market changes
+### FetchConfig
-
+Controls how pages are fetched.
-### Markdownify
+```python
+from scrapegraph_py import FetchConfig
+
+config = FetchConfig(
+ stealth=True, # Anti-detection mode
+ render_js=True, # Render JavaScript
+ wait_ms=2000, # Wait time after page load (ms)
+ scrolls=3, # Number of scrolls
+ country="us", # Proxy country code
+ cookies={"key": "value"},
+ headers={"X-Custom": "header"},
+)
+```
-Convert any webpage into clean, formatted markdown:
+### LlmConfig
+
+Controls LLM behavior for AI-powered methods.
```python
-response = client.markdownify(
- website_url="https://example.com"
+from scrapegraph_py import LlmConfig
+
+config = LlmConfig(
+ model="default", # LLM model to use
+ temperature=0.3, # Response creativity (0-1)
+ max_tokens=1000, # Maximum response tokens
+ chunker="auto", # Content chunking strategy
)
```
## Async Support
-All endpoints support asynchronous operations:
+All methods are available on the async client:
```python
import asyncio
@@ -352,38 +310,32 @@ from scrapegraph_py import AsyncClient
async def main():
async with AsyncClient() as client:
- response = await client.smartscraper(
- website_url="https://example.com",
- user_prompt="Extract the main content"
+ # Extract
+ response = await client.extract(
+ url="https://example.com",
+ prompt="Extract the main content"
)
print(response)
-asyncio.run(main())
-```
-
-## Feedback
+ # Crawl
+ job = await client.crawl.start("https://example.com", depth=2)
+ status = await client.crawl.status(job["id"])
+ print(status)
-Help us improve by submitting feedback programmatically:
+ # Credits
+ credits = await client.credits()
+ print(credits)
-```python
-client.submit_feedback(
- request_id="your-request-id",
- rating=5,
- feedback_text="Great results!"
-)
+asyncio.run(main())
```
## Support
-
+
Report issues and contribute to the SDK
Get help from our development team
-
-
- This project is licensed under the MIT License. See the [LICENSE](https://github.com/ScrapeGraphAI/scrapegraph-sdk/blob/main/LICENSE) file for details.
-
diff --git a/v1/additional-parameters/headers.mdx b/v1/additional-parameters/headers.mdx
new file mode 100644
index 0000000..b202338
--- /dev/null
+++ b/v1/additional-parameters/headers.mdx
@@ -0,0 +1,23 @@
+---
+title: 'Custom Headers'
+description: 'Pass custom HTTP headers with your requests (v1)'
+icon: 'heading'
+---
+
+
+You are viewing the **v1 (legacy)** documentation. In v2, use `FetchConfig(headers={...})`. See the [v2 documentation](/services/additional-parameters/headers).
+
+
+## Custom Headers
+
+Pass custom HTTP headers with your scraping requests:
+
+```python
+response = client.smartscraper(
+ website_url="https://example.com",
+ user_prompt="Extract data",
+ headers={"Authorization": "Bearer token", "Accept-Language": "en-US"}
+)
+```
+
+For v2 usage with `FetchConfig`, see the [v2 documentation](/services/additional-parameters/headers).
diff --git a/v1/additional-parameters/pagination.mdx b/v1/additional-parameters/pagination.mdx
new file mode 100644
index 0000000..a32d8dd
--- /dev/null
+++ b/v1/additional-parameters/pagination.mdx
@@ -0,0 +1,15 @@
+---
+title: 'Pagination'
+description: 'Handle paginated content (v1)'
+icon: 'arrow-right'
+---
+
+
+You are viewing the **v1 (legacy)** documentation. See the [v2 documentation](/services/additional-parameters/pagination).
+
+
+## Pagination
+
+Handle paginated content using the `number_of_scrolls` parameter or by specifying pagination logic in your prompt.
+
+For v2 usage, see the [v2 documentation](/services/additional-parameters/pagination).
diff --git a/v1/additional-parameters/proxy.mdx b/v1/additional-parameters/proxy.mdx
new file mode 100644
index 0000000..57744d9
--- /dev/null
+++ b/v1/additional-parameters/proxy.mdx
@@ -0,0 +1,23 @@
+---
+title: 'Proxy'
+description: 'Route requests through specific countries (v1)'
+icon: 'shield'
+---
+
+
+You are viewing the **v1 (legacy)** documentation. In v2, use `FetchConfig(country="us")`. See the [v2 documentation](/services/additional-parameters/proxy).
+
+
+## Proxy Routing
+
+Route scraping requests through proxies in specific countries using the `country_code` parameter:
+
+```python
+response = client.smartscraper(
+ website_url="https://example.com",
+ user_prompt="Extract data",
+ country_code="us"
+)
+```
+
+For v2 usage with `FetchConfig`, see the [v2 documentation](/services/additional-parameters/proxy).
diff --git a/v1/additional-parameters/wait-ms.mdx b/v1/additional-parameters/wait-ms.mdx
new file mode 100644
index 0000000..93fbbe6
--- /dev/null
+++ b/v1/additional-parameters/wait-ms.mdx
@@ -0,0 +1,23 @@
+---
+title: 'Wait Time'
+description: 'Configure page load wait time (v1)'
+icon: 'clock'
+---
+
+
+You are viewing the **v1 (legacy)** documentation. In v2, use `FetchConfig(wait_ms=3000)`. See the [v2 documentation](/services/additional-parameters/wait-ms).
+
+
+## Wait Time
+
+Configure how long to wait for the page to load before scraping:
+
+```python
+response = client.smartscraper(
+ website_url="https://example.com",
+ user_prompt="Extract data",
+ wait_ms=5000 # Wait 5 seconds
+)
+```
+
+For v2 usage with `FetchConfig`, see the [v2 documentation](/services/additional-parameters/wait-ms).
diff --git a/v1/agenticscraper.mdx b/v1/agenticscraper.mdx
new file mode 100644
index 0000000..9b1ee49
--- /dev/null
+++ b/v1/agenticscraper.mdx
@@ -0,0 +1,39 @@
+---
+title: 'AgenticScraper'
+description: 'Agent-based multi-step scraping (v1)'
+icon: 'robot'
+---
+
+
+You are viewing the **v1 (legacy)** documentation. AgenticScraper has been removed in v2. Use `extract()` with `FetchConfig` for advanced scraping, or `crawl.start()` for multi-page extraction. See the [v2 documentation](/services/agenticscraper).
+
+
+## Overview
+
+AgenticScraper uses an AI agent to perform multi-step scraping operations, navigating through pages and interacting with elements as needed.
+
+## Usage
+
+
+
+```python Python
+from scrapegraph_py import Client
+
+client = Client(api_key="your-api-key")
+
+response = client.agenticscraper(
+ website_url="https://example.com",
+ user_prompt="Navigate to the pricing page and extract all plan details"
+)
+```
+
+```javascript JavaScript
+import { agenticScraper } from "scrapegraph-js";
+
+const response = await agenticScraper(apiKey, {
+ website_url: "https://example.com",
+ user_prompt: "Navigate to the pricing page and extract all plan details",
+});
+```
+
+
diff --git a/v1/api-reference/introduction.mdx b/v1/api-reference/introduction.mdx
new file mode 100644
index 0000000..660053e
--- /dev/null
+++ b/v1/api-reference/introduction.mdx
@@ -0,0 +1,51 @@
+---
+title: 'API Reference'
+description: 'ScrapeGraphAI v1 API Reference'
+icon: 'book'
+---
+
+
+You are viewing the **v1 (legacy)** API documentation. The v1 API uses `/v1/*` endpoints. Please migrate to the [v2 API](/api-reference/introduction) which uses `/api/v2/*` endpoints.
+
+
+## Base URL
+
+```
+https://api.scrapegraphai.com/v1
+```
+
+## Authentication
+
+All v1 API requests require the `SGAI-APIKEY` header:
+
+```bash
+curl -X POST "https://api.scrapegraphai.com/v1/smartscraper" \
+ -H "SGAI-APIKEY: your-api-key" \
+ -H "Content-Type: application/json" \
+ -d '{"website_url": "https://example.com", "user_prompt": "Extract data"}'
+```
+
+
+In v2, authentication uses the `Authorization: Bearer` header instead.
+
+
+## v1 Endpoints
+
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/v1/smartscraper` | POST | Start a SmartScraper job |
+| `/v1/smartscraper/{id}` | GET | Get SmartScraper job status |
+| `/v1/searchscraper` | POST | Start a SearchScraper job |
+| `/v1/searchscraper/{id}` | GET | Get SearchScraper job status |
+| `/v1/markdownify` | POST | Start a Markdownify job |
+| `/v1/markdownify/{id}` | GET | Get Markdownify job status |
+| `/v1/smartcrawler` | POST | Start a SmartCrawler job |
+| `/v1/smartcrawler/{id}` | GET | Get SmartCrawler job status |
+| `/v1/sitemap` | POST | Start a Sitemap job |
+| `/v1/sitemap/{id}` | GET | Get Sitemap job status |
+| `/v1/credits` | GET | Get remaining credits |
+| `/v1/feedback` | POST | Submit feedback |
+
+## Migration to v2
+
+See the [v2 API Reference](/api-reference/introduction) for the latest endpoints and authentication methods.
diff --git a/v1/cli/ai-agent-skill.mdx b/v1/cli/ai-agent-skill.mdx
new file mode 100644
index 0000000..eea9b7e
--- /dev/null
+++ b/v1/cli/ai-agent-skill.mdx
@@ -0,0 +1,15 @@
+---
+title: 'AI Agent Skill'
+description: 'Use CLI as an AI agent skill (v1)'
+icon: 'robot'
+---
+
+
+You are viewing the **v1 (legacy)** documentation. See the [v2 AI Agent Skill documentation](/services/cli/ai-agent-skill).
+
+
+## Overview
+
+The ScrapeGraphAI CLI can be used as a skill within AI agent frameworks, enabling agents to scrape and extract web data.
+
+For detailed usage, see the [v2 documentation](/services/cli/ai-agent-skill).
diff --git a/v1/cli/commands.mdx b/v1/cli/commands.mdx
new file mode 100644
index 0000000..6eddc1a
--- /dev/null
+++ b/v1/cli/commands.mdx
@@ -0,0 +1,20 @@
+---
+title: 'CLI Commands'
+description: 'Available CLI commands (v1)'
+icon: 'terminal'
+---
+
+
+You are viewing the **v1 (legacy)** documentation. See the [v2 CLI commands](/services/cli/commands).
+
+
+## Available Commands
+
+| Command | Description |
+|---------|-------------|
+| `sgai smartscraper` | Extract data from a webpage using AI |
+| `sgai searchscraper` | Search and extract from multiple sources |
+| `sgai markdownify` | Convert webpage to markdown |
+| `sgai credits` | Check remaining API credits |
+
+For detailed usage, see the [v2 CLI documentation](/services/cli/commands).
diff --git a/v1/cli/examples.mdx b/v1/cli/examples.mdx
new file mode 100644
index 0000000..af5365f
--- /dev/null
+++ b/v1/cli/examples.mdx
@@ -0,0 +1,31 @@
+---
+title: 'CLI Examples'
+description: 'CLI usage examples (v1)'
+icon: 'play'
+---
+
+
+You are viewing the **v1 (legacy)** documentation. See the [v2 CLI examples](/services/cli/examples).
+
+
+## Examples
+
+### Extract company info
+
+```bash
+sgai smartscraper --url "https://example.com/about" --prompt "Extract the company name and description"
+```
+
+### Search the web
+
+```bash
+sgai searchscraper --prompt "Latest AI news" --num-results 5
+```
+
+### Convert to markdown
+
+```bash
+sgai markdownify --url "https://example.com/article"
+```
+
+For more examples, see the [v2 CLI documentation](/services/cli/examples).
diff --git a/v1/cli/introduction.mdx b/v1/cli/introduction.mdx
new file mode 100644
index 0000000..161dace
--- /dev/null
+++ b/v1/cli/introduction.mdx
@@ -0,0 +1,27 @@
+---
+title: 'CLI Introduction'
+description: 'ScrapeGraphAI Command Line Interface (v1)'
+icon: 'terminal'
+---
+
+
+You are viewing the **v1 (legacy)** documentation. See the [v2 CLI documentation](/services/cli/introduction).
+
+
+## Overview
+
+The ScrapeGraphAI CLI provides a command-line interface for interacting with ScrapeGraphAI services directly from your terminal.
+
+## Installation
+
+```bash
+pip install scrapegraph-py
+```
+
+## Quick Start
+
+```bash
+sgai smartscraper --url "https://example.com" --prompt "Extract the title"
+```
+
+For more details, see the [v2 CLI documentation](/services/cli/introduction).
diff --git a/v1/cli/json-mode.mdx b/v1/cli/json-mode.mdx
new file mode 100644
index 0000000..932ca48
--- /dev/null
+++ b/v1/cli/json-mode.mdx
@@ -0,0 +1,17 @@
+---
+title: 'JSON Mode'
+description: 'CLI JSON output mode (v1)'
+icon: 'code'
+---
+
+
+You are viewing the **v1 (legacy)** documentation. See the [v2 JSON mode documentation](/services/cli/json-mode).
+
+
+## JSON Output
+
+Use the `--json` flag to get structured JSON output from CLI commands:
+
+```bash
+sgai smartscraper --url "https://example.com" --prompt "Extract data" --json
+```
diff --git a/v1/introduction.mdx b/v1/introduction.mdx
new file mode 100644
index 0000000..23eaab7
--- /dev/null
+++ b/v1/introduction.mdx
@@ -0,0 +1,88 @@
+---
+title: Introduction
+description: 'Welcome to ScrapeGraphAI v1 - AI-Powered Web Data Extraction'
+---
+
+
+You are viewing the **v1 (legacy)** documentation. v1 is deprecated and will be removed in a future release. Please migrate to [v2](/introduction) for the latest features and improvements.
+
+
+
+
+## Overview
+
+[ScrapeGraphAI](https://scrapegraphai.com) is a powerful suite of LLM-driven web scraping tools designed to extract structured data from any website and HTML content. Our API is designed to be easy to use and integrate with your existing workflows.
+
+### Perfect For
+
+
+
+ Feed your AI agents with structured web data for enhanced decision-making
+
+
+ Extract and structure web data for research and analysis
+
+
+ Build comprehensive datasets from web sources
+
+
+ Create scraping-powered platforms and applications
+
+
+
+## Getting Started
+
+
+
+ Sign up and access your API key from the [dashboard](https://dashboard.scrapegraphai.com)
+
+
+ Select from our specialized extraction services based on your needs
+
+
+ Begin extracting data using our SDKs or direct API calls
+
+
+
+## Core Services
+
+- **SmartScraper**: AI-powered extraction for any webpage
+- **SearchScraper**: Find and extract any data using AI starting from a prompt
+- **SmartCrawler**: AI-powered extraction for any webpage with crawl
+- **Markdownify**: Convert web content to clean Markdown format
+- **Sitemap**: Extract sitemaps from websites
+- **AgenticScraper**: Agent-based multi-step scraping
+- **Toonify**: Convert images to cartoon style
+
+## v1 SDKs
+
+### Python
+```python
+from scrapegraph_py import Client
+
+client = Client(api_key="your-api-key")
+
+response = client.smartscraper(
+ website_url="https://example.com",
+ user_prompt="Extract the main content"
+)
+```
+
+### JavaScript
+```javascript
+import { smartScraper } from "scrapegraph-js";
+
+const response = await smartScraper(apiKey, {
+ website_url: "https://example.com",
+ user_prompt: "What does the company do?",
+});
+```
+
+## Migrate to v2
+
+v2 brings significant improvements including renamed methods, unified configuration objects, and new endpoints. See the migration guides:
+- [Python SDK Migration Guide](https://github.com/ScrapeGraphAI/scrapegraph-py/blob/main/MIGRATION_V2.md)
+- [JavaScript SDK Migration Guide](https://github.com/ScrapeGraphAI/scrapegraph-js/blob/main/MIGRATION.md)
diff --git a/v1/markdownify.mdx b/v1/markdownify.mdx
new file mode 100644
index 0000000..2a4028e
--- /dev/null
+++ b/v1/markdownify.mdx
@@ -0,0 +1,46 @@
+---
+title: 'Markdownify'
+description: 'Convert web content to clean markdown (v1)'
+icon: 'markdown'
+---
+
+
+You are viewing the **v1 (legacy)** documentation. In v2, Markdownify has been replaced by `scrape()` with `output_format="markdown"`. See the [v2 documentation](/services/markdownify).
+
+
+## Overview
+
+Markdownify converts any webpage into clean, formatted markdown.
+
+## Usage
+
+
+
+```python Python
+from scrapegraph_py import Client
+
+client = Client(api_key="your-api-key")
+
+response = client.markdownify(
+ website_url="https://example.com"
+)
+```
+
+```javascript JavaScript
+import { markdownify } from "scrapegraph-js";
+
+const response = await markdownify(apiKey, {
+ website_url: "https://example.com",
+});
+```
+
+
+
+### Parameters
+
+| Parameter | Type | Required | Description |
+| ------------ | ------- | -------- | ---------------------------------------- |
+| website_url | string | Yes | The URL of the webpage to convert |
+| wait_ms | number | No | Page load wait time in ms |
+| stealth | boolean | No | Enable anti-detection mode |
+| country_code | string | No | Proxy routing country code |
diff --git a/v1/mcp-server/claude.mdx b/v1/mcp-server/claude.mdx
new file mode 100644
index 0000000..d03af08
--- /dev/null
+++ b/v1/mcp-server/claude.mdx
@@ -0,0 +1,11 @@
+---
+title: 'Claude Integration'
+description: 'Use ScrapeGraphAI MCP with Claude (v1)'
+icon: 'message-bot'
+---
+
+
+You are viewing the **v1 (legacy)** documentation. See the [v2 Claude integration](/services/mcp-server/claude).
+
+
+For Claude MCP setup, see the [v2 documentation](/services/mcp-server/claude).
diff --git a/v1/mcp-server/cursor.mdx b/v1/mcp-server/cursor.mdx
new file mode 100644
index 0000000..dbcdfa4
--- /dev/null
+++ b/v1/mcp-server/cursor.mdx
@@ -0,0 +1,11 @@
+---
+title: 'Cursor Integration'
+description: 'Use ScrapeGraphAI MCP with Cursor (v1)'
+icon: 'code'
+---
+
+
+You are viewing the **v1 (legacy)** documentation. See the [v2 Cursor integration](/services/mcp-server/cursor).
+
+
+For Cursor MCP setup, see the [v2 documentation](/services/mcp-server/cursor).
diff --git a/v1/mcp-server/introduction.mdx b/v1/mcp-server/introduction.mdx
new file mode 100644
index 0000000..3162ccf
--- /dev/null
+++ b/v1/mcp-server/introduction.mdx
@@ -0,0 +1,15 @@
+---
+title: 'MCP Server Introduction'
+description: 'ScrapeGraphAI MCP Server (v1)'
+icon: 'server'
+---
+
+
+You are viewing the **v1 (legacy)** documentation. See the [v2 MCP Server documentation](/services/mcp-server/introduction).
+
+
+## Overview
+
+The ScrapeGraphAI MCP (Model Context Protocol) Server enables AI assistants and tools to use ScrapeGraphAI as a data source.
+
+For setup and usage, see the [v2 MCP Server documentation](/services/mcp-server/introduction).
diff --git a/v1/mcp-server/smithery.mdx b/v1/mcp-server/smithery.mdx
new file mode 100644
index 0000000..edee161
--- /dev/null
+++ b/v1/mcp-server/smithery.mdx
@@ -0,0 +1,11 @@
+---
+title: 'Smithery Integration'
+description: 'Use ScrapeGraphAI MCP with Smithery (v1)'
+icon: 'hammer'
+---
+
+
+You are viewing the **v1 (legacy)** documentation. See the [v2 Smithery integration](/services/mcp-server/smithery).
+
+
+For Smithery MCP setup, see the [v2 documentation](/services/mcp-server/smithery).
diff --git a/v1/quickstart.mdx b/v1/quickstart.mdx
new file mode 100644
index 0000000..34fe5d8
--- /dev/null
+++ b/v1/quickstart.mdx
@@ -0,0 +1,69 @@
+---
+title: Quickstart
+description: 'Get started with ScrapeGraphAI v1 SDKs'
+---
+
+
+You are viewing the **v1 (legacy)** documentation. Please migrate to [v2](/install) for the latest features.
+
+
+## Prerequisites
+
+- Obtain your **API key** by signing up on the [ScrapeGraphAI Dashboard](https://dashboard.scrapegraphai.com)
+
+---
+
+## Python SDK
+
+```bash
+pip install scrapegraph-py
+```
+
+```python
+from scrapegraph_py import Client
+
+client = Client(api_key="your-api-key-here")
+
+response = client.smartscraper(
+ website_url="https://scrapegraphai.com",
+ user_prompt="Extract information about the company"
+)
+print(response)
+```
+
+
+You can also set the `SGAI_API_KEY` environment variable and initialize the client without parameters: `client = Client()`
+
+
+---
+
+## JavaScript SDK
+
+```bash
+npm i scrapegraph-js
+```
+
+```javascript
+import { smartScraper } from "scrapegraph-js";
+
+const apiKey = "your-api-key-here";
+
+const response = await smartScraper(apiKey, {
+ website_url: "https://scrapegraphai.com",
+ user_prompt: "What does the company do?",
+});
+
+if (response.status === "error") {
+ console.error("Error:", response.error);
+} else {
+ console.log(response.data.result);
+}
+```
+
+---
+
+## Next Steps
+
+- Explore the [SmartScraper](/v1/smartscraper) service
+- Check out [SearchScraper](/v1/searchscraper) for search-based extraction
+- Use [Markdownify](/v1/markdownify) for HTML-to-markdown conversion
diff --git a/v1/scrape.mdx b/v1/scrape.mdx
new file mode 100644
index 0000000..5d6ba07
--- /dev/null
+++ b/v1/scrape.mdx
@@ -0,0 +1,37 @@
+---
+title: 'Scrape'
+description: 'Basic webpage scraping service (v1)'
+icon: 'spider-web'
+---
+
+
+You are viewing the **v1 (legacy)** documentation. See the [v2 documentation](/services/scrape).
+
+
+## Overview
+
+The Scrape service provides basic webpage scraping capabilities, returning the raw content of a webpage.
+
+## Usage
+
+
+
+```python Python
+from scrapegraph_py import Client
+
+client = Client(api_key="your-api-key")
+
+response = client.scrape(
+ website_url="https://example.com"
+)
+```
+
+```javascript JavaScript
+import { scrape } from "scrapegraph-js";
+
+const response = await scrape(apiKey, {
+ website_url: "https://example.com",
+});
+```
+
+
diff --git a/v1/searchscraper.mdx b/v1/searchscraper.mdx
new file mode 100644
index 0000000..eba7042
--- /dev/null
+++ b/v1/searchscraper.mdx
@@ -0,0 +1,52 @@
+---
+title: 'SearchScraper'
+description: 'Search and extract information from multiple web sources (v1)'
+icon: 'magnifying-glass'
+---
+
+
+You are viewing the **v1 (legacy)** documentation. In v2, SearchScraper has been renamed to `search()`. See the [v2 documentation](/services/searchscraper).
+
+
+## Overview
+
+SearchScraper enables you to search the web and extract structured information from multiple sources using AI.
+
+## Usage
+
+
+
+```python Python
+from scrapegraph_py import Client
+from scrapegraph_py.models import TimeRange
+
+client = Client(api_key="your-api-key")
+
+response = client.searchscraper(
+ user_prompt="What are the key features of ChatGPT Plus?",
+ time_range=TimeRange.PAST_WEEK
+)
+```
+
+```javascript JavaScript
+import { searchScraper } from "scrapegraph-js";
+
+const response = await searchScraper(apiKey, {
+ user_prompt: "Find the best restaurants in San Francisco",
+ location_geo_code: "us",
+ time_range: "past_week",
+});
+```
+
+
+
+### Parameters
+
+| Parameter | Type | Required | Description |
+| ----------------- | --------- | -------- | -------------------------------------------------------- |
+| user_prompt | string | Yes | Search query description |
+| num_results | number | No | Number of websites to search (3-20) |
+| extraction_mode | boolean | No | AI extraction (true) or markdown mode (false) |
+| output_schema | object | No | Schema for structured response |
+| location_geo_code | string | No | Geo code for location-based search |
+| time_range | TimeRange | No | Time range filter for results |
diff --git a/v1/sitemap.mdx b/v1/sitemap.mdx
new file mode 100644
index 0000000..284f64f
--- /dev/null
+++ b/v1/sitemap.mdx
@@ -0,0 +1,37 @@
+---
+title: 'Sitemap'
+description: 'Extract sitemaps from websites (v1)'
+icon: 'sitemap'
+---
+
+
+You are viewing the **v1 (legacy)** documentation. The Sitemap endpoint has been removed in v2. Use `crawl.start()` with URL patterns instead. See the [v2 documentation](/services/sitemap).
+
+
+## Overview
+
+The Sitemap service extracts and parses sitemap data from any website.
+
+## Usage
+
+
+
+```python Python
+from scrapegraph_py import Client
+
+client = Client(api_key="your-api-key")
+
+response = client.sitemap(
+ website_url="https://example.com"
+)
+```
+
+```javascript JavaScript
+import { sitemap } from "scrapegraph-js";
+
+const response = await sitemap(apiKey, {
+ website_url: "https://example.com",
+});
+```
+
+
diff --git a/v1/smartcrawler.mdx b/v1/smartcrawler.mdx
new file mode 100644
index 0000000..6ceb27f
--- /dev/null
+++ b/v1/smartcrawler.mdx
@@ -0,0 +1,41 @@
+---
+title: 'SmartCrawler'
+description: 'AI-powered multi-page crawling service (v1)'
+icon: 'spider'
+---
+
+
+You are viewing the **v1 (legacy)** documentation. In v2, crawling uses `crawl.start()`, `crawl.status()`, `crawl.stop()`, and `crawl.resume()`. See the [v2 documentation](/services/smartcrawler).
+
+
+## Overview
+
+SmartCrawler enables AI-powered extraction across multiple pages of a website, automatically navigating and collecting structured data.
+
+## Usage
+
+
+
+```python Python
+from scrapegraph_py import Client
+
+client = Client(api_key="your-api-key")
+
+response = client.crawl(
+ website_url="https://example.com",
+ user_prompt="Extract all blog post titles",
+ depth=2
+)
+```
+
+```javascript JavaScript
+import { smartCrawler } from "scrapegraph-js";
+
+const response = await smartCrawler(apiKey, {
+ website_url: "https://example.com",
+ user_prompt: "Extract all blog post titles",
+ depth: 2,
+});
+```
+
+
diff --git a/v1/smartscraper.mdx b/v1/smartscraper.mdx
new file mode 100644
index 0000000..cc082cc
--- /dev/null
+++ b/v1/smartscraper.mdx
@@ -0,0 +1,52 @@
+---
+title: 'SmartScraper'
+description: 'AI-powered web scraping for any website (v1)'
+icon: 'robot'
+---
+
+
+You are viewing the **v1 (legacy)** documentation. In v2, SmartScraper has been renamed to `extract()`. See the [v2 documentation](/services/smartscraper).
+
+
+## Overview
+
+SmartScraper is our flagship LLM-powered web scraping service that intelligently extracts structured data from any website.
+
+## Usage
+
+
+
+```python Python
+from scrapegraph_py import Client
+
+client = Client(api_key="your-api-key")
+
+response = client.smartscraper(
+ website_url="https://example.com",
+ user_prompt="Extract the main heading and description"
+)
+```
+
+```javascript JavaScript
+import { smartScraper } from "scrapegraph-js";
+
+const response = await smartScraper(apiKey, {
+ website_url: "https://example.com",
+ user_prompt: "Extract the main content",
+});
+```
+
+
+
+### Parameters
+
+| Parameter | Type | Required | Description |
+| ------------- | ------- | -------- | --------------------------------------------------------------------------- |
+| website_url | string | Yes | The URL of the webpage to scrape |
+| user_prompt | string | Yes | A textual description of what you want to extract |
+| output_schema | object | No | Pydantic/Zod schema for structured response |
+| stealth | boolean | No | Enable anti-detection mode |
+| headers | object | No | Custom HTTP headers |
+| mock | boolean | No | Enable mock mode for testing |
+| wait_ms | number | No | Page load wait time in ms |
+| country_code | string | No | Proxy routing country code |
diff --git a/v1/toonify.mdx b/v1/toonify.mdx
new file mode 100644
index 0000000..ab5293e
--- /dev/null
+++ b/v1/toonify.mdx
@@ -0,0 +1,25 @@
+---
+title: 'Toonify'
+description: 'Convert images to cartoon style (v1)'
+icon: 'palette'
+---
+
+
+You are viewing the **v1 (legacy)** documentation. Toonify has been removed in v2. See the [v2 documentation](/services/toonify).
+
+
+## Overview
+
+Toonify converts images into cartoon-style illustrations using AI.
+
+## Usage
+
+```python
+from scrapegraph_py import Client
+
+client = Client(api_key="your-api-key")
+
+response = client.toonify(
+ website_url="https://example.com/image.jpg"
+)
+```