Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions examples/async_crawl_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""
Async crawl example.
"""

import asyncio
import json

from scrapegraph_py import AsyncClient


async def main():
async with AsyncClient() as client:
# Start crawl
job = await client.crawl.start(
"https://example.com",
depth=2,
max_pages=5,
)
print("Crawl started:", json.dumps(job, indent=2))

# Poll for completion
crawl_id = job["id"]
while True:
status = await client.crawl.status(crawl_id)
print(f"Status: {status.get('status')}")
if status.get("status") in ("completed", "failed"):
break
await asyncio.sleep(2)

print("\nResult:", json.dumps(status, indent=2))


asyncio.run(main())
17 changes: 17 additions & 0 deletions examples/async_credits_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
"""
Async credits check.
"""

import asyncio
import json

from scrapegraph_py import AsyncClient


async def main():
async with AsyncClient() as client:
credits = await client.credits()
print(json.dumps(credits, indent=2))


asyncio.run(main())
40 changes: 40 additions & 0 deletions examples/async_extract_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""
Async extract example - extract data from multiple pages concurrently.
"""

import asyncio
import json

from pydantic import BaseModel, Field

from scrapegraph_py import AsyncClient


class PageInfo(BaseModel):
title: str = Field(description="Page title")
description: str = Field(description="Brief description of the page content")


async def main():
async with AsyncClient() as client:
urls = [
"https://example.com",
"https://httpbin.org/html",
]

tasks = [
client.extract(
url=url,
prompt="Extract the page title and a brief description",
output_schema=PageInfo,
)
for url in urls
]
results = await asyncio.gather(*tasks)

for url, result in zip(urls, results):
print(f"\n=== {url} ===")
print(json.dumps(result, indent=2))


asyncio.run(main())
27 changes: 27 additions & 0 deletions examples/async_monitor_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""
Async monitor example.
"""

import asyncio
import json

from scrapegraph_py import AsyncClient


async def main():
async with AsyncClient() as client:
# Create a monitor
monitor = await client.monitor.create(
name="Async Price Tracker",
url="https://example.com/products",
prompt="Extract product prices",
cron="0 12 * * *", # Every day at noon
)
print("Created:", json.dumps(monitor, indent=2))

# List all monitors
all_monitors = await client.monitor.list()
print("\nAll monitors:", json.dumps(all_monitors, indent=2))


asyncio.run(main())
27 changes: 27 additions & 0 deletions examples/async_scrape_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""
Async scrape example - scrape multiple pages concurrently.
"""

import asyncio
import json

from scrapegraph_py import AsyncClient


async def main():
async with AsyncClient() as client:
# Scrape multiple pages concurrently
urls = [
"https://example.com",
"https://httpbin.org/html",
]

tasks = [client.scrape(url) for url in urls]
results = await asyncio.gather(*tasks)

for url, result in zip(urls, results):
print(f"\n=== {url} ===")
print(json.dumps(result, indent=2))


asyncio.run(main())
26 changes: 26 additions & 0 deletions examples/async_search_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""
Async search example - run multiple searches concurrently.
"""

import asyncio
import json

from scrapegraph_py import AsyncClient


async def main():
async with AsyncClient() as client:
queries = [
"best python frameworks 2025",
"top javascript libraries 2025",
]

tasks = [client.search(q, num_results=3) for q in queries]
results = await asyncio.gather(*tasks)

for query, result in zip(queries, results):
print(f"\n=== {query} ===")
print(json.dumps(result, indent=2))


asyncio.run(main())
35 changes: 35 additions & 0 deletions examples/crawl_basic_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""
Crawl a website and get pages as markdown.

The crawl endpoint discovers and fetches multiple pages from a website,
starting from a given URL and following links up to a specified depth.
"""

import json
import time

from scrapegraph_py import Client

client = Client() # uses SGAI_API_KEY env var

# Start the crawl
job = client.crawl.start(
"https://example.com",
depth=2,
max_pages=5,
format="markdown",
)
print("Crawl started:", json.dumps(job, indent=2))

# Poll for status
crawl_id = job["id"]
while True:
status = client.crawl.status(crawl_id)
print(f"Status: {status.get('status')}")
if status.get("status") in ("completed", "failed"):
break
time.sleep(2)

print("\nFinal result:", json.dumps(status, indent=2))

client.close()
26 changes: 26 additions & 0 deletions examples/crawl_stop_resume_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""
Stop and resume a crawl job.

You can stop a running crawl and resume it later.
"""

import json

from scrapegraph_py import Client

client = Client() # uses SGAI_API_KEY env var

# Start a crawl
job = client.crawl.start("https://example.com", depth=3, max_pages=50)
crawl_id = job["id"]
print("Crawl started:", crawl_id)

# Stop the crawl
stopped = client.crawl.stop(crawl_id)
print("Stopped:", json.dumps(stopped, indent=2))

# Resume the crawl later
resumed = client.crawl.resume(crawl_id)
print("Resumed:", json.dumps(resumed, indent=2))

client.close()
27 changes: 27 additions & 0 deletions examples/crawl_with_fetch_config_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""
Crawl with custom fetch configuration.

Use FetchConfig to enable stealth mode, JS rendering, etc. for all
pages during the crawl.
"""

import json

from scrapegraph_py import Client, FetchConfig

client = Client() # uses SGAI_API_KEY env var

job = client.crawl.start(
"https://example.com",
depth=2,
max_pages=10,
format="html",
fetch_config=FetchConfig(
stealth=True,
render_js=True,
wait_ms=1000,
),
)
print("Crawl started:", json.dumps(job, indent=2))

client.close()
24 changes: 24 additions & 0 deletions examples/crawl_with_patterns_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""
Crawl a website with URL pattern filtering.

Use include_patterns and exclude_patterns to control which pages
the crawler visits. Patterns support * (any chars) and ** (any path segments).
"""

import json

from scrapegraph_py import Client

client = Client() # uses SGAI_API_KEY env var

job = client.crawl.start(
"https://example.com",
depth=3,
max_pages=20,
format="markdown",
include_patterns=["/blog/*", "/docs/**"],
exclude_patterns=["/admin/*", "/api/*"],
)
print("Crawl started:", json.dumps(job, indent=2))

client.close()
14 changes: 14 additions & 0 deletions examples/credits_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""
Check your remaining API credits.
"""

import json

from scrapegraph_py import Client

client = Client() # uses SGAI_API_KEY env var

credits = client.credits()
print(json.dumps(credits, indent=2))

client.close()
20 changes: 20 additions & 0 deletions examples/extract_basic_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""
Extract structured data from a webpage using a natural language prompt.

The extract endpoint uses AI to understand your prompt and pull out
exactly the data you need.
"""

import json

from scrapegraph_py import Client

client = Client() # uses SGAI_API_KEY env var

result = client.extract(
url="https://example.com",
prompt="Extract the page title and main description",
)
print(json.dumps(result, indent=2))

client.close()
26 changes: 26 additions & 0 deletions examples/extract_with_fetch_config_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""
Extract data from a JavaScript-heavy page using FetchConfig.

Use FetchConfig to enable stealth mode, JS rendering, scrolling,
and other options needed for dynamic pages.
"""

import json

from scrapegraph_py import Client, FetchConfig

client = Client() # uses SGAI_API_KEY env var

result = client.extract(
url="https://example.com",
prompt="Extract all visible text content",
fetch_config=FetchConfig(
stealth=True,
render_js=True,
wait_ms=2000,
scrolls=3,
),
)
print(json.dumps(result, indent=2))

client.close()
40 changes: 40 additions & 0 deletions examples/extract_with_json_schema_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""
Extract structured data using a raw JSON Schema dict.

You can pass a JSON Schema dictionary directly if you prefer not to
use Pydantic models.
"""

import json

from scrapegraph_py import Client

schema = {
"type": "object",
"properties": {
"title": {"type": "string", "description": "Page title"},
"links": {
"type": "array",
"items": {
"type": "object",
"properties": {
"text": {"type": "string"},
"href": {"type": "string"},
},
},
"description": "All links on the page",
},
},
"required": ["title", "links"],
}

client = Client() # uses SGAI_API_KEY env var

result = client.extract(
url="https://example.com",
prompt="Extract the page title and all links",
output_schema=schema,
)
print(json.dumps(result, indent=2))

client.close()
Loading
Loading