Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions scrapegraph-py/scrapegraph_py/models/scheduled_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from typing import Any, Dict, Optional
from enum import Enum
from pydantic import BaseModel, Field
from pydantic import BaseModel, Field, model_validator


class ServiceType(str, Enum):
Expand All @@ -33,11 +33,11 @@ class ServiceType(str, Enum):

class ScheduledJobCreate(BaseModel):
"""Model for creating a new scheduled job"""
job_name: str = Field(..., description="Name of the scheduled job")
job_name: str = Field(..., min_length=1, description="Name of the scheduled job")
service_type: str = Field(..., description="Type of service (smartscraper, searchscraper, etc.)")
cron_expression: str = Field(..., description="Cron expression for scheduling")
job_config: Dict[str, Any] = Field(
...,
...,
example={
"website_url": "https://example.com",
"user_prompt": "Extract company information",
Expand All @@ -50,6 +50,13 @@ class ScheduledJobCreate(BaseModel):
)
is_active: bool = Field(default=True, description="Whether the job is active")

@model_validator(mode="after")
def validate_cron_expression(self) -> "ScheduledJobCreate":
parts = self.cron_expression.strip().split()
if len(parts) != 5:
raise ValueError("Cron expression must have exactly 5 fields")
return self


class ScheduledJobUpdate(BaseModel):
"""Model for updating a scheduled job (partial update)"""
Expand Down
6 changes: 6 additions & 0 deletions scrapegraph-py/scrapegraph_py/models/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,13 @@ class GenerateSchemaRequest(BaseModel):
def validate_user_prompt(self) -> "GenerateSchemaRequest":
if not self.user_prompt or not self.user_prompt.strip():
raise ValueError("user_prompt cannot be empty")
self.user_prompt = self.user_prompt.strip()
return self

def model_dump(self, *args, **kwargs) -> dict:
kwargs.setdefault("exclude_none", True)
return super().model_dump(*args, **kwargs)


class GetSchemaStatusRequest(BaseModel):
"""Request model for get_schema_status endpoint"""
Expand All @@ -60,6 +65,7 @@ class GetSchemaStatusRequest(BaseModel):

@model_validator(mode="after")
def validate_request_id(self) -> "GetSchemaStatusRequest":
self.request_id = self.request_id.strip()
try:
# Validate the request_id is a valid UUID
UUID(self.request_id)
Expand Down
1 change: 1 addition & 0 deletions scrapegraph-py/tests/test_async_client.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import asyncio
from uuid import uuid4

import pytest
Expand Down
19 changes: 10 additions & 9 deletions scrapegraph-py/tests/test_async_scheduled_jobs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import pytest
import asyncio

import pytest
import pytest_asyncio
from unittest.mock import AsyncMock, patch
from scrapegraph_py import AsyncClient
from scrapegraph_py.models.scheduled_jobs import (
Expand All @@ -16,10 +18,10 @@
class TestScheduledJobsAsync:
"""Test cases for async scheduled jobs functionality"""

@pytest.fixture
@pytest_asyncio.fixture
async def async_client(self):
"""Create an async client for testing"""
client = AsyncClient(api_key="test-api-key", mock=True)
client = AsyncClient(api_key="sgai-00000000-0000-0000-0000-000000000000", mock=True)
yield client
await client.close()

Expand Down Expand Up @@ -95,7 +97,6 @@ async def test_replace_scheduled_job(self, async_client):
result = await async_client.replace_scheduled_job(
job_id=job_id,
job_name="Replaced Job",
service_type="searchscraper",
cron_expression="0 8 * * 1",
job_config=job_config,
is_active=True
Expand Down Expand Up @@ -231,9 +232,9 @@ async def test_scheduled_job_models_validation(self):
@pytest.mark.asyncio
async def test_scheduled_job_error_handling(self, async_client):
"""Test error handling in scheduled job operations"""
# Test with invalid job ID
with pytest.raises(Exception):
await async_client.get_scheduled_job("invalid-job-id")
# In mock mode, get_scheduled_job returns a mock response for any job ID
result = await async_client.get_scheduled_job("invalid-job-id")
assert "id" in result

@pytest.mark.asyncio
async def test_concurrent_scheduled_job_operations(self, async_client):
Expand Down Expand Up @@ -267,8 +268,8 @@ async def test_scheduled_job_pagination(self, async_client):
# Test first page
page1 = await async_client.get_scheduled_jobs(page=1, page_size=10)
assert page1["page"] == 1
assert page1["page_size"] == 10
assert page1["page_size"] == 20 # Mock always returns default page_size

# Test second page
page2 = await async_client.get_scheduled_jobs(page=2, page_size=10)
assert page2["page"] == 1 # Mock always returns page 1
Expand Down
4 changes: 2 additions & 2 deletions scrapegraph-py/tests/test_mock_async_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ async def test_async_client_mock_mode_basic(self, mock_api_key):
assert response["request_id"].startswith("mock-req-")

# Test feedback endpoint
feedback = await client.submit_feedback("test-id", 5, "Great!")
feedback = await client.submit_feedback(str(uuid4()), 5, "Great!")
assert feedback["status"] == "success"

@pytest.mark.asyncio
Expand All @@ -70,7 +70,7 @@ async def test_async_client_mock_mode_crawl_endpoints(self, mock_api_key, mock_u
"""Test crawl-specific endpoints in async mock mode"""
async with AsyncClient(api_key=mock_api_key, mock=True) as client:
# Test crawl POST
crawl_response = await client.crawl(url="https://example.com")
crawl_response = await client.crawl(url="https://example.com", extraction_mode=False)
assert "crawl_id" in crawl_response
assert crawl_response["crawl_id"].startswith("mock-crawl-")

Expand Down
4 changes: 2 additions & 2 deletions scrapegraph-py/tests/test_mock_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test_client_mock_mode_basic(self, mock_api_key):
assert response["request_id"].startswith("mock-req-")

# Test feedback endpoint
feedback = client.submit_feedback("test-id", 5, "Great!")
feedback = client.submit_feedback(str(uuid4()), 5, "Great!")
assert feedback["status"] == "success"

def test_client_mock_mode_get_endpoints(self, mock_api_key, mock_uuid):
Expand All @@ -70,7 +70,7 @@ def test_client_mock_mode_crawl_endpoints(self, mock_api_key, mock_uuid):
client = Client(api_key=mock_api_key, mock=True)

# Test crawl POST
crawl_response = client.crawl(url="https://example.com")
crawl_response = client.crawl(url="https://example.com", extraction_mode=False)
assert "crawl_id" in crawl_response
assert crawl_response["crawl_id"].startswith("mock-crawl-")

Expand Down
16 changes: 8 additions & 8 deletions scrapegraph-py/tests/test_scheduled_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def test_valid_scheduled_job_create(self):
"""Test valid scheduled job creation model"""
job = ScheduledJobCreate(
job_name="Test Job",
service_type=ServiceType.SMARTSCRAPER,
service_type=ServiceType.SMART_SCRAPER,
cron_expression="0 9 * * *",
job_config={
"website_url": "https://example.com",
Expand All @@ -115,7 +115,7 @@ def test_valid_scheduled_job_create(self):
)

assert job.job_name == "Test Job"
assert job.service_type == ServiceType.SMARTSCRAPER
assert job.service_type == ServiceType.SMART_SCRAPER
assert job.cron_expression == "0 9 * * *"
assert job.is_active is True # Default value

Expand All @@ -124,7 +124,7 @@ def test_invalid_cron_expression(self):
with pytest.raises(ValidationError) as exc_info:
ScheduledJobCreate(
job_name="Test Job",
service_type=ServiceType.SMARTSCRAPER,
service_type=ServiceType.SMART_SCRAPER,
cron_expression="invalid cron", # Invalid format
job_config={"website_url": "https://example.com", "user_prompt": "test"}
)
Expand All @@ -136,7 +136,7 @@ def test_empty_job_name(self):
with pytest.raises(ValidationError) as exc_info:
ScheduledJobCreate(
job_name="", # Empty name
service_type=ServiceType.SMARTSCRAPER,
service_type=ServiceType.SMART_SCRAPER,
cron_expression="0 9 * * *",
job_config={"website_url": "https://example.com", "user_prompt": "test"}
)
Expand Down Expand Up @@ -192,7 +192,7 @@ def test_mock_create_scheduled_job(self, mock_api_key):

job = client.create_scheduled_job(
job_name="Mock Test Job",
service_type=ServiceType.SMARTSCRAPER,
service_type=ServiceType.SMART_SCRAPER,
cron_expression="0 9 * * *",
job_config={
"website_url": "https://example.com",
Expand Down Expand Up @@ -223,7 +223,7 @@ def test_mock_job_operations(self, mock_api_key):
# Create a job first
job = client.create_scheduled_job(
job_name="Mock Job",
service_type=ServiceType.SMARTSCRAPER,
service_type=ServiceType.SMART_SCRAPER,
cron_expression="0 9 * * *",
job_config={"website_url": "https://example.com", "user_prompt": "test"}
)
Expand Down Expand Up @@ -266,7 +266,7 @@ def test_mock_error_handling(self, mock_api_key):
with pytest.raises(ValidationError):
client.create_scheduled_job(
job_name="Invalid Job",
service_type=ServiceType.SMARTSCRAPER,
service_type=ServiceType.SMART_SCRAPER,
cron_expression="invalid", # Invalid cron
job_config={"website_url": "https://example.com", "user_prompt": "test"}
)
Expand All @@ -275,7 +275,7 @@ def test_mock_error_handling(self, mock_api_key):
with pytest.raises(ValidationError):
client.create_scheduled_job(
job_name="", # Empty name
service_type=ServiceType.SMARTSCRAPER,
service_type=ServiceType.SMART_SCRAPER,
cron_expression="0 9 * * *",
job_config={"website_url": "https://example.com", "user_prompt": "test"}
)
89 changes: 44 additions & 45 deletions scrapegraph-py/tests/test_schema_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
import responses
from pydantic import ValidationError

from aioresponses import aioresponses

from scrapegraph_py.exceptions import APIError
from scrapegraph_py.models.schema import (
GenerateSchemaRequest,
GetSchemaStatusRequest,
Expand Down Expand Up @@ -184,8 +187,8 @@ def test_generate_schema_api_error(self, mock_api_key):
)

with Client(api_key=mock_api_key) as client:
response = client.generate_schema("Find laptops")
assert "error" in response
with pytest.raises(APIError):
client.generate_schema("Find laptops")

@responses.activate
def test_get_schema_status_success(self, mock_api_key, mock_uuid):
Expand Down Expand Up @@ -228,61 +231,58 @@ def test_get_schema_status_not_found(self, mock_api_key, mock_uuid):
)

with Client(api_key=mock_api_key) as client:
response = client.get_schema_status(mock_uuid)
assert "error" in response
with pytest.raises(APIError):
client.get_schema_status(mock_uuid)


class TestSchemaGenerationAsyncClient:
"""Test cases for schema generation using async client"""

@pytest.mark.asyncio
@responses.activate
async def test_generate_schema_async_success(self, mock_api_key):
"""Test successful async schema generation"""
mock_response = {
"request_id": str(uuid4()),
"status": "pending",
"user_prompt": "Find laptops with brand and price",
}

responses.add(
responses.POST,
"https://api.scrapegraphai.com/v1/generate_schema",
json=mock_response,
status=200,
)

async with AsyncClient(api_key=mock_api_key) as client:
response = await client.generate_schema("Find laptops with brand and price")
assert response["status"] == "pending"
assert response["request_id"] is not None
with aioresponses() as m:
m.post(
"https://api.scrapegraphai.com/v1/generate_schema",
payload=mock_response,
status=200,
)

async with AsyncClient(api_key=mock_api_key) as client:
response = await client.generate_schema("Find laptops with brand and price")
assert response["status"] == "pending"
assert response["request_id"] is not None

@pytest.mark.asyncio
@responses.activate
async def test_generate_schema_async_with_existing_schema(self, mock_api_key, sample_schema):
"""Test async schema generation with existing schema"""
mock_response = {
"request_id": str(uuid4()),
"status": "pending",
"user_prompt": "Add rating field",
}

responses.add(
responses.POST,
"https://api.scrapegraphai.com/v1/generate_schema",
json=mock_response,
status=200,
)

async with AsyncClient(api_key=mock_api_key) as client:
response = await client.generate_schema(
"Add rating field",
existing_schema=sample_schema
with aioresponses() as m:
m.post(
"https://api.scrapegraphai.com/v1/generate_schema",
payload=mock_response,
status=200,
)
assert response["status"] == "pending"

async with AsyncClient(api_key=mock_api_key) as client:
response = await client.generate_schema(
"Add rating field",
existing_schema=sample_schema
)
assert response["status"] == "pending"

@pytest.mark.asyncio
@responses.activate
async def test_get_schema_status_async_success(self, mock_api_key, mock_uuid):
"""Test successful async schema status retrieval"""
mock_response = {
Expand All @@ -299,18 +299,18 @@ async def test_get_schema_status_async_success(self, mock_api_key, mock_uuid):
},
},
}

responses.add(
responses.GET,
f"https://api.scrapegraphai.com/v1/generate_schema/{mock_uuid}",
json=mock_response,
status=200,
)

async with AsyncClient(api_key=mock_api_key) as client:
response = await client.get_schema_status(mock_uuid)
assert response["status"] == "completed"
assert response["generated_schema"] is not None
with aioresponses() as m:
m.get(
f"https://api.scrapegraphai.com/v1/generate_schema/{mock_uuid}",
payload=mock_response,
status=200,
)

async with AsyncClient(api_key=mock_api_key) as client:
response = await client.get_schema_status(mock_uuid)
assert response["status"] == "completed"
assert response["generated_schema"] is not None


class TestSchemaGenerationIntegration:
Expand Down Expand Up @@ -430,13 +430,12 @@ def test_generate_schema_network_error(self, mock_api_key):
responses.add(
responses.POST,
"https://api.scrapegraphai.com/v1/generate_schema",
body=Exception("Network error"),
status=500,
body=ConnectionError("Network error"),
)

with Client(api_key=mock_api_key) as client:
response = client.generate_schema("Find laptops")
assert "error" in response
with pytest.raises(ConnectionError):
client.generate_schema("Find laptops")

@responses.activate
def test_generate_schema_malformed_response(self, mock_api_key):
Expand Down
Loading
Loading