diff --git a/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py b/docs/guides/code_examples/playwright_crawler/navigation_hooks_example.py
similarity index 65%
rename from docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py
rename to docs/guides/code_examples/playwright_crawler/navigation_hooks_example.py
index a0894f5fa1..6abfde0dfc 100644
--- a/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py
+++ b/docs/guides/code_examples/playwright_crawler/navigation_hooks_example.py
@@ -3,8 +3,10 @@
from crawlee.crawlers import (
PlaywrightCrawler,
PlaywrightCrawlingContext,
+ PlaywrightPostNavCrawlingContext,
PlaywrightPreNavCrawlingContext,
)
+from crawlee.errors import SessionError
async def main() -> None:
@@ -24,6 +26,14 @@ async def configure_page(context: PlaywrightPreNavCrawlingContext) -> None:
# to speed up page loading
await context.block_requests()
+ @crawler.post_navigation_hook
+ async def custom_captcha_check(context: PlaywrightPostNavCrawlingContext) -> None:
+ # check if the page contains a captcha
+ captcha_element = context.page.locator('input[name="captcha"]').first
+ if await captcha_element.is_visible():
+ context.log.warning('Captcha detected! Skipping the page.')
+ raise SessionError('Captcha detected')
+
# Run the crawler with the initial list of URLs.
await crawler.run(['https://crawlee.dev'])
diff --git a/docs/guides/playwright_crawler.mdx b/docs/guides/playwright_crawler.mdx
index 7d2bb7fec6..17eebcc465 100644
--- a/docs/guides/playwright_crawler.mdx
+++ b/docs/guides/playwright_crawler.mdx
@@ -10,7 +10,7 @@ import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
import MultipleLaunchExample from '!!raw-loader!roa-loader!./code_examples/playwright_crawler/multiple_launch_example.py';
import BrowserConfigurationExample from '!!raw-loader!roa-loader!./code_examples/playwright_crawler/browser_configuration_example.py';
-import PreNavigationExample from '!!raw-loader!roa-loader!./code_examples/playwright_crawler/pre_navigation_hook_example.py';
+import NavigationHooksExample from '!!raw-loader!roa-loader!./code_examples/playwright_crawler/navigation_hooks_example.py';
import BrowserPoolPageHooksExample from '!!raw-loader!roa-loader!./code_examples/playwright_crawler/browser_pool_page_hooks_example.py';
import PluginBrowserConfigExample from '!!raw-loader!./code_examples/playwright_crawler/plugin_browser_configuration_example.py';
@@ -67,10 +67,10 @@ For additional setup or event-driven actions around page creation and closure, t
## Navigation hooks
-Navigation hooks allow for additional configuration at specific points during page navigation. For example, the `pre_navigation_hook` is called before each navigation and provides `PlaywrightPreNavCrawlingContext` - including the [page](https://playwright.dev/python/docs/api/class-page) instance and a `block_requests` helper for filtering unwanted resource types and URL patterns. See the [block requests example](https://crawlee.dev/python/docs/examples/playwright-crawler-with-block-requests) for a dedicated walkthrough.
+Navigation hooks allow for additional configuration at specific points during page navigation. The `pre_navigation_hook` is called before each navigation and provides `PlaywrightPreNavCrawlingContext` - including the [page](https://playwright.dev/python/docs/api/class-page) instance and a `block_requests` helper for filtering unwanted resource types and URL patterns. See the [block requests example](https://crawlee.dev/python/docs/examples/playwright-crawler-with-block-requests) for a dedicated walkthrough. Similarly, the `post_navigation_hook` is called after each navigation and provides `PlaywrightPostNavCrawlingContext` - useful for post-load checks such as detecting CAPTCHAs or verifying page state.
- {PreNavigationExample}
+ {NavigationHooksExample}
## Conclusion