From 3f1e3dc9be26a627dbc884df3d78d1779aeca06c Mon Sep 17 00:00:00 2001 From: Max Bohomolov Date: Thu, 19 Mar 2026 00:54:11 +0000 Subject: [PATCH] add `post_navigation_hook` in docs --- ...ion_hook_example.py => navigation_hooks_example.py} | 10 ++++++++++ docs/guides/playwright_crawler.mdx | 6 +++--- 2 files changed, 13 insertions(+), 3 deletions(-) rename docs/guides/code_examples/playwright_crawler/{pre_navigation_hook_example.py => navigation_hooks_example.py} (65%) diff --git a/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py b/docs/guides/code_examples/playwright_crawler/navigation_hooks_example.py similarity index 65% rename from docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py rename to docs/guides/code_examples/playwright_crawler/navigation_hooks_example.py index a0894f5fa1..6abfde0dfc 100644 --- a/docs/guides/code_examples/playwright_crawler/pre_navigation_hook_example.py +++ b/docs/guides/code_examples/playwright_crawler/navigation_hooks_example.py @@ -3,8 +3,10 @@ from crawlee.crawlers import ( PlaywrightCrawler, PlaywrightCrawlingContext, + PlaywrightPostNavCrawlingContext, PlaywrightPreNavCrawlingContext, ) +from crawlee.errors import SessionError async def main() -> None: @@ -24,6 +26,14 @@ async def configure_page(context: PlaywrightPreNavCrawlingContext) -> None: # to speed up page loading await context.block_requests() + @crawler.post_navigation_hook + async def custom_captcha_check(context: PlaywrightPostNavCrawlingContext) -> None: + # check if the page contains a captcha + captcha_element = context.page.locator('input[name="captcha"]').first + if await captcha_element.is_visible(): + context.log.warning('Captcha detected! Skipping the page.') + raise SessionError('Captcha detected') + # Run the crawler with the initial list of URLs. await crawler.run(['https://crawlee.dev']) diff --git a/docs/guides/playwright_crawler.mdx b/docs/guides/playwright_crawler.mdx index 7d2bb7fec6..17eebcc465 100644 --- a/docs/guides/playwright_crawler.mdx +++ b/docs/guides/playwright_crawler.mdx @@ -10,7 +10,7 @@ import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; import MultipleLaunchExample from '!!raw-loader!roa-loader!./code_examples/playwright_crawler/multiple_launch_example.py'; import BrowserConfigurationExample from '!!raw-loader!roa-loader!./code_examples/playwright_crawler/browser_configuration_example.py'; -import PreNavigationExample from '!!raw-loader!roa-loader!./code_examples/playwright_crawler/pre_navigation_hook_example.py'; +import NavigationHooksExample from '!!raw-loader!roa-loader!./code_examples/playwright_crawler/navigation_hooks_example.py'; import BrowserPoolPageHooksExample from '!!raw-loader!roa-loader!./code_examples/playwright_crawler/browser_pool_page_hooks_example.py'; import PluginBrowserConfigExample from '!!raw-loader!./code_examples/playwright_crawler/plugin_browser_configuration_example.py'; @@ -67,10 +67,10 @@ For additional setup or event-driven actions around page creation and closure, t ## Navigation hooks -Navigation hooks allow for additional configuration at specific points during page navigation. For example, the `pre_navigation_hook` is called before each navigation and provides `PlaywrightPreNavCrawlingContext` - including the [page](https://playwright.dev/python/docs/api/class-page) instance and a `block_requests` helper for filtering unwanted resource types and URL patterns. See the [block requests example](https://crawlee.dev/python/docs/examples/playwright-crawler-with-block-requests) for a dedicated walkthrough. +Navigation hooks allow for additional configuration at specific points during page navigation. The `pre_navigation_hook` is called before each navigation and provides `PlaywrightPreNavCrawlingContext` - including the [page](https://playwright.dev/python/docs/api/class-page) instance and a `block_requests` helper for filtering unwanted resource types and URL patterns. See the [block requests example](https://crawlee.dev/python/docs/examples/playwright-crawler-with-block-requests) for a dedicated walkthrough. Similarly, the `post_navigation_hook` is called after each navigation and provides `PlaywrightPostNavCrawlingContext` - useful for post-load checks such as detecting CAPTCHAs or verifying page state. - {PreNavigationExample} + {NavigationHooksExample} ## Conclusion