From a0d0784fdfdee74be084f1be32c3e4fd2da27021 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mariana=20Rodr=C3=ADguez?= Date: Thu, 2 May 2024 12:38:44 -0500 Subject: [PATCH 1/3] Add option to fetch iframes' content and search for links in them --- crawler.py | 83 +++++++++++++++++++++++++++++++++++++----------------- main.py | 1 + 2 files changed, 58 insertions(+), 26 deletions(-) diff --git a/crawler.py b/crawler.py index 5b27bf3..140e343 100644 --- a/crawler.py +++ b/crawler.py @@ -51,6 +51,8 @@ class Crawler: # TODO also search for window.location={.*?} linkregex = re.compile(b']*href=[\'|"](.*?)[\'"][^>]*?>') imageregex = re.compile (b']*src=[\'|"](.*?)[\'"].*?>') + iframeregex = re.compile (b'