From 1d9551aaf20be6aa39ee4f8ddd93215faae183c0 Mon Sep 17 00:00:00 2001
From: Vikrant-Khedkar <vikrantkhedkar2720@gmail.com>
Date: Tue, 24 Mar 2026 15:18:29 +0530
Subject: [PATCH] fix: replace print() statements with proper logging across
 codebase

  Library code should never write directly to stdout. Migrated all 13
  print() calls to use the existing get_logger() infrastructure with
  appropriate log levels (debug/info/warning).
---
 scrapegraphai/graphs/abstract_graph.py        | 21 +++++++++++--------
 scrapegraphai/graphs/base_graph.py            | 12 +++++------
 scrapegraphai/graphs/speech_graph.py          |  5 ++++-
 scrapegraphai/integrations/burr_bridge.py     |  8 +++++--
 scrapegraphai/utils/data_export.py            | 12 +++++++----
 .../screenshot_preparation.py                 |  6 +++++-
 6 files changed, 40 insertions(+), 24 deletions(-)

diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py
index 065d3d30..d508f293 100644
--- a/scrapegraphai/graphs/abstract_graph.py
+++ b/scrapegraphai/graphs/abstract_graph.py
@@ -190,11 +190,12 @@ def _create_llm(self, llm_config: dict) -> object:
                                 If possible, try to use a model instance instead."""
                 )
             llm_params["model_provider"] = possible_providers[0]
-            print(
-                (
-                    f"Found providers {possible_providers} for model {llm_params['model']}, using {llm_params['model_provider']}.\n"
-                    "If it was not intended please specify the model provider in the graph configuration"
-                )
+            logger.info(
+                "Found providers %s for model %s, using %s. "
+                "If it was not intended please specify the model provider in the graph configuration",
+                possible_providers,
+                llm_params["model"],
+                llm_params["model_provider"],
             )
 
         if llm_params["model_provider"] not in known_providers:
@@ -209,10 +210,12 @@ def _create_llm(self, llm_config: dict) -> object:
                     llm_params["model"]
                 ]
             except KeyError:
-                print(
-                    f"""Max input tokens for model {llm_params["model_provider"]}/{llm_params["model"]} not found,
-                    please specify the model_tokens parameter in the llm section of the graph configuration.
-                    Using default token size: 8192"""
+                logger.warning(
+                    "Max input tokens for model %s/%s not found, "
+                    "please specify the model_tokens parameter in the llm section of the graph configuration. "
+                    "Using default token size: 8192",
+                    llm_params["model_provider"],
+                    llm_params["model"],
                 )
                 self.model_token = 8192
         else:
diff --git a/scrapegraphai/graphs/base_graph.py b/scrapegraphai/graphs/base_graph.py
index dad3a257..d0977ece 100644
--- a/scrapegraphai/graphs/base_graph.py
+++ b/scrapegraphai/graphs/base_graph.py
@@ -362,18 +362,16 @@ def execute(self, initial_state: dict) -> Tuple[dict, list]:
         else:
             state, exec_info = self._execute_standard(initial_state)
 
-        # Print the result first
         if "answer" in state:
-            print(state["answer"])
+            logger.info(state["answer"])
         elif "parsed_doc" in state:
-            print(state["parsed_doc"])
+            logger.info(state["parsed_doc"])
         elif "generated_code" in state:
-            print(state["generated_code"])
+            logger.info(state["generated_code"])
         elif "merged_script" in state:
-            print(state["merged_script"])
+            logger.info(state["merged_script"])
 
-        # Then show the message ONLY ONCE
-        print(f"✨ Try enhanced version of ScrapegraphAI at {CLICKABLE_URL} ✨")
+        logger.info("✨ Try enhanced version of ScrapegraphAI at %s ✨", CLICKABLE_URL)
 
         return state, exec_info
 
diff --git a/scrapegraphai/graphs/speech_graph.py b/scrapegraphai/graphs/speech_graph.py
index 11caea9b..6622b121 100644
--- a/scrapegraphai/graphs/speech_graph.py
+++ b/scrapegraphai/graphs/speech_graph.py
@@ -8,10 +8,13 @@
 
 from ..models import OpenAITextToSpeech
 from ..nodes import FetchNode, GenerateAnswerNode, ParseNode, TextToSpeechNode
+from ..utils.logging import get_logger
 from ..utils.save_audio_from_bytes import save_audio_from_bytes
 from .abstract_graph import AbstractGraph
 from .base_graph import BaseGraph
 
+logger = get_logger(__name__)
+
 
 class SpeechGraph(AbstractGraph):
     """
@@ -112,6 +115,6 @@ def run(self) -> str:
         if not audio:
             raise ValueError("No audio generated from the text.")
         save_audio_from_bytes(audio, self.config.get("output_path", "output.mp3"))
-        print(f"Audio saved to {self.config.get('output_path', 'output.mp3')}")
+        logger.info("Audio saved to %s", self.config.get("output_path", "output.mp3"))
 
         return self.final_state.get("answer", "No answer found.")
diff --git a/scrapegraphai/integrations/burr_bridge.py b/scrapegraphai/integrations/burr_bridge.py
index cb1d3b10..6ab96725 100644
--- a/scrapegraphai/integrations/burr_bridge.py
+++ b/scrapegraphai/integrations/burr_bridge.py
@@ -8,6 +8,10 @@
 import uuid
 from typing import Any, Dict, List, Tuple
 
+from ..utils.logging import get_logger
+
+logger = get_logger(__name__)
+
 try:
     from burr import tracking
     from burr.core import (
@@ -32,10 +36,10 @@ class PrintLnHook(PostRunStepHook, PreRunStepHook):
     """
 
     def pre_run_step(self, *, state: "State", action: "Action", **future_kwargs: Any):
-        print(f"Starting action: {action.name}")
+        logger.debug("Starting action: %s", action.name)
 
     def post_run_step(self, *, state: "State", action: "Action", **future_kwargs: Any):
-        print(f"Finishing action: {action.name}")
+        logger.debug("Finishing action: %s", action.name)
 
 
 class BurrNodeBridge(Action):
diff --git a/scrapegraphai/utils/data_export.py b/scrapegraphai/utils/data_export.py
index 9bbe8c34..498f661b 100644
--- a/scrapegraphai/utils/data_export.py
+++ b/scrapegraphai/utils/data_export.py
@@ -8,6 +8,10 @@
 import xml.etree.ElementTree as ET
 from typing import Any, Dict, List
 
+from .logging import get_logger
+
+logger = get_logger(__name__)
+
 
 def export_to_json(data: List[Dict[str, Any]], filename: str) -> None:
     """
@@ -18,7 +22,7 @@ def export_to_json(data: List[Dict[str, Any]], filename: str) -> None:
     """
     with open(filename, "w", encoding="utf-8") as f:
         json.dump(data, f, ensure_ascii=False, indent=4)
-    print(f"Data exported to {filename}")
+    logger.info("Data exported to %s", filename)
 
 
 def export_to_csv(data: List[Dict[str, Any]], filename: str) -> None:
@@ -29,7 +33,7 @@ def export_to_csv(data: List[Dict[str, Any]], filename: str) -> None:
     :param filename: Name of the file to save the CSV data
     """
     if not data:
-        print("No data to export")
+        logger.warning("No data to export")
         return
 
     keys = data[0].keys()
@@ -37,7 +41,7 @@ def export_to_csv(data: List[Dict[str, Any]], filename: str) -> None:
         writer = csv.DictWriter(f, fieldnames=keys)
         writer.writeheader()
         writer.writerows(data)
-    print(f"Data exported to {filename}")
+    logger.info("Data exported to %s", filename)
 
 
 def export_to_xml(
@@ -59,4 +63,4 @@ def export_to_xml(
 
     tree = ET.ElementTree(root)
     tree.write(filename, encoding="utf-8", xml_declaration=True)
-    print(f"Data exported to {filename}")
+    logger.info("Data exported to %s", filename)
diff --git a/scrapegraphai/utils/screenshot_scraping/screenshot_preparation.py b/scrapegraphai/utils/screenshot_scraping/screenshot_preparation.py
index 861e1328..2df47a04 100644
--- a/scrapegraphai/utils/screenshot_scraping/screenshot_preparation.py
+++ b/scrapegraphai/utils/screenshot_scraping/screenshot_preparation.py
@@ -7,6 +7,10 @@
 import numpy as np
 from playwright.async_api import async_playwright
 
+from ..logging import get_logger
+
+logger = get_logger(__name__)
+
 
 async def take_screenshot(url: str, save_path: str = None, quality: int = 100):
     """
@@ -155,7 +159,7 @@ def select_area_with_ipywidget(image):
 
     img_array = np.array(image)
 
-    print(img_array.shape)
+    logger.debug("Image array shape: %s", img_array.shape)
 
     def update_plot(top_bottom, left_right, image_size):
         plt.figure(figsize=(image_size, image_size))