From 1d9551aaf20be6aa39ee4f8ddd93215faae183c0 Mon Sep 17 00:00:00 2001 From: Vikrant-Khedkar Date: Tue, 24 Mar 2026 15:18:29 +0530 Subject: [PATCH] fix: replace print() statements with proper logging across codebase Library code should never write directly to stdout. Migrated all 13 print() calls to use the existing get_logger() infrastructure with appropriate log levels (debug/info/warning). --- scrapegraphai/graphs/abstract_graph.py | 21 +++++++++++-------- scrapegraphai/graphs/base_graph.py | 12 +++++------ scrapegraphai/graphs/speech_graph.py | 5 ++++- scrapegraphai/integrations/burr_bridge.py | 8 +++++-- scrapegraphai/utils/data_export.py | 12 +++++++---- .../screenshot_preparation.py | 6 +++++- 6 files changed, 40 insertions(+), 24 deletions(-) diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py index 065d3d30..d508f293 100644 --- a/scrapegraphai/graphs/abstract_graph.py +++ b/scrapegraphai/graphs/abstract_graph.py @@ -190,11 +190,12 @@ def _create_llm(self, llm_config: dict) -> object: If possible, try to use a model instance instead.""" ) llm_params["model_provider"] = possible_providers[0] - print( - ( - f"Found providers {possible_providers} for model {llm_params['model']}, using {llm_params['model_provider']}.\n" - "If it was not intended please specify the model provider in the graph configuration" - ) + logger.info( + "Found providers %s for model %s, using %s. " + "If it was not intended please specify the model provider in the graph configuration", + possible_providers, + llm_params["model"], + llm_params["model_provider"], ) if llm_params["model_provider"] not in known_providers: @@ -209,10 +210,12 @@ def _create_llm(self, llm_config: dict) -> object: llm_params["model"] ] except KeyError: - print( - f"""Max input tokens for model {llm_params["model_provider"]}/{llm_params["model"]} not found, - please specify the model_tokens parameter in the llm section of the graph configuration. - Using default token size: 8192""" + logger.warning( + "Max input tokens for model %s/%s not found, " + "please specify the model_tokens parameter in the llm section of the graph configuration. " + "Using default token size: 8192", + llm_params["model_provider"], + llm_params["model"], ) self.model_token = 8192 else: diff --git a/scrapegraphai/graphs/base_graph.py b/scrapegraphai/graphs/base_graph.py index dad3a257..d0977ece 100644 --- a/scrapegraphai/graphs/base_graph.py +++ b/scrapegraphai/graphs/base_graph.py @@ -362,18 +362,16 @@ def execute(self, initial_state: dict) -> Tuple[dict, list]: else: state, exec_info = self._execute_standard(initial_state) - # Print the result first if "answer" in state: - print(state["answer"]) + logger.info(state["answer"]) elif "parsed_doc" in state: - print(state["parsed_doc"]) + logger.info(state["parsed_doc"]) elif "generated_code" in state: - print(state["generated_code"]) + logger.info(state["generated_code"]) elif "merged_script" in state: - print(state["merged_script"]) + logger.info(state["merged_script"]) - # Then show the message ONLY ONCE - print(f"✨ Try enhanced version of ScrapegraphAI at {CLICKABLE_URL} ✨") + logger.info("✨ Try enhanced version of ScrapegraphAI at %s ✨", CLICKABLE_URL) return state, exec_info diff --git a/scrapegraphai/graphs/speech_graph.py b/scrapegraphai/graphs/speech_graph.py index 11caea9b..6622b121 100644 --- a/scrapegraphai/graphs/speech_graph.py +++ b/scrapegraphai/graphs/speech_graph.py @@ -8,10 +8,13 @@ from ..models import OpenAITextToSpeech from ..nodes import FetchNode, GenerateAnswerNode, ParseNode, TextToSpeechNode +from ..utils.logging import get_logger from ..utils.save_audio_from_bytes import save_audio_from_bytes from .abstract_graph import AbstractGraph from .base_graph import BaseGraph +logger = get_logger(__name__) + class SpeechGraph(AbstractGraph): """ @@ -112,6 +115,6 @@ def run(self) -> str: if not audio: raise ValueError("No audio generated from the text.") save_audio_from_bytes(audio, self.config.get("output_path", "output.mp3")) - print(f"Audio saved to {self.config.get('output_path', 'output.mp3')}") + logger.info("Audio saved to %s", self.config.get("output_path", "output.mp3")) return self.final_state.get("answer", "No answer found.") diff --git a/scrapegraphai/integrations/burr_bridge.py b/scrapegraphai/integrations/burr_bridge.py index cb1d3b10..6ab96725 100644 --- a/scrapegraphai/integrations/burr_bridge.py +++ b/scrapegraphai/integrations/burr_bridge.py @@ -8,6 +8,10 @@ import uuid from typing import Any, Dict, List, Tuple +from ..utils.logging import get_logger + +logger = get_logger(__name__) + try: from burr import tracking from burr.core import ( @@ -32,10 +36,10 @@ class PrintLnHook(PostRunStepHook, PreRunStepHook): """ def pre_run_step(self, *, state: "State", action: "Action", **future_kwargs: Any): - print(f"Starting action: {action.name}") + logger.debug("Starting action: %s", action.name) def post_run_step(self, *, state: "State", action: "Action", **future_kwargs: Any): - print(f"Finishing action: {action.name}") + logger.debug("Finishing action: %s", action.name) class BurrNodeBridge(Action): diff --git a/scrapegraphai/utils/data_export.py b/scrapegraphai/utils/data_export.py index 9bbe8c34..498f661b 100644 --- a/scrapegraphai/utils/data_export.py +++ b/scrapegraphai/utils/data_export.py @@ -8,6 +8,10 @@ import xml.etree.ElementTree as ET from typing import Any, Dict, List +from .logging import get_logger + +logger = get_logger(__name__) + def export_to_json(data: List[Dict[str, Any]], filename: str) -> None: """ @@ -18,7 +22,7 @@ def export_to_json(data: List[Dict[str, Any]], filename: str) -> None: """ with open(filename, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=4) - print(f"Data exported to {filename}") + logger.info("Data exported to %s", filename) def export_to_csv(data: List[Dict[str, Any]], filename: str) -> None: @@ -29,7 +33,7 @@ def export_to_csv(data: List[Dict[str, Any]], filename: str) -> None: :param filename: Name of the file to save the CSV data """ if not data: - print("No data to export") + logger.warning("No data to export") return keys = data[0].keys() @@ -37,7 +41,7 @@ def export_to_csv(data: List[Dict[str, Any]], filename: str) -> None: writer = csv.DictWriter(f, fieldnames=keys) writer.writeheader() writer.writerows(data) - print(f"Data exported to {filename}") + logger.info("Data exported to %s", filename) def export_to_xml( @@ -59,4 +63,4 @@ def export_to_xml( tree = ET.ElementTree(root) tree.write(filename, encoding="utf-8", xml_declaration=True) - print(f"Data exported to {filename}") + logger.info("Data exported to %s", filename) diff --git a/scrapegraphai/utils/screenshot_scraping/screenshot_preparation.py b/scrapegraphai/utils/screenshot_scraping/screenshot_preparation.py index 861e1328..2df47a04 100644 --- a/scrapegraphai/utils/screenshot_scraping/screenshot_preparation.py +++ b/scrapegraphai/utils/screenshot_scraping/screenshot_preparation.py @@ -7,6 +7,10 @@ import numpy as np from playwright.async_api import async_playwright +from ..logging import get_logger + +logger = get_logger(__name__) + async def take_screenshot(url: str, save_path: str = None, quality: int = 100): """ @@ -155,7 +159,7 @@ def select_area_with_ipywidget(image): img_array = np.array(image) - print(img_array.shape) + logger.debug("Image array shape: %s", img_array.shape) def update_plot(top_bottom, left_right, image_size): plt.figure(figsize=(image_size, image_size))