From 68dc2333fba0013f9dd7b845bbc9581176a7881b Mon Sep 17 00:00:00 2001
From: do420 <denizozcanmc@gmail.com>
Date: Thu, 12 Mar 2026 16:32:07 +0300
Subject: [PATCH 1/2] Enhanced logging and error handling in run_inference.py

---
 run_inference.py | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/run_inference.py b/run_inference.py
index f3ab727b6..66990a918 100644
--- a/run_inference.py
+++ b/run_inference.py
@@ -4,13 +4,24 @@
 import platform
 import argparse
 import subprocess
+import logging
+
+def setup_logging():
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(levelname)s - %(message)s',
+        handlers=[
+            logging.StreamHandler()
+        ]
+    )
 
 def run_command(command, shell=False):
     """Run a system command and ensure it succeeds."""
     try:
+        logging.info(f"Executing command: {' '.join(command)}")
         subprocess.run(command, shell=shell, check=True)
     except subprocess.CalledProcessError as e:
-        print(f"Error occurred while running command: {e}")
+        logging.error(f"Error occurred while running command: {e}")
         sys.exit(1)
 
 def run_inference():
@@ -21,6 +32,11 @@ def run_inference():
             main_path = os.path.join(build_dir, "bin", "llama-cli")
     else:
         main_path = os.path.join(build_dir, "bin", "llama-cli")
+    
+    if not os.path.exists(main_path):
+        logging.error(f"The executable {main_path} does not exist. Please ensure the build directory is correct.")
+        sys.exit(1)
+    
     command = [
         f'{main_path}',
         '-m', args.model,
@@ -37,12 +53,14 @@ def run_inference():
     run_command(command)
 
 def signal_handler(sig, frame):
-    print("Ctrl+C pressed, exiting...")
+    logging.info("Ctrl+C pressed, exiting...")
     sys.exit(0)
 
 if __name__ == "__main__":
+    setup_logging()
+    logging.info("Initializing inference script.")
     signal.signal(signal.SIGINT, signal_handler)
-    # Usage: python run_inference.py -p "Microsoft Corporation is an American multinational corporation and technology company headquartered in Redmond, Washington."
+
     parser = argparse.ArgumentParser(description='Run inference')
     parser.add_argument("-m", "--model", type=str, help="Path to model file", required=False, default="models/bitnet_b1_58-3B/ggml-model-i2_s.gguf")
     parser.add_argument("-n", "--n-predict", type=int, help="Number of tokens to predict when generating text", required=False, default=128)
@@ -53,4 +71,5 @@ def signal_handler(sig, frame):
     parser.add_argument("-cnv", "--conversation", action='store_true', help="Whether to enable chat mode or not (for instruct models.)")
 
     args = parser.parse_args()
+    logging.info("Parsed arguments successfully.")
     run_inference()
\ No newline at end of file

From 55dbf1ef0b55b9904cd382de3959580aaa19849c Mon Sep 17 00:00:00 2001
From: do420 <denizozcanmc@gmail.com>
Date: Thu, 12 Mar 2026 16:46:12 +0300
Subject: [PATCH 2/2] Enhanced logging, error handling, and added GPU (CUDA)
 support to run_inference.py

---
 run_inference.py | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/run_inference.py b/run_inference.py
index 66990a918..5e88f8256 100644
--- a/run_inference.py
+++ b/run_inference.py
@@ -5,7 +5,6 @@
 import argparse
 import subprocess
 import logging
-
 def setup_logging():
     logging.basicConfig(
         level=logging.INFO,
@@ -15,6 +14,19 @@ def setup_logging():
         ]
     )
 
+def check_cuda():
+    """Check if CUDA is available."""
+    try:
+        import torch
+        if torch.cuda.is_available():
+            logging.info(f"CUDA is available: {torch.cuda.get_device_name(0)}")
+            return True
+        else:
+            logging.warning("CUDA is not available. Falling back to CPU.")
+    except ImportError:
+        logging.warning("PyTorch not installed. CUDA check skipped.")
+    return False
+
 def run_command(command, shell=False):
     """Run a system command and ensure it succeeds."""
     try:
@@ -32,24 +44,27 @@ def run_inference():
             main_path = os.path.join(build_dir, "bin", "llama-cli")
     else:
         main_path = os.path.join(build_dir, "bin", "llama-cli")
-    
+
     if not os.path.exists(main_path):
         logging.error(f"The executable {main_path} does not exist. Please ensure the build directory is correct.")
         sys.exit(1)
-    
+
     command = [
         f'{main_path}',
         '-m', args.model,
         '-n', str(args.n_predict),
         '-t', str(args.threads),
         '-p', args.prompt,
-        '-ngl', '0',
+        '-ngl', '1' if check_cuda() else '0',
         '-c', str(args.ctx_size),
         '--temp', str(args.temperature),
         "-b", "1",
     ]
+
     if args.conversation:
         command.append("-cnv")
+
+    logging.info("Starting inference process...")
     run_command(command)
 
 def signal_handler(sig, frame):