From a2251fad9b7605f4b3ea298d16ad02b6f6ae14e3 Mon Sep 17 00:00:00 2001
From: Saumya Rai <saumya.rai@qorix.ai>
Date: Fri, 13 Mar 2026 13:31:10 +0530
Subject: [PATCH 1/3] Fix script to handle SARIF file recategorization

Signed-off-by: Saumya Rai <saumya.rai@qorix.ai>

adding debug

adding python files for codeql scripts

adding formatting

adding filter for coding guidelines files

removed the shell script files from workflow

using exisitng libs

fixing repo issues

fixed the vmain issue

format fix
---
 .../workflows/codeql-multiple-repo-scan.yml   |  29 ++-
 scripts/workflow/checkout_repos.py            | 190 +++++++++++++++
 scripts/workflow/checkout_repos.sh            |  47 ----
 scripts/workflow/parse_repos.py               | 138 +++++++++++
 scripts/workflow/parse_repos.sh               |  54 -----
 scripts/workflow/recategorize_guidelines.py   | 227 ++++++++++++++++++
 scripts/workflow/recategorize_guidelines.sh   |  28 ---
 7 files changed, 573 insertions(+), 140 deletions(-)
 create mode 100755 scripts/workflow/checkout_repos.py
 delete mode 100755 scripts/workflow/checkout_repos.sh
 create mode 100755 scripts/workflow/parse_repos.py
 delete mode 100755 scripts/workflow/parse_repos.sh
 create mode 100755 scripts/workflow/recategorize_guidelines.py
 delete mode 100755 scripts/workflow/recategorize_guidelines.sh

diff --git a/.github/workflows/codeql-multiple-repo-scan.yml b/.github/workflows/codeql-multiple-repo-scan.yml
index a22531153b2..a8ded6d20fe 100644
--- a/.github/workflows/codeql-multiple-repo-scan.yml
+++ b/.github/workflows/codeql-multiple-repo-scan.yml
@@ -40,25 +40,25 @@ jobs:
     steps:
       - name: Checkout central repository
         uses: actions/checkout@v4
-      - name: Checkout CodeQL Coding Standards scripts
-        uses: actions/checkout@v4
-        with:
-          repository: github/codeql-coding-standards
-          path: codeql-coding-standards-repo # Klonen in diesen Ordner
-          ref: main # Oder eine spezifische Release-Version, z.B. 'v2.53.0-dev'
       # Add coding standard packages and dependencies
-      - name: Install Python dependencies for Coding Standards scripts
+      - name: Install Python dependencies
         run: |
           python3 -m pip install --upgrade pip
-          pip3 install pyyaml jsonpath-ng jsonschema jsonpatch jsonpointer pytest sarif-tools
+          pip3 install --break-system-packages pyyaml jsonpath-ng jsonschema jsonpatch jsonpointer pytest sarif-tools
       - name: Parse known_good.json and create repos.json
         id: parse-repos
         run: |
-          scripts/workflow/parse_repos.sh
+          python3 scripts/workflow/parse_repos.py
       - name: Checkout all pinned repositories
         id: checkout-repos
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          python3 scripts/workflow/checkout_repos.py
+      - name: List files in repos directory (debug)
         run: |
-          scripts/workflow/checkout_repos.sh
+          echo "Listing all files in repos directory before CodeQL analysis:"
+          find repos || echo "repos directory not found"
       - name: Initialize CodeQL for all repositories
         uses: github/codeql-action/init@v4
         with:
@@ -72,10 +72,17 @@ jobs:
           upload-database: false # Don't upload databases for each repo
           output: sarif-results/
           category: "multi-repo-scan"
+      # Checkout CodeQL Coding Standards AFTER analysis for recategorization
+      - name: Checkout CodeQL Coding Standards for recategorization
+        uses: actions/checkout@v4
+        with:
+          repository: github/codeql-coding-standards
+          path: codeql-coding-standards-repo
+          ref: v2.50.0 # Use frozen version instead of main
       - name: Recategorize Guidelines
         if: always()
         run: |
-          scripts/workflow/recategorize_guidelines.sh
+          python3 scripts/workflow/recategorize_guidelines.py
       - name: Generate HTML Report from SARIF
         run: |
           SARIF_FILE="sarif-results/cpp.sarif"
diff --git a/scripts/workflow/checkout_repos.py b/scripts/workflow/checkout_repos.py
new file mode 100755
index 00000000000..05d4bb1f9bd
--- /dev/null
+++ b/scripts/workflow/checkout_repos.py
@@ -0,0 +1,190 @@
+#!/usr/bin/env python3
+# *******************************************************************************
+# Copyright (c) 2025 Contributors to the Eclipse Foundation
+#
+# See the NOTICE file(s) distributed with this work for additional
+# information regarding copyright ownership.
+#
+# This program and the accompanying materials are made available under the
+# terms of the Apache License Version 2.0 which is available at
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# SPDX-License-Identifier: Apache-2.0
+# *******************************************************************************
+"""
+Checkout all pinned repositories based on repos.json configuration.
+"""
+
+import json
+import sys
+import subprocess
+import re
+import os
+from pathlib import Path
+
+
+def load_repos_config(config_file="./repos.json"):
+    """
+    Load repository configuration from repos.json.
+
+    Args:
+        config_file: Path to repos.json file
+
+    Returns:
+        List of repository configurations
+    """
+    config_path = Path(config_file)
+
+    if not config_path.exists():
+        print(f"Error: file not found '{config_file}'", file=sys.stderr)
+        sys.exit(1)
+
+    try:
+        with open(config_path, "r") as f:
+            repos = json.load(f)
+        return repos
+    except (json.JSONDecodeError, IOError) as e:
+        print(f"Error: Failed to load repos.json: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+def is_commit_hash(ref):
+    """
+    Check if reference looks like a commit hash (40 hex characters for SHA-1).
+
+    Args:
+        ref: Git reference (branch, tag, or hash)
+
+    Returns:
+        True if ref matches commit hash pattern
+    """
+    return bool(re.match(r"^[0-9a-fA-F]{40}$", ref))
+
+
+def checkout_repo(name, url, ref, path):
+    """
+    Checkout a single repository using git with GitHub token for authentication.
+
+    Args:
+        name: Repository name
+        url: Repository URL
+        ref: Git reference (branch, tag, or commit hash)
+        path: Local path to checkout into
+
+    Returns:
+        True if successful, False otherwise
+    """
+    path_obj = Path(path)
+
+    try:
+        # Create parent directory if needed
+        path_obj.parent.mkdir(parents=True, exist_ok=True)
+
+        # Use GitHub token if available to avoid rate limits
+        github_token = os.environ.get("GITHUB_TOKEN", "")
+        auth_url = url
+
+        if github_token and "github.com" in url:
+            # Inject token into URL for authenticated requests
+            # Replace https://github.com/ with https://token@github.com/
+            auth_url = url.replace("https://github.com/", f"https://{github_token}@github.com/")
+
+        if is_commit_hash(ref):
+            print(f"Checking out {name} ({ref}) to {path}")
+            print(f"  Detected commit hash. Cloning and then checking out.")
+
+            # Clone the repository
+            result = subprocess.run(["git", "clone", auth_url, path], capture_output=True, text=True)
+            if result.returncode != 0:
+                print(f"  Git error: {result.stderr}", file=sys.stderr)
+                raise subprocess.CalledProcessError(result.returncode, result.args, result.stdout, result.stderr)
+
+            # Checkout specific commit
+            result = subprocess.run(["git", "-C", path, "checkout", ref], capture_output=True, text=True)
+            if result.returncode != 0:
+                print(f"  Git error: {result.stderr}", file=sys.stderr)
+                raise subprocess.CalledProcessError(result.returncode, result.args, result.stdout, result.stderr)
+        else:
+            print(f"Checking out {name} ({ref}) to {path}")
+            print(f"  Detected branch/tag. Cloning with --branch.")
+
+            # Clone with shallow copy and specific branch/tag
+            # Try the ref as-is first
+            result = subprocess.run(
+                ["git", "clone", "--depth", "1", "--branch", ref, auth_url, path],
+                capture_output=True,
+                text=True,
+            )
+
+            if result.returncode != 0:
+                # If failed, try with 'v' prefix (common for version tags)
+                if not ref.startswith("v") and re.match(r"^\d+\.\d+", ref):
+                    print(f"  First attempt failed, retrying with 'v' prefix for version tag...")
+                    branch_ref = f"v{ref}"
+                    result = subprocess.run(
+                        ["git", "clone", "--depth", "1", "--branch", branch_ref, auth_url, path],
+                        capture_output=True,
+                        text=True,
+                    )
+                    if result.returncode != 0:
+                        print(f"  Git error: {result.stderr}", file=sys.stderr)
+                        raise subprocess.CalledProcessError(
+                            result.returncode, result.args, result.stdout, result.stderr
+                        )
+                else:
+                    print(f"  Git error: {result.stderr}", file=sys.stderr)
+                    raise subprocess.CalledProcessError(result.returncode, result.args, result.stdout, result.stderr)
+
+        return True
+
+    except subprocess.CalledProcessError as e:
+        print(f"Error: Failed to checkout {name}: {e}", file=sys.stderr)
+        if hasattr(e, "stderr") and e.stderr:
+            print(f"  Details: {e.stderr}", file=sys.stderr)
+        return False
+
+
+def main():
+    """Main entry point."""
+    # Load repository configurations
+    repos = load_repos_config("./repos.json")
+    repo_count = len(repos)
+
+    # Track successfully checked out repositories
+    repo_paths = []
+
+    # Checkout each repository
+    for i, repo in enumerate(repos):
+        name = repo.get("name", f"repo-{i}")
+        url = repo.get("url", "")
+        ref = repo.get("version", "")
+        path = repo.get("path", "")
+
+        if not all([url, ref, path]):
+            print(f"Warning: Skipping {name} - missing required fields", file=sys.stderr)
+            continue
+
+        if checkout_repo(name, url, ref, path):
+            repo_paths.append(path)
+
+    # Output all paths (comma-separated for GitHub Actions compatibility)
+    repo_paths_output = ",".join(repo_paths)
+
+    # Write to GITHUB_OUTPUT if available
+    github_output = os.environ.get("GITHUB_OUTPUT")
+    if github_output:
+        try:
+            with open(github_output, "a") as f:
+                f.write(f"repo_paths={repo_paths_output}\n")
+        except IOError as e:
+            print(f"Warning: Failed to write GITHUB_OUTPUT: {e}", file=sys.stderr)
+
+    # Also print for debugging
+    print(f"\nSuccessfully checked out {len(repo_paths)} of {repo_count} repositories")
+    print(f"repo_paths={repo_paths_output}")
+
+    return 0 if len(repo_paths) == repo_count else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/workflow/checkout_repos.sh b/scripts/workflow/checkout_repos.sh
deleted file mode 100755
index d70cce8bc08..00000000000
--- a/scripts/workflow/checkout_repos.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-# *******************************************************************************
-# Copyright (c) 2025 Contributors to the Eclipse Foundation
-#
-# See the NOTICE file(s) distributed with this work for additional
-# information regarding copyright ownership.
-#
-# This program and the accompanying materials are made available under the
-# terms of the Apache License Version 2.0 which is available at
-# https://www.apache.org/licenses/LICENSE-2.0
-#
-# SPDX-License-Identifier: Apache-2.0
-# *******************************************************************************
-# jq is already installed by the previous step.
-
-# Read repositories from the repos.json file created by the previous step
-repos=$(cat repos.json)
-repo_count=$(echo "$repos" | jq length)
-# Initialize an empty string for paths to be outputted
-repo_paths_output=""
-for i in $(seq 0 $((repo_count-1))); do
-  name=$(echo "$repos" | jq -r ".[$i].name")
-  url=$(echo "$repos" | jq -r ".[$i].url")
-  ref=$(echo "$repos" | jq -r ".[$i].version") # This can be a branch, tag, or commit hash
-  path=$(echo "$repos" | jq -r ".[$i].path") # e.g., "repos/score_baselibs"
-  echo "Checking out $name ($ref) to $path"
-  # Create the parent directory if it doesn't exist
-  mkdir -p "$(dirname "$path")"
-  # Check if 'ref' looks like a commit hash (e.g., 40 hex characters)
-  # This is a heuristic; a more robust check might involve fetching refs first.
-  if [[ "$ref" =~ ^[0-9a-fA-F]{40}$ ]]; then
-    echo "  Detected commit hash. Cloning and then checking out."
-    git clone "$url" "$path"
-    (cd "$path" && git checkout "$ref")
-  else
-    echo "  Detected branch/tag. Cloning with --branch."
-    git clone --depth 1 --branch v"$ref" "$url" "$path"
-  fi
-  # Append the path to the list, separated by commas
-  if [ -z "$repo_paths_output" ]; then
-    repo_paths_output="$path"
-  else
-    repo_paths_output="$repo_paths_output,$path"
-  fi
-done
-# Output all paths as a single variable
-echo "repo_paths=$repo_paths_output" >> $GITHUB_OUTPUT
diff --git a/scripts/workflow/parse_repos.py b/scripts/workflow/parse_repos.py
new file mode 100755
index 00000000000..7703002052d
--- /dev/null
+++ b/scripts/workflow/parse_repos.py
@@ -0,0 +1,138 @@
+#!/usr/bin/env python3
+# *******************************************************************************
+# Copyright (c) 2025 Contributors to the Eclipse Foundation
+#
+# See the NOTICE file(s) distributed with this work for additional
+# information regarding copyright ownership.
+#
+# This program and the accompanying materials are made available under the
+# terms of the Apache License Version 2.0 which is available at
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# SPDX-License-Identifier: Apache-2.0
+# *******************************************************************************
+"""
+Parse known_good.json and create repos.json for multi-repository CodeQL analysis.
+
+Uses scripts.tooling.lib.known_good for consistent parsing of known_good.json.
+"""
+
+import json
+import os
+import sys
+from pathlib import Path
+
+# Add scripts directory to path for imports from tooling library
+sys.path.insert(0, str(Path(__file__).parent.parent.parent / "scripts"))
+
+from tooling.lib.known_good import load_known_good
+
+
+def parse_known_good(json_file="./known_good.json"):
+    """
+    Parse known_good.json and transform modules into repository objects.
+
+    Uses the centralized scripts.tooling.lib.known_good library for parsing.
+
+    Args:
+        json_file: Path to known_good.json file
+
+    Returns:
+        Tuple of (repos list, module count, module outputs dict)
+    """
+    json_path = Path(json_file)
+
+    if not json_path.exists():
+        print(f"Error: file not found '{json_file}'", file=sys.stderr)
+        print(f"Current directory: {Path.cwd()}", file=sys.stderr)
+        sys.exit(1)
+
+    try:
+        # Use the centralized library to parse known_good.json
+        known_good = load_known_good(json_path)
+    except Exception as e:
+        print(f"Error: Failed to parse known_good.json: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    # Extract target_sw modules
+    modules = known_good.modules.get("target_sw", {})
+
+    # Transform modules into repository objects
+    repos = []
+    module_outputs = {}
+
+    for name, module in modules.items():
+        repo_url = module.repo
+        ref = module.version or module.branch or module.hash
+
+        repo_obj = {"name": name, "url": repo_url, "version": ref, "path": f"repos/{name}"}
+        repos.append(repo_obj)
+
+        # Store module outputs for GITHUB_OUTPUT compatibility
+        module_outputs[f"{name}_url"] = repo_url
+        if module.version:
+            module_outputs[f"{name}_version"] = module.version
+        if module.branch:
+            module_outputs[f"{name}_branch"] = module.branch
+        if module.hash:
+            module_outputs[f"{name}_hash"] = module.hash
+
+    return repos, len(modules), module_outputs
+
+
+def write_repos_json(repos, output_file="./repos.json"):
+    """Write repositories to repos.json file."""
+    output_path = Path(output_file)
+
+    try:
+        with open(output_path, "w") as f:
+            json.dump(repos, f, indent=2)
+        print(f"Generated {output_file}:")
+        print(json.dumps(repos, indent=2))
+        print()  # Add newline for readability
+    except IOError as e:
+        print(f"Error: Failed to write {output_file}: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+def write_github_output(outputs):
+    """
+    Write outputs to GITHUB_OUTPUT for GitHub Actions compatibility.
+
+    Args:
+        outputs: Dictionary of key-value pairs to output
+    """
+    github_output = Path(os.environ.get("GITHUB_OUTPUT", "/dev/null"))
+
+    if github_output.exists() or github_output.parent.exists():
+        try:
+            with open(github_output, "a") as f:
+                for key, value in outputs.items():
+                    f.write(f"{key}={value}\n")
+        except IOError as e:
+            print(f"Warning: Failed to write GITHUB_OUTPUT: {e}", file=sys.stderr)
+
+
+def main():
+    """Main entry point."""
+    import os
+
+    # Install dependencies (optional, jq not strictly needed in Python version)
+    # install_dependencies()
+
+    # Parse known_good.json
+    repos, module_count, module_outputs = parse_known_good("./known_good.json")
+
+    # Write repos.json
+    write_repos_json(repos)
+
+    # Write GitHub Actions outputs
+    github_outputs = {"MODULE_COUNT": str(module_count)}
+    github_outputs.update(module_outputs)
+    write_github_output(github_outputs)
+
+    print("Parse complete!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/workflow/parse_repos.sh b/scripts/workflow/parse_repos.sh
deleted file mode 100755
index 71c50737a5e..00000000000
--- a/scripts/workflow/parse_repos.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-# *******************************************************************************
-# Copyright (c) 2025 Contributors to the Eclipse Foundation
-#
-# See the NOTICE file(s) distributed with this work for additional
-# information regarding copyright ownership.
-#
-# This program and the accompanying materials are made available under the
-# terms of the Apache License Version 2.0 which is available at
-# https://www.apache.org/licenses/LICENSE-2.0
-#
-# SPDX-License-Identifier: Apache-2.0
-# *******************************************************************************
-sudo apt-get update && sudo apt-get install -y jq
-JSON_FILE="./known_good.json"
-# Check if the file exists
-if [ ! -f "$JSON_FILE" ]; then
-  echo "Error file not found '$JSON_FILE' "
-  ls -la .
-  exit 1
-fi
-# Create repos.json from known_good.json
-# This jq command transforms the 'modules' object into an array of repository objects
-# with 'name', 'url', 'version' (branch/tag/hash), and 'path'.
-jq '[.modules.target_sw | to_entries[] | {
-  name: .key,
-  url: .value.repo,
-  version: (.value.branch // .value.hash // .value.version),
-  path: ("repos/" + .key)
-}]' "$JSON_FILE" > repos.json
-echo "Generated repos.json:"
-cat repos.json
-echo "" # Add a newline for better readability
-# The following GITHUB_OUTPUT variables are set for each module.
-# These might be useful for other steps, but are not directly used by the 'checkout-repos' step
-# which now reads 'repos.json' directly.
-echo "MODULE_COUNT=$(jq '.modules.target_sw | length' "$JSON_FILE")" >> $GITHUB_OUTPUT
-jq -c '.modules.target_sw | to_entries[]' "$JSON_FILE" | while read -r module_entry; do
-  module_name=$(echo "$module_entry" | jq -r '.key')
-  repo_url=$(echo "$module_entry" | jq -r '.value.repo // empty')
-  version=$(echo "$module_entry" | jq -r '.value.version // empty')
-  branch=$(echo "$module_entry" | jq -r '.value.branch // empty')
-  hash=$(echo "$module_entry" | jq -r '.value.hash // empty')
-  echo "${module_name}_url=$repo_url" >> $GITHUB_OUTPUT
-  if [ -n "$version" ]; then
-  echo "${module_name}_version=$version" >> $GITHUB_OUTPUT
-  fi
-  if [ -n "$branch" ]; then
-    echo "${module_name}_branch=$branch" >> $GITHUB_OUTPUT
-  fi
-  if [ -n "$hash" ]; then
-    echo "${module_name}_hash=$hash" >> $GITHUB_OUTPUT
-  fi
-done
diff --git a/scripts/workflow/recategorize_guidelines.py b/scripts/workflow/recategorize_guidelines.py
new file mode 100755
index 00000000000..f78b93dc194
--- /dev/null
+++ b/scripts/workflow/recategorize_guidelines.py
@@ -0,0 +1,227 @@
+#!/usr/bin/env python3
+# *******************************************************************************
+# Copyright (c) 2025 Contributors to the Eclipse Foundation
+#
+# See the NOTICE file(s) distributed with this work for additional
+# information regarding copyright ownership.
+#
+# This program and the accompanying materials are made available under the
+# terms of the Apache License Version 2.0 which is available at
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# SPDX-License-Identifier: Apache-2.0
+# *******************************************************************************
+"""
+Recategorize CodeQL SARIF results according to coding standards.
+"""
+
+import json
+import re
+import subprocess
+import sys
+from pathlib import Path
+
+
+# Configuration paths
+RECATEGORIZE_SCRIPT = "codeql-coding-standards-repo/scripts/guideline_recategorization/recategorize.py"
+CODING_STANDARDS_CONFIG = "./.github/codeql/coding-standards.yml"
+CODING_STANDARDS_SCHEMA = "codeql-coding-standards-repo/schemas/coding-standards-schema-1.0.0.json"
+SARIF_SCHEMA = "codeql-coding-standards-repo/schemas/sarif-schema-2.1.0.json"
+SARIF_FILE = "sarif-results/cpp.sarif"
+OUTPUT_DIR = "sarif-results-recategorized"
+
+
+def validate_paths():
+    """
+    Validate that required files exist.
+
+    Note: Only validates files needed for recategorization if SARIF exists.
+    Returns:
+        True if validation passes or SARIF doesn't exist, False on critical errors
+    """
+    # First check if SARIF file exists - if not, nothing to recategorize
+    if not Path(SARIF_FILE).exists():
+        print(f"Info: SARIF file not found at {SARIF_FILE}", file=sys.stderr)
+        return False  # Signal to skip recategorization
+
+    # SARIF exists, check for recategorization dependencies
+    optional_files = [
+        RECATEGORIZE_SCRIPT,
+        CODING_STANDARDS_SCHEMA,
+        SARIF_SCHEMA,
+    ]
+
+    required_files = [
+        CODING_STANDARDS_CONFIG,
+    ]
+
+    # Check required files (fail if missing)
+    for file_path in required_files:
+        if not Path(file_path).exists():
+            print(f"Error: Required file not found: {file_path}", file=sys.stderr)
+            return False
+
+    # Warn about optional files but don't fail
+    missing_optional = []
+    for file_path in optional_files:
+        if not Path(file_path).exists():
+            missing_optional.append(file_path)
+
+    if missing_optional:
+        print(f"Warning: Some recategorization files not found: {missing_optional}", file=sys.stderr)
+        print("Recategorization will be skipped, but filtering will still be applied.", file=sys.stderr)
+
+    return True
+
+
+def recategorize_sarif():
+    """
+    Run the CodeQL recategorization script on SARIF results.
+
+    Returns:
+        True if successful or skipped, False on critical errors
+    """
+    # Check if recategorization script exists
+    if not Path(RECATEGORIZE_SCRIPT).exists():
+        print(f"Info: Recategorization script not found at {RECATEGORIZE_SCRIPT}", file=sys.stderr)
+        print("Skipping recategorization step (will apply filtering only).", file=sys.stderr)
+        return True  # Not a failure, just skip this step
+
+    # Create output directory
+    output_path = Path(OUTPUT_DIR)
+    output_path.mkdir(parents=True, exist_ok=True)
+
+    output_file = output_path / Path(SARIF_FILE).name
+
+    print(f"Processing {SARIF_FILE} for recategorization...")
+
+    try:
+        # Run recategorization script
+        result = subprocess.run(
+            [
+                "python3",
+                RECATEGORIZE_SCRIPT,
+                "--coding-standards-schema-file",
+                CODING_STANDARDS_SCHEMA,
+                "--sarif-schema-file",
+                SARIF_SCHEMA,
+                CODING_STANDARDS_CONFIG,
+                SARIF_FILE,
+                str(output_file),
+            ],
+            check=True,
+            capture_output=True,
+            text=True,
+        )
+
+        print("Recategorization completed successfully")
+        if result.stdout:
+            print("Output:", result.stdout)
+
+        # Replace original SARIF file with recategorized version
+        sarif_path = Path(SARIF_FILE)
+
+        if sarif_path.exists():
+            sarif_path.unlink()
+            print(f"Removed original {SARIF_FILE}")
+
+        # Move recategorized file to original location
+        output_file.replace(sarif_path)
+        print(f"Moved recategorized SARIF to {SARIF_FILE}")
+
+        return True
+
+    except subprocess.CalledProcessError as e:
+        print(f"Error: Recategorization script failed: {e}", file=sys.stderr)
+        if e.stderr:
+            print(f"Error output: {e.stderr}", file=sys.stderr)
+        return False
+    except (FileNotFoundError, OSError) as e:
+        print(f"Error: File operation failed: {e}", file=sys.stderr)
+        return False
+
+
+def filter_sarif_results():
+    """
+    Filter SARIF results to only include entries with paths matching repos/*.
+
+    Returns:
+        True if successful, False otherwise
+    """
+    sarif_path = Path(SARIF_FILE)
+
+    if not sarif_path.exists():
+        print(f"Warning: SARIF file not found: {SARIF_FILE}", file=sys.stderr)
+        return False
+
+    try:
+        # Load SARIF file
+        with open(sarif_path, "r") as f:
+            sarif_data = json.load(f)
+
+        print("Filtering SARIF results to only include entries with paths matching repos/* ...")
+
+        # Filter runs and results
+        if "runs" in sarif_data:
+            for run in sarif_data["runs"]:
+                if "results" in run:
+                    filtered_results = []
+
+                    for result in run["results"]:
+                        # Check if result has locations
+                        locations = result.get("locations", [])
+                        if not locations:
+                            continue
+
+                        # Check if first location URI matches repos/ pattern
+                        first_location = locations[0].get("physicalLocation", {})
+                        artifact_uri = first_location.get("artifactLocation", {}).get("uri", "")
+
+                        # Pattern: (^|/)repos/ - matches repos/ at start or after a /
+                        if artifact_uri and re.search(r"(^|/)repos/", artifact_uri):
+                            filtered_results.append(result)
+
+                    # Update results with filtered list
+                    run["results"] = filtered_results
+                    print(
+                        f"Run '{run.get('tool', {}).get('driver', {}).get('name', 'unknown')}' "
+                        f"now has {len(filtered_results)} results"
+                    )
+
+        # Write filtered SARIF back to file
+        with open(sarif_path, "w") as f:
+            json.dump(sarif_data, f, indent=2)
+
+        print(f"Filtered SARIF written to {SARIF_FILE}")
+        return True
+
+    except (json.JSONDecodeError, IOError) as e:
+        print(f"Error: Failed to filter SARIF file: {e}", file=sys.stderr)
+        return False
+
+
+def main():
+    """Main entry point."""
+    # Validate required files exist
+    has_sarif = validate_paths()
+
+    if not has_sarif:
+        # No SARIF file to process - this is normal before CodeQL analysis runs
+        print("No SARIF file found - skipping recategorization.")
+        print("This is expected if CodeQL analysis hasn't completed yet.")
+        sys.exit(0)
+
+    # Run recategorization (will skip gracefully if script not available)
+    if not recategorize_sarif():
+        print("Warning: Recategorization failed, continuing with filtering...", file=sys.stderr)
+
+    # Filter SARIF results to only include repos/*
+    if not filter_sarif_results():
+        sys.exit(1)
+
+    print("Recategorization workflow completed successfully")
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/workflow/recategorize_guidelines.sh b/scripts/workflow/recategorize_guidelines.sh
deleted file mode 100755
index 8fa4b736020..00000000000
--- a/scripts/workflow/recategorize_guidelines.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-# *******************************************************************************
-# Copyright (c) 2025 Contributors to the Eclipse Foundation
-#
-# See the NOTICE file(s) distributed with this work for additional
-# information regarding copyright ownership.
-#
-# This program and the accompanying materials are made available under the
-# terms of the Apache License Version 2.0 which is available at
-# https://www.apache.org/licenses/LICENSE-2.0
-#
-# SPDX-License-Identifier: Apache-2.0
-# *******************************************************************************
-RECATEGORIZE_SCRIPT="codeql-coding-standards-repo/scripts/guideline_recategorization/recategorize.py"
-CODING_STANDARDS_CONFIG="./.github/codeql/coding-standards.yml"
-CODING_STANDARDS_SCHEMA="codeql-coding-standards-repo/schemas/coding-standards-schema-1.0.0.json"
-SARIF_SCHEMA="codeql-coding-standards-repo/schemas/sarif-schema-2.1.0.json"
-SARIF_FILE="sarif-results/cpp.sarif" 
-mkdir -p sarif-results-recategorized
-echo "Processing $SARIF_FILE for recategorization..."
-python3 "$RECATEGORIZE_SCRIPT" \
-  --coding-standards-schema-file "$CODING_STANDARDS_SCHEMA" \
-  --sarif-schema-file "$SARIF_SCHEMA" \
-  "$CODING_STANDARDS_CONFIG" \
-  "$SARIF_FILE" \
-  "sarif-results-recategorized/$(basename "$SARIF_FILE")"
-  rm "$SARIF_FILE"
-  mv "sarif-results-recategorized/$(basename "$SARIF_FILE")" "$SARIF_FILE"

From 8f36092f326f2fca6d1b112e50d4bf3bca29a462 Mon Sep 17 00:00:00 2001
From: Saumya-R <saumya.rai@qorix.ai>
Date: Mon, 6 Apr 2026 23:35:59 +0530
Subject: [PATCH 2/3] adding paths to the paths to ignore

---
 .github/codeql/codeql-config.yml | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/.github/codeql/codeql-config.yml b/.github/codeql/codeql-config.yml
index 88d151e9ae3..06a756dec8b 100644
--- a/.github/codeql/codeql-config.yml
+++ b/.github/codeql/codeql-config.yml
@@ -19,3 +19,11 @@ paths-ignore:
   - "**/test/**"
   - "**/mock/**"
   - "**/codeql-coding-standards-repo/**"
+  - "**/examples/**"
+  - "**/docs/**"
+  - "**/target/**"
+  - "**/bazel-*/**"
+  - "**/.git/**"
+  - "**/node_modules/**"
+  - "**/build/**"
+  - "**/dist/**"

From 5a1df97b32205d03750e795f204da90271d127da Mon Sep 17 00:00:00 2001
From: Saumya-R <saumya.rai@qorix.ai>
Date: Tue, 7 Apr 2026 11:57:11 +0530
Subject: [PATCH 3/3] fixing the timeout issues

---
 .../workflows/codeql-multiple-repo-scan.yml   | 40 +++++++++++++++++--
 scripts/workflow/checkout_repos.py            | 16 ++++++--
 2 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/codeql-multiple-repo-scan.yml b/.github/workflows/codeql-multiple-repo-scan.yml
index a8ded6d20fe..4041dff9165 100644
--- a/.github/workflows/codeql-multiple-repo-scan.yml
+++ b/.github/workflows/codeql-multiple-repo-scan.yml
@@ -32,12 +32,21 @@ jobs:
   analyze-repos:
     name: Analyze Multiple Repositories
     runs-on: ubuntu-latest
+    timeout-minutes: 120 # Prevent indefinite hanging
     permissions:
       security-events: write
       packages: read
       actions: read
       contents: read
     steps:
+      - name: Check runner resources
+        run: |
+          echo "=== System Resources ==="
+          free -h
+          df -h
+          echo "=== CPU Info ==="
+          nproc
+          echo "======================="
       - name: Checkout central repository
         uses: actions/checkout@v4
       # Add coding standard packages and dependencies
@@ -49,17 +58,28 @@ jobs:
         id: parse-repos
         run: |
           python3 scripts/workflow/parse_repos.py
+      # Cache repository checkouts to speed up re-runs
+      - name: Cache repository checkouts
+        uses: actions/cache@v4
+        with:
+          path: repos/
+          key: repos-${{ hashFiles('known_good.json') }}
+          restore-keys: |
+            repos-
       - name: Checkout all pinned repositories
         id: checkout-repos
+        timeout-minutes: 30
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
           python3 scripts/workflow/checkout_repos.py
       - name: List files in repos directory (debug)
         run: |
-          echo "Listing all files in repos directory before CodeQL analysis:"
-          find repos || echo "repos directory not found"
+          echo "Listing repos directory structure:"
+          du -sh repos/* 2>/dev/null || echo "repos directory not found"
+          df -h
       - name: Initialize CodeQL for all repositories
+        timeout-minutes: 15
         uses: github/codeql-action/init@v4
         with:
           languages: cpp
@@ -67,11 +87,19 @@ jobs:
           packs: codeql/misra-cpp-coding-standards
           config-file: ./.github/codeql/codeql-config.yml
       - name: Perform CodeQL Analysis
+        timeout-minutes: 60
         uses: github/codeql-action/analyze@v4
         with:
           upload-database: false # Don't upload databases for each repo
           output: sarif-results/
           category: "multi-repo-scan"
+      # Cleanup large repo directories to free disk space
+      - name: Cleanup repository checkouts
+        if: always()
+        run: |
+          echo "Cleaning up checked out repositories to free disk space"
+          rm -rf repos/
+          df -h
       # Checkout CodeQL Coding Standards AFTER analysis for recategorization
       - name: Checkout CodeQL Coding Standards for recategorization
         uses: actions/checkout@v4
@@ -81,12 +109,18 @@ jobs:
           ref: v2.50.0 # Use frozen version instead of main
       - name: Recategorize Guidelines
         if: always()
+        timeout-minutes: 10
         run: |
           python3 scripts/workflow/recategorize_guidelines.py
       - name: Generate HTML Report from SARIF
+        timeout-minutes: 5
         run: |
           SARIF_FILE="sarif-results/cpp.sarif"
-          sarif html "$SARIF_FILE" --output codeql-report.html
+          if [ -f "$SARIF_FILE" ]; then
+            sarif html "$SARIF_FILE" --output codeql-report.html
+          else
+            echo "SARIF file not found, skipping HTML generation"
+          fi
       - name: Upload SARIF results as artifact
         uses: actions/upload-artifact@v4
         with:
diff --git a/scripts/workflow/checkout_repos.py b/scripts/workflow/checkout_repos.py
index 05d4bb1f9bd..c2f52f9d213 100755
--- a/scripts/workflow/checkout_repos.py
+++ b/scripts/workflow/checkout_repos.py
@@ -91,14 +91,24 @@ def checkout_repo(name, url, ref, path):
 
         if is_commit_hash(ref):
             print(f"Checking out {name} ({ref}) to {path}")
-            print(f"  Detected commit hash. Cloning and then checking out.")
+            print(f"  Detected commit hash. Cloning with shallow depth and then checking out.")
 
-            # Clone the repository
-            result = subprocess.run(["git", "clone", auth_url, path], capture_output=True, text=True)
+            # Clone the repository with shallow depth to save disk space and time
+            result = subprocess.run(
+                ["git", "clone", "--depth", "1", "--no-single-branch", auth_url, path], capture_output=True, text=True
+            )
             if result.returncode != 0:
                 print(f"  Git error: {result.stderr}", file=sys.stderr)
                 raise subprocess.CalledProcessError(result.returncode, result.args, result.stdout, result.stderr)
 
+            # Fetch the specific commit (needed when shallow cloning)
+            result = subprocess.run(
+                ["git", "-C", path, "fetch", "--depth", "1", "origin", ref], capture_output=True, text=True
+            )
+            if result.returncode != 0:
+                print(f"  Git error during fetch: {result.stderr}", file=sys.stderr)
+                raise subprocess.CalledProcessError(result.returncode, result.args, result.stdout, result.stderr)
+
             # Checkout specific commit
             result = subprocess.run(["git", "-C", path, "checkout", ref], capture_output=True, text=True)
             if result.returncode != 0: