From e4ccecdd43175582be9a0a7436cf6f36547b9e9f Mon Sep 17 00:00:00 2001
From: satoridev01 <info@satori.ci>
Date: Mon, 1 Jun 2026 14:29:48 -0300
Subject: [PATCH] core: per-rule CWE field + CWE-aware cross-rule dedup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a `cwe` field on each rule. When two rules report findings at the same (file, line) and share the same CWE (e.g. DESER_TORCH001 + AI202 both flagging one torch.load line under CWE-502), the engine collapses them: the finding whose rule declares the higher severity wins, with rule_id lex order as stable tiebreaker on equal severity. CWE itself does not set severity — each rule's severity comes from its own TOML field. Distinct CWEs at the same line stay distinct, so `os.system(eval(user_input))` correctly reports both CWE-78 and CWE-94.

Rust core
 - rules.rs / issues.rs: new optional `cwe: Option<String>`, carried from Rule → Issue and exposed to Python via pyo3
 - analysis/{config,ast,taint}_analysis.rs: pass it through Issue::new
 - analysis/mod.rs: 2-stage dedup
     stage 1 = existing fingerprint dedup (same rule, exact match)
     stage 2 = CWE-aware merge by (file, line, cwe), highest severity wins. Rules without a CWE skip stage 2.

cli.py
 - file_path passed to Rust is now `py_file.resolve()` (absolute, canonical) so AST-rule and pattern-rule findings agree on the same path string and stage-2 dedup actually triggers.

reporting.py
 - JSON output gains a top-level `cwe` field on each issue
 - SARIF output emits `external/cwe/cwe-N` in each rule's `properties.tags` — standard SARIF taxon, parses cleanly in GitHub Code Scanning and DefectDojo

setup.py
 - RustExtension declares `debug=False` so `pip install -e .` produces release-mode binaries; previously editable installs ran ~3× slower.

Rules — all 179 [[rule]] blocks now declare a CWE (built-in-rules.toml + built-in-rules-ai.toml). Mapping summary:

  CWE-78  command injection           PROC819, SHELL602/689, PY102/103/106, AI503, ...
  CWE-22  path traversal              PATH813, OPEN1149, AI502, ZIPSLIP001, FILE526, ...
  CWE-94  code/template injection     PY001/305/500, SEC501, SSTI001, SANDBOX307/308, AI101/102/103/105/106/107, ...
  CWE-502 insecure deserialization    DESER*, PY002/107/204/301/302/306, YAML001, AI201/202/203/204/205, RUAMEL_UNSAFE001, ...
  CWE-89  SQL injection               PY101, SQL586/693, ORM001/002, AI104/504, ...
  CWE-918 SSRF                        SSRF_001, NET705, AI501, ENV_URL001, ...
  CWE-295 TLS / cert verification     TLS001, SSL531, SSH001, G405, NET705
  CWE-327 weak crypto                 PY201/202/203/205, HASH807
  CWE-338 weak PRNG                   CRYPTO708, RAND810
  CWE-798 hardcoded credentials       G101/101B/102/104/110..133, AI002/404, AUTH711, ADMIN795, CFG001, ...
  CWE-352 CSRF                        G404, CSRF747, OAUTH774
  CWE-489 active debug code           G401/403, FLASK001, FLASK_DEBUG001, DJANGO_DEBUG001, DEBUG798
  CWE-79  XSS                         PY105
  CWE-611 XXE                         PY303, XXE001
  CWE-942 CORS                        CORS780
  CWE-601 open redirect               OPEN_REDIRECT001
  CWE-1004 sensitive cookie attr      COOKIE792, COOKIE_FILE001
  CWE-319 cleartext transmission      HTTPS789, AI403
  CWE-200 info disclosure             INFO738, BACKUP801, FILE528, AI402, AI405
  CWE-117 log injection               LOG741
  CWE-208 timing attack               TIMING759
  CWE-1333 ReDoS                      REGEX870
  (full list in the rule TOMLs themselves)

New AST rules
 - YAML001        yaml.load() without SafeLoader   (CWE-502, Critical)
 - FLASK_DEBUG001 .run(debug=True) on Flask/FastAPI (CWE-489, High)

AI202 hardened
 - pattern tightened to `torch\.load\s*\(`
 - exclude_pattern now matches DESER_TORCH001's: skip lines with `weights_only=True`
 - now redundant with DESER_TORCH001 (both CWE-502) → stage-2 dedup collapses them to one Critical finding per torch.load line

Test on Ghy0501/MCITlib (4,743 .py / 27,568 functions):

                                  this branch     main (post-#55)
  wall clock                          593s              606s
  total findings                     1,740             3,103
  unique (file, line, CWE) groups    1,740             1,918
  duplicate groups (≥2 rules)            0             1,185
  excess duplicate findings              0             1,185
  heuristic-TP                       1,684             3,047
  heuristic-FP                          56                56

Dedup is reflected directly: branch produces 0 duplicate groups where main produces 1,185 (i.e. 1,185 places where 2+ rules describe the same vulnerability at the same line). FP count is identical (56) since FPs are pattern-shape artifacts that don't depend on dedup. The remaining 178-finding gap (1,918 unique vs 1,740) is AI202 no longer flagging torch.load(..., weights_only=True). Wall clock −13s is within noise.
---
 setup.py                                      |   1 +
 .../_rust_core/src/analysis/ast_analysis.rs   |   1 +
 .../src/analysis/config_analysis.rs           |   1 +
 src/pyspector/_rust_core/src/analysis/mod.rs  |  55 +++++-
 .../_rust_core/src/analysis/taint_analysis.rs |  16 +-
 src/pyspector/_rust_core/src/issues.rs        |   7 +
 src/pyspector/_rust_core/src/rules.rs         |   7 +
 src/pyspector/cli.py                          |   2 +-
 src/pyspector/reporting.py                    |   2 +
 src/pyspector/rules/built-in-rules-ai.toml    |  33 +++-
 src/pyspector/rules/built-in-rules.toml       | 182 +++++++++++++++++-
 11 files changed, 276 insertions(+), 31 deletions(-)
diff --git a/setup.py b/setup.py
index 07ee19ad..c4c8233b 100644
--- a/setup.py
+++ b/setup.py
@@ -30,6 +30,7 @@
         RustExtension(
             "pyspector._rust_core",
             path=cargo_toml_path,
+            debug=False,
         )
     ],
     python_requires=">=3.8",
diff --git a/src/pyspector/_rust_core/src/analysis/ast_analysis.rs b/src/pyspector/_rust_core/src/analysis/ast_analysis.rs
index 16d0d597..0bd620a4 100644
--- a/src/pyspector/_rust_core/src/analysis/ast_analysis.rs
+++ b/src/pyspector/_rust_core/src/analysis/ast_analysis.rs
@@ -44,6 +44,7 @@ fn walk_ast(node: &AstNode, file_path: &str, content: &str, rules: &[&Rule], iss
                     rule.severity.clone(),
                     rule.confidence.clone(),
                     rule.remediation.clone(),
+                    rule.cwe.clone(),
                 ));
             }
         }
diff --git a/src/pyspector/_rust_core/src/analysis/config_analysis.rs b/src/pyspector/_rust_core/src/analysis/config_analysis.rs
index b8a814b2..e9869eab 100644
--- a/src/pyspector/_rust_core/src/analysis/config_analysis.rs
+++ b/src/pyspector/_rust_core/src/analysis/config_analysis.rs
@@ -47,6 +47,7 @@ pub fn scan_file(file_path: &str, content: &str, ruleset: &RuleSet) -> Vec<Issue
                         rule.severity.clone(),
                         rule.confidence.clone(),
                         rule.remediation.clone(),
+                        rule.cwe.clone(),
                     ));
                 }
             }
diff --git a/src/pyspector/_rust_core/src/analysis/mod.rs b/src/pyspector/_rust_core/src/analysis/mod.rs
index d4141167..c83caf43 100644
--- a/src/pyspector/_rust_core/src/analysis/mod.rs
+++ b/src/pyspector/_rust_core/src/analysis/mod.rs
@@ -1,13 +1,24 @@
 use crate::ast_parser::PythonFile;
 use crate::graph::call_graph_builder;
-use crate::issues::Issue;
+use crate::issues::{Issue, Severity};
 use crate::rules::RuleSet;
 use rayon::prelude::*;
-use std::collections::HashSet;
+use std::collections::{HashMap, HashSet};
 use std::fs;
 use std::path::Path;
 use walkdir::WalkDir;
 
+/// Numeric ordering of severities so we can pick the "worse" of two findings
+/// that fire at the same code location. Critical > High > Medium > Low.
+fn severity_rank(s: &Severity) -> u8 {
+    match s {
+        Severity::Critical => 4,
+        Severity::High => 3,
+        Severity::Medium => 2,
+        Severity::Low => 1,
+    }
+}
+
 mod ast_analysis;
 mod config_analysis;
 mod taint_analysis;
@@ -99,11 +110,47 @@ pub fn run_analysis(mut context: AnalysisContext) -> Vec<Issue> {
     println!("[+] Found {} issues from taint analysis", taint_issues.len());
     issues.extend(taint_issues);
     
-    // Remove duplicates
     let mut seen = HashSet::new();
     issues.retain(|issue| seen.insert(issue.get_fingerprint()));
 
-    println!("[*] Total issues after deduplication: {}", issues.len());
+    // Cross-rule dedup by CWE: at the same (file, line), rules sharing a CWE
+    // describe one vulnerability — keep the highest severity. Distinct CWEs
+    // stay distinct so `os.system(eval(x))` reports both CWE-78 and CWE-94.
+    let mut by_cwe_loc: HashMap<(String, usize, String), Issue> = HashMap::new();
+    let mut uncategorized: Vec<Issue> = Vec::new();
+    for issue in issues {
+        match &issue.cwe {
+            Some(cwe) => {
+                let key = (issue.file_path.clone(), issue.line_number, cwe.clone());
+                match by_cwe_loc.get(&key) {
+                    Some(existing) => {
+                        let new_rank = severity_rank(&issue.severity);
+                        let old_rank = severity_rank(&existing.severity);
+                        if new_rank > old_rank
+                            || (new_rank == old_rank && issue.rule_id < existing.rule_id)
+                        {
+                            by_cwe_loc.insert(key, issue);
+                        }
+                    }
+                    None => { by_cwe_loc.insert(key, issue); }
+                }
+            }
+            None => uncategorized.push(issue),
+        }
+    }
+    let merged = by_cwe_loc.len();
+    let mut issues: Vec<Issue> = by_cwe_loc.into_values().collect();
+    issues.extend(uncategorized);
+
+    let untagged = issues.len() - merged;
+    if untagged > 0 {
+        println!(
+            "[*] Total issues after deduplication: {} (CWE-tagged: {}, untagged: {})",
+            issues.len(), merged, untagged
+        );
+    } else {
+        println!("[*] Total issues after deduplication: {}", issues.len());
+    }
     issues
 }
 
diff --git a/src/pyspector/_rust_core/src/analysis/taint_analysis.rs b/src/pyspector/_rust_core/src/analysis/taint_analysis.rs
index 8c6e8a82..b6a1a0d0 100644
--- a/src/pyspector/_rust_core/src/analysis/taint_analysis.rs
+++ b/src/pyspector/_rust_core/src/analysis/taint_analysis.rs
@@ -205,22 +205,11 @@ pub fn analyze_program_for_taint(call_graph: &CallGraph, ruleset: &RuleSet) -> V
         iterations += 1;
         let mut summaries_changed = false;
         let mut current_pass_issues: Vec<Issue> = Vec::new();
-        
+
         // Analyze functions IN PARALLEL using Rayon.
         // Each function reads global_ctx (immutable snapshot of this iteration's state)
         // and returns (func_id, summary, call_sites, class_attrs).
         // Results are merged serially after all parallel analyses complete.
-        //
-        // Correctness: with parallel analysis, function B doesn't see call_site_taints
-        // produced by function A in the SAME iteration — it sees them in the NEXT
-        // iteration. This may require one extra iteration vs sequential but is safe.
-        //
-        // Lazy filter: iterations 2+ skip functions with no taint to propagate.
-        // A function has taint to propagate if:
-        //   (a) it's an HTTP/CLI entry point (has tainted params)
-        //   (b) it was called with tainted arguments (call_site_taint)
-        //   (c) it's in a file where class attributes have been tainted (class_attr_taint)
-        //       — e.g., self.output_dir set in __init__ propagates to all same-file methods
         let files_with_class_attr_taints: std::collections::HashSet<&str> = global_ctx.class_attr_taints
             .keys()
             .filter(|(_, _)| true)
@@ -288,8 +277,6 @@ pub fn analyze_program_for_taint(call_graph: &CallGraph, ruleset: &RuleSet) -> V
                     summaries_changed = true;
                 }
             }
-
-            // Issues from convergence loop are discarded — collected in final pass.
         }
 
         println!("[*] Iteration {} done in {:.2}s", iterations, t_iter.elapsed().as_secs_f64());
@@ -1928,6 +1915,7 @@ fn report_issue(ruleset: &RuleSet, vuln_id: &str, file_path: &str, stmt: &AstNod
             vuln_rule.severity.clone(),
             vuln_rule.confidence.clone(),
             vuln_rule.remediation.clone(),
+            vuln_rule.cwe.clone(),
         ));
     }
 }
\ No newline at end of file
diff --git a/src/pyspector/_rust_core/src/issues.rs b/src/pyspector/_rust_core/src/issues.rs
index f35885ef..324d0416 100644
--- a/src/pyspector/_rust_core/src/issues.rs
+++ b/src/pyspector/_rust_core/src/issues.rs
@@ -30,12 +30,17 @@ pub struct Issue {
     pub confidence: String,
     #[pyo3(get)]
     pub remediation: String,
+    /// CWE identifier inherited from the rule (e.g. "CWE-502"). Used for
+    /// cross-rule dedup and downstream SARIF/JSON output.
+    #[pyo3(get)]
+    pub cwe: Option<String>,
 }
 
 // This new block exposes methods to Python
 #[pymethods]
 impl Issue {
     #[new] // This is the constructor exposed to Python
+    #[pyo3(signature = (rule_id, description, file_path, line_number, code, severity, confidence, remediation, cwe=None))]
     pub fn new(
         rule_id: String,
         description: String,
@@ -45,6 +50,7 @@ impl Issue {
         severity: Severity,
         confidence: String,
         remediation: String,
+        cwe: Option<String>,
     ) -> Self {
         Self {
             rule_id,
@@ -55,6 +61,7 @@ impl Issue {
             severity,
             confidence,
             remediation,
+            cwe,
         }
     }
 
diff --git a/src/pyspector/_rust_core/src/rules.rs b/src/pyspector/_rust_core/src/rules.rs
index e4d38524..add37beb 100644
--- a/src/pyspector/_rust_core/src/rules.rs
+++ b/src/pyspector/_rust_core/src/rules.rs
@@ -43,6 +43,13 @@ pub struct Rule {
     /// Example: file_content_exclude = "from ruamel\\.yaml|import ruamel"
     #[serde(with = "serde_regex", default)]
     pub file_content_exclude: Option<regex::Regex>,
+    /// CWE identifier (e.g. "CWE-78" for command injection). Used for
+    /// cross-rule dedup: findings at the same (file, line) sharing the same
+    /// CWE collapse to the highest-severity one. Rules without a CWE set
+    /// keep the legacy per-rule dedup behaviour. Also surfaced in JSON/SARIF
+    /// output for downstream tooling.
+    #[serde(default)]
+    pub cwe: Option<String>,
 }
 
 impl Rule {
diff --git a/src/pyspector/cli.py b/src/pyspector/cli.py
index 845e9fe3..b22f3875 100644
--- a/src/pyspector/cli.py
+++ b/src/pyspector/cli.py
@@ -276,7 +276,7 @@ def get_python_file_asts(
                     ast_json = json.dumps(parsed_ast, cls=AstEncoder)
                     results.append(
                         {
-                            "file_path": str(display_path),
+                            "file_path": str(py_file.resolve()),
                             "content": content,
                             "ast_json": ast_json,
                         }
diff --git a/src/pyspector/reporting.py b/src/pyspector/reporting.py
index 2e58b98e..3a30ebd5 100644
--- a/src/pyspector/reporting.py
+++ b/src/pyspector/reporting.py
@@ -128,6 +128,7 @@ def to_json(self) -> str:
             "issues": [
                 {
                     "rule_id": issue.rule_id,
+                    "cwe": issue.cwe,
                     "description": issue.description,
                     "file_path": issue.file_path,
                     "line_number": issue.line_number,
@@ -177,6 +178,7 @@ def to_sarif(self) -> str:
                         "warning",
                     )
                 ),
+                properties={"tags": [f"external/cwe/{issue.cwe.lower()}"]} if issue.cwe else None,
             )
 
             rule_index_map[issue.rule_id] = len(rules)
diff --git a/src/pyspector/rules/built-in-rules-ai.toml b/src/pyspector/rules/built-in-rules-ai.toml
index c8b3b18e..2f976184 100644
--- a/src/pyspector/rules/built-in-rules-ai.toml
+++ b/src/pyspector/rules/built-in-rules-ai.toml
@@ -151,6 +151,7 @@ description = "Prompt Injection via direct user input in LangChain template."
 severity = "Critical"
 remediation = "Do not construct prompt templates directly from user input. Use parameterized inputs and structured prompt formats like ChatPromptTemplate."
 # This rule is primarily triggered by taint analysis (see AISK01)
+cwe = "CWE-94"
 
 [[rule]]
 id = "AI102"
@@ -159,6 +160,7 @@ severity = "High"
 remediation = "Avoid using f-strings to build prompts with untrusted data. Use the API's built-in parameterization features."
 pattern = "\\.(invoke|run|predict)\\s*\\(\\s*f[\"']"
 file_pattern = "*.py"
+cwe = "CWE-94"
 
 [[rule]]
 id = "AI103"
@@ -166,6 +168,7 @@ description = "Direct execution of untrusted data in an LLM chain."
 severity = "Critical"
 remediation = "Ensure input passed to LLM chains is sanitized or constrained. Do not pass raw user input directly to chains that can execute tools."
 # This rule is primarily triggered by taint analysis (see AISK02)
+cwe = "CWE-94"
 
 [[rule]]
 id = "AI104"
@@ -173,6 +176,7 @@ description = "SQL Injection risk through a LangChain SQLDatabaseChain agent."
 severity = "Critical"
 remediation = "The SQLDatabaseChain can execute arbitrary SQL. Do not expose it directly to user input without significant safeguards and prompt engineering."
 # This rule is primarily triggered by taint analysis (see AISK03)
+cwe = "CWE-89"
 
 [[rule]]
 id = "AI105"
@@ -180,6 +184,7 @@ description = "Indirect Prompt Injection via Python REPL tool in an agent."
 severity = "Critical"
 remediation = "The PythonAstREPLTool allows an LLM to execute Python code. This is extremely dangerous if the agent can be influenced by tainted data."
 # This rule is primarily triggered by taint analysis (see AISK10)
+cwe = "CWE-94"
 
 [[rule]]
 id = "AI106"
@@ -188,6 +193,7 @@ severity = "High"
 remediation = "The `LLMMathChain` uses `eval()` internally. Avoid using it with any user-controllable input."
 ast_match = "Call(func.id=LLMMathChain)"
 file_pattern = "*.py"
+cwe = "CWE-94"
 
 [[rule]]
 id = "AI107"
@@ -197,6 +203,7 @@ confidence = "Low"
 remediation = "Review the interpretation logic to ensure it properly handles adversarial inputs and does not inadvertently execute harmful instructions."
 pattern = "gradio\\.Interface\\s*\\(.*interpret_fn="
 file_pattern = "*.py"
+cwe = "CWE-94"
 
 [[rule]]
 id = "AI108"
@@ -209,6 +216,7 @@ file_pattern = "*.py"
 # -------------------------------------------
 # SECTION: AI200 - Insecure Model Loading & Deserialization
 # -------------------------------------------
+cwe = "CWE-20"
 
 [[rule]]
 id = "AI201"
@@ -217,14 +225,17 @@ severity = "Critical"
 remediation = "Use a safer model format like SafeTensors ('safetensors.torch.load_file') instead of pickle for untrusted model files."
 ast_match = "Call(func.value.id=pickle, func.attr=load)"
 file_pattern = "*.py"
+cwe = "CWE-502"
 
 [[rule]]
 id = "AI202"
 description = "Loading a PyTorch model from an untrusted source can be insecure."
 severity = "High"
-remediation = "Only load PyTorch models from trusted, verified sources. Scan models for malicious code before loading."
-pattern = "torch\\.load"
+remediation = "Only load PyTorch models from trusted sources. Prefer torch.load(..., weights_only=True) on PyTorch 2.0+."
+pattern = "torch\\.load\\s*\\("
+exclude_pattern = "^\\s*#|weights_only\\s*=\\s*True"
 file_pattern = "*.py"
+cwe = "CWE-502"
 
 [[rule]]
 id = "AI203"
@@ -233,6 +244,7 @@ severity = "High"
 remediation = "Only load Keras models from trusted sources. H5 files can contain executable code."
 pattern = "keras\\.models\\.load_model"
 file_pattern = "*.py"
+cwe = "CWE-502"
 
 [[rule]]
 id = "AI204"
@@ -241,6 +253,7 @@ severity = "High"
 remediation = "Joblib can use pickle under the hood. Treat .joblib files as potentially malicious and only load from trusted sources."
 pattern = "joblib\\.load"
 file_pattern = "*.py"
+cwe = "CWE-502"
 
 [[rule]]
 id = "AI205"
@@ -253,6 +266,7 @@ file_pattern = "*.py"
 # -------------------------------------------
 # SECTION: AI300 - Data Poisoning & Evasion
 # -------------------------------------------
+cwe = "CWE-502"
 
 [[rule]]
 id = "AI301"
@@ -261,6 +275,7 @@ severity = "High"
 remediation = "Download and verify training data from remote sources before use. Do not load it directly in training scripts."
 pattern = "pd\\.read_csv\\s*\\(\\s*[\"']https?://"
 file_pattern = "*.py"
+cwe = "CWE-345"
 
 [[rule]]
 id = "AI302"
@@ -271,6 +286,7 @@ remediation = "For critical applications, pin datasets to a specific commit hash
 # This pattern is now less specific but will not crash the engine.
 pattern = "load_dataset\\s*\\("
 file_pattern = "*.py"
+cwe = "CWE-345"
 
 [[rule]]
 id = "AI303"
@@ -284,6 +300,7 @@ file_pattern = "*.py"
 # -------------------------------------------
 # SECTION: AI400 - Model Theft & Information Leakage
 # -------------------------------------------
+cwe = "CWE-20"
 
 [[rule]]
 id = "AI401"
@@ -292,6 +309,7 @@ severity = "High"
 remediation = "Ensure that making a Gradio interface public is intentional. Set 'share=False' for local-only development."
 pattern = "\\.launch\\(share=True\\)"
 file_pattern = "*.py"
+cwe = "CWE-16"
 
 [[rule]]
 id = "AI402"
@@ -300,6 +318,7 @@ severity = "Medium"
 remediation = "Disable or carefully manage verbose logging in production environments (e.g., `langchain.debug = False`)."
 pattern = "langchain\\.debug\\s*=\\s*True"
 file_pattern = "*.py"
+cwe = "CWE-200"
 
 [[rule]]
 id = "AI403"
@@ -308,6 +327,7 @@ severity = "High"
 remediation = "Ensure all model repositories and endpoints use HTTPS."
 pattern = "from_pretrained\\s*\\(\\s*[\"']http://"
 file_pattern = "*.py"
+cwe = "CWE-319"
 
 [[rule]]
 id = "AI404"
@@ -321,6 +341,7 @@ pattern = "token\\s*=\\s*[\"']hf_[A-Za-z0-9]{16,}"
 file_pattern = "*.py"
 # Doctest examples (>>> / ...) shouldn't fire even if they happen to use a long fake token.
 exclude_pattern = "^\\s*(>>>|\\.\\.\\.)\\s"
+cwe = "CWE-798"
 
 [[rule]]
 id = "AI405"
@@ -333,6 +354,7 @@ file_pattern = "*.py"
 # -------------------------------------------
 # SECTION: AI500 - Over-reliance and Insecure Tool Use
 # -------------------------------------------
+cwe = "CWE-200"
 
 [[rule]]
 id = "AI501"
@@ -340,13 +362,14 @@ description = "Potential Server-Side Request Forgery (SSRF) in an LLM agent tool
 severity = "Critical"
 remediation = "If an LLM can control the URL passed to a network request tool, it can attack internal network services. Sanitize and validate all URLs."
 # This rule is primarily triggered by taint analysis (see AISK08)
+cwe = "CWE-918"
 
 [[rule]]
 id = "AI502"
 description = "Potential Local File Inclusion/Path Traversal in an LLM agent tool."
 severity = "Critical"
 remediation = "If an LLM can control the filename passed to a filesystem tool, it can read sensitive files. Sanitize and constrain file paths."
-# This rule is primarily triggered by taint analysis (see AISK09)
+cwe = "CWE-22"
 
 [[rule]]
 id = "AI503"
@@ -355,6 +378,7 @@ severity = "Critical"
 remediation = "Providing an LLM with direct, unsandboxed shell access is extremely dangerous and can lead to full system compromise."
 pattern = "ShellTool"
 file_pattern = "*.py"
+cwe = "CWE-78"
 
 [[rule]]
 id = "AI504"
@@ -362,4 +386,5 @@ description = "An LLM is given a tool to execute arbitrary SQL queries, which is
 severity = "Critical"
 remediation = "Avoid giving LLMs direct SQL execution capabilities. If necessary, use a view with limited permissions or a function with parameterized queries."
 pattern = "create_sql_agent"
-file_pattern = "*.py"
\ No newline at end of file
+file_pattern = "*.py"
+cwe = "CWE-89"
diff --git a/src/pyspector/rules/built-in-rules.toml b/src/pyspector/rules/built-in-rules.toml
index 8caded9a..6566367b 100644
--- a/src/pyspector/rules/built-in-rules.toml
+++ b/src/pyspector/rules/built-in-rules.toml
@@ -865,6 +865,7 @@ confidence = "High"
 remediation = "User-controlled data reached a command execution function without sanitization. Use 'shlex.quote()' to escape arguments or avoid passing user input to shell commands entirely."
 # No ast_match — triggered only by taint engine
 # NOTE: This rule has no 'pattern' or 'ast_match'. It is triggered ONLY by the taint engine.
+cwe = "CWE-78"
 
 [[rule]]
 id = "PY001"
@@ -873,6 +874,7 @@ severity = "High"
 remediation = "Avoid 'eval()'. Use safer alternatives like 'ast.literal_eval' for data parsing."
 ast_match = "Call(func.id=eval)"
 file_pattern = "*.py"
+cwe = "CWE-94"
 
 [[rule]]
 id = "PY103"
@@ -880,6 +882,7 @@ description = "Use of os.system is a command injection risk."
 severity = "High"
 remediation = "Avoid 'os.system'. Use the 'subprocess' module with command and arguments as a list."
 # No ast_match — triggered only by taint engine
+cwe = "CWE-78"
 
 [[rule]]
 id = "PY101"
@@ -891,6 +894,7 @@ remediation = "Use parameterized queries (e.g., cursor.execute('SELECT * FROM us
 # Exclude migration files: ORM DDL in migrations uses cursor.execute() with developer-controlled
 # schema parameters (table names, column names) that are not user input.
 exclude_file_pattern = "*/migrations/*,*/alembic/*,*/backends/*"
+cwe = "CWE-89"
 
 [[rule]]
 id = "PY104"
@@ -899,6 +903,7 @@ severity = "High"
 remediation = "Use a proper LDAP escaping library for any user-controlled data in LDAP queries."
 pattern = "\\.search_s\\s*\\(.*f[\"']"
 file_pattern = "*.py"
+cwe = "CWE-90"
 
 [[rule]]
 id = "PY105"
@@ -907,6 +912,7 @@ severity = "High"
 confidence = "High"
 remediation = "Never pass user-controlled data to mark_safe() or Markup(). Sanitize with django.utils.html.escape() first."
 # No pattern — triggered only by taint engine (SK_PY105 / SK_PY105B)
+cwe = "CWE-79"
 
 [[rule]]
 id = "PY106"
@@ -916,6 +922,7 @@ remediation = "Avoid shell=True with subprocess.run. Pass commands as a list ins
 # Only fire when shell=True is explicitly passed — not for every subprocess.run call
 ast_match = "Call(func.value.id=subprocess, func.attr=run, keywords.*.arg=shell, keywords.*.value.value=True)"
 file_pattern = "*.py"
+cwe = "CWE-78"
 
 [[rule]]
 id = "PY107"
@@ -935,6 +942,7 @@ file_content_exclude = "from ruamel\\.yaml|import ruamel"
 # -------------------------------------------
 # SECTION: Cryptographic Failures (OWASP A02:2021)
 # -------------------------------------------
+cwe = "CWE-502"
 
 [[rule]]
 id = "PY201"
@@ -950,6 +958,7 @@ file_pattern = "*.py"
 #   legacy                  — explicitly marked legacy/deprecated code path
 #   update(                 — incremental MD5 building (checksums use .update(), passwords don't)
 exclude_pattern = "hexdigest|checksum|integrity|fingerprint|digest\\(\\)|0x7FFFFFFF|int.*md5|md5.*int|hash_id|hash.*file|file.*hash|_hash|legacy|nonce|update\\s*\\(|hasher|algorithm"
+cwe = "CWE-327"
 
 [[rule]]
 id = "PY202"
@@ -961,6 +970,7 @@ file_pattern = "*.py"
 # SHA1 for cache keys, template keys, content addressing is not a security vulnerability.
 # Only flag when SHA1 is used for passwords or authentication tokens.
 exclude_pattern = "cache|key|template|content|join\\(|etag|checksum|digest|signature|chunk|fingerprint|function|framework|hasher"
+cwe = "CWE-327"
 
 [[rule]]
 id = "PY203"
@@ -969,6 +979,7 @@ severity = "High"
 remediation = "Use 'ssl.PROTOCOL_TLS' or higher. Avoid SSLv2, SSLv3, and TLSv1.0/1.1."
 pattern = "ssl\\.PROTOCOL_(SSLv2|SSLv3|TLSv1|TLSv1_1)"
 file_pattern = "*.py"
+cwe = "CWE-327"
 
 [[rule]]
 id = "PY204"
@@ -977,6 +988,7 @@ severity = "High"
 remediation = "Migrate from 'pycrypto' to a more secure and actively maintained library like 'pycryptodome'."
 pattern = "from\\s+Crypto|import\\s+Crypto"
 file_pattern = "*.py"
+cwe = "CWE-327"
 
 [[rule]]
 id = "PY205"
@@ -990,6 +1002,7 @@ file_pattern = "*.py"
 # -------------------------------------------
 # SECTION: Insecure Deserialization & Design (OWASP A08:2021)
 # -------------------------------------------
+cwe = "CWE-327"
 
 [[rule]]
 id = "PY002"
@@ -999,6 +1012,7 @@ remediation = "Use a safer serialization format like JSON if deserializing untru
 ast_match = "Call(func.value.id=pickle, func.attr=loads)"
 file_pattern = "*.py"
 exclude_file_pattern = "*/cache/backends/*"
+cwe = "CWE-502"
 
 [[rule]]
 id = "PY301"
@@ -1007,6 +1021,7 @@ severity = "High"
 remediation = "Use a safer serialization format like JSON if deserializing untrusted data."
 ast_match = "Call(func.attr=load, func.value.id=pickle)"
 file_pattern = "*.py"
+cwe = "CWE-502"
 
 [[rule]]
 id = "PY302"
@@ -1026,6 +1041,7 @@ file_pattern = "*.py"
 # Use RUAMEL_UNSAFE001 for ruamel's explicitly unsafe YAML(typ="unsafe") pattern.
 exclude_pattern = "^\\s*#|Loader\\s*=|yaml\\.safe_load|YAML\\s*\\(\\s*\\)\\s*\\.\\s*load|typ\\s*=\\s*[\"'](safe|rt|base)[\"']"
 file_content_exclude = "from ruamel\\.yaml|import ruamel"
+cwe = "CWE-502"
 
 [[rule]]
 id = "PY303"
@@ -1034,6 +1050,7 @@ severity = "High"
 remediation = "Use 'defusedxml.ElementTree' to parse untrusted XML data safely."
 pattern = "xml\\.etree\\.ElementTree\\.(parse|fromstring)"
 file_pattern = "*.py"
+cwe = "CWE-611"
 
 [[rule]]
 id = "PY304"
@@ -1042,12 +1059,14 @@ severity = "Medium"
 remediation = "Use 'tempfile.mkstemp()' instead of 'tempfile.mktemp()' for secure temporary file creation."
 pattern = "tempfile\\.mktemp"
 file_pattern = "*.py"
+cwe = "CWE-377"
 
 [[rule]]
 id = "PY305"
 description = "Use of exec() enables arbitrary code execution"
 severity = "Critical"
 ast_match = "Call(func.id=exec)"
+cwe = "CWE-94"
 
 [[rule]]
 id = "SANDBOX307"
@@ -1063,6 +1082,7 @@ file_pattern = "*.py"
 # Does NOT match:
 #   cls.__subclasses__()      — legitimate: find subclasses of a specific known class
 #   Model.__subclasses__()    — legitimate: ORM model registry
+cwe = "CWE-94"
 
 [[rule]]
 id = "SANDBOX308"
@@ -1076,6 +1096,7 @@ file_pattern = "*.py"
 # -------------------------------------------
 # SECTION: Security Misconfiguration (OWASP A05:2021)
 # -------------------------------------------
+cwe = "CWE-94"
 
 [[rule]]
 id = "G401"
@@ -1085,6 +1106,7 @@ confidence = "Low"
 remediation = "Use a production-ready WSGI server like Gunicorn or uWSGI instead of 'app.run()'."
 pattern = "app\\.run\\(host=.*0\\.0\\.0\\.0"
 file_pattern = "*.py"
+cwe = "CWE-489"
 
 [[rule]]
 id = "G403"
@@ -1093,6 +1115,7 @@ severity = "High"
 remediation = "Ensure app.debug is False or the DEBUG config variable is False in production."
 pattern = "app\\.run\\(.*debug=True"
 file_pattern = "*.py"
+cwe = "CWE-489"
 
 [[rule]]
 id = "G404"
@@ -1101,6 +1124,7 @@ severity = "Critical"
 remediation = "Ensure 'django.middleware.csrf.CsrfViewMiddleware' is active in your MIDDLEWARE setting."
 pattern = "#.*CsrfViewMiddleware" # Simple check for commented-out middleware
 file_pattern = "*settings*.py"
+cwe = "CWE-352"
 
 [[rule]]
 id = "G405"
@@ -1113,6 +1137,7 @@ file_pattern = "*.py"
 # -------------------------------------------
 # SECTION: Hardcoded Secrets (OWASP A07:2021)
 # -------------------------------------------
+cwe = "CWE-295"
 
 [[rule]]
 id = "G101"
@@ -1129,6 +1154,7 @@ file_pattern = "*.py"
 #  - Lines that emit instructional output: print(...), click.echo(...), sys.stderr.write
 #  - Doctest examples: lines starting with ">>>" or "..."
 exclude_pattern = "^\\s*[A-Z][A-Z0-9_]+\\s*=|(?i)[\"'](your[_-]|insert[_-]|example[_-]|placeholder|change[_-]?me|replace[_-]?me|todo|fake|dummy|sample|demo|server_api_key|api_key_secret|my_password|root_password)|[\"'][^\"']*_here[\"']|[\"'][A-Z][A-Z0-9_]+_(KEY|TOKEN|SECRET|PASSWORD)[\"']|^\\s*(print|click\\.echo|sys\\.stderr)|^\\s*(>>>|\\.\\.\\.)\\s"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G101B"
@@ -1145,6 +1171,7 @@ file_pattern = "*.py"
 #  - Instructional `print(...)` / `click.echo(...)` lines
 #  - Doctest examples (>>> / ...)
 exclude_pattern = "os\\.environ|getenv|config\\(|env\\(|settings\\.|vault|secrets\\.|(?i)[\"'](your[_-]|insert[_-]|example[_-]|placeholder|change[_-]?me|replace[_-]?me|fake|dummy|sample|demo)|[\"'][^\"']*_here[\"']|[\"'][A-Z][A-Z0-9_]+_(KEY|TOKEN|SECRET|PASSWORD)[\"']|^\\s*(print|click\\.echo|sys\\.stderr)|^\\s*(>>>|\\.\\.\\.)\\s"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G102"
@@ -1157,6 +1184,7 @@ pattern = "-----BEGIN (RSA|EC|OPENSSH|PGP) PRIVATE KEY-----"
 # secret-detection knowledge bases, READMEs). Restrict G102 to source/key files; G102
 # in docs has a near-100% FP rate in our corpus.
 exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex,*.ipynb"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G103"
@@ -1173,20 +1201,38 @@ file_pattern = "*.py"
 #     Django/Flask settings meant to be overridden at runtime via env var; flagging them is FP.
 exclude_pattern = "^\\s*def\\s|^\\s*#|\\w+\\s*=\\s*\\w+\\s*=|^\\s*[A-Z][A-Z0-9_]+\\s*="
 exclude_file_pattern = "*global_settings*,*settings*.py,*config*.py"
+cwe = "CWE-258"
 
 [[rule]]
 id = "G104"
 description = "JWT secret is hardcoded."
 severity = "Critical"
 remediation = "Load JWT secrets from environment variables or a secrets management system."
-# Value must be at least 16 chars (real secrets) to suppress short placeholder field-name
-# values like JSON_SER_KB_JWT_KEY = "kb_jwt".
 pattern = "(?i)(jwt_secret|jwt_key)\\s*[:=]\\s*[\"'][^\"']{16,}[\"']"
 file_pattern = "*.py"
-# Exclude placeholder/dev-secret values that explicitly tell the reader to replace them
-# or that are clearly demonstration material (CTF challenges, "do-not-share", "demo", etc.).
 exclude_pattern = "(?i)[\"'](your[_-]|change[_-]?(me|in[_-]?production)|default[_-]?secret|placeholder|example|replace|demo[_\\-]|do[_\\-]not[_\\-]share|never[_\\-]?(hardcode|use))"
+cwe = "CWE-798"
 
+[[rule]]
+id = "YAML001"
+description = "yaml.load() without SafeLoader allows arbitrary code execution via untrusted YAML."
+severity = "Critical"
+confidence = "High"
+remediation = "Use yaml.safe_load() or yaml.load(..., Loader=yaml.SafeLoader)."
+pattern = "\\byaml\\.load\\s*\\([^)]*\\)"
+exclude_pattern = "Loader\\s*=\\s*(yaml\\.)?(SafeLoader|CSafeLoader|BaseLoader)|\\bsafe_load\\b"
+file_pattern = "*.py"
+cwe = "CWE-502"
+
+[[rule]]
+id = "FLASK_DEBUG001"
+description = "Flask/FastAPI application started with debug=True — exposes the Werkzeug debugger PIN and arbitrary code execution to anyone reaching the listening port."
+severity = "High"
+confidence = "High"
+remediation = "Never run debug=True in production. Use a separate dev-only entry point or gate via FLASK_ENV=development."
+pattern = "\\.run\\s*\\([^)]*debug\\s*=\\s*True"
+exclude_pattern = "(?i)test|example|sample|demo|tutorial"
+file_pattern = "*.py"
 # -------------------------------------------
 # SECTION: Provider-specific high-precision secret patterns (G110+)
 # These rules detect literal credentials by format alone — they fire regardless
@@ -1196,6 +1242,7 @@ exclude_pattern = "(?i)[\"'](your[_-]|change[_-]?(me|in[_-]?production)|default[
 # Shared exclusion for obvious placeholders: long runs of identical chars (xxx, 000), the
 # words EXAMPLE/FAKE/PLACEHOLDER/SAMPLE inside the value, and common dummy sequences.
 # Used across Tier-1 rules by repeating in each rule's exclude_pattern.
+cwe = "CWE-489"
 
 [[rule]]
 id = "G110"
@@ -1208,6 +1255,7 @@ pattern = "\\b(AKIA|ASIA|AIDA|AROA|AGPA|ANPA|ANVA|ASCA)[0-9A-Z]{16}\\b"
 # their docker-compose examples). Treat as a known-public dev credential.
 exclude_pattern = "__SHARED_PLACEHOLDERS__|AKIAIOSFOLQUICKSTART"
 exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G111"
@@ -1218,6 +1266,7 @@ remediation = "Revoke immediately at https://github.com/settings/tokens. Use a f
 pattern = "\\b(ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9]{36}\\b|\\bgithub_pat_[A-Za-z0-9_]{82}\\b"
 exclude_pattern = "__SHARED_PLACEHOLDERS__"
 exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G112"
@@ -1228,6 +1277,7 @@ remediation = "Revoke in GitLab > Edit profile > Access Tokens; load from env or
 pattern = "\\bglpat-[A-Za-z0-9_\\-]{20}\\b"
 exclude_pattern = "__SHARED_PLACEHOLDERS__"
 exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G113"
@@ -1239,6 +1289,7 @@ pattern = "\\bxox[abprso]-[A-Za-z0-9-]{10,}\\b"
 # Catch "xoxb-your-slack-bot-token" style placeholders, plus runs of identical chars.
 exclude_pattern = "__SHARED_PLACEHOLDERS__|-your-|-here\\b|-token\\b|-replace-"
 exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex,*.env.example,*.env.template,*.env.sample,*.env.dist,env.example"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G114"
@@ -1249,6 +1300,7 @@ remediation = "Webhook URLs are credentials — anyone with the URL can post to
 pattern = "https://hooks\\.slack\\.com/services/T[A-Z0-9]+/B[A-Z0-9]+/[A-Za-z0-9]+"
 exclude_pattern = "__SHARED_PLACEHOLDERS__"
 exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G115"
@@ -1259,6 +1311,7 @@ remediation = "Rotate the key in the Stripe dashboard immediately. Never commit
 pattern = "\\b(sk|rk)_(live|test)_[A-Za-z0-9]{24,}\\b"
 exclude_pattern = "__SHARED_PLACEHOLDERS__"
 exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G116"
@@ -1269,6 +1322,7 @@ remediation = "Rotate at GCP Console > APIs & Services > Credentials. Restrict b
 pattern = "\\bAIza[A-Za-z0-9_\\-]{35}\\b"
 exclude_pattern = "__SHARED_PLACEHOLDERS__"
 exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G117"
@@ -1280,6 +1334,7 @@ pattern = "\\bsk-[A-Za-z0-9]{48}\\b|\\bsk-(proj|svcacct|admin|None)-[A-Za-z0-9_\
 # Catch placeholders like sk-svcacct-your-embedding-key-here and -here suffixes.
 exclude_pattern = "__SHARED_PLACEHOLDERS__|-your-|-here\\b|-replace-|-key-here\\b|YOUR-?KEY|YOUR-?TOKEN"
 exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex,*.env.example,*.env.template,*.env.sample,*.env.dist,env.example"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G118"
@@ -1295,6 +1350,7 @@ remediation = "Rotate at https://console.anthropic.com/settings/keys. Load via A
 pattern = "\\bsk-ant-(api|admin|sid)\\d{2}-[A-Za-z0-9_\\-]{80,110}\\b"
 exclude_pattern = "__SHARED_PLACEHOLDERS__"
 exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G119"
@@ -1304,6 +1360,7 @@ confidence = "High"
 remediation = "Revoke and reissue at https://app.sendgrid.com/settings/api_keys."
 pattern = "\\bSG\\.[A-Za-z0-9_\\-]{22}\\.[A-Za-z0-9_\\-]{43}\\b"
 exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G120"
@@ -1313,6 +1370,7 @@ confidence = "High"
 remediation = "Project keys can be public for client-side telemetry but personal API keys are not — verify and rotate accordingly."
 pattern = "\\bphc_[A-Za-z0-9]{40}\\b"
 exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G121"
@@ -1332,6 +1390,7 @@ pattern = "(?i)\\b(postgres(?:ql)?|mysql|mongodb(?:\\+srv)?|mariadb|redis|rediss
 exclude_pattern = "(?i)://[^:]+:(password|passwd|pass|secret|changeme|change[_\\-]me|placeholder|example|\\{\\{?[^}]+\\}\\}?|\\$\\{[^}]+\\}|\\$\\([^)]+\\)|\\$[A-Za-z_][A-Za-z0-9_]*|%\\([^)]+\\)s|<[^>]+>)@|://\\{\\{?[^}]+\\}\\}?:|^[^#]*\\bre\\.(match|compile|search|fullmatch)\\s*\\(|://[^:]+:[^@]+@(localhost|127\\.0\\.0\\.1|0\\.0\\.0\\.0|::1|host\\.docker\\.internal|db|database|postgres(ql)?|mysql|mariadb|mongo(db)?|redis|rabbitmq|broker|kafka|memcached|amqp)[:/?#\"' \\t]"
 # Skip docs, env templates, and infrastructure templates (Helm, Jinja, cookiecutter).
 exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex,*.env.example,*.env.template,*.env.sample,*.env.dist,env.example,*.tpl,*.j2,*.jinja,*.template,*cookiecutter*"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G121L"
@@ -1348,6 +1407,7 @@ remediation = "If this connection string ships to production, move credentials t
 pattern = "(?i)\\b(postgres(?:ql)?|mysql|mongodb(?:\\+srv)?|mariadb|redis|rediss|amqp|amqps|mssql|oracle)://[^\\s:@\"'/]+:[^@\\s\"']{4,}@(localhost|127\\.0\\.0\\.1|0\\.0\\.0\\.0|::1|host\\.docker\\.internal|db|database|postgres(ql)?|mysql|mariadb|mongo(db)?|redis|rabbitmq|broker|kafka|memcached|amqp)[:/?#\"' \\t]"
 exclude_pattern = "(?i)://[^:]+:(\\{\\{?[^}]+\\}\\}?|\\$\\{[^}]+\\}|\\$\\([^)]+\\)|\\$[A-Za-z_][A-Za-z0-9_]*|%\\([^)]+\\)s|<[^>]+>)@|://\\{\\{?[^}]+\\}\\}?:|^[^#]*\\bre\\.(match|compile|search|fullmatch)\\s*\\("
 exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex,*.env.example,*.env.template,*.env.sample,*.env.dist,env.example,*.tpl,*.j2,*.jinja,*.template,*cookiecutter*"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G122"
@@ -1360,6 +1420,7 @@ exclude_pattern = "(?i)example|sample|placeholder|change[_\\-]?me"
 # No file_pattern restriction — JWTs appear in *.py, *.js, *.yaml, *.json, *.sh,
 # build configs, and many other source/config files. Doc-extension exclude still applies.
 exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex,*.log,*.lock"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G123"
@@ -1378,6 +1439,7 @@ pattern = "https?://[^:/\\s\"']+:[^@\\s\"'/]{4,}@[^\\s\"']+"
 exclude_pattern = "(?i)://[A-Za-z0-9._\\-]+:(pass(word)?|secret|changeme|change[_\\-]me|placeholder|example|\\{\\{?[^}]+\\}\\}?|\\$\\{[^}]+\\}|\\$\\([^)]+\\)|\\$[A-Za-z_][A-Za-z0-9_]*|%\\([^)]+\\)s|<[^>]+>)@|://(oauth2|x-access-token|token):"
 # Skip docs, env templates, and log files (JS stack traces contain http://host:port/path@module FPs).
 exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex,*.log,*.env.example,*.env.template,*.env.sample,*.env.dist,env.example,*.tpl,*.j2,*.jinja,*.template,*cookiecutter*"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G124"
@@ -1387,6 +1449,7 @@ confidence = "High"
 remediation = "Revoke at https://www.npmjs.com/settings/<user>/tokens."
 pattern = "\\bnpm_[A-Za-z0-9]{36}\\b"
 exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G125"
@@ -1396,6 +1459,7 @@ confidence = "High"
 remediation = "Revoke at https://pypi.org/manage/account/token/."
 pattern = "\\bpypi-AgEIcHlwaS5vcmc[A-Za-z0-9_\\-]{50,}\\b"
 exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G126"
@@ -1405,6 +1469,7 @@ confidence = "High"
 remediation = "Reset at https://discord.com/developers/applications > Bot > Reset Token."
 pattern = "\\b[MN][A-Za-z0-9]{23}\\.[\\w\\-]{6}\\.[\\w\\-]{27}\\b"
 exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G127"
@@ -1420,6 +1485,7 @@ exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex"
 # These target providers whose tokens commonly leak into non-Python files
 # (.json, .yaml, .sh, .env). No file_pattern scoping — scan everything except docs.
 # -------------------------------------------
+cwe = "CWE-798"
 
 [[rule]]
 id = "G128"
@@ -1434,6 +1500,7 @@ remediation = "Revoke at https://cloud.digitalocean.com/account/api/tokens. Load
 pattern = "\\b(dop|doo|dor)_v1_[a-f0-9]{64}\\b"
 exclude_pattern = "__SHARED_PLACEHOLDERS__"
 exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex,*.lock"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G129"
@@ -1451,6 +1518,7 @@ remediation = "Revoke at https://dashboard.doppler.com/workplace/tokens or via t
 pattern = "\\bdp\\.(pt|st|ct|scim|audit|prov|sa)\\.[A-Za-z0-9_\\-]{30,}\\b"
 exclude_pattern = "__SHARED_PLACEHOLDERS__"
 exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex,*.lock"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G130"
@@ -1465,6 +1533,7 @@ remediation = "Rotate at Cloudflare dashboard > My Profile > API Tokens. Load fr
 pattern = "\\bv1\\.0-[a-f0-9]{32}-[a-f0-9]{146}\\b|(?i)cloudflare[^\\n]{0,40}[\"'][A-Za-z0-9_\\-]{40}[\"']"
 exclude_pattern = "__SHARED_PLACEHOLDERS__"
 exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex,*.lock"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G131"
@@ -1477,6 +1546,7 @@ remediation = "Rotate at https://dashboard.heroku.com/account > API Key > Regene
 pattern = "(?i)heroku[^\\n]{0,40}[\"'][0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}[\"']"
 exclude_pattern = "__SHARED_PLACEHOLDERS__"
 exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex,*.lock"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G132"
@@ -1490,6 +1560,7 @@ remediation = "Revoke at https://app.hubspot.com/private-apps. Use a vault and e
 pattern = "\\bpat-(na1|na2|na3|eu1)-[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}\\b|(?i)hubspot[^\\n]{0,40}[\"'][a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}[\"']"
 exclude_pattern = "__SHARED_PLACEHOLDERS__"
 exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex,*.lock"
+cwe = "CWE-798"
 
 [[rule]]
 id = "G133"
@@ -1506,6 +1577,7 @@ exclude_file_pattern = "*.md,*.rst,*.html,*.txt,*.adoc,*.tex,*.lock"
 # -------------------------------------------
 # SECTION: IaC and Configuration File Security
 # -------------------------------------------
+cwe = "CWE-798"
 
 [[rule]]
 id = "DKR001"
@@ -1514,6 +1586,7 @@ severity = "High"
 remediation = "Use build-time arguments (ARG) with the --secret flag or a secrets management tool."
 pattern = "(?i)ENV\\s+(PASS|PASSWORD|SECRET|TOKEN|API_KEY)\\s+"
 file_pattern = "Dockerfile"
+cwe = "CWE-798"
 
 [[rule]]
 id = "DKR002"
@@ -1522,6 +1595,7 @@ severity = "Low"
 remediation = "Pin base images to a specific version digest for reproducible and secure builds."
 pattern = "FROM\\s+\\w+:latest"
 file_pattern = "Dockerfile"
+cwe = "CWE-16"
 
 [[rule]]
 id = "DKR003"
@@ -1530,6 +1604,7 @@ severity = "Critical"
 remediation = "Avoid mounting '/var/run/docker.sock' into containers."
 pattern = "/var/run/docker\\.sock"
 file_pattern = "docker-compose*.y*ml"
+cwe = "CWE-269"
 
 [[rule]]
 id = "K8S001"
@@ -1538,6 +1613,7 @@ severity = "Critical"
 remediation = "Set 'securityContext.privileged' to 'false' or remove it."
 pattern = "privileged:\\s*true"
 file_pattern = "*.y*ml"
+cwe = "CWE-250"
 
 [[rule]]
 id = "K8S002"
@@ -1546,6 +1622,7 @@ severity = "High"
 remediation = "Explicitly set 'securityContext.allowPrivilegeEscalation' to 'false'."
 pattern = "allowPrivilegeEscalation:\\s*true"
 file_pattern = "*.y*ml"
+cwe = "CWE-250"
 
 [[rule]]
 id = "TF001"
@@ -1554,6 +1631,7 @@ severity = "Critical"
 remediation = "Set the 'acl' property of 'aws_s3_bucket' to 'private', not 'public-read' or 'public-read-write'."
 pattern = "acl\\s*=\\s*\"(public-read|public-read-write)\""
 file_pattern = "*.tf"
+cwe = "CWE-732"
 
 [[rule]]
 id = "CFG001"
@@ -1566,6 +1644,7 @@ file_pattern = "*.ini"
 # -------------------------------------------
 # SECTION: ADDITIONAL SECURITY RULES
 # -------------------------------------------
+cwe = "CWE-798"
 
 [[rule]]
 id = "PY500"
@@ -1575,6 +1654,7 @@ confidence = "Medium"
 remediation = "Avoid dynamic code execution. Consider safer alternatives or validate input thoroughly."
 ast_match = "Call(func.attr=exec, func.value.id=builtins)"
 file_pattern = "*.py"
+cwe = "CWE-94"
 
 [[rule]]
 id = "SEC501"
@@ -1590,6 +1670,7 @@ pattern = "\\bexec\\b\\s*\\("
 # Exclude: quoted "exec()" or 'exec()' — documentation text, not actual calls
 exclude_pattern = "^\\s*(?:async\\s+)?def\\s|^\\s*#|\\.exec\\s*\\(|`exec\\(|\"exec\\(\\)\"|'exec\\(\\)'"
 file_pattern = "*.py"
+cwe = "CWE-94"
 
 [[rule]]
 id = "PY507"
@@ -1601,6 +1682,7 @@ remediation = "Validate inputs before passing to .exec(). Use parameterized quer
 # Pattern-based detection of .exec() generates 100% FPs: fires on ORM sessions
 # (Session.exec(select(...))), docstring code examples, and function definitions.
 file_pattern = "*.py"
+cwe = "CWE-94"
 
 [[rule]]
 id = "WEB508"
@@ -1610,6 +1692,7 @@ confidence = "Medium"
 remediation = "Remove unsafe-inline from CSP directives and use nonces or hashes instead."
 pattern = "Content-Security-Policy\\s*:\\s*.*unsafe-inline"
 file_pattern = "*.conf"
+cwe = "CWE-1021"
 
 [[rule]]
 id = "CFG510"
@@ -1618,6 +1701,7 @@ severity = "Low"
 remediation = "Store AWS credentials securely using IAM roles or environment variables."
 pattern = "aws_access_key_id\\s*[:=]\\s*['\\\"][A-Za-z0-9/+=]{16,}"
 file_pattern = "*.ini"
+cwe = "CWE-798"
 
 [[rule]]
 id = "WEB512"
@@ -1626,6 +1710,7 @@ severity = "Medium"
 remediation = "Store authentication tokens securely and avoid hardcoding in configuration files."
 pattern = "Authorization\\s*:\\s*\\bBearer\\b"
 file_pattern = "*.conf"
+cwe = "CWE-798"
 
 [[rule]]
 id = "WEB514"
@@ -1634,6 +1719,7 @@ severity = "Medium"
 remediation = "Set X-Frame-Options to DENY or SAMEORIGIN to prevent clickjacking attacks."
 pattern = "X-Frame-Options\\s*:\\s*ALLOW"
 file_pattern = "*.conf"
+cwe = "CWE-1021"
 
 [[rule]]
 id = "SER522"
@@ -1641,6 +1727,7 @@ description = "Object serialization function detected."
 severity = "Low"
 remediation = "Ensure serialized data comes from trusted sources to prevent deserialization attacks."
 # No ast_match/pattern — triggered only by taint engine (SK007)
+cwe = "CWE-502"
 
 [[rule]]
 id = "FILE526"
@@ -1649,6 +1736,7 @@ severity = "Medium"
 remediation = "Implement proper file access controls and validate file paths."
 ast_match = "Attribute(attr=read, value.id=open)"
 file_pattern = "*.py"
+cwe = "CWE-22"
 
 [[rule]]
 id = "PERM527"
@@ -1657,6 +1745,7 @@ severity = "High"
 remediation = "Use more restrictive permissions. Consider 644 for files and 755 for directories."
 pattern = "chmod\\s+777"
 file_pattern = "*.sh"
+cwe = "CWE-732"
 
 [[rule]]
 id = "FILE528"
@@ -1666,6 +1755,7 @@ confidence = "Medium"
 remediation = "Accessing /etc/passwd should be done through proper system APIs with authorization."
 pattern = "open\\s*\\(\\s*['\\\"]/etc/passwd"
 file_pattern = "*.py"
+cwe = "CWE-200"
 
 [[rule]]
 id = "TEMP529"
@@ -1674,6 +1764,7 @@ severity = "Low"
 remediation = "Use mktemp without -u flag or mkstemp for secure temporary file creation."
 pattern = "mktemp\\s+-u"
 file_pattern = "*.sh"
+cwe = "CWE-377"
 
 [[rule]]
 id = "SSL531"
@@ -1682,6 +1773,7 @@ severity = "Medium"
 remediation = "Enable certificate verification to prevent man-in-the-middle attacks."
 pattern = "verify\\s*:\\s*false"
 file_pattern = "*.y*ml"
+cwe = "CWE-295"
 
 [[rule]]
 id = "WEB575"
@@ -1691,6 +1783,7 @@ confidence = "Medium"
 remediation = "Remove unsafe-inline from CSP directives and implement nonce-based or hash-based CSP."
 pattern = "Content-Security-Policy\\s*:\\s*.*unsafe-inline"
 file_pattern = "*.conf"
+cwe = "CWE-1021"
 
 [[rule]]
 id = "SQL586"
@@ -1699,6 +1792,7 @@ severity = "Critical"
 confidence = "Medium"
 remediation = "Use parameterized queries instead of string formatting to prevent SQL injection."
 # No pattern — triggered by taint engine (SK_SQL001/SK_SQL002)
+cwe = "CWE-89"
 
 [[rule]]
 id = "SHELL602"
@@ -1708,6 +1802,7 @@ confidence = "Medium"
 remediation = "Use subprocess with argument arrays instead of shell command strings."
 pattern = "subprocess\\.(Popen|call)\\(.*shell\\s*=\\s*True"
 file_pattern = "*.py"
+cwe = "CWE-78"
 
 [[rule]]
 id = "CODE607"
@@ -1717,6 +1812,7 @@ confidence = "Medium"
 remediation = "Implement strict CSP without unsafe-inline to prevent XSS attacks."
 pattern = "Content-Security-Policy\\s*:\\s*.*unsafe-inline"
 file_pattern = "*.conf"
+cwe = "CWE-1021"
 
 [[rule]]
 id = "SHELL631"
@@ -1725,6 +1821,7 @@ severity = "Critical"
 confidence = "Medium"
 remediation = "Use parameterized queries with placeholders instead of string concatenation."
 # No pattern — triggered by taint engine (SK_SQL001/SK_SQL002)
+cwe = "CWE-89"
 
 [[rule]]
 id = "CSP640"
@@ -1734,6 +1831,7 @@ confidence = "Medium"
 remediation = "Configure CSP without unsafe-inline and unsafe-eval directives."
 pattern = "Content-Security-Policy\\s*:\\s*.*unsafe-inline"
 file_pattern = "*.conf"
+cwe = "CWE-1021"
 
 [[rule]]
 id = "PERM650"
@@ -1742,6 +1840,7 @@ severity = "Critical"
 confidence = "Medium"
 remediation = "Implement prepared statements and parameterized queries to prevent SQL injection."
 # No pattern — triggered by taint engine (SK_SQL001/SK_SQL002)
+cwe = "CWE-89"
 
 [[rule]]
 id = "CSP665"
@@ -1751,6 +1850,7 @@ confidence = "Medium"
 remediation = "Use nonce or hash-based CSP instead of unsafe-inline directive."
 pattern = "Content-Security-Policy\\s*:\\s*.*unsafe-inline"
 file_pattern = "*.conf"
+cwe = "CWE-1021"
 
 [[rule]]
 id = "SHELL675"
@@ -1759,6 +1859,7 @@ severity = "Critical"
 confidence = "Medium"
 remediation = "Use ORM methods or prepared statements instead of string formatting in SQL queries."
 # No pattern — triggered by taint engine (SK_SQL001/SK_SQL002)
+cwe = "CWE-89"
 
 [[rule]]
 id = "SHELL689"
@@ -1767,6 +1868,7 @@ severity = "High"
 confidence = "Medium"
 remediation = "Use process execution without shell to avoid command injection vulnerabilities."
 # No pattern — triggered by taint engine (SK_SQL001/SK_SQL002)
+cwe = "CWE-78"
 
 [[rule]]
 id = "SQL693"
@@ -1775,6 +1877,7 @@ severity = "Critical"
 confidence = "Medium"
 remediation = "Implement parameterized queries to eliminate SQL injection risks."
 # No pattern — triggered by taint engine (SK_SQL001/SK_SQL002)
+cwe = "CWE-89"
 
 [[rule]]
 id = "NET705"
@@ -1784,6 +1887,7 @@ confidence = "Medium"
 remediation = "Enable SSL certificate verification to prevent man-in-the-middle attacks."
 pattern = "requests\\.(get|post|put|delete)\\(.*verify\\s*=\\s*False"
 file_pattern = "*.py"
+cwe = "CWE-295"
 
 [[rule]]
 id = "CRYPTO708"
@@ -1802,6 +1906,7 @@ file_pattern = "*.py"
 #   choice/randbelow — selection, not key generation
 #   variable names suggesting non-security context (index, delay, seed for ML)
 exclude_pattern = "np\\.random\\.|numpy\\.random\\.|len\\(|range\\(|\\b(index|idx|pos|offset|delay|sleep_|sleep|wait|_n|num_|seed|shape|size|dim|batch|epoch)\\b|_time\\b|_delay\\b|_wait\\b|random\\.choice|randbelow|input_shape|array_ops|benchmark"
+cwe = "CWE-338"
 
 [[rule]]
 id = "AUTH711"
@@ -1811,6 +1916,7 @@ confidence = "High"
 remediation = "Implement proper authentication mechanisms without hardcoded credentials."
 pattern = "(?i)(username|user)\\s*[:=]\\s*[\"']admin[\"']"
 file_pattern = "*.py"
+cwe = "CWE-798"
 
 [[rule]]
 id = "LDAP717"
@@ -1820,6 +1926,7 @@ confidence = "Medium"
 remediation = "Properly escape LDAP filter characters or use parameterized LDAP queries."
 pattern = "\\.search\\(.*filter.*%s"
 file_pattern = "*.py"
+cwe = "CWE-90"
 
 [[rule]]
 id = "XPATH720"
@@ -1829,6 +1936,7 @@ confidence = "Medium"
 remediation = "Use parameterized XPath queries or properly escape user input."
 pattern = "xpath\\(.*%s"
 file_pattern = "*.py"
+cwe = "CWE-643"
 
 [[rule]]
 id = "DESER723"
@@ -1838,6 +1946,7 @@ confidence = "High"
 remediation = "Never deserialize marshal bytecode from untrusted sources. Use JSON/protobuf for data exchange. For model serialization, use SavedModel format instead of custom bytecode paths."
 ast_match = "Call(func.value.id=marshal, func.attr=loads)"
 file_pattern = "*.py"
+cwe = "CWE-502"
 
 [[rule]]
 id = "DESER724"
@@ -1848,6 +1957,7 @@ remediation = "Never create functions from deserialized code objects. This is eq
 file_pattern = "*.py"
 # No pattern — triggered only by taint engine (SK_DESER724):
 # marshal.loads(raw) → code is tainted → FunctionType(code, globals()) fires this rule.
+cwe = "CWE-94"
 
 [[rule]]
 id = "PRIV726"
@@ -1857,6 +1967,7 @@ confidence = "Medium"
 remediation = "Avoid executing setuid binaries or implement proper privilege checks."
 pattern = "os\\.setuid\\("
 file_pattern = "*.py"
+cwe = "CWE-269"
 
 [[rule]]
 id = "RACE729"
@@ -1866,6 +1977,7 @@ confidence = "Low"
 remediation = "Use atomic file operations or proper locking mechanisms."
 pattern = "os\\.path\\.exists.*open\\("
 file_pattern = "*.py"
+cwe = "CWE-362"
 
 [[rule]]
 id = "INFO738"
@@ -1875,6 +1987,7 @@ confidence = "Low"
 remediation = "Implement generic error messages that don't reveal system information."
 pattern = "traceback\\.print_exc\\("
 file_pattern = "*.py"
+cwe = "CWE-200"
 
 [[rule]]
 id = "LOG741"
@@ -1887,6 +2000,7 @@ file_pattern = "*.py"
 # Only fires when data traced from request.GET/POST/CLI args/API responses
 # reaches a logging call. Internal framework objects and computed values
 # are never tainted → no false positives on framework internals.
+cwe = "CWE-117"
 
 [[rule]]
 id = "SESS744"
@@ -1897,6 +2011,7 @@ remediation = "Regenerate session IDs after authentication to prevent fixation a
 # Writing data to a session is NOT session fixation. Only flag direct session key assignment from request.
 pattern = "session\\.session_key\\s*=.*request\\."
 file_pattern = "*.py"
+cwe = "CWE-384"
 
 [[rule]]
 id = "CSRF747"
@@ -1906,6 +2021,7 @@ confidence = "Medium"
 remediation = "Implement proper CSRF tokens for state-changing operations."
 pattern = "@csrf_exempt"
 file_pattern = "*.py"
+cwe = "CWE-352"
 
 [[rule]]
 id = "HTTP750"
@@ -1915,6 +2031,7 @@ confidence = "Medium"
 remediation = "Validate and sanitize HTTP headers to prevent response splitting."
 pattern = "HttpResponse\\(.*\\\\r\\\\n"
 file_pattern = "*.py"
+cwe = "CWE-113"
 
 [[rule]]
 id = "UPLOAD753"
@@ -1924,6 +2041,7 @@ confidence = "Medium"
 remediation = "Implement file type validation and size limits for uploads."
 pattern = "request\\.FILES\\[.*\\]\\.save\\("
 file_pattern = "*.py"
+cwe = "CWE-434"
 
 [[rule]]
 id = "CACHE756"
@@ -1933,6 +2051,7 @@ confidence = "Low"
 remediation = "Validate cache keys and implement proper cache invalidation."
 pattern = "cache\\.set\\(.*request\\."
 file_pattern = "*.py"
+cwe = "CWE-444"
 
 [[rule]]
 id = "TIMING759"
@@ -1945,6 +2064,7 @@ file_pattern = "*.py"
 # Exclude null/empty checks: `if password is None or password == ""` is a presence check,
 # not a secret comparison. Also exclude `password != ""` style guards.
 exclude_pattern = "is None|== \"\"|== ''|!= \"\"|!= ''|^\\s*#"
+cwe = "CWE-208"
 
 [[rule]]
 id = "ENUM762"
@@ -1954,6 +2074,7 @@ confidence = "Low"
 remediation = "Return identical responses for valid and invalid usernames."
 pattern = "User\\.objects\\.get\\(username="
 file_pattern = "*.py"
+cwe = "CWE-204"
 
 [[rule]]
 id = "TOKEN771"
@@ -1966,6 +2087,7 @@ remediation = "Always include 'exp' claim in JWT payload: {'sub': user_id, 'exp'
 pattern = "jwt\\.encode\\s*\\("
 file_pattern = "*.py"
 exclude_pattern = "^\\s*#|[\"']exp[\"']|datetime|timedelta"
+cwe = "CWE-613"
 
 [[rule]]
 id = "OAUTH774"
@@ -1977,6 +2099,7 @@ pattern = "oauth.*authorize.*"
 file_pattern = "*.py"
 # Public OAuth authorization URLs in string literals are DeveloperDefined endpoints, not missing state params
 exclude_pattern = "[\"']https?://.*oauth.*authorize|client_id="
+cwe = "CWE-352"
 
 [[rule]]
 id = "API777"
@@ -1986,6 +2109,7 @@ confidence = "Low"
 remediation = "Implement rate limiting on API endpoints to prevent abuse."
 pattern = "@app\\.route.*methods.*POST"
 file_pattern = "*.py"
+cwe = "CWE-770"
 
 [[rule]]
 id = "CORS780"
@@ -1995,6 +2119,7 @@ confidence = "Medium"
 remediation = "Restrict CORS origins to trusted domains only."
 pattern = "Access-Control-Allow-Origin\\s*:\\s*\\*"
 file_pattern = "*.py"
+cwe = "CWE-942"
 
 [[rule]]
 id = "HTTPS789"
@@ -2007,6 +2132,7 @@ file_pattern = "*settings*.py"
 # global_settings.py is a framework defaults file — False here is the intended default.
 # Deployments must override this in their project settings.
 exclude_file_pattern = "*global_settings*"
+cwe = "CWE-319"
 
 [[rule]]
 id = "COOKIE792"
@@ -2016,6 +2142,7 @@ confidence = "Medium"
 remediation = "Set secure and httponly flags on sensitive cookies."
 pattern = "set_cookie\\(.*secure=False"
 file_pattern = "*.py"
+cwe = "CWE-1004"
 
 [[rule]]
 id = "ADMIN795"
@@ -2027,6 +2154,7 @@ pattern = "(?i)(admin|administrator).*password.*password"
 file_pattern = "*.py"
 # "class AdminPasswordChangeForm" is a Python class declaration — DeveloperDefined name, not a credential
 exclude_pattern = "^\\s*class\\s+"
+cwe = "CWE-798"
 
 [[rule]]
 id = "DEBUG798"
@@ -2036,6 +2164,7 @@ confidence = "Medium"
 remediation = "Disable debug mode and remove debug statements in production."
 pattern = "print\\(.*password\\|.*secret"
 file_pattern = "*.py"
+cwe = "CWE-489"
 
 [[rule]]
 id = "BACKUP801"
@@ -2049,6 +2178,7 @@ remediation = "Secure backup files and exclude them from web-accessible director
 pattern = "['\"][^'\"]*\\w\\.(bak|backup|old)['\"]"
 file_pattern = "*"
 exclude_file_pattern = "*.sh,*.rst,*.md,*.txt"
+cwe = "CWE-200"
 
 [[rule]]
 id = "CONFIG804"
@@ -2058,6 +2188,7 @@ confidence = "Low"
 remediation = "Change default configuration values before production deployment."
 pattern = "(?i)secret_key.*changeme"
 file_pattern = "*settings*.py"
+cwe = "CWE-16"
 
 [[rule]]
 id = "HASH807"
@@ -2070,6 +2201,7 @@ remediation = "For password storage use bcrypt, scrypt, or Argon2. SHA-256 witho
 ast_match = "Call(func.value.id=hashlib, func.attr=sha256)"
 file_pattern = "*.py"
 exclude_pattern = "fingerprint|checksum|digest|integrity|hash_file|file_hash|sha256_file|content_hash|benchmark|test|sample|example|demo"
+cwe = "CWE-327"
 
 [[rule]]
 id = "RAND810"
@@ -2078,6 +2210,7 @@ severity = "Medium"
 confidence = "Medium"
 remediation = "Use cryptographically secure random generators for security purposes."
 # No ast_match/pattern — triggered only by taint engine (SK008)
+cwe = "CWE-338"
 
 [[rule]]
 id = "SSRF_001"
@@ -2094,6 +2227,7 @@ file_pattern = "*.py"
 # For CLI args (parse_args taint source) flowing into format strings where only
 # path params vary, the engine may produce FPs. Those cases need per-sink
 # host-vs-path discrimination — a future enhancement.
+cwe = "CWE-918"
 
 [[rule]]
 id = "PATH813"
@@ -2108,6 +2242,7 @@ file_pattern = "*.py"
 #   os.path.join(module.__file__, '..')  — navigating relative to installed module
 #   os.path.join(os.path.dirname(__file__), ..)  — standard Python package path
 exclude_pattern = "__file__|module\\.__file__|dirname\\(__file__\\)|abspath.*dirname"
+cwe = "CWE-22"
 
 [[rule]]
 id = "SYMLINK816"
@@ -2119,6 +2254,7 @@ file_pattern = "*.py"
 # Pattern removed — SYMLINK816 is now taint-driven only (see taint_sink SK_SYMLINK001).
 # Pattern-based matching produced 100% FPs (capability detection, static file management).
 # Only fires when the symlink source argument is HttpRequest-tainted.
+cwe = "CWE-59"
 
 [[rule]]
 id = "PROC819"
@@ -2128,6 +2264,7 @@ confidence = "Medium"
 remediation = "Validate and sanitize all inputs to process execution functions."
 ast_match = "Call(func.value.id=os, func.attr=popen)"
 file_pattern = "*.py"
+cwe = "CWE-78"
 
 [[rule]]
 id = "IMPORT825"
@@ -2143,6 +2280,7 @@ file_pattern = "*.py"
 # Also exclude when the import name is from a known-safe source (self.LIB,
 # self.package) — these are class attributes set from validated plugin registries.
 exclude_pattern = "self\\.(LIB|package|base_class|module)|__import__\\(name\\)|six\\.|future\\."
+cwe = "CWE-94"
 
 [[rule]]
 id = "GETATTR828"
@@ -2156,6 +2294,7 @@ remediation = "Validate attribute names against an allowlist before passing to g
 # ORM model _meta (developer-defined schema), not user input. These generate high FP
 # rates in serializer/schema code across all ORM frameworks.
 exclude_file_pattern = "*pyct*,*serializer*,*schema*,*/pandas/core/*,pandas/core/*,*/pandas/io/*,pandas/io/*"
+cwe = "CWE-915"
 
 [[rule]]
 id = "SETATTR831"
@@ -2164,6 +2303,7 @@ severity = "Medium"
 confidence = "Medium"
 remediation = "Validate attribute names and values before setting."
 # No ast_match/pattern — triggered only by taint engine (SK005)
+cwe = "CWE-915"
 
 [[rule]]
 id = "DELATTR834"
@@ -2172,6 +2312,7 @@ severity = "Medium"
 confidence = "Medium"
 remediation = "Validate attribute names before deletion."
 # No ast_match/pattern — triggered only by taint engine (SK006)
+cwe = "CWE-915"
 
 [[rule]]
 id = "GLOBALS843"
@@ -2185,6 +2326,7 @@ remediation = "Never pass globals() to exec/eval with untrusted code. Dynamic mo
 # codec registration) and generates high FP rates in framework code.
 pattern = "exec[\\s(].*globals\\s*\\(\\)|eval[\\s(].*globals\\s*\\(\\)"
 file_pattern = "*.py"
+cwe = "CWE-94"
 
 [[rule]]
 id = "FORMAT864"
@@ -2193,6 +2335,7 @@ severity = "Medium"
 confidence = "Medium"
 remediation = "Use safe string formatting methods and validate format strings."
 # No ast_match/pattern — triggered only by taint engine (SK009)
+cwe = "CWE-134"
 
 [[rule]]
 id = "REGEX870"
@@ -2210,6 +2353,7 @@ file_pattern = "*.py"
 # \\w+ only matches [a-zA-Z0-9_] so alternation between dot and word chars is non-overlapping
 # → no catastrophic backtracking. Exclude when inner group uses \\w or \\d only.
 exclude_pattern = "\\\\w\\+\\.\\)\\+|\\\\d\\+\\.\\)\\+|\\\\w\\+\\.\\)\\*"
+cwe = "CWE-1333"
 
 [[rule]]
 id = "OPEN1149"
@@ -2217,8 +2361,7 @@ description = "User-controlled path passed to open() — potential path traversa
 severity = "High"
 confidence = "High"
 remediation = "Validate and sanitize file paths. Use os.path.realpath() and verify the result stays within the expected directory."
-# No ast_match — triggered ONLY by taint engine (SK003).
-# Taint flow: request.* → variable → open(variable)
+cwe = "CWE-22"
 
 [[rule]]
 id = "SSTI001"
@@ -2229,6 +2372,7 @@ remediation = "Never pass user input as the template string. Use render_template
 file_pattern = "*.py"
 # Triggered by taint engine (SK_SSTI001: render_template_string, SK_SSTI002: env.from_string).
 # render_template_string(user_template) or env.from_string(user_template).render() → Jinja2 RCE.
+cwe = "CWE-94"
 
 [[rule]]
 id = "ORM002"
@@ -2240,6 +2384,7 @@ file_pattern = "*.py"
 # Triggered by taint engine: SK_ORMRAW001 (raw), SK_ORMORDER001 (order_by), SK_ORMEXTRA001 (extra).
 # CVE-2021-35042: order_by(user_input) allows column name injection.
 # CVE-2022-28346/28347: extra(**user_dict) allows SQL injection via crafted kwargs.
+cwe = "CWE-89"
 
 [[rule]]
 id = "DESER725"
@@ -2250,6 +2395,7 @@ remediation = "Never pass untrusted data to jsonpickle.decode(). jsonpickle rest
 pattern = "jsonpickle\\.decode\\s*\\("
 file_pattern = "*.py"
 exclude_pattern = "^\\s*#"
+cwe = "CWE-502"
 
 [[rule]]
 id = "DESER726"
@@ -2260,6 +2406,7 @@ remediation = "Never pass untrusted data to dill.loads(). dill extends pickle wi
 pattern = "dill\\.loads\\s*\\("
 file_pattern = "*.py"
 exclude_pattern = "^\\s*#"
+cwe = "CWE-502"
 
 [[rule]]
 id = "TLS001"
@@ -2276,6 +2423,7 @@ file_pattern = "*.py"
 #   Bare verify=False on its own line (fragment of a multi-line pandas call)
 #   Docstring text describing the verify parameter
 exclude_pattern = "^\\s*#|\\baxis\\s*=|_mgr\\.|_block|block_manager|Pass\\s+verify|^\\s+verify=False,?\\s*$|take\\s*\\(|indexer[^=]*verify|assumed|codes equal|parameter|description"
+cwe = "CWE-295"
 
 [[rule]]
 id = "SSH001"
@@ -2286,6 +2434,7 @@ remediation = "Use RejectPolicy() or load known_hosts with client.load_system_ho
 pattern = "AutoAddPolicy\\s*\\(\\s*\\)"
 file_pattern = "*.py"
 exclude_pattern = "^\\s*#"
+cwe = "CWE-295"
 
 [[rule]]
 id = "JWT001"
@@ -2296,6 +2445,7 @@ remediation = "Never set verify_signature=False or algorithms=['none'] in jwt.de
 pattern = "verify_signature[\"']?\\s*:\\s*False|[\"']none[\"']\\s*.*algorithm|algorithms\\s*=\\s*\\[[\"']none[\"']"
 file_pattern = "*.py"
 exclude_pattern = "^\\s*#"
+cwe = "CWE-345"
 
 [[rule]]
 id = "ZIPSLIP001"
@@ -2311,6 +2461,7 @@ file_pattern = "*.py"
 #   Series.str.extractall — same, string regex method
 exclude_pattern = "^\\s*#|filter\\s*=|str\\.extractall|strings.*extractall|accessor.*extractall|\\.str\\."
 # Low confidence: legitimate uses exist when archives are trusted/developer-controlled.
+cwe = "CWE-22"
 
 [[rule]]
 id = "XXE001"
@@ -2323,6 +2474,7 @@ file_pattern = "*.py"
 # lxml's default parser resolves external entities. Attacker-controlled XML can read
 # arbitrary files (/etc/passwd) or trigger SSRF to internal services via entity references.
 exclude_pattern = "^\\s*#|defusedxml|resolve_entities\\s*=\\s*False"
+cwe = "CWE-611"
 
 [[rule]]
 id = "ORM001"
@@ -2339,6 +2491,7 @@ exclude_pattern = "^\\s*#"
 # Exclude migration/backend files: f-strings in migrations contain hardcoded schema
 # identifiers, not user input. Backend files are ORM infrastructure, not application code.
 exclude_file_pattern = "*/migrations/*,*/alembic/*,*/backends/*"
+cwe = "CWE-89"
 
 [[rule]]
 id = "FLASK001"
@@ -2349,6 +2502,7 @@ remediation = "Never run Flask with debug=True in production. The Werkzeug debug
 pattern = "app\\.run\\s*\\(.*\\bdebug\\s*=\\s*True|app\\.debug\\s*=\\s*True|[\"']DEBUG[\"']\\s*:\\s*True"
 file_pattern = "*.py"
 exclude_pattern = "^\\s*#"
+cwe = "CWE-489"
 
 [[rule]]
 id = "AI002"
@@ -2357,6 +2511,7 @@ severity = "High"
 remediation = "Remove hardcoded API keys and load them from environment variables or a secure secrets manager."
 pattern = "(?i)sk-ant-api[0-9]*-[A-Za-z0-9_-]{20,}"
 file_pattern = ".*\\.py"
+cwe = "CWE-798"
 
 [[rule]]
 id = "PY306_CACHE"
@@ -2366,6 +2521,7 @@ confidence = "High"
 remediation = "Replace pickle-based cache serialization with JSON or msgpack. If pickle is required, authenticate the cache channel and use HMAC to verify payload integrity before deserializing."
 pattern = "pickle\\.loads\\s*\\("
 file_pattern = "*cache/backends/*.py"
+cwe = "CWE-502"
 
 [[rule]]
 id = "SHELL_BYPASS001"
@@ -2376,6 +2532,7 @@ remediation = "Never pass user-controlled data as the -c argument to bash/sh/cmd
 pattern = "subprocess\\.(run|Popen|call)\\s*\\(\\s*\\[\\s*[\"'](bash|sh|zsh|cmd\\.exe|powershell)[\"']\\s*,\\s*[\"']-c[\"']"
 file_pattern = "*.py"
 exclude_pattern = "^\\s*#"
+cwe = "CWE-78"
 
 [[rule]]
 id = "OPEN_REDIRECT001"
@@ -2391,6 +2548,7 @@ file_pattern = "*.py"
 # Exclude Django's own framework files — they validate redirects with is_safe_url() /
 # url_has_allowed_host_and_scheme() before calling redirect(), but the call is safe.
 exclude_file_pattern = "*/django/contrib/*,django/contrib/*,*/django/views/*,django/views/*"
+cwe = "CWE-601"
 
 [[rule]]
 id = "PLAIN_PWD001"
@@ -2401,6 +2559,7 @@ remediation = "Use Django's make_password() or set_password() before storing. Ne
 file_pattern = "*.py"
 # No pattern — triggered only by taint engine (SK_PLAIN_PWD001).
 # Taint flow: request.POST['password'] → Model.objects.create(password=tainted)
+cwe = "CWE-256"
 
 [[rule]]
 id = "DJANGO_DEBUG001"
@@ -2414,6 +2573,7 @@ file_pattern = "*.py"
 # Flask app.run(debug=True) is covered separately by FLASK001.
 # Different from FLASK001: this is a settings file value, not runtime configuration.
 exclude_file_pattern = "*/tests/*,*/test_*.py"
+cwe = "CWE-489"
 
 [[rule]]
 id = "RUAMEL_UNSAFE001"
@@ -2424,6 +2584,7 @@ remediation = "Use YAML() (round-trip, safe by default) or YAML(typ='safe'). typ
 pattern = "YAML\\s*\\(\\s*typ\\s*=\\s*[\"']unsafe[\"']\\s*\\)"
 file_pattern = "*.py"
 exclude_pattern = "^\\s*#"
+cwe = "CWE-502"
 
 [[rule]]
 id = "ENV_URL001"
@@ -2437,6 +2598,7 @@ file_pattern = "*.py"
 # The taint engine (SSRF_001) catches the downstream HTTP call when env-var URL propagates to requests/httpx.
 pattern = "os\\.environ(?:\\.get)?\\s*\\([\"'][A-Z_]*URL[A-Z_]*[\"']"
 exclude_pattern = "^\\s*#|allowlist|whitelist|validate|urlparse\\.scheme|startswith\\s*\\([\"']https"
+cwe = "CWE-918"
 
 [[rule]]
 id = "COOKIE_FILE001"
@@ -2448,6 +2610,7 @@ file_pattern = "*.py"
 # No pattern — triggered by taint engine (SK_COOKIE_JAR001):
 # os.environ["SEMGREP_COOKIES_PATH"] → MozillaCookieJar(path) → cookies.load()
 # Allows attacker-controlled cookies to be injected into all HTTP requests.
+cwe = "CWE-1004"
 
 [[rule]]
 id = "ENV_GIT_URL001"
@@ -2462,6 +2625,7 @@ file_pattern = "*.py"
 # This rule provides higher-confidence CI-specific context for the same finding.
 pattern = "CI_MERGE_REQUEST_PROJECT_URL|CI_JOB_TOKEN.*git.*fetch|git.*fetch.*CI_"
 exclude_pattern = "^\\s*#"
+cwe = "CWE-918"
 
 [[rule]]
 id = "DESER_JOBLIB001"
@@ -2472,6 +2636,7 @@ remediation = "Never load joblib files from untrusted sources. joblib uses pickl
 pattern = "joblib\\.load\\s*\\("
 file_pattern = "*.py"
 exclude_pattern = "^\\s*#"
+cwe = "CWE-502"
 
 [[rule]]
 id = "DESER_NUMPY001"
@@ -2482,14 +2647,15 @@ remediation = "Use allow_pickle=False (default in NumPy 1.17+). Only load .npy/.
 pattern = "np\\.load\\s*\\(.*allow_pickle\\s*=\\s*True|numpy\\.load\\s*\\(.*allow_pickle\\s*=\\s*True"
 file_pattern = "*.py"
 exclude_pattern = "^\\s*#"
+cwe = "CWE-502"
 
 [[rule]]
 id = "DESER_TORCH001"
 description = "torch.load() uses pickle by default — loading untrusted PyTorch model files → RCE."
 severity = "Critical"
 confidence = "High"
-remediation = "Use torch.load(..., weights_only=True) (PyTorch 2.0+) to restrict deserialization. Never load model files from untrusted sources. For model exchange, use ONNX or safetensors format."
+remediation = "Use torch.load(..., weights_only=True) (PyTorch 2.0+) to restrict deserialization. Never load model files from untrusted sources."
 pattern = "torch\\.load\\s*\\("
 file_pattern = "*.py"
-# weights_only=True is the safe version — exclude it
 exclude_pattern = "^\\s*#|weights_only\\s*=\\s*True"
+cwe = "CWE-502"