Log-Analyzer · rh-rahulshetty · May 22, 2026 · May 22, 2026 · May 22, 2026
@@ -1,6 +1,22 @@
 # Stage 0: grab uv binary from official uv image
 FROM ghcr.io/astral-sh/uv:latest AS uvbin
 
+# Stage 1: build drain3-rs wheel (Rust toolchain kept out of the final image)
+FROM python:3.11-slim AS rustbuilder
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        curl build-essential git \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
+ENV PATH="/root/.cargo/bin:${PATH}"
+
+RUN pip install --no-cache-dir maturin
+
+RUN pip wheel --no-cache-dir \
+        git+https://github.com/Log-Analyzer/Drain3-rs.git \
+        -w /wheels
+
 # Final image: start from Red Hat UBI python image
 # More details: https://catalog.redhat.com/en/software/containers/ubi9/python-311/63f764b03f0b02a2e2d63fff#overview
 FROM registry.access.redhat.com/ubi9/python-311:9.7
@@ -30,17 +46,27 @@ RUN java -version && python3 --version && pip3 --version && uv --version
 WORKDIR /opt/app-root/src
 
 # Install all Python deps in one layer, then remove build-only packages
+COPY --from=rustbuilder /wheels/drain3*.whl /tmp/
 COPY --chmod=755 requirements.txt .
-RUN uv venv --python python3.11 \
- && uv pip install --no-cache-dir -r requirements.txt \
+RUN grep -v "^drain3" requirements.txt > /tmp/reqs_no_drain3.txt \
+ && uv venv --python python3.11 \
+ && uv pip install --no-cache-dir /tmp/drain3*.whl \
+ && uv pip install --no-cache-dir -r /tmp/reqs_no_drain3.txt \
  && uv pip install --no-cache-dir \
       torch==2.2.2+cpu \
-      --index-url https://download.pytorch.org/whl/cpu
+      --index-url https://download.pytorch.org/whl/cpu \
+ && rm /tmp/drain3*.whl /tmp/reqs_no_drain3.txt
 
 # Install logan package
 COPY --chmod=755 . .
 RUN uv pip install --no-cache-dir . --no-deps
 
+# Pre-download DuckDB WASM assets so first-run requires no network access
+RUN HOME=/opt/app-root/src .venv/bin/python -c \
+    "from logan.store.duckdb_assets import ensure_duckdb_assets; ensure_duckdb_assets()" \
+ && chown -R 1001:0 /opt/app-root/src/.cache \
+ && chmod -R g+rwx /opt/app-root/src/.cache
+
 # Clean up build-only packages
 RUN yum remove -y make gcc python3-devel \
  && yum clean all \

@@ -243,11 +243,7 @@ def analyze(files, glob, time_range, output_dir, debug_mode, process_all_files,
     click.echo(click.style("\nStep 2: Generating log templates...", fg="cyan"))
     drain_config_path = os.path.join(os.path.dirname(__file__), 'drain', 'drain3.ini')
     templatizer = Templatizer(debug_mode=debug_mode_str, config_path=drain_config_path)
-    templatizer.miner(
-        preprocessing_obj.df,
-        output_dir,
-        os.path.join(output_dir, "test_templates", "tm-test.templates.json")
-    )
+    templatizer.miner(preprocessing_obj.df, output_dir)
     click.echo(click.style("  Templates generated successfully", fg="green"))
 
     # Step 3: Anomaly detection

@@ -1,15 +1,15 @@
 [SNAPSHOT]
-snapshot_interval_minutes = 1
+snapshot_interval_minutes = 60
 compress_state = False
 
 [MASKING]
 masking = [
-          {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(([0-9a-f]{2,}:){3,}([0-9a-f]{2,}))((?=[^A-Za-z0-9])|$)", "mask_with": "ID"},
-          {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})((?=[^A-Za-z0-9])|$)", "mask_with": "IP"},
-          {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([0-9a-f]{6,} ?){3,}((?=[^A-Za-z0-9])|$)", "mask_with": "SEQ"},
-          {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([0-9A-F]{4} ?){4,}((?=[^A-Za-z0-9])|$)", "mask_with": "SEQ"},
-          {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(0x[a-f0-9A-F]+)((?=[^A-Za-z0-9])|$)", "mask_with": "HEX"},
-          {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([\\-\\+]?\\d+)((?=[^A-Za-z0-9])|$)", "mask_with": "NUM"},
+          {"regex_pattern":"\\b(([0-9a-f]{2,}:){3,}([0-9a-f]{2,}))\\b", "mask_with": "ID"},
+          {"regex_pattern":"\\b(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})\\b", "mask_with": "IP"},
+          {"regex_pattern":"\\b([0-9a-f]{6,} ?){3,}\\b", "mask_with": "SEQ"},
+          {"regex_pattern":"\\b([0-9A-F]{4} ?){4,}\\b", "mask_with": "SEQ"},
+          {"regex_pattern":"\\b(0x[a-f0-9A-F]+)\\b", "mask_with": "HEX"},
+          {"regex_pattern":"\\b([\\-\\+]?\\d+)\\b", "mask_with": "NUM"},
           {"regex_pattern":"(?<=executed cmd )(\".+?\")", "mask_with": "CMD"}
           ]
 mask_prefix = <:
@@ -26,5 +26,5 @@ max_clusters = 50000
 extra_delimiters = ["_"]
 
 [PROFILING]
-enabled = True
+enabled = False
 report_sec = 30
@@ -5,10 +5,24 @@
 import time
 from drain3.template_miner import TemplateMiner
 from drain3.template_miner_config import TemplateMinerConfig
-from drain3.file_persistence import FilePersistence
+from pandarallel import pandarallel
 
 from logan.store.store import LogStore
 
+_pandarallel_initialized = False
+_pandarallel_disabled = False
+
+def _ensure_pandarallel():
+    global _pandarallel_initialized, _pandarallel_disabled
+    if _pandarallel_initialized:
+        return
+    if os.environ.get("LOGAN_DISABLE_PANDARALLEL") == "1":
+        _pandarallel_disabled = True
+        _pandarallel_initialized = True
+        return
+    pandarallel.initialize(progress_bar=False, nb_workers=os.cpu_count() or 2)
+    _pandarallel_initialized = True
+
 class Templatizer:
     """
     The Templatizer class is responsible for mining log templates using the DRAIN3 algorithm.
@@ -24,7 +38,7 @@ class Templatizer:
         compute_drain_statistics(time, output_dir):
             Logs and stores the time taken for the DRAIN3 template mining process.
 
-        miner(df, output_dir, template):
+        miner(df, output_dir):
             Mines log templates from the given DataFrame and saves the templates to a specified path.
     """
 
@@ -67,7 +81,7 @@ def compute_drain_statistics(self, time_taken: float, output_dir: str):
         with open(os.path.join(output_dir, "metrics", "drain.json"), 'w') as writer:
             writer.write(json.dumps(metrics, indent=4))
 
-    def miner(self, df, output_dir: str, template: str):
+    def miner(self, df, output_dir: str):
         """
         Apply the DRAIN3 template mining algorithm to the given DataFrame.
 
@@ -77,7 +91,6 @@ def miner(self, df, output_dir: str, template: str):
         Args:
             df (pd.DataFrame): DataFrame containing log data with a 'truncated_log' column for mining.
             output_dir (str): The directory where output files, such as statistics, will be saved.
-            template (str): The file path where the mined templates will be stored.
         """
         # Record the start time of the DRAIN3 mining process
         start_time = time.time()
@@ -87,11 +100,8 @@ def miner(self, df, output_dir: str, template: str):
         config = TemplateMinerConfig()
         config.load(self.config_path)
 
-        # Set up file persistence to store the learned templates in the specified template file
-        mem_persistence = FilePersistence(template)
-
-        # Initialize the TemplateMiner with the loaded configuration and file persistence
-        template_miner_temporary = TemplateMiner(mem_persistence, config)
+        # Initialize the TemplateMiner with no persistence (single-shot analysis, no disk I/O)
+        template_miner_temporary = TemplateMiner(None, config)
 
         # Preserve original log text before Drain3 masking overwrites it
         df["original_text"] = df["text"].astype(str)
@@ -103,16 +113,20 @@ def miner(self, df, output_dir: str, template: str):
         try:
             test_ids = []
             template_strs = []
-            variables_list = []
             for log in df["truncated_log"].values:
                 result = template_miner_temporary.add_log_message(log)
                 test_ids.append(result['cluster_id'])
-                tmpl = result.get('template_mined', '')
-                template_strs.append(tmpl)
-                variables_list.append(LogStore.extract_variables(log, tmpl))
+                template_strs.append(result.get('template_mined', ''))
             df["test_ids"] = test_ids
             df["template_str"] = template_strs
-            df["variables"] = [json.dumps(v) for v in variables_list]
+
+            # Extract variables in parallel — embarrassingly parallel, independent of drain order
+            _ensure_pandarallel()
+            apply_fn = df.apply if _pandarallel_disabled else df.parallel_apply
+            df["variables"] = apply_fn(
+                lambda row: json.dumps(LogStore.extract_variables(row["truncated_log"], row["template_str"])),
+                axis=1,
+            )
 
             if (self.debug_mode == "true"):
                 template_log_dict = df.groupby("test_ids")["truncated_log"].agg(list).to_dict()

@@ -206,10 +206,7 @@ def _run_drain():
                 os.path.dirname(os.path.dirname(__file__)), "drain", "drain3.ini"
             )
             templatizer = Templatizer(debug_mode=debug_str, config_path=drain_config)
-            templatizer.miner(
-                df, output_dir,
-                os.path.join(output_dir, "test_templates", "tm-test.templates.json"),
-            )
+            templatizer.miner(df, output_dir)
         return templatizer.df
 
     templatized_df = await loop.run_in_executor(None, _run_drain)
@@ -368,10 +365,7 @@ def _run_drain():
                 os.path.dirname(os.path.dirname(__file__)), "drain", "drain3.ini"
             )
             templatizer = Templatizer(debug_mode="true", config_path=drain_config)
-            templatizer.miner(
-                df, output_dir,
-                os.path.join(output_dir, "test_templates", "tm-test.templates.json"),
-            )
+            templatizer.miner(df, output_dir)
 
             # Extract template patterns from Drain3's internal clusters
             config = TemplateMinerConfig()

@@ -19,5 +19,5 @@ transformers==4.56.2
 openpyxl==3.1.4
 numpy==1.26.4
 setuptools==70.0.0
-drain3==0.9.11
+drain3 @ git+https://github.com/Log-Analyzer/Drain3-rs.git
 click==8.3.1