diff --git a/.idea/csv-editor.xml b/.idea/csv-editor.xml new file mode 100644 index 00000000..dee6c593 --- /dev/null +++ b/.idea/csv-editor.xml @@ -0,0 +1,16 @@ + + + + + + \ No newline at end of file diff --git a/CODE/Logicytics.py b/CODE/Logicytics.py index 80c59791..3149d645 100644 --- a/CODE/Logicytics.py +++ b/CODE/Logicytics.py @@ -441,6 +441,7 @@ def __and_log(directory: str, name: str): log.debug( f"Zipping directory '{directory}' with name '{name}' under action '{ACTION}'" ) + # noinspection PyUnreachableCode zip_values = file_management.Zip.and_hash( directory, name, diff --git a/CODE/config.ini b/CODE/config.ini index 939b4e6a..8395bd55 100644 --- a/CODE/config.ini +++ b/CODE/config.ini @@ -26,8 +26,8 @@ save_preferences = true [System Settings] # Do not play with these settings unless you know what you are doing # Dev Mode allows a safe way to modify these settings!! -version = 3.5.1 -files = "bluetooth_details.py, bluetooth_logger.py, browser_miner.ps1, cmd_commands.py, config.ini, dir_list.py, dump_memory.py, event_log.py, Logicytics.py, log_miner.py, media_backup.py, netadapter.ps1, network_psutil.py, packet_sniffer.py, property_scraper.ps1, registry.py, sensitive_data_miner.py, ssh_miner.py, sys_internal.py, tasklist.py, tree.ps1, vulnscan.py, wifi_stealer.py, window_feature_miner.ps1, wmic.py, logicytics\Checks.py, logicytics\Config.py, logicytics\Execute.py, logicytics\FileManagement.py, logicytics\Flag.py, logicytics\Get.py, logicytics\Logger.py, logicytics\User_History.json.gz, vulnscan\SenseMini.3n3.pth, vulnscan\vectorizer.3n3.pkl" +version = 3.6.0 +files = "bluetooth_details.py, bluetooth_logger.py, browser_miner.ps1, cmd_commands.py, config.ini, dir_list.py, dump_memory.py, encrypted_drive_audit.py, event_log.py, Logicytics.py, log_miner.py, media_backup.py, netadapter.ps1, network_psutil.py, packet_sniffer.py, property_scraper.ps1, registry.py, sensitive_data_miner.py, ssh_miner.py, sys_internal.py, tasklist.py, tree.ps1, usb_history.py, vulnscan.py, wifi_stealer.py, window_feature_miner.ps1, wmic.py, logicytics\Checks.py, logicytics\Config.py, logicytics\Execute.py, logicytics\FileManagement.py, logicytics\Flag.py, logicytics\Get.py, logicytics\Logger.py, logicytics\User_History.json.gz, vulnscan\Model_SenseMacro.4n1.pth" # If you forked the project, change the USERNAME to your own to use your own fork as update material, # I dont advise doing this however config_url = https://raw.githubusercontent.com/DefinetlyNotAI/Logicytics/main/CODE/config.ini @@ -100,93 +100,15 @@ timeout = 10 max_retry_time = 30 ################################################### + [VulnScan Settings] -# Following extensions to be skipped by the model -# Format: comma-separated list with dots (e.g., .exe, .dll) -unreadable_extensions = .exe, .dll, .so, .zip, .tar, .gz, .7z, .rar, .jpg, .jpeg, .png, .gif, .bmp, .tiff, .webp, .mp3, .wav, .flac, .aac, .ogg, .mp4, .mkv, .avi, .mov, .wmv, .flv, .pdf, .doc, .docx, .xls, .xlsx, .ppt, .pptx, .odt, .ods, .odp, .bin, .dat, .iso, .class, .pyc, .o, .obj, .sqlite, .db, .ttf, .otf, .woff, .woff2, .lnk, .url -# In MB, max file size that the model is allowed to scan, if commented out disables the limit, you can also just say None -max_file_size_mb = None -# Max workers to be used, either integer or use auto to make it decide the best value +# Max characters of text from each file to analyze. Set an integer or None to disable truncation. +text_char_limit = None +# Max workers to be used, either integer or use "auto" to make it decide the best value max_workers = auto - -[VulnScan.generate Settings] -# The following settings are for the Generate module for fake training data -extensions = .txt, .log, .md, .csv, .json, .xml, .html, .yaml, .ini, .pdf, .docx, .xlsx, .pptx -save_path = PATH - -# Options include: -# 'Sense' - Generates 50k files, each 25KB in size. -# 'SenseNano' - Generates 5 files, each 5KB in size. -# 'SenseMacro' - Generates 1m files, each 10KB in size. -# 'SenseMini' - Generates 10k files, each 10KB in size. -# 'SenseCustom' - Uses custom size settings from the configuration file. -code_name = SenseMini - -# This allows more randomness in the file sizes, use 0 to disable -# this is applied randomly every time a file is generated -# Variation is applied in the following way: -# size +- (size */ variation) where its random weather to add or subtract and divide or multiply -size_variation = 0.1 - -# Set to SenseCustom to use below size settings -min_file_size = 5KB -max_file_size = 50KB - -# Chances for the following data types in files: -# 0.0 - 1.0, the rest will be for pure data -full_sensitive_chance = 0.07 -partial_sensitive_chance = 0.2 - -[VulnScan.vectorizer Settings] -# The following settings are for the Vectorizer module for vectorizing data -# Usually it automatically vectorizes data, but this is for manual vectorization - -# We advise to use this vectorization, although not knowing the vectorizer is not advised -# as this may lead to ValueErrors due to different inputs -# Use the vectorizer supplied for any v3 model on SenseMini - -# The path to the data to vectorize, either a file or a directory -data_path = PATH -# The path to save the vectorized data - It will automatically be appended '\Vectorizer.pkl' -# Make sure the path is a directory, and it exists -output_path = PATH - -# Vectorizer to use, options include: -# tfidf or count - The code for the training only supports tfidf - we advise to use tfidf -vectorizer_type = tfidf - -[VulnScan.train Settings] -# The following settings are for the Train module for training models -# NeuralNetwork seems to be the best choice for this task -# Options: "NeuralNetwork", "LogReg", -# "RandomForest", "ExtraTrees", "GBM", -# "XGBoost", "DecisionTree", "NaiveBayes" -model_name = NeuralNetwork - -# General Training Parameters -epochs = 10 -batch_size = 32 -learning_rate = 0.001 -use_cuda = true - -# Paths to train and save data -train_data_path = PATH -# If all models are to be trained, this is the path to save all models, -# and will be appended with the model codename and follow naming convention -save_model_path = PATH - -[VulnScan.study Settings] -# Here is the basics of the study module -# This is useful to generate graphs and data that may help in understanding the model -# Everything is found online pre-studied, so this is not necessary -# But it is useful for understanding the model locally -# All files be saved here, and can't be changed, PATH is "NN features/" - -# This is the path to the model, and the vectorizer -model_path = PATH -vectorizer_path = PATH -# Number of features to visualise in the SVG Bar graph, maximum is 3000 due to limitations -# Placing -1 will visualise first 3000 features. Bar will be a color gradient heatmap. -number_of_features = -1 +# Sensitivity threshold (0.0–1.0) for the model to flag content as sensitive +threshold = 0.6 +# Paths for required files +model = vulnscan/Model_SenseMacro.4n1.pth ################################################## diff --git a/CODE/encrypted_drive_audit.py b/CODE/encrypted_drive_audit.py new file mode 100644 index 00000000..290281bf --- /dev/null +++ b/CODE/encrypted_drive_audit.py @@ -0,0 +1,106 @@ +import datetime +import getpass +import os +import platform +import shutil +import subprocess +from pathlib import Path + +from logicytics import check, log + + +def now_iso(): + return datetime.datetime.now().astimezone().isoformat() + + +def run_cmd(cmd): + log.debug(f"Running command: {cmd}") + try: + proc = subprocess.run(cmd, capture_output=True, text=True, timeout=30) + if proc.returncode == 0: + log.debug(f"Command succeeded: {cmd}") + else: + log.warning(f"Command returned {proc.returncode}: {cmd}") + return proc.stdout.strip(), proc.stderr.strip(), proc.returncode + except FileNotFoundError: + log.error(f"Command not found: {cmd[0]}") + return "", "not found", 127 + except subprocess.TimeoutExpired: + log.error(f"Command timed out: {cmd}") + return "", "timeout", 124 + + +def have(cmd_name): + exists = shutil.which(cmd_name) is not None + log.debug(f"Check if '{cmd_name}' exists: {exists}") + return exists + + +def get_mountvol_output(): + log.info("Gathering mounted volumes via mountvol") + out, err, _ = run_cmd(["mountvol"]) + if not out: + return err + lines = out.splitlines() + filtered = [] + keep = False + for line in lines: + if line.strip().startswith("\\\\?\\Volume"): + keep = True + if keep: + filtered.append(line) + return "\n".join(filtered) + + +def main(): + script_dir = Path(__file__).resolve().parent + report_path = script_dir / "win_encrypted_volume_report.txt" + log.info(f"Starting encrypted volume analysis, report will be saved to {report_path}") + + with report_path.open("w", encoding="utf-8") as f: + f.write("=" * 80 + "\n") + f.write("Windows Encrypted Volume Report\n") + f.write("=" * 80 + "\n") + f.write(f"Generated at: {now_iso()}\n") + f.write(f"User: {getpass.getuser()}\n") + f.write(f"IsAdmin: {check.admin()}\n") + f.write(f"Hostname: {platform.node()}\n") + f.write(f"Version: {platform.platform()}\n\n") + + # Logical drives + log.info("Gathering logical volumes via wmic") + f.write("Logical Volumes (wmic):\n") + out, err, _ = run_cmd(["wmic", "logicaldisk", "get", + "DeviceID,DriveType,FileSystem,FreeSpace,Size,VolumeName"]) + f.write(out + "\n" + err + "\n\n") + + # Mounted volumes + f.write("Mounted Volumes (mountvol):\n") + f.write(get_mountvol_output() + "\n\n") + + # BitLocker status + f.write("=" * 80 + "\nBitLocker Status\n" + "=" * 80 + "\n") + if have("manage-bde"): + log.info("Checking BitLocker status with manage-bde") + for letter in "ABCDEFGHIJKLMNOPQRSTUVWXYZ": + path = f"{letter}:" + if os.path.exists(f"{path}\\"): + out, err, _ = run_cmd(["manage-bde", "-status", path]) + f.write(f"Drive {path}:\n{out}\n{err}\n\n") + else: + log.warning("manage-bde not found") + + if have("powershell"): + log.info("Checking BitLocker status with PowerShell") + f.write("PowerShell Get-BitLockerVolume:\n") + ps_cmd = r"Get-BitLockerVolume | Format-List *" + out, err, _ = run_cmd(["powershell", "-NoProfile", "-Command", ps_cmd]) + f.write(out + "\n" + err + "\n\n") + else: + log.warning("PowerShell not available") + + log.info(f"Report successfully saved to {report_path}") + + +if __name__ == "__main__": + main() diff --git a/CODE/logicytics/Flag.py b/CODE/logicytics/Flag.py index b93d98b4..0a4367f0 100644 --- a/CODE/logicytics/Flag.py +++ b/CODE/logicytics/Flag.py @@ -498,7 +498,7 @@ def __available_arguments( "and not the best, use only if the device doesnt have python installed.", ) - # TODO v3.6.0 -> Out of beta + # TODO v3.6.1 -> Out of beta parser.add_argument( "--vulnscan-ai", action="store_true", diff --git a/CODE/logicytics/__init__.py b/CODE/logicytics/__init__.py index e9637974..2e540e0c 100644 --- a/CODE/logicytics/__init__.py +++ b/CODE/logicytics/__init__.py @@ -122,4 +122,5 @@ def wrapper(*args, **kwargs) -> callable: "ObjectLoadError", "log", "Log", + "config", ] diff --git a/CODE/usb_history.py b/CODE/usb_history.py new file mode 100644 index 00000000..912823ce --- /dev/null +++ b/CODE/usb_history.py @@ -0,0 +1,89 @@ +import ctypes +import os +import winreg +from datetime import datetime, timedelta + +from logicytics import log + + +class USBHistory: + def __init__(self): + self.history_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "usb_history.txt") + + def _save_history(self, message: str): + """Append a timestamped message to the history file and log it.""" + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + entry = f"{timestamp} - {message}\n" + try: + with open(self.history_path, "a", encoding="utf-8") as f: + f.write(entry) + log.debug(f"Saved entry: {message}") + except Exception as e: + log.error(f"Failed to write history: {e}") + + # noinspection PyUnresolvedReferences + @staticmethod + def _get_last_write_time(root_key, sub_key_path): + """Return the precise last write time of a registry key, or None on failure.""" + handle = ctypes.wintypes.HANDLE() + try: + advapi32 = ctypes.windll.advapi32 + if advapi32.RegOpenKeyExW(root_key, sub_key_path, 0, winreg.KEY_READ, ctypes.byref(handle)) != 0: + return None + ft = ctypes.wintypes.FILETIME() + if advapi32.RegQueryInfoKeyW(handle, None, None, None, None, None, None, None, None, None, None, + ctypes.byref(ft)) != 0: + return None + t = ((ft.dwHighDateTime << 32) + ft.dwLowDateTime) // 10 + return datetime(1601, 1, 1) + timedelta(microseconds=t) + finally: + if handle: + ctypes.windll.advapi32.RegCloseKey(handle) + + @staticmethod + def _enum_subkeys(root, path, warn_func): + """Yield all subkeys of a registry key, logging warnings on errors.""" + try: + with winreg.OpenKey(root, path) as key: + subkey_count, _, _ = winreg.QueryInfoKey(key) + for i in range(subkey_count): + try: + yield winreg.EnumKey(key, i) + except OSError as e: + if getattr(e, "winerror", None) == 259: # ERROR_NO_MORE_ITEMS + break + warn_func(f"Error enumerating {path} index {i}: {e}") + except OSError as e: + warn_func(f"Failed to open registry key {path}: {e}") + + @staticmethod + def _get_friendly_name(dev_info_path, device_id): + """Return the friendly name of a device if available, else the device ID.""" + try: + with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, dev_info_path) as dev_key: + return winreg.QueryValueEx(dev_key, "FriendlyName")[0] + except FileNotFoundError: + return device_id + except Exception as e: + log.warning(f"Failed to read friendly name for {dev_info_path}: {e}") + return device_id + + def read(self): + """Read all USB devices from USBSTOR and log their info.""" + log.info("Starting USB history extraction...") + reg_path = r"SYSTEM\CurrentControlSet\Enum\USBSTOR" + try: + for device_class in self._enum_subkeys(winreg.HKEY_LOCAL_MACHINE, reg_path, log.warning): + dev_class_path = f"{reg_path}\\{device_class}" + for device_id in self._enum_subkeys(winreg.HKEY_LOCAL_MACHINE, dev_class_path, log.warning): + dev_info_path = f"{dev_class_path}\\{device_id}" + friendly_name = self._get_friendly_name(dev_info_path, device_id) + last_write = self._get_last_write_time(winreg.HKEY_LOCAL_MACHINE, dev_info_path) or "Unknown" + self._save_history(f"USB Device Found: {friendly_name} | LastWriteTime: {last_write}") + log.info(f"USB history extraction complete, saved to {self.history_path}") + except Exception as e: + log.error(f"Error during USB history extraction: {e}") + + +if __name__ == "__main__": + USBHistory().read() diff --git a/CODE/vulnscan.py b/CODE/vulnscan.py index 72c6c0ea..b31a83ad 100644 --- a/CODE/vulnscan.py +++ b/CODE/vulnscan.py @@ -1,226 +1,196 @@ -from __future__ import annotations - -import asyncio +import csv +import json import os -import threading -import warnings +import shutil +from concurrent.futures import ThreadPoolExecutor, as_completed -import aiofiles -import joblib -import numpy as np -# noinspection PyPackageRequirements import torch -from pathlib import Path -from safetensors import safe_open -from tqdm import tqdm +from sentence_transformers import SentenceTransformer +from torch import nn from logicytics import log, config -warnings.filterwarnings("ignore") - -UNREADABLE_EXTENSIONS = config.get("VulnScan Settings", "unreadable_extensions").split( - "," -) -MAX_FILE_SIZE_MB = config.get("VulnScan Settings", "max_file_size_mb", fallback="None") -raw_workers = config.get("VulnScan Settings", "max_workers", fallback="auto") -max_workers = min(32, os.cpu_count() * 2) if raw_workers == "auto" else int(raw_workers) - -if MAX_FILE_SIZE_MB != "None": - MAX_FILE_SIZE_MB = max(int(MAX_FILE_SIZE_MB), 1) -else: - MAX_FILE_SIZE_MB = None - - -class _SensitiveDataScanner: - def __init__(self, model_path: str, vectorizer_path: str): - self.model_path = model_path - self.vectorizer_path = vectorizer_path - self.model_cache = {} - self.vectorizer_cache = {} - self.model_lock = threading.Lock() - self.vectorizer_lock = threading.Lock() - self.model = None - self.vectorizer = None - self._load_model() - self._load_vectorizer() - - def _load_model(self) -> None: - with self.model_lock: - if self.model_path in self.model_cache: - self.model = self.model_cache[self.model_path] - return - - if self.model_path.endswith(".pkl"): - self.model = joblib.load(self.model_path) - elif self.model_path.endswith(".safetensors"): - self.model = safe_open(self.model_path, framework="torch") - elif self.model_path.endswith(".pth"): - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=FutureWarning) - self.model = torch.load( - self.model_path, - map_location=torch.device( - "cuda" if torch.cuda.is_available() else "cpu" - ), - weights_only=False, - ) - if not torch.cuda.is_available() and torch.version.cuda: - log.warning( - "NVIDIA GPU detected but CUDA is not available. Check your PyTorch and CUDA installation to utilise as much power as possible." - ) - log.debug( - f"Model using device: {torch.device('cuda' if torch.cuda.is_available() else 'cpu')}" - ) - else: - raise ValueError("Unsupported model file format") - - self.model_cache[self.model_path] = self.model - - def _load_vectorizer(self) -> None: - with self.vectorizer_lock: - if self.vectorizer_path in self.vectorizer_cache: - self.vectorizer = self.vectorizer_cache[self.vectorizer_path] - return - - try: - self.vectorizer = joblib.load(self.vectorizer_path) - except Exception as e: - log.critical(f"Failed to load vectorizer: {e}") - exit(1) - - self.vectorizer_cache[self.vectorizer_path] = self.vectorizer - - def _extract_features(self, content: str): - return self.vectorizer.transform([content]) - - def _is_sensitive(self, content: str) -> tuple[bool, float, str]: - features = self._extract_features(content) - if isinstance(self.model, torch.nn.Module): - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - self.model.to(device) - self.model.eval() - indices = torch.LongTensor(np.vstack(features.nonzero())) - values = torch.FloatTensor(features.data) - tensor = torch.sparse_coo_tensor(indices, values, size=features.shape).to( - device - ) - +# ================== GLOBAL SETTINGS ================== + +# File scan settings +TEXT_EXTENSIONS = { + ".txt", ".log", ".csv", ".json", ".xml", ".html", ".md", ".cfg", ".ini", ".yml", ".yaml", + ".rtf", ".tex", ".rst", ".adoc", ".properties", ".conf", ".bat", ".ps1", ".sh", ".tsv", + ".dat", ".env", ".toml", ".dockerfile", ".gitignore", ".gitattributes", ".npmrc", ".editorconfig" +} +MAX_TEXT_LENGTH = config.get("VulnScan Settings", "text_char_limit", fallback=None) +MAX_TEXT_LENGTH = int(MAX_TEXT_LENGTH) if MAX_TEXT_LENGTH not in (None, "None", "") else None +# Threading +NUM_WORKERS = config.get("VulnScan Settings", "max_workers", fallback="auto") +NUM_WORKERS = min(32, (os.cpu_count() or 1) * 2) if NUM_WORKERS == "auto" else int(NUM_WORKERS) +# Classification threshold +SENSITIVE_THRESHOLD = float( + config.get("VulnScan Settings", "threshold", fallback=0.6)) # Probability cutoff to consider a file sensitive + +# Paths +SENSITIVE_PATHS = [ + r"C:\Users\%USERNAME%\Documents", + r"C:\Users\%USERNAME%\Desktop", + r"C:\Users\%USERNAME%\Downloads", + r"C:\Users\%USERNAME%\AppData\Roaming", + r"C:\Users\%USERNAME%\AppData\Local", + r"C:\Users\%USERNAME%\OneDrive", + r"C:\Users\%USERNAME%\Dropbox", + r"C:\Users\%USERNAME%\Google Drive", +] +SAVE_DIR = r"VulnScan_Files" # Backup folder +MODEL_PATH = r"vulnscan/Model_SenseMacro.4n1.pth" # Your trained model checkpoint +REPORT_JSON = "report.json" +REPORT_CSV = "report.csv" + +# ================== DEVICE SETUP ================== +DEVICE = "cuda" if torch.cuda.is_available() else "cpu" +log.debug(f"Using device: {DEVICE}") + + +# ================== MODEL DEFINITION ================== +class SimpleNN(nn.Module): + def __init__(self, input_dim): + super().__init__() + self.fc = nn.Sequential( + nn.Linear(in_features=input_dim, out_features=256), + nn.ReLU(), + nn.Linear(in_features=256, out_features=64), + nn.ReLU(), + nn.Linear(in_features=64, out_features=1), + ) + + def forward(self, x): + return self.fc(x) + + +# ================== LOAD MODELS ================== +# Load classifier +checkpoint = torch.load(MODEL_PATH, map_location=DEVICE) +model = SimpleNN(input_dim=384) +model.load_state_dict(checkpoint["model_state_dict"]) +model.to(DEVICE) +model.eval() + +# Load embedding model +embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", device=DEVICE) + +# Make backup folder +os.makedirs(SAVE_DIR, exist_ok=True) + + +# ================== FILE PROCESSING ================== +def process_file(filepath): + try: + _, ext = os.path.splitext(filepath) + if ext.lower() not in TEXT_EXTENSIONS: + return None + + with open(filepath, "r", encoding="utf-8", errors="ignore") as f_: + content = f_.read() + if not content.strip(): + return None + + # Limit file length + if MAX_TEXT_LENGTH is not None: + content = content[:MAX_TEXT_LENGTH] + + # Split content into lines + lines = [line_ for line_ in content.splitlines() if line_.strip()] + if not lines: + return None + + # Embed all lines + embeddings = embed_model.encode(lines, convert_to_tensor=True, device=DEVICE) + + # Predict per line + probs = [] + for emb in embeddings: with torch.no_grad(): - pred = self.model(tensor) - prob = torch.softmax(pred, dim=1).max().item() - reason = ", ".join( - self.vectorizer.get_feature_names_out()[i] - for i in np.argsort(features.data)[-5:] - ) - return pred.argmax(dim=1).item() == 1, prob, reason - else: - probs = self.model.predict_proba(features) - top_indices = np.argsort(features.toarray()[0])[-5:] - reason = ", ".join( - self.vectorizer.get_feature_names_out()[i] for i in top_indices - ) - return self.model.predict(features)[0] == 1, probs.max(), reason - - async def scan_file_async(self, file_path: str) -> tuple[bool, float, str]: - try: - async with aiofiles.open( - file_path, "r", encoding="utf-8", errors="ignore" - ) as f: - content = await f.read() - return self._is_sensitive(content) - except Exception as e: - log.error(f"Failed to scan {file_path}: {e}") - return False, 0.0, "Error" - - def cleanup(self): - self.model_cache.clear() - self.vectorizer_cache.clear() - self.model = None - self.vectorizer = None - log.info("Cleanup complete.") - - -class VulnScan: - def __init__(self, model_path: str, vectorizer_path: str): - self.scanner = _SensitiveDataScanner(model_path, vectorizer_path) - - @log.function - def scan_directory(self, scan_paths: list[str]) -> None: - log.info("Collecting files...") - all_files = [] - - for path in scan_paths: - try: - all_files.extend(str(f) for f in Path(path).rglob("*") if f.is_file()) - log.debug(f"Found {len(all_files)} files in {path}") - except Exception as e: - log.warning(f"Skipping path {path} due to error: {e}") - - log.info(f"Collected {len(all_files)} files.") - - loop = asyncio.get_event_loop() - loop.run_until_complete(self._async_scan(all_files)) - - async def _async_scan(self, files: list[str]) -> None: - valid_files = [] - - for file in files: - try: - file_size_mb = os.path.getsize(file) / (1024 * 1024) - if MAX_FILE_SIZE_MB and file_size_mb > MAX_FILE_SIZE_MB: - continue - if any(file.lower().endswith(ext) for ext in UNREADABLE_EXTENSIONS): - continue - valid_files.append(file) - except Exception as e: - log.debug(f"Skipping file {file}: {e}") - - log.info(f"Valid files to scan: {len(valid_files)}") - - semaphore = asyncio.Semaphore(max_workers) - sensitive_files = [] - - async def scan_worker(scan_file): - async with semaphore: - result, prob, reason = await self.scanner.scan_file_async(scan_file) - if result: - log.debug( - f"SENSITIVE: {scan_file} | Confidence: {prob:.2f} | Reason: {reason}" - ) - sensitive_files.append(scan_file) - - tasks = [scan_worker(f) for f in valid_files] - - with tqdm( - total=len(valid_files), - desc="\033[32mSCAN\033[0m \033[94mScanning Files\033[0m", - unit="file", - bar_format="{l_bar} {bar} {n_fmt}/{total_fmt}\n", - ) as pbar: - for f in asyncio.as_completed(tasks): - await f - pbar.update(1) - - with open("Sensitive_File_Paths.txt", "a") as out: - out.write( - "\n".join(sensitive_files) + "\n" - if sensitive_files - else "No sensitive files detected.\n" - ) - - self.scanner.cleanup() + output = model(emb.unsqueeze(0)) + probs.append(torch.sigmoid(output).item()) + + max_prob = max(probs) + if max_prob < SENSITIVE_THRESHOLD: + return None + + # Get top 5 lines contributing most + top_lines = [lines[i] for i, p in sorted(enumerate(probs), key=lambda x: x[1], reverse=True)[:5]] + + # Backup file + rel_path = os.path.relpath(filepath, ROOT_DIR) + backup_path = os.path.join(SAVE_DIR, rel_path) + os.makedirs(os.path.dirname(backup_path), exist_ok=True) + shutil.copy2(filepath, backup_path) + + return { + "file": filepath, + "probability": max_prob, + "copied_to": backup_path, + "reason": top_lines + } + + except Exception as e: + log.error(f"Could not process {filepath}: {e}") + return None + + +# ================== DIRECTORY SCAN ================== +def scan_directory(root): + sensitive_files = [] + with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor: + futures = [] + for dirpath, _, filenames in os.walk(root): + for file in filenames: + futures.append(executor.submit(process_file, os.path.join(dirpath, file))) + + for future in as_completed(futures): + result = future.result() + if result: + sensitive_files.append(result) + + return sensitive_files + + +# ================== MAIN ================== +def main(): + log.info(f"Scanning directory: {ROOT_DIR} - This will take some time...") + sensitive = scan_directory(ROOT_DIR) + + # Save JSON report + with open(REPORT_JSON, "w", encoding="utf-8") as f: + json.dump(sensitive, f, indent=2, ensure_ascii=False) + + # Save CSV report + with open(REPORT_CSV, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter(f, fieldnames=["file", "probability", "copied_to", "reason"]) + writer.writeheader() + for entry in sensitive: + # Join top lines as single string for CSV + entry_csv = entry.copy() + entry_csv["reason"] = " | ".join(entry["reason"]) + writer.writerow(entry_csv) + + print() + log.debug("Sensitive files detected and backed up:") + for entry in sensitive: + log.debug(f" - {entry['file']} (prob={entry['probability']:.4f})") + for line in entry["reason"]: + log.debug(f" -> {line}") + + print() + log.info("Backup completed.\n") + log.debug(f"Files copied into: {SAVE_DIR}") + log.debug(f"JSON report saved as: {REPORT_JSON}") + log.debug(f"CSV report saved as: {REPORT_CSV}") if __name__ == "__main__": - try: - base_paths = [ - "C:\\Users\\", - "C:\\Windows\\Logs", - "C:\\Program Files", - "C:\\Program Files (x86)", - ] - vulnscan = VulnScan("vulnscan/SenseMini.3n3.pth", "vulnscan/vectorizer.3n3.pkl") - vulnscan.scan_directory(base_paths) - except KeyboardInterrupt: - log.warning("User interrupted. Exiting gracefully.") - exit(0) + log.info(f"Starting VulnScan with {NUM_WORKERS} thread workers and {len(SENSITIVE_PATHS)} paths...") + for path in SENSITIVE_PATHS: + expanded_path = os.path.expandvars(path) + if os.path.exists(expanded_path): + ROOT_DIR = expanded_path + main() + else: + log.warning(f"Path does not exist and will be skipped: {expanded_path}") diff --git a/CODE/vulnscan/Model_SenseMacro.4n1.pth b/CODE/vulnscan/Model_SenseMacro.4n1.pth new file mode 100644 index 00000000..4f8182cc Binary files /dev/null and b/CODE/vulnscan/Model_SenseMacro.4n1.pth differ diff --git a/CODE/vulnscan/SenseMini.3n3.pth b/CODE/vulnscan/SenseMini.3n3.pth deleted file mode 100644 index cee8abc0..00000000 Binary files a/CODE/vulnscan/SenseMini.3n3.pth and /dev/null differ diff --git a/CODE/vulnscan/vectorizer.3n3.pkl b/CODE/vulnscan/vectorizer.3n3.pkl deleted file mode 100644 index 59af8fb5..00000000 Binary files a/CODE/vulnscan/vectorizer.3n3.pkl and /dev/null differ diff --git a/PLANS.md b/PLANS.md index 099bae94..221aa875 100644 --- a/PLANS.md +++ b/PLANS.md @@ -8,8 +8,8 @@ | Task | Version | Might or Will be done? | |----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------|------------------------| -| Remake VulnScan .pkl and .pth to be more accurate | v3.6.0 | ❌ | -| Encrypted Volume Detection and Analysis, Advanced USB Device History Tracker | v3.6.0 | ❌ | -| Merge `sensitive data miner` with `vulnscan` to be 1 tool | v4.0.0 | ✅ | +| Update to model 4n2 of vulnscan | v3.6.1 | ✅ | +| Merge `sensitive data miner` with `vulnscan` to be 1 tool | v4.0.0 | ❌ | | Remake Logicytics End-Execution cycle, where files created must go in `temp/` directory, and zipper takes it from there only, simplifying any code logic with this as well | v4.0.0 | ✅ | | Replace Logger.py with Util that contains (tprint), also implement the ExceptionHandler and UpdateManager from Util | v4.0.0 | ✅ | +| Make WIKI in the git repo, with a yaml file that updates it to the default github wiki | v4.0.0 | ✅ | diff --git a/README.md b/README.md index 6d27f0c9..2c447ddd 100644 --- a/README.md +++ b/README.md @@ -198,52 +198,10 @@ If those don't work attempt: Check out the [wiki](https://github.com/DefinetlyNotAI/Logicytics/wiki) for help. -## 📊 Data Extraction 📊 - -Logicytics extracts a wide range of data points on a Windows system. - -Here are some of the data points that Logicytics extracts: - -> [!IMPORTANT] -> Don't recreate the scripts/ideas below as then it's a waste of time for you, -> unless the Side-note on the script says otherwise, you can however contribute to the script itself. - > [!TIP] > You can check out future plans [here](PLANS.md), > you can contribute these plans if you have no idea's on what to contribute! -| File Name | About | Important Note | -|--------------------------|----------------------------------------------------------------------------------------------------------------------|----------------------------| -| browser_miner.ps1 | Mines all data related to browsers | | -| cmd_commands.py | Gets data from driverquery, sysinfo, gpresult and more | | -| log_miner.py | Gets all logs from the Windows device | | -| media_backup.py | Gets all media of the device in a neat folder | Would love to be updated | -| netadapter.ps1 | Runs Get-NetAdapter Command with many flags | | -| property_scraper.ps1 | Gets all the windows properties | | -| registry.py | Backups the registry | | -| sensitive_data_miner.py | Copies all files that can be considered sensitive in a neat folder, very slow and clunky - useful for depth scanning | | -| ssh_miner.py | Gets as much ssh private data as possible | | -| sys_internal.py | Attempts to use the Sys_Internal Suite from microsoft | | -| tasklist.py | Gets all running tasks, PID and info/data | | -| tree.ps1 | Runs and logs the tree.ps1 command, very slow and clunky - useful for depth scanning | | -| window_feature_miner.ps1 | Logs all the windows features enabled | | -| wmic.py | Logs and runs many wmic commands to gain sensitive data and information | | -| wifi_stealer.py | Gets the SSID and Password of all saved Wi-Fi | | -| dir_list.py | Produces a txt on every single file on the device, very slow and clunky - useful for depth scanning | | -| event_logs.py | Produces a multiple txt files in a folder on many event logs (Security, Applications and System) | | -| vulnscan.py | Uses AI/ML to detect sensitive files, and log their paths | In beta! We need feedback! | -| dump_memory.py | Dumps some memory as well as log some RAM details | Not completely good yet... | -| bluetooth_details.py | Gets the PNP Device ID, Status, Manufacturer, Device ID, Name, Description of all paired bluetooth devices | | -| bluetooth_logger.py | Collect, log, and analyze Bluetooth-related data, by accessing the Windows registry and Event Viewer. | | -| network_psutil.py | The `network_psutil.py` file collects and logs various network-related information. | | - -This is not an exhaustive list, -but it should give you a good idea of what data Logicytics is capable of extracting. - -> [!NOTE] -> **Any file with `_` is not counted here, -> do note they may range from custom libraries to special files/wrappers** - ### Want to create your own mod? Check out the [contributing guidlines](CONTRIBUTING.md) file for more info diff --git a/SECURITY.md b/SECURITY.md index 596a1fa9..110e6ab6 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -6,6 +6,7 @@ This section outlines the versions of our project that are currently supported w | Version | Supported | Major Release Date | |---------|-----------|--------------------| +| 3.6.x | ✅ | July 26, 2025 | | 3.5.x | ✅ | July 26, 2025 | | 3.4.x | ✖️ | January 3, 2025 | | 3.3.x | ✖️ | January 3, 2025 |