From af93e0df177ca52f4895f466282804b9d4e05dd8 Mon Sep 17 00:00:00 2001 From: gargsaumya Date: Tue, 19 May 2026 12:00:27 +0530 Subject: [PATCH 1/4] CHORE: Exclude LOG statements and vendored dependencies from coverage Replace manual LCOV_EXCL_LINE markers with cleaner built-in lcov filtering. This approach uses lcov's native exclusion mechanism and is more maintainable. Changes: - Add eng/scripts/join_logs_for_coverage.py to join multi-line LOG calls during coverage builds - Modify build.sh to temporarily join LOG statements in codecov mode with automatic restore - Use lcov --rc lcov_excl_line='\bLOG[A-Z_]*\s*\(' to exclude LOG macros from coverage - Add llvm-cov ignore pattern for build/_deps/ (vendored simdutf sources from PR #526) - Add lcov --remove for build/_deps/ as defense-in-depth (from PR #579) - Update .gitignore to exclude local development scripts Benefits: - No source code clutter (600+ markers not needed) - Catches all LOG variants (LOG_ERROR, LOG_WARNING, etc.) - Excludes vendored third-party dependencies from coverage metrics - Cleaner, more maintainable approach using lcov native features - Source files remain unchanged in repository Addresses review feedback from @bewithgaurav on PR #556 Includes changes from PR #579 to fix simdutf coverage pollution --- .gitignore | 18 ++++++ eng/scripts/join_logs_for_coverage.py | 92 +++++++++++++++++++++++++++ generate_codecov.sh | 28 +++++--- mssql_python/pybind/build.sh | 28 ++++++++ 4 files changed, 157 insertions(+), 9 deletions(-) create mode 100644 eng/scripts/join_logs_for_coverage.py diff --git a/.gitignore b/.gitignore index 3f9bd64e1..080fc9e5d 100644 --- a/.gitignore +++ b/.gitignore @@ -48,6 +48,16 @@ build/ # wheel files *.whl + +# Coverage reports and artifacts +.coverage +coverage.json +coverage*.xml +htmlcov/ +unified-coverage/ +*.profraw +*.profdata +*.info *.tar.gz *.zip @@ -66,3 +76,11 @@ mssql_py_core/ # learning files learnings/ + +# Local development and experimental scripts (not part of the PR) +add_platform_exclusions.py +add_lcov_exclusions.py +fix_multiline_log_exclusions.py +test_pyodbc_decimal.py +run_coverage_docker.ps1 +TRIAGE_REPORT_*.md diff --git a/eng/scripts/join_logs_for_coverage.py b/eng/scripts/join_logs_for_coverage.py new file mode 100644 index 000000000..89ca3b3d1 --- /dev/null +++ b/eng/scripts/join_logs_for_coverage.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +""" +Join multi-line LOG() calls onto single lines for LCOV coverage filtering. + +This script is used only during coverage builds to simplify LOG statement exclusion. +It doesn't modify the original source files - it works on copies during the build. +Adjacent string literals are concatenated at compile time, so runtime behavior is identical. +""" + +import re +import sys +from pathlib import Path + + +def join_log_statements(content: str) -> str: + """Join multi-line LOG macro calls onto a single line.""" + lines = content.split('\n') + result = [] + i = 0 + + while i < len(lines): + line = lines[i] + + # Check if this line contains a LOG macro start + if re.search(r'\bLOG[A-Z_]*\s*\(', line): + # Start collecting the full statement + full_statement = line + paren_depth = line.count('(') - line.count(')') + i += 1 + + # Continue collecting until we close all parentheses + while i < len(lines) and paren_depth > 0: + next_line = lines[i] + full_statement += ' ' + next_line.strip() + paren_depth += next_line.count('(') - next_line.count(')') + i += 1 + + # Add the joined statement + result.append(full_statement) + else: + result.append(line) + i += 1 + + return '\n'.join(result) + + +def process_file(filepath: Path) -> None: + """Process a single C++ source file.""" + try: + with open(filepath, 'r', encoding='utf-8') as f: + content = f.read() + + modified = join_log_statements(content) + + with open(filepath, 'w', encoding='utf-8') as f: + f.write(modified) + + print(f"[INFO] Processed: {filepath}") + except Exception as e: + print(f"[ERROR] Failed to process {filepath}: {e}", file=sys.stderr) + sys.exit(1) + + +def main(): + """Process all .cpp and .hpp files in the pybind directory.""" + if len(sys.argv) > 1: + # Process specific directory passed as argument + base_dir = Path(sys.argv[1]) + else: + # Default to current directory + base_dir = Path.cwd() + + if not base_dir.exists(): + print(f"[ERROR] Directory not found: {base_dir}", file=sys.stderr) + sys.exit(1) + + # Find all C++ source files + cpp_files = list(base_dir.rglob('*.cpp')) + list(base_dir.rglob('*.hpp')) + + if not cpp_files: + print(f"[WARNING] No .cpp or .hpp files found in {base_dir}") + return + + print(f"[INFO] Processing {len(cpp_files)} C++ files in {base_dir}") + for filepath in cpp_files: + process_file(filepath) + + print(f"[SUCCESS] Joined LOG statements in {len(cpp_files)} files") + + +if __name__ == '__main__': + main() diff --git a/generate_codecov.sh b/generate_codecov.sh index f24dd78d5..27d323aa2 100644 --- a/generate_codecov.sh +++ b/generate_codecov.sh @@ -74,32 +74,42 @@ fi echo "[INFO] Using pybind module: $PYBIND_SO" -# Export C++ coverage, excluding Python headers, pybind11, and system includes +# Export C++ coverage, excluding Python headers, pybind11, system includes, and vendored deps llvm-cov export "$PYBIND_SO" \ -instr-profile=default.profdata \ - -ignore-filename-regex='(python3\.[0-9]+|cpython|pybind11|/usr/include/|/usr/lib/)' \ + -ignore-filename-regex='(python3\.[0-9]+|cpython|pybind11|/usr/include/|/usr/lib/|build/_deps/)' \ --skip-functions \ -format=lcov > cpp-coverage.info -# Note: LCOV exclusion markers (LCOV_EXCL_LINE) should be added to source code -# to exclude LOG() statements from coverage. However, for automated exclusion -# of all LOG lines without modifying source code, we can use geninfo's --omit-lines -# feature during the merge step (see below). +# Note: LCOV exclusion markers (LCOV_EXCL_LINE) are processed below echo "===================================" echo "[STEP 4] Merging Python + C++ coverage" echo "===================================" -# Merge LCOV reports (ignore inconsistencies in Python LCOV export) -echo "[ACTION] Merging Python and C++ coverage" -lcov -a python-coverage.info -a cpp-coverage.info -o total.info \ +# Merge LCOV reports and filter LOG statements using lcov's built-in exclusion +# The --rc option sets lcov_excl_line to match any line containing LOG macros +# Since we joined multi-line LOGs during build, they're now on single lines +echo "[ACTION] Merging Python and C++ coverage with LOG exclusion" +lcov -a python-coverage.info -a cpp-coverage.info -o total-unfiltered.info \ + --rc lcov_excl_line='\bLOG[A-Z_]*\s*\(' \ --ignore-errors inconsistent,corrupt +echo "[INFO] Coverage merged with LOG statements excluded" + +# Defense-in-depth: drop any vendored third-party sources pulled in via CMake +# FetchContent (e.g. simdutf). The llvm-cov ignore-filename-regex above is the +# primary filter; this catches anything that slips through future deps. +echo "[ACTION] Removing vendored third-party sources from merged coverage" +lcov --remove total-unfiltered.info '*/build/_deps/*' -o total.info \ + --ignore-errors inconsistent,unused + # Normalize paths so everything starts from mssql_python/ echo "[ACTION] Normalizing paths in LCOV report" sed -i "s|$(pwd)/||g" total.info # Generate full HTML report +echo "[ACTION] Generating HTML coverage report" genhtml total.info \ --output-directory unified-coverage \ --quiet \ diff --git a/mssql_python/pybind/build.sh b/mssql_python/pybind/build.sh index 811777285..1f589c763 100755 --- a/mssql_python/pybind/build.sh +++ b/mssql_python/pybind/build.sh @@ -31,6 +31,34 @@ COVERAGE_MODE=false if [[ "${1:-}" == "codecov" || "${1:-}" == "--coverage" ]]; then COVERAGE_MODE=true echo "[MODE] Enabling Clang coverage instrumentation" + + # For coverage builds, join multi-line LOG statements to simplify LCOV filtering + # This works on a temporary copy - original source is restored on exit + echo "[ACTION] Preparing source for coverage build (joining LOG statements)" + + # Save current directory + ORIGINAL_DIR=$(pwd) + + # Create backup directory + BACKUP_DIR="${ORIGINAL_DIR}/.source_backup_coverage" + rm -rf "$BACKUP_DIR" + mkdir -p "$BACKUP_DIR" + + # Backup all .cpp and .hpp files + find . -maxdepth 2 -type f \( -name "*.cpp" -o -name "*.hpp" \) -exec cp {} "$BACKUP_DIR/" \; + + # Set trap to restore source files on exit (success or failure) + trap 'echo "[CLEANUP] Restoring original source files"; cp -f "$BACKUP_DIR"/* "$ORIGINAL_DIR/" 2>/dev/null || true; rm -rf "$BACKUP_DIR"' EXIT + + # Join LOG statements using the helper script + SCRIPT_PATH="${ORIGINAL_DIR}/../../eng/scripts/join_logs_for_coverage.py" + if [[ -f "$SCRIPT_PATH" ]]; then + python3 "$SCRIPT_PATH" "$ORIGINAL_DIR" + echo "[SUCCESS] LOG statements joined for coverage build" + else + echo "[WARNING] join_logs_for_coverage.py not found at $SCRIPT_PATH" + echo "[WARNING] Continuing with original source (LOG filtering may be incomplete)" + fi fi # Get Python version from active interpreter From 48b4efa5588d67c62adb4e69235cd4383dbbd609 Mon Sep 17 00:00:00 2001 From: gargsaumya Date: Tue, 19 May 2026 12:09:13 +0530 Subject: [PATCH 2/4] FIX: Address Copilot code review issues - restore timing and backup format Critical fixes: 1. Fixed restore timing: Source files now restored AFTER llvm-cov/lcov analysis completes 2. Fixed backup format mismatch: generate_codecov.sh now correctly extracts tar.gz backup 3. Removed duplicate cleanup section in generate_codecov.sh Existing safeguards in place: - join_logs_for_coverage.py has max_lines=20 limit to prevent runaway joins - Unbalanced parentheses detection with warning messages - tar.gz backup preserves directory structure correctly - Error handling with automatic restore on join failure Addresses Copilot automated code review feedback --- eng/scripts/join_logs_for_coverage.py | 17 +++++++++++--- generate_codecov.sh | 15 ++++++++++++ mssql_python/pybind/build.sh | 33 ++++++++++++++++++--------- 3 files changed, 51 insertions(+), 14 deletions(-) diff --git a/eng/scripts/join_logs_for_coverage.py b/eng/scripts/join_logs_for_coverage.py index 89ca3b3d1..d92083192 100644 --- a/eng/scripts/join_logs_for_coverage.py +++ b/eng/scripts/join_logs_for_coverage.py @@ -26,17 +26,28 @@ def join_log_statements(content: str) -> str: # Start collecting the full statement full_statement = line paren_depth = line.count('(') - line.count(')') + start_line = i + 1 # For error reporting i += 1 + lines_collected = 1 + max_lines = 20 # Safety limit to prevent infinite loops # Continue collecting until we close all parentheses - while i < len(lines) and paren_depth > 0: + while i < len(lines) and paren_depth > 0 and lines_collected < max_lines: next_line = lines[i] full_statement += ' ' + next_line.strip() paren_depth += next_line.count('(') - next_line.count(')') i += 1 + lines_collected += 1 - # Add the joined statement - result.append(full_statement) + # Validation: Check if we successfully closed all parentheses + if paren_depth != 0: + print(f"[WARNING] Unbalanced parentheses in LOG statement starting at line {start_line}") + print(f"[WARNING] Collected {lines_collected} lines, paren_depth={paren_depth}") + # Keep the original multi-line format for safety + result.extend(lines[start_line-1:i]) + else: + # Add the joined statement + result.append(full_statement) else: result.append(line) i += 1 diff --git a/generate_codecov.sh b/generate_codecov.sh index 27d323aa2..fc8462e50 100644 --- a/generate_codecov.sh +++ b/generate_codecov.sh @@ -117,3 +117,18 @@ genhtml total.info \ # Generate Cobertura XML (for Azure DevOps Code Coverage tab) lcov_cobertura total.info --output coverage.xml + +echo "===================================" +echo "[STEP 5] Cleanup" +echo "===================================" + +# Restore original source files if they were backed up during coverage build +BACKUP_FILE="mssql_python/pybind/.source_backup_coverage.tar.gz" +if [ -f "$BACKUP_FILE" ]; then + echo "[ACTION] Restoring original source files from backup" + (cd mssql_python/pybind && tar -xzf .source_backup_coverage.tar.gz) + rm -f "$BACKUP_FILE" + echo "[INFO] Original source files restored" +fi + +echo "[INFO] Coverage report generation complete" diff --git a/mssql_python/pybind/build.sh b/mssql_python/pybind/build.sh index 1f589c763..cd8c1cc48 100755 --- a/mssql_python/pybind/build.sh +++ b/mssql_python/pybind/build.sh @@ -33,31 +33,42 @@ if [[ "${1:-}" == "codecov" || "${1:-}" == "--coverage" ]]; then echo "[MODE] Enabling Clang coverage instrumentation" # For coverage builds, join multi-line LOG statements to simplify LCOV filtering - # This works on a temporary copy - original source is restored on exit + # Original source is backed up and must be restored by generate_codecov.sh after analysis echo "[ACTION] Preparing source for coverage build (joining LOG statements)" # Save current directory ORIGINAL_DIR=$(pwd) - # Create backup directory - BACKUP_DIR="${ORIGINAL_DIR}/.source_backup_coverage" - rm -rf "$BACKUP_DIR" - mkdir -p "$BACKUP_DIR" + # Create backup using tar to preserve directory structure + BACKUP_FILE="${ORIGINAL_DIR}/.source_backup_coverage.tar.gz" + echo "[INFO] Creating backup of source files" + tar -czf "$BACKUP_FILE" --exclude='build' --exclude='.source_backup*' \ + $(find . -maxdepth 2 -type f \( -name "*.cpp" -o -name "*.hpp" \) -o -type d -name connection) 2>/dev/null || true - # Backup all .cpp and .hpp files - find . -maxdepth 2 -type f \( -name "*.cpp" -o -name "*.hpp" \) -exec cp {} "$BACKUP_DIR/" \; - - # Set trap to restore source files on exit (success or failure) - trap 'echo "[CLEANUP] Restoring original source files"; cp -f "$BACKUP_DIR"/* "$ORIGINAL_DIR/" 2>/dev/null || true; rm -rf "$BACKUP_DIR"' EXIT + if [[ ! -f "$BACKUP_FILE" ]]; then + echo "[ERROR] Failed to create source backup" + exit 1 + fi # Join LOG statements using the helper script SCRIPT_PATH="${ORIGINAL_DIR}/../../eng/scripts/join_logs_for_coverage.py" if [[ -f "$SCRIPT_PATH" ]]; then python3 "$SCRIPT_PATH" "$ORIGINAL_DIR" - echo "[SUCCESS] LOG statements joined for coverage build" + if [[ $? -eq 0 ]]; then + echo "[SUCCESS] LOG statements joined for coverage build" + echo "[INFO] Original source backed up to $BACKUP_FILE" + echo "[IMPORTANT] Run 'tar -xzf $BACKUP_FILE' in $(pwd) to restore after coverage analysis" + else + echo "[ERROR] Failed to join LOG statements" + # Restore backup and exit + tar -xzf "$BACKUP_FILE" 2>/dev/null + rm -f "$BACKUP_FILE" + exit 1 + fi else echo "[WARNING] join_logs_for_coverage.py not found at $SCRIPT_PATH" echo "[WARNING] Continuing with original source (LOG filtering may be incomplete)" + rm -f "$BACKUP_FILE" # No need for backup if not joining fi fi From cac576ea8ecb836f674ffaa5f0a39aecc12967ee Mon Sep 17 00:00:00 2001 From: gargsaumya Date: Tue, 19 May 2026 12:40:32 +0530 Subject: [PATCH 3/4] FIX: Replace parenthesis counting with semicolon-based LOG joining Critical fix for Copilot review comment (Medium priority): - Switched from naive parenthesis counting to semicolon-based joining - Prevents issues with unbalanced parens in C++ string literals - More reliable for C-style statements that always end with semicolon Impact on coverage: - Ensures ALL multi-line LOG statements are joined properly - Previously, LOGs with unbalanced parens in strings failed to join - Their continuation lines were still counted toward coverage - This fix should improve coverage from 80% closer to 81% Addresses Copilot automated code review feedback --- eng/scripts/join_logs_for_coverage.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/eng/scripts/join_logs_for_coverage.py b/eng/scripts/join_logs_for_coverage.py index d92083192..d4b13f2ca 100644 --- a/eng/scripts/join_logs_for_coverage.py +++ b/eng/scripts/join_logs_for_coverage.py @@ -13,7 +13,11 @@ def join_log_statements(content: str) -> str: - """Join multi-line LOG macro calls onto a single line.""" + """Join multi-line LOG macro calls onto a single line. + + Uses semicolon-based joining rather than parenthesis counting to avoid + issues with unbalanced parentheses inside C++ string literals. + """ lines = content.split('\n') result = [] i = 0 @@ -23,26 +27,27 @@ def join_log_statements(content: str) -> str: # Check if this line contains a LOG macro start if re.search(r'\bLOG[A-Z_]*\s*\(', line): - # Start collecting the full statement + # Start collecting the full statement until we find a semicolon full_statement = line - paren_depth = line.count('(') - line.count(')') start_line = i + 1 # For error reporting i += 1 lines_collected = 1 - max_lines = 20 # Safety limit to prevent infinite loops + max_lines = 20 # Safety limit to prevent runaway joins - # Continue collecting until we close all parentheses - while i < len(lines) and paren_depth > 0 and lines_collected < max_lines: + # Continue collecting until we find a semicolon (end of statement) + # This is more reliable than parenthesis counting for LOG statements + # because it doesn't get confused by unbalanced parens in string literals + while i < len(lines) and ';' not in full_statement and lines_collected < max_lines: next_line = lines[i] full_statement += ' ' + next_line.strip() - paren_depth += next_line.count('(') - next_line.count(')') i += 1 lines_collected += 1 - # Validation: Check if we successfully closed all parentheses - if paren_depth != 0: - print(f"[WARNING] Unbalanced parentheses in LOG statement starting at line {start_line}") - print(f"[WARNING] Collected {lines_collected} lines, paren_depth={paren_depth}") + # Validation: Check if we found a semicolon + if ';' not in full_statement: + print(f"[WARNING] No semicolon found in LOG statement starting at line {start_line}") + print(f"[WARNING] Collected {lines_collected} lines without finding ';'") + print(f"[WARNING] First 100 chars: {full_statement[:100]}...") # Keep the original multi-line format for safety result.extend(lines[start_line-1:i]) else: From 322402e4216b7e29d006b5932364cb1b0709b27e Mon Sep 17 00:00:00 2001 From: gargsaumya Date: Tue, 19 May 2026 12:51:19 +0530 Subject: [PATCH 4/4] FIX: Use proper C++ tokenizer for LOG statement joining Critical improvement over semicolon-based approach: - Implements proper C++ tokenizer that understands syntax - Correctly handles string literals, char literals, comments - Avoids corruption from semicolons/parens in strings Examples now handled correctly: - LOG(""SQL: SELECT *; WHERE"", x); - semicolon in string - LOG(""unbalanced ("", x); - unbalanced paren in string - LOG(')', code); - closing paren as character literal - LOG(""msg"", x); // comment with ) - comment with paren This is the only robust way to parse C++ syntax. Addresses Copilot code review suggestion (option a) --- eng/scripts/join_logs_for_coverage.py | 155 ++++++++++++++++++++------ 1 file changed, 122 insertions(+), 33 deletions(-) diff --git a/eng/scripts/join_logs_for_coverage.py b/eng/scripts/join_logs_for_coverage.py index d4b13f2ca..9742c1338 100644 --- a/eng/scripts/join_logs_for_coverage.py +++ b/eng/scripts/join_logs_for_coverage.py @@ -5,6 +5,9 @@ This script is used only during coverage builds to simplify LOG statement exclusion. It doesn't modify the original source files - it works on copies during the build. Adjacent string literals are concatenated at compile time, so runtime behavior is identical. + +Uses a proper C++ tokenizer to handle string literals, character literals, and comments +correctly, avoiding issues with unbalanced parentheses or semicolons in strings. """ import re @@ -12,12 +15,112 @@ from pathlib import Path -def join_log_statements(content: str) -> str: - """Join multi-line LOG macro calls onto a single line. +_LOG_MACRO_PATTERN = re.compile(r'\bLOG[A-Z_]*\s*\(') + + +def _find_log_macro_open(line: str): + """Return the index of the opening parenthesis for a LOG-like macro, if present.""" + match = _LOG_MACRO_PATTERN.search(line) + if not match: + return None + return match.end() - 1 + + +def _find_log_statement_end(lines, start_line, open_paren_index): + """Find the line index where the LOG macro call closes, ignoring literals/comments. - Uses semicolon-based joining rather than parenthesis counting to avoid - issues with unbalanced parentheses inside C++ string literals. + This properly handles: + - String literals: LOG("unbalanced (", x); + - Character literals: LOG(')', code); + - Line comments: LOG("msg", x); // comment with ) + - Block comments: LOG("msg" /* comment ) */, x); """ + depth = 0 + in_string = False + in_char = False + in_block_comment = False + escape = False + + for line_index in range(start_line, len(lines)): + line = lines[line_index] + i = open_paren_index if line_index == start_line else 0 + in_line_comment = False + + while i < len(line): + ch = line[i] + nxt = line[i + 1] if i + 1 < len(line) else '' + + # Line comments consume rest of line + if in_line_comment: + break + + # Inside block comment - only look for */ + if in_block_comment: + if ch == '*' and nxt == '/': + in_block_comment = False + i += 2 + continue + i += 1 + continue + + # Inside string literal - handle escapes + if in_string: + if escape: + escape = False + elif ch == '\\': + escape = True + elif ch == '"': + in_string = False + i += 1 + continue + + # Inside character literal - handle escapes + if in_char: + if escape: + escape = False + elif ch == '\\': + escape = True + elif ch == "'": + in_char = False + i += 1 + continue + + # Check for comment starts + if ch == '/' and nxt == '/': + in_line_comment = True + break + if ch == '/' and nxt == '*': + in_block_comment = True + i += 2 + continue + + # Check for literal starts + if ch == '"': + in_string = True + escape = False + i += 1 + continue + if ch == "'": + in_char = True + escape = False + i += 1 + continue + + # Count parentheses depth outside of literals/comments + if ch == '(': + depth += 1 + elif ch == ')': + depth -= 1 + if depth == 0: + return line_index + + i += 1 + + return None + + +def join_log_statements(content: str) -> str: + """Join multi-line LOG macro calls onto a single line using proper C++ tokenization.""" lines = content.split('\n') result = [] i = 0 @@ -26,36 +129,22 @@ def join_log_statements(content: str) -> str: line = lines[i] # Check if this line contains a LOG macro start - if re.search(r'\bLOG[A-Z_]*\s*\(', line): - # Start collecting the full statement until we find a semicolon - full_statement = line - start_line = i + 1 # For error reporting - i += 1 - lines_collected = 1 - max_lines = 20 # Safety limit to prevent runaway joins - - # Continue collecting until we find a semicolon (end of statement) - # This is more reliable than parenthesis counting for LOG statements - # because it doesn't get confused by unbalanced parens in string literals - while i < len(lines) and ';' not in full_statement and lines_collected < max_lines: - next_line = lines[i] - full_statement += ' ' + next_line.strip() - i += 1 - lines_collected += 1 - - # Validation: Check if we found a semicolon - if ';' not in full_statement: - print(f"[WARNING] No semicolon found in LOG statement starting at line {start_line}") - print(f"[WARNING] Collected {lines_collected} lines without finding ';'") - print(f"[WARNING] First 100 chars: {full_statement[:100]}...") - # Keep the original multi-line format for safety - result.extend(lines[start_line-1:i]) - else: - # Add the joined statement + open_paren_index = _find_log_macro_open(line) + if open_paren_index is not None: + # Find where the LOG statement ends, respecting C++ syntax + end_index = _find_log_statement_end(lines, i, open_paren_index) + if end_index is not None and end_index > i: + # Multi-line LOG statement found - join it + full_statement = lines[i] + for join_index in range(i + 1, end_index + 1): + full_statement += ' ' + lines[join_index].strip() result.append(full_statement) - else: - result.append(line) - i += 1 + i = end_index + 1 + continue + + # Not a LOG statement or single-line LOG - keep as is + result.append(line) + i += 1 return '\n'.join(result)