From cfce65c5ef8bd0e9b07e2a710690960ec9a49d31 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 1 Apr 2026 16:40:18 +0000
Subject: [PATCH 1/5] Add comprehensive unicode bypass tests for
 grep-find-unicode-wrapper

Tests 66 cases across 10 categories: clean files, ASCII control chars,
BiDi/Trojan Source chars, invisible/zero-width chars, homoglyphs,
Unicode spaces, tag characters, malformed UTF-8, sneaky embeddings,
and edge cases. No actual bypass found - checks 1+2 catch all non-ASCII.

Documents a locale bug: check 3's $'\uXXXX' expansion requires a UTF-8
locale. In non-UTF-8 locales the pattern degrades to literal chars
causing false positives, though BiDi detection is still covered by
checks 1+2.

https://claude.ai/code/session_01726gqqGv3oaDV5jLbM6E6h
---
 tests/test_grep_find_unicode_wrapper | 495 +++++++++++++++++++++++++++
 1 file changed, 495 insertions(+)
 create mode 100755 tests/test_grep_find_unicode_wrapper

diff --git a/tests/test_grep_find_unicode_wrapper b/tests/test_grep_find_unicode_wrapper
new file mode 100755
index 00000000..d9998d68
--- /dev/null
+++ b/tests/test_grep_find_unicode_wrapper
@@ -0,0 +1,495 @@
+#!/bin/bash
+
+## Copyright (C) 2025 - 2025 ENCRYPTED SUPPORT LLC <adrelanos@whonix.org>
+## See the file COPYING for copying conditions.
+
+## Test script for grep-find-unicode-wrapper
+## Tests for Unicode bypass vulnerabilities by checking that various
+## suspicious characters are properly detected by the four grep checks.
+##
+## Bug found: Check 3 uses bash $'\uXXXX' expansion which requires a
+## UTF-8 locale at parse time. In non-UTF-8 locales (LANG=C or LANG=),
+## the \u sequences are passed through literally, creating a character
+## class of [0-9A-Fu\] that causes massive false positives.
+## Fix: Use raw \x byte sequences instead of \u escapes.
+
+set -o errexit
+set -o nounset
+set -o errtrace
+set -o pipefail
+
+test_dir=""
+pass_count=0
+fail_count=0
+skip_count=0
+
+cleanup() {
+  if [ -n "$test_dir" ] && [ -d "$test_dir" ]; then
+    rm -rf -- "$test_dir"
+  fi
+}
+trap cleanup EXIT
+
+test_dir="$(mktemp -d)"
+
+## Colors for output (if terminal supports it).
+if [ -t 1 ]; then
+  RED='\033[0;31m'
+  GREEN='\033[0;32m'
+  YELLOW='\033[0;33m'
+  NC='\033[0m'
+else
+  RED=''
+  GREEN=''
+  YELLOW=''
+  NC=''
+fi
+
+## Replicate the four grep checks from grep-find-unicode-wrapper.
+## This allows testing even without stecho installed.
+grep_args=(
+  --files-with-matches
+  --line-number
+  --binary-files=text
+)
+
+## Build the check 3 pattern using raw UTF-8 byte sequences (\x)
+## instead of \u escapes, so this test works regardless of locale.
+##
+## U+061C -> 0xD8 0x9C
+## U+200E -> 0xE2 0x80 0x8E
+## U+200F -> 0xE2 0x80 0x8F
+## U+202A -> 0xE2 0x80 0xAA
+## U+202B -> 0xE2 0x80 0xAB
+## U+202C -> 0xE2 0x80 0xAC
+## U+202D -> 0xE2 0x80 0xAD
+## U+202E -> 0xE2 0x80 0xAE
+## U+2066 -> 0xE2 0x81 0xA6
+## U+2067 -> 0xE2 0x81 0xA7
+## U+2068 -> 0xE2 0x81 0xA8
+## U+2069 -> 0xE2 0x81 0xA9
+##
+## Note: check 3 in the wrapper does NOT use --perl-regexp, so we use
+## grep's basic bracket expression which with LC_ALL=C matches individual
+## bytes. This means the bracket expression matches ANY of the individual
+## bytes, not specific multi-byte sequences. This is overly broad but
+## ensures detection.
+bidi_pattern=$'[\xD8\x9C\xE2\x80\x8E\x8F\xAA\xAB\xAC\xAD\xAE\x81\xA6\xA7\xA8\xA9]'
+
+run_checks() {
+  local file="$1"
+  local found=0
+
+  ## Check 1: Non-ASCII bytes (hex range).
+  if LC_ALL=C grep "${grep_args[@]}" --perl-regexp '[^\x00-\x7F]' "$file" >/dev/null 2>&1; then
+    found=1
+  fi
+
+  ## Check 2: Non-ASCII (POSIX class).
+  if LC_ALL=C grep "${grep_args[@]}" --perl-regexp "[^[:ascii:]]" "$file" >/dev/null 2>&1; then
+    found=1
+  fi
+
+  ## Check 3: BiDi / Trojan Source characters.
+  ## Using raw byte pattern (see bidi_pattern above).
+  if LC_ALL=C grep "${grep_args[@]}" "$bidi_pattern" "$file" >/dev/null 2>&1; then
+    found=1
+  fi
+
+  ## Check 4: ASCII control characters.
+  if LC_ALL=C grep "${grep_args[@]}" --perl-regexp '[\x00-\x08\x0B\x0C\x0D\x0E-\x1F\x7F]' "$file" >/dev/null 2>&1; then
+    found=1
+  fi
+
+  return $(( ! found ))
+}
+
+expect_detected() {
+  local description="$1"
+  local file="$2"
+
+  if run_checks "$file"; then
+    printf "${GREEN}PASS${NC}: Detected:     %s\n" "$description"
+    pass_count=$(( pass_count + 1 ))
+  else
+    printf "${RED}FAIL${NC}: NOT detected: %s\n" "$description" >&2
+    fail_count=$(( fail_count + 1 ))
+  fi
+}
+
+expect_clean() {
+  local description="$1"
+  local file="$2"
+
+  if ! run_checks "$file"; then
+    printf "${GREEN}PASS${NC}: Clean:        %s\n" "$description"
+    pass_count=$(( pass_count + 1 ))
+  else
+    printf "${RED}FAIL${NC}: False positive: %s\n" "$description" >&2
+    fail_count=$(( fail_count + 1 ))
+  fi
+}
+
+write_file() {
+  local name="$1"
+  local content="$2"
+  local file="$test_dir/$name"
+  printf '%s' "$content" > "$file"
+  printf '%s' "$file"
+}
+
+write_file_binary() {
+  local name="$1"
+  shift
+  local file="$test_dir/$name"
+  printf "$@" > "$file"
+  printf '%s' "$file"
+}
+
+printf '%s\n' "===== grep-find-unicode-wrapper bypass tests ====="
+printf '%s\n' ""
+
+## ===================================================================
+## Section 0: Check 3 locale bug validation
+## ===================================================================
+printf '%s\n' "--- Check 3 locale bug ($'\u' expansion) ---"
+
+## Verify whether $'\uXXXX' expands to UTF-8 in the current locale.
+## If it doesn't, check 3 in the wrapper is broken.
+check3_test_byte="$(printf '%s' $'\u061C' | od -A n -t x1 | tr -d ' \n')"
+if [ "$check3_test_byte" = "d89c" ]; then
+  printf "${GREEN}PASS${NC}: \$'\\u061C' expands to UTF-8 bytes (d8 9c) - check 3 works in this locale\n"
+  pass_count=$(( pass_count + 1 ))
+else
+  printf "${YELLOW}WARN${NC}: \$'\\u061C' expands to literal '\\u061C' (got: %s) - check 3 in wrapper is broken in this locale!\n" "$check3_test_byte"
+  printf "       Current locale: LANG='${LANG:-}' LC_ALL='${LC_ALL:-}'\n"
+  printf "       The wrapper's check 3 pattern becomes a character class of\n"
+  printf "       literal chars [0-9A-Fu\\\\] causing false positives on most files.\n"
+  printf "       BiDi chars are still caught by checks 1+2 (non-ASCII byte detection).\n"
+  printf "       Fix: use \\\\x byte sequences instead of \\\\u escapes in the wrapper.\n"
+  skip_count=$(( skip_count + 1 ))
+fi
+
+## ===================================================================
+## Section 1: Clean files (should NOT be detected)
+## ===================================================================
+printf '%s\n' ""
+printf '%s\n' "--- Clean files (no false positives expected) ---"
+
+f="$(write_file "clean_ascii.txt" "Hello, World!")"
+expect_clean "Plain ASCII text" "$f"
+
+f="$(write_file "clean_with_tab.txt" "$(printf 'col1\tcol2')")"
+expect_clean "ASCII with TAB (0x09)" "$f"
+
+f="$(write_file_binary "clean_with_newline.txt" 'line1\nline2\n')"
+expect_clean "ASCII with LF (0x0A)" "$f"
+
+f="$(write_file "clean_printable.txt" ' !"#$%&'\''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~')"
+expect_clean "All printable ASCII characters" "$f"
+
+f="$(write_file "empty.txt" "")"
+expect_clean "Empty file" "$f"
+
+## ===================================================================
+## Section 2: ASCII control characters (check 4)
+## ===================================================================
+printf '%s\n' ""
+printf '%s\n' "--- ASCII control characters ---"
+
+f="$(write_file_binary "null.txt" '\x00')"
+expect_detected "NULL byte (0x00)" "$f"
+
+f="$(write_file_binary "soh.txt" '\x01')"
+expect_detected "SOH (0x01)" "$f"
+
+f="$(write_file_binary "stx.txt" '\x02')"
+expect_detected "STX (0x02)" "$f"
+
+f="$(write_file_binary "bell.txt" '\x07')"
+expect_detected "BEL (0x07)" "$f"
+
+f="$(write_file_binary "backspace.txt" '\x08')"
+expect_detected "Backspace (0x08) - can overwrite displayed text" "$f"
+
+f="$(write_file_binary "vt.txt" '\x0B')"
+expect_detected "Vertical Tab (0x0B)" "$f"
+
+f="$(write_file_binary "ff.txt" '\x0C')"
+expect_detected "Form Feed (0x0C)" "$f"
+
+f="$(write_file_binary "cr.txt" '\x0D')"
+expect_detected "Carriage Return (0x0D) - can overwrite line content" "$f"
+
+f="$(write_file_binary "so.txt" '\x0E')"
+expect_detected "Shift Out (0x0E)" "$f"
+
+f="$(write_file_binary "si.txt" '\x0F')"
+expect_detected "Shift In (0x0F)" "$f"
+
+f="$(write_file_binary "escape.txt" '\x1B')"
+expect_detected "Escape (0x1B) - terminal escape sequences" "$f"
+
+f="$(write_file_binary "us.txt" '\x1F')"
+expect_detected "Unit Separator (0x1F)" "$f"
+
+f="$(write_file_binary "del.txt" '\x7F')"
+expect_detected "DEL (0x7F)" "$f"
+
+## ===================================================================
+## Section 3: BiDi / Trojan Source characters (CVE-2021-42574)
+## Using \x byte sequences to create test files reliably.
+## ===================================================================
+printf '%s\n' ""
+printf '%s\n' "--- BiDi / Trojan Source characters (CVE-2021-42574) ---"
+
+## U+061C ARABIC LETTER MARK -> D8 9C
+f="$(write_file_binary "bidi_alm.txt" 'test\xD8\x9Ctext')"
+expect_detected "U+061C ARABIC LETTER MARK" "$f"
+
+## U+200E LEFT-TO-RIGHT MARK -> E2 80 8E
+f="$(write_file_binary "bidi_lrm.txt" 'test\xE2\x80\x8Etext')"
+expect_detected "U+200E LEFT-TO-RIGHT MARK" "$f"
+
+## U+200F RIGHT-TO-LEFT MARK -> E2 80 8F
+f="$(write_file_binary "bidi_rlm.txt" 'test\xE2\x80\x8Ftext')"
+expect_detected "U+200F RIGHT-TO-LEFT MARK" "$f"
+
+## U+202A LEFT-TO-RIGHT EMBEDDING -> E2 80 AA
+f="$(write_file_binary "bidi_lre.txt" 'test\xE2\x80\xAAtext')"
+expect_detected "U+202A LEFT-TO-RIGHT EMBEDDING" "$f"
+
+## U+202B RIGHT-TO-LEFT EMBEDDING -> E2 80 AB
+f="$(write_file_binary "bidi_rle.txt" 'test\xE2\x80\xABtext')"
+expect_detected "U+202B RIGHT-TO-LEFT EMBEDDING" "$f"
+
+## U+202C POP DIRECTIONAL FORMATTING -> E2 80 AC
+f="$(write_file_binary "bidi_pdf.txt" 'test\xE2\x80\xACtext')"
+expect_detected "U+202C POP DIRECTIONAL FORMATTING" "$f"
+
+## U+202D LEFT-TO-RIGHT OVERRIDE -> E2 80 AD
+f="$(write_file_binary "bidi_lro.txt" 'test\xE2\x80\xADtext')"
+expect_detected "U+202D LEFT-TO-RIGHT OVERRIDE" "$f"
+
+## U+202E RIGHT-TO-LEFT OVERRIDE -> E2 80 AE
+f="$(write_file_binary "bidi_rlo.txt" 'test\xE2\x80\xAEtext')"
+expect_detected "U+202E RIGHT-TO-LEFT OVERRIDE" "$f"
+
+## U+2066 LEFT-TO-RIGHT ISOLATE -> E2 81 A6
+f="$(write_file_binary "bidi_lri.txt" 'test\xE2\x81\xA6text')"
+expect_detected "U+2066 LEFT-TO-RIGHT ISOLATE" "$f"
+
+## U+2067 RIGHT-TO-LEFT ISOLATE -> E2 81 A7
+f="$(write_file_binary "bidi_rli.txt" 'test\xE2\x81\xA7text')"
+expect_detected "U+2067 RIGHT-TO-LEFT ISOLATE" "$f"
+
+## U+2068 FIRST STRONG ISOLATE -> E2 81 A8
+f="$(write_file_binary "bidi_fsi.txt" 'test\xE2\x81\xA8text')"
+expect_detected "U+2068 FIRST STRONG ISOLATE" "$f"
+
+## U+2069 POP DIRECTIONAL ISOLATE -> E2 81 A9
+f="$(write_file_binary "bidi_pdi.txt" 'test\xE2\x81\xA9text')"
+expect_detected "U+2069 POP DIRECTIONAL ISOLATE" "$f"
+
+## ===================================================================
+## Section 4: Invisible / zero-width Unicode characters
+## ===================================================================
+printf '%s\n' ""
+printf '%s\n' "--- Invisible / zero-width Unicode characters ---"
+
+## U+200B ZERO WIDTH SPACE -> E2 80 8B
+f="$(write_file_binary "zwsp.txt" 'ab\xE2\x80\x8Bcd')"
+expect_detected "U+200B ZERO WIDTH SPACE" "$f"
+
+## U+200C ZERO WIDTH NON-JOINER -> E2 80 8C
+f="$(write_file_binary "zwnj.txt" 'ab\xE2\x80\x8Ccd')"
+expect_detected "U+200C ZERO WIDTH NON-JOINER" "$f"
+
+## U+200D ZERO WIDTH JOINER -> E2 80 8D
+f="$(write_file_binary "zwj.txt" 'ab\xE2\x80\x8Dcd')"
+expect_detected "U+200D ZERO WIDTH JOINER" "$f"
+
+## U+2060 WORD JOINER -> E2 81 A0
+f="$(write_file_binary "wj.txt" 'ab\xE2\x81\xA0cd')"
+expect_detected "U+2060 WORD JOINER" "$f"
+
+## U+FEFF BOM -> EF BB BF
+f="$(write_file_binary "bom.txt" '\xEF\xBB\xBFtext')"
+expect_detected "U+FEFF BOM / ZERO WIDTH NO-BREAK SPACE" "$f"
+
+## U+00AD SOFT HYPHEN -> C2 AD
+f="$(write_file_binary "soft_hyphen.txt" 'ab\xC2\xADcd')"
+expect_detected "U+00AD SOFT HYPHEN" "$f"
+
+## U+034F COMBINING GRAPHEME JOINER -> CD 8F
+f="$(write_file_binary "cgj.txt" 'ab\xCD\x8Fcd')"
+expect_detected "U+034F COMBINING GRAPHEME JOINER" "$f"
+
+## ===================================================================
+## Section 5: Homoglyph / confusable characters
+## ===================================================================
+printf '%s\n' ""
+printf '%s\n' "--- Homoglyph attacks (visually similar to ASCII) ---"
+
+## U+0410 CYRILLIC A -> D0 90
+f="$(write_file_binary "cyrillic_a.txt" '\xD0\x90dmin')"
+expect_detected "U+0410 CYRILLIC CAPITAL A (looks like Latin A)" "$f"
+
+## U+043E CYRILLIC o -> D0 BE
+f="$(write_file_binary "cyrillic_o.txt" 'passw\xD0\xBErd')"
+expect_detected "U+043E CYRILLIC SMALL O (looks like Latin o)" "$f"
+
+## U+03BF GREEK OMICRON -> CE BF
+f="$(write_file_binary "greek_omicron.txt" 'passw\xCE\xBFrd')"
+expect_detected "U+03BF GREEK SMALL OMICRON (looks like Latin o)" "$f"
+
+## U+FF21 FULLWIDTH A -> EF BC A1
+f="$(write_file_binary "fullwidth_A.txt" '\xEF\xBC\xA1dmin')"
+expect_detected "U+FF21 FULLWIDTH LATIN CAPITAL A" "$f"
+
+## ===================================================================
+## Section 6: Special Unicode spaces and separators
+## ===================================================================
+printf '%s\n' ""
+printf '%s\n' "--- Unicode whitespace / separators ---"
+
+## U+00A0 NO-BREAK SPACE -> C2 A0
+f="$(write_file_binary "nbsp.txt" 'a\xC2\xA0b')"
+expect_detected "U+00A0 NO-BREAK SPACE" "$f"
+
+## U+2002 EN SPACE -> E2 80 82
+f="$(write_file_binary "en_space.txt" 'a\xE2\x80\x82b')"
+expect_detected "U+2002 EN SPACE" "$f"
+
+## U+2003 EM SPACE -> E2 80 83
+f="$(write_file_binary "em_space.txt" 'a\xE2\x80\x83b')"
+expect_detected "U+2003 EM SPACE" "$f"
+
+## U+2009 THIN SPACE -> E2 80 89
+f="$(write_file_binary "thin_space.txt" 'a\xE2\x80\x89b')"
+expect_detected "U+2009 THIN SPACE" "$f"
+
+## U+200A HAIR SPACE -> E2 80 8A
+f="$(write_file_binary "hair_space.txt" 'a\xE2\x80\x8Ab')"
+expect_detected "U+200A HAIR SPACE" "$f"
+
+## U+2028 LINE SEPARATOR -> E2 80 A8
+f="$(write_file_binary "line_sep.txt" 'a\xE2\x80\xA8b')"
+expect_detected "U+2028 LINE SEPARATOR" "$f"
+
+## U+2029 PARAGRAPH SEPARATOR -> E2 80 A9
+f="$(write_file_binary "para_sep.txt" 'a\xE2\x80\xA9b')"
+expect_detected "U+2029 PARAGRAPH SEPARATOR" "$f"
+
+## U+3000 IDEOGRAPHIC SPACE -> E3 80 80
+f="$(write_file_binary "ideographic_space.txt" 'a\xE3\x80\x80b')"
+expect_detected "U+3000 IDEOGRAPHIC SPACE" "$f"
+
+## U+2800 BRAILLE PATTERN BLANK -> E2 A0 80
+f="$(write_file_binary "braille_blank.txt" 'a\xE2\xA0\x80b')"
+expect_detected "U+2800 BRAILLE PATTERN BLANK (invisible)" "$f"
+
+## ===================================================================
+## Section 7: Tag characters (Supplementary Plane, used in exploits)
+## ===================================================================
+printf '%s\n' ""
+printf '%s\n' "--- Unicode tag characters ---"
+
+## U+E0061 TAG LATIN SMALL LETTER A -> F3 A0 81 A1
+f="$(write_file_binary "tag_latin_a.txt" 'test\xF3\xA0\x81\xA1text')"
+expect_detected "U+E0061 TAG LATIN SMALL LETTER A" "$f"
+
+## U+E007F CANCEL TAG -> F3 A0 81 BF
+f="$(write_file_binary "cancel_tag.txt" 'test\xF3\xA0\x81\xBFtext')"
+expect_detected "U+E007F CANCEL TAG" "$f"
+
+## ===================================================================
+## Section 8: Overlong UTF-8 / malformed sequences
+## ===================================================================
+printf '%s\n' ""
+printf '%s\n' "--- Malformed / overlong UTF-8 ---"
+
+## Overlong encoding of NULL (C0 80 instead of 00).
+f="$(write_file_binary "overlong_null.txt" '\xC0\x80')"
+expect_detected "Overlong UTF-8 NULL (0xC0 0x80)" "$f"
+
+## Overlong encoding of '/' (C0 AF instead of 2F).
+f="$(write_file_binary "overlong_slash.txt" '\xC0\xAF')"
+expect_detected "Overlong UTF-8 slash (0xC0 0xAF)" "$f"
+
+## Invalid continuation byte.
+f="$(write_file_binary "invalid_continuation.txt" '\x80')"
+expect_detected "Invalid UTF-8 continuation byte (0x80)" "$f"
+
+## Invalid start byte.
+f="$(write_file_binary "invalid_start.txt" '\xFE')"
+expect_detected "Invalid UTF-8 start byte (0xFE)" "$f"
+
+f="$(write_file_binary "invalid_ff.txt" '\xFF')"
+expect_detected "Invalid byte (0xFF)" "$f"
+
+## ===================================================================
+## Section 9: Sneaky embedding in otherwise clean files
+## ===================================================================
+printf '%s\n' ""
+printf '%s\n' "--- Sneaky embeddings in normal-looking files ---"
+
+## Zero-width space (E2 80 8B) hidden in a comment.
+f="$(write_file_binary "hidden_in_comment.txt" '# This is a normal comment\xE2\x80\x8B\ndef hello():\n    print('"'"'hello'"'"')\n')"
+expect_detected "Zero-width space hidden in comment" "$f"
+
+## LTR mark (E2 80 8E) hidden in a string literal.
+f="$(write_file_binary "hidden_in_string.txt" 'var x = "hello\xE2\x80\x8Eworld";')"
+expect_detected "LTR mark hidden in string literal" "$f"
+
+## Trojan Source BiDi attack pattern.
+f="$(write_file_binary "trojan_source_example.txt" 'access_level = "user\xE2\x80\xAA\xE2\x81\xA6\xE2\x81\xA9\xE2\x81\xA6admin\xE2\x81\xA9\xE2\x80\xAC"')"
+expect_detected "Trojan Source BiDi attack pattern" "$f"
+
+## Backspace (\x08) overwrite attack.
+f="$(write_file_binary "backspace_overwrite.txt" 'user\x08\x08\x08\x08root')"
+expect_detected "Backspace overwrite (displays 'root' over 'user')" "$f"
+
+## Carriage return (\x0D) overwrite attack.
+f="$(write_file_binary "cr_overwrite.txt" 'safe command\x0Drm -rf /')"
+expect_detected "CR overwrite (hides malicious command)" "$f"
+
+## ===================================================================
+## Section 10: Mixed content edge cases
+## ===================================================================
+printf '%s\n' ""
+printf '%s\n' "--- Edge cases ---"
+
+## File with only a BOM (EF BB BF) and nothing else.
+f="$(write_file_binary "bom_only.txt" '\xEF\xBB\xBF')"
+expect_detected "File containing only BOM" "$f"
+
+## Suspicious char at very end of file.
+f="$(write_file_binary "trailing_bidi.txt" 'normal text\xE2\x80\x8E')"
+expect_detected "BiDi char at end of file" "$f"
+
+## Suspicious char at very start of file.
+f="$(write_file_binary "leading_null.txt" '\x00normal text')"
+expect_detected "NULL byte at start of file" "$f"
+
+## Very long line with suspicious char in the middle (ZWSP = E2 80 8B).
+python3 -c "
+import sys
+sys.stdout.buffer.write(b'a' * 10000 + b'\xe2\x80\x8b' + b'b' * 10000)
+" > "$test_dir/long_line.txt"
+expect_detected "Suspicious char buried in 20000-char line" "$test_dir/long_line.txt"
+
+## ===================================================================
+## Summary
+## ===================================================================
+printf '%s\n' ""
+printf '%s\n' "===== Results ====="
+printf "Passed: %d | Failed: %d | Skipped: %d\n" "$pass_count" "$fail_count" "$skip_count"
+
+if [ "$fail_count" -gt 0 ]; then
+  printf "${RED}%s${NC}\n" "SOME TESTS FAILED - potential Unicode bypass found!"
+  exit 1
+fi
+
+printf "${GREEN}%s${NC}\n" "All tests passed - no bypass detected."
+exit 0

From 50cc246066f4b96490fea98acac47a8c4b7b84e5 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 2 Apr 2026 09:41:45 +0000
Subject: [PATCH 2/5] Add check 3 false positive tests for $'\u' locale bug

The wrapper's check 3 uses $'\uXXXX' which is expanded at bash parse
time using the caller's locale - NOT the LC_ALL=C on the grep command.
In non-UTF-8 locales, \u sequences pass through literally, creating a
bracket expression of [0-9A-Fu\] that false-positives on files with
digits, hex values, UUIDs, backslashes, or typical code.

New tests verify this: 5/6 clean ASCII files trigger false positives in
non-UTF-8 locales, 0/6 in UTF-8 locales. Fix: use \x byte sequences.

https://claude.ai/code/session_01726gqqGv3oaDV5jLbM6E6h
---
 tests/test_grep_find_unicode_wrapper | 48 ++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/tests/test_grep_find_unicode_wrapper b/tests/test_grep_find_unicode_wrapper
index d9998d68..72d4e2c0 100755
--- a/tests/test_grep_find_unicode_wrapper
+++ b/tests/test_grep_find_unicode_wrapper
@@ -156,8 +156,10 @@ printf '%s\n' "--- Check 3 locale bug ($'\u' expansion) ---"
 
 ## Verify whether $'\uXXXX' expands to UTF-8 in the current locale.
 ## If it doesn't, check 3 in the wrapper is broken.
+check3_locale_ok="false"
 check3_test_byte="$(printf '%s' $'\u061C' | od -A n -t x1 | tr -d ' \n')"
 if [ "$check3_test_byte" = "d89c" ]; then
+  check3_locale_ok="true"
   printf "${GREEN}PASS${NC}: \$'\\u061C' expands to UTF-8 bytes (d8 9c) - check 3 works in this locale\n"
   pass_count=$(( pass_count + 1 ))
 else
@@ -170,6 +172,52 @@ else
   skip_count=$(( skip_count + 1 ))
 fi
 
+## Test check 3's actual $'\u...' pattern (as used in the wrapper) for false positives.
+## The $'\u...' expansion happens at bash parse time using the caller's locale,
+## NOT the LC_ALL=C set on the grep command. In non-UTF-8 locales, \u sequences
+## are passed through literally, creating a bracket expression containing ASCII
+## chars like digits, hex letters, 'u', and '\'. This causes false positives
+## on nearly any file with digits or hex characters.
+wrapper_check3_pattern=$'[\u061C\u200E\u200F\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069]'
+
+check3_false_positive_test() {
+  local description="$1"
+  local file="$2"
+  if LC_ALL=C grep --files-with-matches --line-number --binary-files=text "$wrapper_check3_pattern" "$file" >/dev/null 2>&1; then
+    if [ "$check3_locale_ok" = "true" ]; then
+      printf "${RED}FAIL${NC}: Check 3 false positive: %s\n" "$description" >&2
+      fail_count=$(( fail_count + 1 ))
+    else
+      printf "${RED}FAIL${NC}: Check 3 false positive (locale bug): %s\n" "$description" >&2
+      fail_count=$(( fail_count + 1 ))
+    fi
+  else
+    printf "${GREEN}PASS${NC}: Check 3 clean:          %s\n" "$description"
+    pass_count=$(( pass_count + 1 ))
+  fi
+}
+
+printf '%s\n' ""
+printf '%s\n' "--- Check 3 false positive tests (wrapper's actual \$'\\u' pattern) ---"
+
+f="$(write_file "check3_digits.txt" "x = 12345")"
+check3_false_positive_test "File with digits" "$f"
+
+f="$(write_file "check3_hex.txt" "color = 0xDEADBEEF")"
+check3_false_positive_test "File with hex values" "$f"
+
+f="$(write_file "check3_code.txt" "for (int i = 0; i < 100; i++) { printf(\"%d\", i); }")"
+check3_false_positive_test "File with typical C code" "$f"
+
+f="$(write_file "check3_uuid.txt" "uuid = \"550e8400-e29b-41d4-a716-446655440000\"")"
+check3_false_positive_test "File with UUID" "$f"
+
+f="$(write_file "check3_backslash.txt" "path = C:\\\\Users\\\\test")"
+check3_false_positive_test "File with backslashes" "$f"
+
+f="$(write_file "check3_plain.txt" "Hello World")"
+check3_false_positive_test "File with plain text (no digits)" "$f"
+
 ## ===================================================================
 ## Section 1: Clean files (should NOT be detected)
 ## ===================================================================

From 07fc663960cd8551e496efb9db44bd5babcc2baf Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 2 Apr 2026 10:04:48 +0000
Subject: [PATCH 3/5] Fix check 3 locale bug in grep-find-unicode-wrapper,
 integrate test

- grep-find-unicode-wrapper: check 3 used $'\uXXXX' which requires a
  UTF-8 locale at bash parse time. In non-UTF-8 locales the pattern
  degrades to literal ASCII chars causing false positives. Fixed by
  using \x byte sequences. Old line kept commented out with explanation.
- test script: use get_colors.sh instead of custom color vars, handle
  old-pattern false positives as expected warns not failures.
- run-tests: call tests/test_grep_find_unicode_wrapper.

https://claude.ai/code/session_01726gqqGv3oaDV5jLbM6E6h
---
 run-tests                            |  2 +
 tests/test_grep_find_unicode_wrapper | 55 +++++++++++++++-------------
 usr/bin/grep-find-unicode-wrapper    | 16 +++++++-
 3 files changed, 46 insertions(+), 27 deletions(-)

diff --git a/run-tests b/run-tests
index ce0581cf..44aa69f4 100755
--- a/run-tests
+++ b/run-tests
@@ -56,3 +56,5 @@ fi
 
 cd "${git_toplevel}"
 ./unicode-testscript
+
+./tests/test_grep_find_unicode_wrapper
diff --git a/tests/test_grep_find_unicode_wrapper b/tests/test_grep_find_unicode_wrapper
index 72d4e2c0..ecd62428 100755
--- a/tests/test_grep_find_unicode_wrapper
+++ b/tests/test_grep_find_unicode_wrapper
@@ -32,17 +32,16 @@ trap cleanup EXIT
 
 test_dir="$(mktemp -d)"
 
-## Colors for output (if terminal supports it).
-if [ -t 1 ]; then
-  RED='\033[0;31m'
-  GREEN='\033[0;32m'
-  YELLOW='\033[0;33m'
-  NC='\033[0m'
+script_dir="$(cd -- "$(dirname -- "$0")" && pwd)"
+git_toplevel="$(cd -- "$script_dir" && git rev-parse --show-toplevel)"
+
+if test -f /usr/libexec/helper-scripts/get_colors.sh ; then
+  source /usr/libexec/helper-scripts/get_colors.sh
+elif test -f "${git_toplevel}/usr/libexec/helper-scripts/get_colors.sh" ; then
+  source "${git_toplevel}/usr/libexec/helper-scripts/get_colors.sh"
 else
-  RED=''
-  GREEN=''
-  YELLOW=''
-  NC=''
+  printf '%s\n' "$0: ERROR: get_colors.sh not found!" >&2
+  exit 1
 fi
 
 ## Replicate the four grep checks from grep-find-unicode-wrapper.
@@ -109,10 +108,10 @@ expect_detected() {
   local file="$2"
 
   if run_checks "$file"; then
-    printf "${GREEN}PASS${NC}: Detected:     %s\n" "$description"
+    printf "${green}PASS${nocolor}: Detected:     %s\n" "$description"
     pass_count=$(( pass_count + 1 ))
   else
-    printf "${RED}FAIL${NC}: NOT detected: %s\n" "$description" >&2
+    printf "${red}FAIL${nocolor}: NOT detected: %s\n" "$description" >&2
     fail_count=$(( fail_count + 1 ))
   fi
 }
@@ -122,10 +121,10 @@ expect_clean() {
   local file="$2"
 
   if ! run_checks "$file"; then
-    printf "${GREEN}PASS${NC}: Clean:        %s\n" "$description"
+    printf "${green}PASS${nocolor}: Clean:        %s\n" "$description"
     pass_count=$(( pass_count + 1 ))
   else
-    printf "${RED}FAIL${NC}: False positive: %s\n" "$description" >&2
+    printf "${red}FAIL${nocolor}: False positive: %s\n" "$description" >&2
     fail_count=$(( fail_count + 1 ))
   fi
 }
@@ -160,10 +159,10 @@ check3_locale_ok="false"
 check3_test_byte="$(printf '%s' $'\u061C' | od -A n -t x1 | tr -d ' \n')"
 if [ "$check3_test_byte" = "d89c" ]; then
   check3_locale_ok="true"
-  printf "${GREEN}PASS${NC}: \$'\\u061C' expands to UTF-8 bytes (d8 9c) - check 3 works in this locale\n"
+  printf "${green}PASS${nocolor}: \$'\\u061C' expands to UTF-8 bytes (d8 9c) - check 3 works in this locale\n"
   pass_count=$(( pass_count + 1 ))
 else
-  printf "${YELLOW}WARN${NC}: \$'\\u061C' expands to literal '\\u061C' (got: %s) - check 3 in wrapper is broken in this locale!\n" "$check3_test_byte"
+  printf "${yellow}WARN${nocolor}: \$'\\u061C' expands to literal '\\u061C' (got: %s) - check 3 in wrapper is broken in this locale!\n" "$check3_test_byte"
   printf "       Current locale: LANG='${LANG:-}' LC_ALL='${LC_ALL:-}'\n"
   printf "       The wrapper's check 3 pattern becomes a character class of\n"
   printf "       literal chars [0-9A-Fu\\\\] causing false positives on most files.\n"
@@ -172,33 +171,37 @@ else
   skip_count=$(( skip_count + 1 ))
 fi
 
-## Test check 3's actual $'\u...' pattern (as used in the wrapper) for false positives.
+## Test the old $'\u...' pattern (before the fix) for false positives.
 ## The $'\u...' expansion happens at bash parse time using the caller's locale,
 ## NOT the LC_ALL=C set on the grep command. In non-UTF-8 locales, \u sequences
 ## are passed through literally, creating a bracket expression containing ASCII
 ## chars like digits, hex letters, 'u', and '\'. This causes false positives
 ## on nearly any file with digits or hex characters.
-wrapper_check3_pattern=$'[\u061C\u200E\u200F\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069]'
+## The wrapper has been fixed to use \x byte sequences. These tests document
+## the old behavior - false positives in non-UTF-8 locales are expected.
+old_check3_pattern=$'[\u061C\u200E\u200F\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069]'
 
 check3_false_positive_test() {
   local description="$1"
   local file="$2"
-  if LC_ALL=C grep --files-with-matches --line-number --binary-files=text "$wrapper_check3_pattern" "$file" >/dev/null 2>&1; then
+  if LC_ALL=C grep --files-with-matches --line-number --binary-files=text "$old_check3_pattern" "$file" >/dev/null 2>&1; then
     if [ "$check3_locale_ok" = "true" ]; then
-      printf "${RED}FAIL${NC}: Check 3 false positive: %s\n" "$description" >&2
+      ## UTF-8 locale: old pattern should work, so this is a real failure.
+      printf "${red}FAIL${nocolor}: Old check 3 false positive: %s\n" "$description" >&2
       fail_count=$(( fail_count + 1 ))
     else
-      printf "${RED}FAIL${NC}: Check 3 false positive (locale bug): %s\n" "$description" >&2
-      fail_count=$(( fail_count + 1 ))
+      ## Non-UTF-8 locale: false positive from old pattern is expected.
+      printf "${yellow}WARN${nocolor}: Old check 3 false positive (expected, locale bug): %s\n" "$description"
+      skip_count=$(( skip_count + 1 ))
     fi
   else
-    printf "${GREEN}PASS${NC}: Check 3 clean:          %s\n" "$description"
+    printf "${green}PASS${nocolor}: Old check 3 clean:      %s\n" "$description"
     pass_count=$(( pass_count + 1 ))
   fi
 }
 
 printf '%s\n' ""
-printf '%s\n' "--- Check 3 false positive tests (wrapper's actual \$'\\u' pattern) ---"
+printf '%s\n' "--- Old check 3 false positive tests (\$'\\u' pattern before fix) ---"
 
 f="$(write_file "check3_digits.txt" "x = 12345")"
 check3_false_positive_test "File with digits" "$f"
@@ -535,9 +538,9 @@ printf '%s\n' "===== Results ====="
 printf "Passed: %d | Failed: %d | Skipped: %d\n" "$pass_count" "$fail_count" "$skip_count"
 
 if [ "$fail_count" -gt 0 ]; then
-  printf "${RED}%s${NC}\n" "SOME TESTS FAILED - potential Unicode bypass found!"
+  printf "${red}%s${nocolor}\n" "SOME TESTS FAILED - potential Unicode bypass found!"
   exit 1
 fi
 
-printf "${GREEN}%s${NC}\n" "All tests passed - no bypass detected."
+printf "${green}%s${nocolor}\n" "All tests passed - no bypass detected."
 exit 0
diff --git a/usr/bin/grep-find-unicode-wrapper b/usr/bin/grep-find-unicode-wrapper
index 1bf188e4..fac84fb3 100755
--- a/usr/bin/grep-find-unicode-wrapper
+++ b/usr/bin/grep-find-unicode-wrapper
@@ -52,7 +52,21 @@ check_grep_status "$?"
 ## Not using 'grep's '--perl-regexp' option for three.
 ## Because not mentioned in above links and can lead to the following error message:
 # grep: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u
-three="$(LC_ALL=C grep "${grep_args[@]}" $'[\u061C\u200E\u200F\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069]' "$@")"
+## Locale bug: bash $'\uXXXX' is expanded at parse time using the caller's
+## locale, NOT the LC_ALL=C set on the grep command. In non-UTF-8 locales
+## (LANG=C, LANG= empty), the \u sequences are passed through literally,
+## creating a bracket expression of ASCII chars [0-9A-Fu\] that causes false
+## positives on almost any file containing digits or hex characters.
+## BiDi characters would still be caught by checks one and two (non-ASCII
+## byte detection), so this is a false positive issue, not a bypass.
+#three="$(LC_ALL=C grep "${grep_args[@]}" $'[\u061C\u200E\u200F\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069]' "$@")"
+## Fixed: use \x byte sequences which work regardless of locale.
+## U+061C -> \xD8\x9C          U+202A -> \xE2\x80\xAA  U+202E -> \xE2\x80\xAE
+## U+200E -> \xE2\x80\x8E     U+202B -> \xE2\x80\xAB  U+2066 -> \xE2\x81\xA6
+## U+200F -> \xE2\x80\x8F     U+202C -> \xE2\x80\xAC  U+2067 -> \xE2\x81\xA7
+##                              U+202D -> \xE2\x80\xAD  U+2068 -> \xE2\x81\xA8
+##                                                       U+2069 -> \xE2\x81\xA9
+three="$(LC_ALL=C grep "${grep_args[@]}" $'[\xD8\x9C\xE2\x80\x8E\x8F\xAA\xAB\xAC\xAD\xAE\x81\xA6\xA7\xA8\xA9]' "$@")"
 check_grep_status "$?"
 
 ## ASCII control characters.

From d34fa86161c80111fadf39de222ac4a528a088df Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 2 Apr 2026 10:14:40 +0000
Subject: [PATCH 4/5] Refactor test to use actual grep-find-unicode-wrapper and
 project conventions

- Use real grep-find-unicode-wrapper binary instead of reimplementing
  the four grep checks locally. Tests exercise actual code paths.
- Source get_colors.sh for colors instead of custom color variables.
- Create safe-rm-maybe.bsh providing rm-safe-maybe function that uses
  safe-rm if installed, otherwise falls back to rm.
- Use long options (--recursive, --force, --directory, --delete,
  --address-radix, --format) instead of short flags.
- Use `| tee -- "$file_name"` instead of `> "$file_name"` for better
  xtrace output and error handling.
- Rename variable 'file' to 'file_name' to avoid collision with
  the standard unix 'file' utility.
- Remove assumption that tools might not be installed; tests require
  all tools available (installed from source or on disk).

https://claude.ai/code/session_01726gqqGv3oaDV5jLbM6E6h
---
 tests/test_grep_find_unicode_wrapper         | 151 ++++++-------------
 usr/libexec/helper-scripts/safe-rm-maybe.bsh |  15 ++
 2 files changed, 59 insertions(+), 107 deletions(-)
 create mode 100644 usr/libexec/helper-scripts/safe-rm-maybe.bsh

diff --git a/tests/test_grep_find_unicode_wrapper b/tests/test_grep_find_unicode_wrapper
index ecd62428..6eb50eaa 100755
--- a/tests/test_grep_find_unicode_wrapper
+++ b/tests/test_grep_find_unicode_wrapper
@@ -5,9 +5,9 @@
 
 ## Test script for grep-find-unicode-wrapper
 ## Tests for Unicode bypass vulnerabilities by checking that various
-## suspicious characters are properly detected by the four grep checks.
+## suspicious characters are properly detected.
 ##
-## Bug found: Check 3 uses bash $'\uXXXX' expansion which requires a
+## Bug found: Check 3 used bash $'\uXXXX' expansion which requires a
 ## UTF-8 locale at parse time. In non-UTF-8 locales (LANG=C or LANG=),
 ## the \u sequences are passed through literally, creating a character
 ## class of [0-9A-Fu\] that causes massive false positives.
@@ -23,108 +23,45 @@ pass_count=0
 fail_count=0
 skip_count=0
 
+source /usr/libexec/helper-scripts/get_colors.sh
+source /usr/libexec/helper-scripts/safe-rm-maybe.bsh
+
 cleanup() {
   if [ -n "$test_dir" ] && [ -d "$test_dir" ]; then
-    rm -rf -- "$test_dir"
+    rm-safe-maybe --recursive --force -- "$test_dir"
   fi
 }
 trap cleanup EXIT
 
-test_dir="$(mktemp -d)"
-
-script_dir="$(cd -- "$(dirname -- "$0")" && pwd)"
-git_toplevel="$(cd -- "$script_dir" && git rev-parse --show-toplevel)"
-
-if test -f /usr/libexec/helper-scripts/get_colors.sh ; then
-  source /usr/libexec/helper-scripts/get_colors.sh
-elif test -f "${git_toplevel}/usr/libexec/helper-scripts/get_colors.sh" ; then
-  source "${git_toplevel}/usr/libexec/helper-scripts/get_colors.sh"
-else
-  printf '%s\n' "$0: ERROR: get_colors.sh not found!" >&2
-  exit 1
-fi
-
-## Replicate the four grep checks from grep-find-unicode-wrapper.
-## This allows testing even without stecho installed.
-grep_args=(
-  --files-with-matches
-  --line-number
-  --binary-files=text
-)
-
-## Build the check 3 pattern using raw UTF-8 byte sequences (\x)
-## instead of \u escapes, so this test works regardless of locale.
-##
-## U+061C -> 0xD8 0x9C
-## U+200E -> 0xE2 0x80 0x8E
-## U+200F -> 0xE2 0x80 0x8F
-## U+202A -> 0xE2 0x80 0xAA
-## U+202B -> 0xE2 0x80 0xAB
-## U+202C -> 0xE2 0x80 0xAC
-## U+202D -> 0xE2 0x80 0xAD
-## U+202E -> 0xE2 0x80 0xAE
-## U+2066 -> 0xE2 0x81 0xA6
-## U+2067 -> 0xE2 0x81 0xA7
-## U+2068 -> 0xE2 0x81 0xA8
-## U+2069 -> 0xE2 0x81 0xA9
-##
-## Note: check 3 in the wrapper does NOT use --perl-regexp, so we use
-## grep's basic bracket expression which with LC_ALL=C matches individual
-## bytes. This means the bracket expression matches ANY of the individual
-## bytes, not specific multi-byte sequences. This is overly broad but
-## ensures detection.
-bidi_pattern=$'[\xD8\x9C\xE2\x80\x8E\x8F\xAA\xAB\xAC\xAD\xAE\x81\xA6\xA7\xA8\xA9]'
-
-run_checks() {
-  local file="$1"
-  local found=0
-
-  ## Check 1: Non-ASCII bytes (hex range).
-  if LC_ALL=C grep "${grep_args[@]}" --perl-regexp '[^\x00-\x7F]' "$file" >/dev/null 2>&1; then
-    found=1
-  fi
-
-  ## Check 2: Non-ASCII (POSIX class).
-  if LC_ALL=C grep "${grep_args[@]}" --perl-regexp "[^[:ascii:]]" "$file" >/dev/null 2>&1; then
-    found=1
-  fi
+test_dir="$(mktemp --directory)"
 
-  ## Check 3: BiDi / Trojan Source characters.
-  ## Using raw byte pattern (see bidi_pattern above).
-  if LC_ALL=C grep "${grep_args[@]}" "$bidi_pattern" "$file" >/dev/null 2>&1; then
-    found=1
-  fi
-
-  ## Check 4: ASCII control characters.
-  if LC_ALL=C grep "${grep_args[@]}" --perl-regexp '[\x00-\x08\x0B\x0C\x0D\x0E-\x1F\x7F]' "$file" >/dev/null 2>&1; then
-    found=1
-  fi
-
-  return $(( ! found ))
-}
+## grep-find-unicode-wrapper is similar to grep.
+## - Exit code 0: if found.
+## - Non-zero exit code: if not found.
+command -v grep-find-unicode-wrapper >/dev/null
 
 expect_detected() {
   local description="$1"
-  local file="$2"
+  local file_name="$2"
 
-  if run_checks "$file"; then
-    printf "${green}PASS${nocolor}: Detected:     %s\n" "$description"
+  if grep-find-unicode-wrapper "$file_name" >/dev/null 2>&1 ; then
+    printf "%s\n" "${green}PASS${nocolor}: Detected:     $description"
     pass_count=$(( pass_count + 1 ))
   else
-    printf "${red}FAIL${nocolor}: NOT detected: %s\n" "$description" >&2
+    printf "%s\n" "${red}FAIL${nocolor}: NOT detected: $description" >&2
     fail_count=$(( fail_count + 1 ))
   fi
 }
 
 expect_clean() {
   local description="$1"
-  local file="$2"
+  local file_name="$2"
 
-  if ! run_checks "$file"; then
-    printf "${green}PASS${nocolor}: Clean:        %s\n" "$description"
+  if ! grep-find-unicode-wrapper "$file_name" >/dev/null 2>&1 ; then
+    printf "%s\n" "${green}PASS${nocolor}: Clean:        $description"
     pass_count=$(( pass_count + 1 ))
   else
-    printf "${red}FAIL${nocolor}: False positive: %s\n" "$description" >&2
+    printf "%s\n" "${red}FAIL${nocolor}: False positive: $description" >&2
     fail_count=$(( fail_count + 1 ))
   fi
 }
@@ -132,17 +69,17 @@ expect_clean() {
 write_file() {
   local name="$1"
   local content="$2"
-  local file="$test_dir/$name"
-  printf '%s' "$content" > "$file"
-  printf '%s' "$file"
+  local file_name="$test_dir/$name"
+  printf '%s' "$content" | tee -- "$file_name" >/dev/null
+  printf '%s' "$file_name"
 }
 
 write_file_binary() {
   local name="$1"
   shift
-  local file="$test_dir/$name"
-  printf "$@" > "$file"
-  printf '%s' "$file"
+  local file_name="$test_dir/$name"
+  printf "$@" | tee -- "$file_name" >/dev/null
+  printf '%s' "$file_name"
 }
 
 printf '%s\n' "===== grep-find-unicode-wrapper bypass tests ====="
@@ -151,23 +88,23 @@ printf '%s\n' ""
 ## ===================================================================
 ## Section 0: Check 3 locale bug validation
 ## ===================================================================
-printf '%s\n' "--- Check 3 locale bug ($'\u' expansion) ---"
+printf '%s\n' "--- Check 3 locale bug (\$'\\u' expansion) ---"
 
 ## Verify whether $'\uXXXX' expands to UTF-8 in the current locale.
-## If it doesn't, check 3 in the wrapper is broken.
+## If it doesn't, check 3 in the wrapper would be broken with the old pattern.
 check3_locale_ok="false"
-check3_test_byte="$(printf '%s' $'\u061C' | od -A n -t x1 | tr -d ' \n')"
+check3_test_byte="$(printf '%s' $'\u061C' | od --address-radix=n --format=x1 | tr --delete ' \n')"
 if [ "$check3_test_byte" = "d89c" ]; then
   check3_locale_ok="true"
-  printf "${green}PASS${nocolor}: \$'\\u061C' expands to UTF-8 bytes (d8 9c) - check 3 works in this locale\n"
+  printf '%s\n' "${green}PASS${nocolor}: \$'\\u061C' expands to UTF-8 bytes (d8 9c) - check 3 works in this locale"
   pass_count=$(( pass_count + 1 ))
 else
-  printf "${yellow}WARN${nocolor}: \$'\\u061C' expands to literal '\\u061C' (got: %s) - check 3 in wrapper is broken in this locale!\n" "$check3_test_byte"
-  printf "       Current locale: LANG='${LANG:-}' LC_ALL='${LC_ALL:-}'\n"
-  printf "       The wrapper's check 3 pattern becomes a character class of\n"
-  printf "       literal chars [0-9A-Fu\\\\] causing false positives on most files.\n"
-  printf "       BiDi chars are still caught by checks 1+2 (non-ASCII byte detection).\n"
-  printf "       Fix: use \\\\x byte sequences instead of \\\\u escapes in the wrapper.\n"
+  printf '%s\n' "${yellow}WARN${nocolor}: \$'\\u061C' expands to literal '\\u061C' (got: $check3_test_byte) - check 3 would be broken with old pattern!"
+  printf '%s\n' "       Current locale: LANG='${LANG:-}' LC_ALL='${LC_ALL:-}'"
+  printf '%s\n' "       The old \$'\\u...' pattern becomes a character class of"
+  printf '%s\n' "       literal chars [0-9A-Fu\\] causing false positives on most files."
+  printf '%s\n' "       BiDi chars are still caught by checks 1+2 (non-ASCII byte detection)."
+  printf '%s\n' "       Fix: use \\x byte sequences instead of \\u escapes in the wrapper."
   skip_count=$(( skip_count + 1 ))
 fi
 
@@ -183,19 +120,19 @@ old_check3_pattern=$'[\u061C\u200E\u200F\u202A\u202B\u202C\u202D\u202E\u2066\u20
 
 check3_false_positive_test() {
   local description="$1"
-  local file="$2"
-  if LC_ALL=C grep --files-with-matches --line-number --binary-files=text "$old_check3_pattern" "$file" >/dev/null 2>&1; then
+  local file_name="$2"
+  if LC_ALL=C grep --files-with-matches --line-number --binary-files=text "$old_check3_pattern" "$file_name" >/dev/null 2>&1; then
     if [ "$check3_locale_ok" = "true" ]; then
       ## UTF-8 locale: old pattern should work, so this is a real failure.
-      printf "${red}FAIL${nocolor}: Old check 3 false positive: %s\n" "$description" >&2
+      printf '%s\n' "${red}FAIL${nocolor}: Old check 3 false positive: $description" >&2
       fail_count=$(( fail_count + 1 ))
     else
       ## Non-UTF-8 locale: false positive from old pattern is expected.
-      printf "${yellow}WARN${nocolor}: Old check 3 false positive (expected, locale bug): %s\n" "$description"
+      printf '%s\n' "${yellow}WARN${nocolor}: Old check 3 false positive (expected, locale bug): $description"
       skip_count=$(( skip_count + 1 ))
     fi
   else
-    printf "${green}PASS${nocolor}: Old check 3 clean:      %s\n" "$description"
+    printf '%s\n' "${green}PASS${nocolor}: Old check 3 clean:      $description"
     pass_count=$(( pass_count + 1 ))
   fi
 }
@@ -527,7 +464,7 @@ expect_detected "NULL byte at start of file" "$f"
 python3 -c "
 import sys
 sys.stdout.buffer.write(b'a' * 10000 + b'\xe2\x80\x8b' + b'b' * 10000)
-" > "$test_dir/long_line.txt"
+" | tee -- "$test_dir/long_line.txt" >/dev/null
 expect_detected "Suspicious char buried in 20000-char line" "$test_dir/long_line.txt"
 
 ## ===================================================================
@@ -535,12 +472,12 @@ expect_detected "Suspicious char buried in 20000-char line" "$test_dir/long_line
 ## ===================================================================
 printf '%s\n' ""
 printf '%s\n' "===== Results ====="
-printf "Passed: %d | Failed: %d | Skipped: %d\n" "$pass_count" "$fail_count" "$skip_count"
+printf '%s\n' "Passed: $pass_count | Failed: $fail_count | Skipped: $skip_count"
 
 if [ "$fail_count" -gt 0 ]; then
-  printf "${red}%s${nocolor}\n" "SOME TESTS FAILED - potential Unicode bypass found!"
+  printf '%s\n' "${red}SOME TESTS FAILED - potential Unicode bypass found!${nocolor}"
   exit 1
 fi
 
-printf "${green}%s${nocolor}\n" "All tests passed - no bypass detected."
+printf '%s\n' "${green}All tests passed - no bypass detected.${nocolor}"
 exit 0
diff --git a/usr/libexec/helper-scripts/safe-rm-maybe.bsh b/usr/libexec/helper-scripts/safe-rm-maybe.bsh
new file mode 100644
index 00000000..1b56cc72
--- /dev/null
+++ b/usr/libexec/helper-scripts/safe-rm-maybe.bsh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+## Copyright (C) 2025 - 2025 ENCRYPTED SUPPORT LLC <adrelanos@whonix.org>
+## See the file COPYING for copying conditions.
+
+## Provides rm-safe-maybe function.
+## Uses safe-rm if installed, otherwise falls back to rm.
+
+rm-safe-maybe() {
+  if command -v safe-rm >/dev/null 2>&1 ; then
+    safe-rm "$@"
+  else
+    rm "$@"
+  fi
+}

From e1638f20439bceb58e0106ea2797695e451e8121 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 2 Apr 2026 11:03:28 +0000
Subject: [PATCH 5/5] Use has.sh, &>/dev/null, safe examples, extract Python to
 separate file

- Use 'has' from has.sh instead of 'command -v'.
- Use &>/dev/null instead of >/dev/null 2>&1.
- Replace offensive/risky test examples (rm -rf, root escalation) with
  safe alternatives (GOOD/BADX overwrite, harmless error messages).
- Move inline Python for long-line generation to separate script
  usr/libexec/helper-scripts/write-long-line-with-unicode.

https://claude.ai/code/session_01726gqqGv3oaDV5jLbM6E6h
---
 tests/test_grep_find_unicode_wrapper          | 38 ++++++++++---------
 .../write-long-line-with-unicode              | 28 ++++++++++++++
 2 files changed, 48 insertions(+), 18 deletions(-)
 create mode 100644 usr/libexec/helper-scripts/write-long-line-with-unicode

diff --git a/tests/test_grep_find_unicode_wrapper b/tests/test_grep_find_unicode_wrapper
index 6eb50eaa..afb825e3 100755
--- a/tests/test_grep_find_unicode_wrapper
+++ b/tests/test_grep_find_unicode_wrapper
@@ -25,6 +25,7 @@ skip_count=0
 
 source /usr/libexec/helper-scripts/get_colors.sh
 source /usr/libexec/helper-scripts/safe-rm-maybe.bsh
+source /usr/libexec/helper-scripts/has.sh
 
 cleanup() {
   if [ -n "$test_dir" ] && [ -d "$test_dir" ]; then
@@ -38,13 +39,14 @@ test_dir="$(mktemp --directory)"
 ## grep-find-unicode-wrapper is similar to grep.
 ## - Exit code 0: if found.
 ## - Non-zero exit code: if not found.
-command -v grep-find-unicode-wrapper >/dev/null
+has grep-find-unicode-wrapper
+has write-long-line-with-unicode
 
 expect_detected() {
   local description="$1"
   local file_name="$2"
 
-  if grep-find-unicode-wrapper "$file_name" >/dev/null 2>&1 ; then
+  if grep-find-unicode-wrapper "$file_name" &>/dev/null ; then
     printf "%s\n" "${green}PASS${nocolor}: Detected:     $description"
     pass_count=$(( pass_count + 1 ))
   else
@@ -57,7 +59,7 @@ expect_clean() {
   local description="$1"
   local file_name="$2"
 
-  if ! grep-find-unicode-wrapper "$file_name" >/dev/null 2>&1 ; then
+  if ! grep-find-unicode-wrapper "$file_name" &>/dev/null ; then
     printf "%s\n" "${green}PASS${nocolor}: Clean:        $description"
     pass_count=$(( pass_count + 1 ))
   else
@@ -70,7 +72,7 @@ write_file() {
   local name="$1"
   local content="$2"
   local file_name="$test_dir/$name"
-  printf '%s' "$content" | tee -- "$file_name" >/dev/null
+  printf '%s' "$content" | tee -- "$file_name" &>/dev/null
   printf '%s' "$file_name"
 }
 
@@ -78,7 +80,7 @@ write_file_binary() {
   local name="$1"
   shift
   local file_name="$test_dir/$name"
-  printf "$@" | tee -- "$file_name" >/dev/null
+  printf "$@" | tee -- "$file_name" &>/dev/null
   printf '%s' "$file_name"
 }
 
@@ -121,7 +123,7 @@ old_check3_pattern=$'[\u061C\u200E\u200F\u202A\u202B\u202C\u202D\u202E\u2066\u20
 check3_false_positive_test() {
   local description="$1"
   local file_name="$2"
-  if LC_ALL=C grep --files-with-matches --line-number --binary-files=text "$old_check3_pattern" "$file_name" >/dev/null 2>&1; then
+  if LC_ALL=C grep --files-with-matches --line-number --binary-files=text "$old_check3_pattern" "$file_name" &>/dev/null; then
     if [ "$check3_locale_ok" = "true" ]; then
       ## UTF-8 locale: old pattern should work, so this is a real failure.
       printf '%s\n' "${red}FAIL${nocolor}: Old check 3 false positive: $description" >&2
@@ -427,20 +429,23 @@ f="$(write_file_binary "hidden_in_comment.txt" '# This is a normal comment\xE2\x
 expect_detected "Zero-width space hidden in comment" "$f"
 
 ## LTR mark (E2 80 8E) hidden in a string literal.
-f="$(write_file_binary "hidden_in_string.txt" 'var x = "hello\xE2\x80\x8Eworld";')"
+f="$(write_file_binary "hidden_in_string.txt" 'x = "hello\xE2\x80\x8Eworld"')"
 expect_detected "LTR mark hidden in string literal" "$f"
 
 ## Trojan Source BiDi attack pattern.
-f="$(write_file_binary "trojan_source_example.txt" 'access_level = "user\xE2\x80\xAA\xE2\x81\xA6\xE2\x81\xA9\xE2\x81\xA6admin\xE2\x81\xA9\xE2\x80\xAC"')"
+## Embeds BiDi overrides around the word "test" to demonstrate detection.
+f="$(write_file_binary "trojan_source_example.txt" 'access_level = "user\xE2\x80\xAA\xE2\x81\xA6\xE2\x81\xA9\xE2\x81\xA6test\xE2\x81\xA9\xE2\x80\xAC"')"
 expect_detected "Trojan Source BiDi attack pattern" "$f"
 
-## Backspace (\x08) overwrite attack.
-f="$(write_file_binary "backspace_overwrite.txt" 'user\x08\x08\x08\x08root')"
-expect_detected "Backspace overwrite (displays 'root' over 'user')" "$f"
+## Backspace (\x08) overwrite: text followed by backspaces then replacement.
+## Would display "BADX" overwriting "GOOD" on a terminal.
+f="$(write_file_binary "backspace_overwrite.txt" 'GOOD\x08\x08\x08\x08BADX')"
+expect_detected "Backspace overwrite (GOOD overwritten by BADX)" "$f"
 
-## Carriage return (\x0D) overwrite attack.
-f="$(write_file_binary "cr_overwrite.txt" 'safe command\x0Drm -rf /')"
-expect_detected "CR overwrite (hides malicious command)" "$f"
+## Carriage return (\x0D) overwrite: second text replaces first on display.
+## Would display "ERROR: You should not see this" on a terminal.
+f="$(write_file_binary "cr_overwrite.txt" 'This line looks safe\x0DERROR: You should not see this')"
+expect_detected "CR overwrite (hides text behind carriage return)" "$f"
 
 ## ===================================================================
 ## Section 10: Mixed content edge cases
@@ -461,10 +466,7 @@ f="$(write_file_binary "leading_null.txt" '\x00normal text')"
 expect_detected "NULL byte at start of file" "$f"
 
 ## Very long line with suspicious char in the middle (ZWSP = E2 80 8B).
-python3 -c "
-import sys
-sys.stdout.buffer.write(b'a' * 10000 + b'\xe2\x80\x8b' + b'b' * 10000)
-" | tee -- "$test_dir/long_line.txt" >/dev/null
+write-long-line-with-unicode "$test_dir/long_line.txt"
 expect_detected "Suspicious char buried in 20000-char line" "$test_dir/long_line.txt"
 
 ## ===================================================================
diff --git a/usr/libexec/helper-scripts/write-long-line-with-unicode b/usr/libexec/helper-scripts/write-long-line-with-unicode
new file mode 100644
index 00000000..5929dd4f
--- /dev/null
+++ b/usr/libexec/helper-scripts/write-long-line-with-unicode
@@ -0,0 +1,28 @@
+#!/usr/bin/python3 -su
+
+## Copyright (C) 2025 - 2025 ENCRYPTED SUPPORT LLC <adrelanos@whonix.org>
+## See the file COPYING for copying conditions.
+
+"""
+Writes a long line with a suspicious Unicode character (ZWSP, U+200B)
+buried in the middle. Used by test_grep_find_unicode_wrapper to test
+detection in large files.
+
+Usage: write-long-line-with-unicode <output-file>
+"""
+
+import sys
+
+def main():
+    if len(sys.argv) != 2:
+        sys.stderr.write("Usage: write-long-line-with-unicode <output-file>\n")
+        return 1
+    output_path = sys.argv[1]
+    ## 10000 'a' bytes + ZWSP (E2 80 8B) + 10000 'b' bytes
+    data = b'a' * 10000 + b'\xe2\x80\x8b' + b'b' * 10000
+    with open(output_path, 'wb') as output_file:
+        output_file.write(data)
+    return 0
+
+if __name__ == "__main__":
+    sys.exit(main())