From 6a4006d8c132070000302f473177a545be6e5bf2 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 1 Apr 2026 16:37:04 +0000
Subject: [PATCH] fix: sanitize quotes in malicious branch and bare angle
 brackets in output

strip_markup had two security issues:
1. The malicious-input sanitization branch only replaced <, >, & with
   underscores but not " and ', which could allow HTML attribute
   injection if output is placed in an attribute context.
2. Bare < and > characters that HTMLParser doesn't treat as tags
   (e.g., "2 < 3") passed through both strip passes unchanged and
   were returned unsanitized, potentially dangerous in HTML contexts.

Now quotes are sanitized in the malicious branch, and remaining < / >
are always sanitized in the final output.

https://claude.ai/code/session_01Wjn2KfitiA5iTADcLLfdbR
---
 .../sanitize_string/tests/sanitize_string.py  |  24 +++-
 .../strip_markup/strip_markup_lib.py          |  33 +++--
 .../strip_markup/tests/strip_markup.py        | 116 ++++++++++++++++++
 3 files changed, 158 insertions(+), 15 deletions(-)

diff --git a/usr/lib/python3/dist-packages/sanitize_string/tests/sanitize_string.py b/usr/lib/python3/dist-packages/sanitize_string/tests/sanitize_string.py
index 31cf19b9..ece794c8 100644
--- a/usr/lib/python3/dist-packages/sanitize_string/tests/sanitize_string.py
+++ b/usr/lib/python3/dist-packages/sanitize_string/tests/sanitize_string.py
@@ -92,6 +92,26 @@ def test_malicious_markup_strings(self) -> None:
             sanitize_string_main, self.argv0, pos_args_prefix=["nolimit"]
         )
 
+    def test_bare_angle_bracket_strings(self) -> None:
+        """
+        Wrapper for _test_bare_angle_bracket_strings (from TestStripMarkup)
+        specific to TestSanitizeString.
+        """
+
+        self._test_bare_angle_bracket_strings(
+            sanitize_string_main, self.argv0, pos_args_prefix=["nolimit"]
+        )
+
+    def test_malicious_markup_quote_strings(self) -> None:
+        """
+        Wrapper for _test_malicious_markup_quote_strings (from
+        TestStripMarkup) specific to TestSanitizeString.
+        """
+
+        self._test_malicious_markup_quote_strings(
+            sanitize_string_main, self.argv0, pos_args_prefix=["nolimit"]
+        )
+
     def test_simple_escape_cases(self) -> None:
         """
         Ensures sanitize_string.py correctly sanitizes escape sequences and
@@ -153,10 +173,10 @@ def test_malicious_cases(self) -> None:
 """,
                 """\
 
-__blowupWorld() __//__ Won't blow up world, because it's commented :) \
+__blowupWorld() __//__ Won_t blow up world, because it_s commented :) \
 _[8mor not!_[0m
 
-There really isn't bold text below, I promise!
+There really isn_t bold text below, I promise!
 _b_Not bold!_/b_
 [8mThis text might become invisible.[0m
 
diff --git a/usr/lib/python3/dist-packages/strip_markup/strip_markup_lib.py b/usr/lib/python3/dist-packages/strip_markup/strip_markup_lib.py
index 8379f48f..14e837e8 100644
--- a/usr/lib/python3/dist-packages/strip_markup/strip_markup_lib.py
+++ b/usr/lib/python3/dist-packages/strip_markup/strip_markup_lib.py
@@ -55,19 +55,26 @@ def strip_markup(untrusted_string: str) -> str:
     markup_stripper = StripMarkupEngine()
     markup_stripper.feed(strip_one_string)
     strip_two_string: str = markup_stripper.get_data()
-    if strip_one_string == strip_two_string:
-        return strip_one_string
-
-    ## If we get this far, the second strip attempt further transformed the
-    ## text, indicating an attempt to maliciously circumvent the stripper.
-    ## Sanitize the malicious text by changing all '<', '>', and '&'
-    ## characters to underscores. See
-    ## https://stackoverflow.com/a/10371699/19474638
-    ##
-    ## Note that we sanitize strip_one_string, NOT strip_two_string, so that
-    ## the neutered malicious text is displayed to the user. This is so that
-    ## the user is alerted to something odd happening.
+    if strip_one_string != strip_two_string:
+        ## If we get this far, the second strip attempt further transformed
+        ## the text, indicating an attempt to maliciously circumvent the
+        ## stripper. Sanitize the malicious text by changing all '<', '>',
+        ## '&', '"', and "'" characters to underscores. See
+        ## https://stackoverflow.com/a/10371699/19474638
+        ##
+        ## Note that we sanitize strip_one_string, NOT strip_two_string, so
+        ## that the neutered malicious text is displayed to the user. This
+        ## is so that the user is alerted to something odd happening.
+        strip_one_string = "".join(
+            "_" if char in ("<", ">", "&", '"', "'") else char
+            for char in strip_one_string
+        )
+
+    ## Sanitize any remaining '<' and '>' characters that survived both
+    ## strip passes (e.g. bare '<' in "2 < 3" which HTMLParser does not
+    ## treat as a tag). These could be misinterpreted as markup if the
+    ## output is later placed into an HTML context.
     sanitized_string: str = "".join(
-        "_" if char in ["<", ">", "&"] else char for char in strip_one_string
+        "_" if char in ("<", ">") else char for char in strip_one_string
     )
     return sanitized_string
diff --git a/usr/lib/python3/dist-packages/strip_markup/tests/strip_markup.py b/usr/lib/python3/dist-packages/strip_markup/tests/strip_markup.py
index f9f29122..8151063f 100644
--- a/usr/lib/python3/dist-packages/strip_markup/tests/strip_markup.py
+++ b/usr/lib/python3/dist-packages/strip_markup/tests/strip_markup.py
@@ -359,6 +359,104 @@ def _test_malicious_markup_strings(
                 stdin_string=test_case[0],
             )
 
+    def _test_bare_angle_bracket_strings(
+        self,
+        main_func: Callable[[], int],
+        argv0: str,
+        pos_args_prefix: list[str] | None = None,
+    ) -> None:
+        """
+        Ensure strip_markup.py sanitizes bare '<' and '>' characters that
+        are not part of valid tags but could be misinterpreted as markup
+        in downstream HTML contexts. This function is reused by
+        sanitize_string's tests.
+        """
+
+        if pos_args_prefix is None:
+            pos_args_prefix = []
+
+        test_case_list: list[tuple[str, str]] = [
+            (
+                "2 < 3",
+                "2 _ 3",
+            ),
+            (
+                "2 > 1",
+                "2 _ 1",
+            ),
+            (
+                "1 < 2 > 0",
+                "1 _ 2 _ 0",
+            ),
+            (
+                "x << y",
+                "x __ y",
+            ),
+        ]
+
+        for test_case in test_case_list:
+            self._test_args(
+                main_func=main_func,
+                argv0=argv0,
+                stdout_string=test_case[1],
+                stderr_string="",
+                exit_code=0,
+                args=[*pos_args_prefix, test_case[0]],
+            )
+            self._test_stdin(
+                main_func=main_func,
+                argv0=argv0,
+                stdout_string=test_case[1],
+                stderr_string="",
+                args=[*pos_args_prefix],
+                stdin_string=test_case[0],
+            )
+
+    def _test_malicious_markup_quote_strings(
+        self,
+        main_func: Callable[[], int],
+        argv0: str,
+        pos_args_prefix: list[str] | None = None,
+    ) -> None:
+        """
+        Ensure strip_markup.py sanitizes quote characters in strings that
+        trigger the malicious-input branch, preventing attribute injection
+        if the output is placed into an HTML attribute context. This
+        function is reused by sanitize_string's tests.
+        """
+
+        if pos_args_prefix is None:
+            pos_args_prefix = []
+
+        test_case_list: list[tuple[str, str]] = [
+            (
+                '<<b>b "onmouseover="alert(1)<</b>/b>',
+                '_b _onmouseover=_alert(1)_/b_',
+            ),
+            (
+                "<<b>b 'onmouseover='alert(1)<</b>/b>",
+                "_b _onmouseover=_alert(1)_/b_",
+            ),
+        ]
+
+        for test_case in test_case_list:
+            self._test_args(
+                main_func=main_func,
+                argv0=argv0,
+                stdout_string=test_case[1],
+                stderr_string="",
+                exit_code=0,
+                args=[*pos_args_prefix, test_case[0]],
+            )
+            self._test_stdin(
+                main_func=main_func,
+                argv0=argv0,
+                stdout_string=test_case[1],
+                stderr_string="",
+                args=[*pos_args_prefix],
+                stdin_string=test_case[0],
+            )
+
 
 class TestStripMarkup(TestStripMarkupBase):
     """
@@ -414,3 +512,21 @@ def test_malicious_markup_strings(self) -> None:
         """
 
         self._test_malicious_markup_strings(strip_markup_main, self.argv0)
+
+    def test_bare_angle_bracket_strings(self) -> None:
+        """
+        Wrapper for _test_bare_angle_bracket_strings specific to
+        TestStripMarkup.
+        """
+
+        self._test_bare_angle_bracket_strings(strip_markup_main, self.argv0)
+
+    def test_malicious_markup_quote_strings(self) -> None:
+        """
+        Wrapper for _test_malicious_markup_quote_strings specific to
+        TestStripMarkup.
+        """
+
+        self._test_malicious_markup_quote_strings(
+            strip_markup_main, self.argv0
+        )