From 6a4006d8c132070000302f473177a545be6e5bf2 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 1 Apr 2026 16:37:04 +0000 Subject: [PATCH] fix: sanitize quotes in malicious branch and bare angle brackets in output strip_markup had two security issues: 1. The malicious-input sanitization branch only replaced <, >, & with underscores but not " and ', which could allow HTML attribute injection if output is placed in an attribute context. 2. Bare < and > characters that HTMLParser doesn't treat as tags (e.g., "2 < 3") passed through both strip passes unchanged and were returned unsanitized, potentially dangerous in HTML contexts. Now quotes are sanitized in the malicious branch, and remaining < / > are always sanitized in the final output. https://claude.ai/code/session_01Wjn2KfitiA5iTADcLLfdbR --- .../sanitize_string/tests/sanitize_string.py | 24 +++- .../strip_markup/strip_markup_lib.py | 33 +++-- .../strip_markup/tests/strip_markup.py | 116 ++++++++++++++++++ 3 files changed, 158 insertions(+), 15 deletions(-) diff --git a/usr/lib/python3/dist-packages/sanitize_string/tests/sanitize_string.py b/usr/lib/python3/dist-packages/sanitize_string/tests/sanitize_string.py index 31cf19b9..ece794c8 100644 --- a/usr/lib/python3/dist-packages/sanitize_string/tests/sanitize_string.py +++ b/usr/lib/python3/dist-packages/sanitize_string/tests/sanitize_string.py @@ -92,6 +92,26 @@ def test_malicious_markup_strings(self) -> None: sanitize_string_main, self.argv0, pos_args_prefix=["nolimit"] ) + def test_bare_angle_bracket_strings(self) -> None: + """ + Wrapper for _test_bare_angle_bracket_strings (from TestStripMarkup) + specific to TestSanitizeString. + """ + + self._test_bare_angle_bracket_strings( + sanitize_string_main, self.argv0, pos_args_prefix=["nolimit"] + ) + + def test_malicious_markup_quote_strings(self) -> None: + """ + Wrapper for _test_malicious_markup_quote_strings (from + TestStripMarkup) specific to TestSanitizeString. + """ + + self._test_malicious_markup_quote_strings( + sanitize_string_main, self.argv0, pos_args_prefix=["nolimit"] + ) + def test_simple_escape_cases(self) -> None: """ Ensures sanitize_string.py correctly sanitizes escape sequences and @@ -153,10 +173,10 @@ def test_malicious_cases(self) -> None: """, """\ -__blowupWorld() __//__ Won't blow up world, because it's commented :) \ +__blowupWorld() __//__ Won_t blow up world, because it_s commented :) \ _[8mor not!_[0m -There really isn't bold text below, I promise! +There really isn_t bold text below, I promise! _b_Not bold!_/b_ [8mThis text might become invisible.[0m diff --git a/usr/lib/python3/dist-packages/strip_markup/strip_markup_lib.py b/usr/lib/python3/dist-packages/strip_markup/strip_markup_lib.py index 8379f48f..14e837e8 100644 --- a/usr/lib/python3/dist-packages/strip_markup/strip_markup_lib.py +++ b/usr/lib/python3/dist-packages/strip_markup/strip_markup_lib.py @@ -55,19 +55,26 @@ def strip_markup(untrusted_string: str) -> str: markup_stripper = StripMarkupEngine() markup_stripper.feed(strip_one_string) strip_two_string: str = markup_stripper.get_data() - if strip_one_string == strip_two_string: - return strip_one_string - - ## If we get this far, the second strip attempt further transformed the - ## text, indicating an attempt to maliciously circumvent the stripper. - ## Sanitize the malicious text by changing all '<', '>', and '&' - ## characters to underscores. See - ## https://stackoverflow.com/a/10371699/19474638 - ## - ## Note that we sanitize strip_one_string, NOT strip_two_string, so that - ## the neutered malicious text is displayed to the user. This is so that - ## the user is alerted to something odd happening. + if strip_one_string != strip_two_string: + ## If we get this far, the second strip attempt further transformed + ## the text, indicating an attempt to maliciously circumvent the + ## stripper. Sanitize the malicious text by changing all '<', '>', + ## '&', '"', and "'" characters to underscores. See + ## https://stackoverflow.com/a/10371699/19474638 + ## + ## Note that we sanitize strip_one_string, NOT strip_two_string, so + ## that the neutered malicious text is displayed to the user. This + ## is so that the user is alerted to something odd happening. + strip_one_string = "".join( + "_" if char in ("<", ">", "&", '"', "'") else char + for char in strip_one_string + ) + + ## Sanitize any remaining '<' and '>' characters that survived both + ## strip passes (e.g. bare '<' in "2 < 3" which HTMLParser does not + ## treat as a tag). These could be misinterpreted as markup if the + ## output is later placed into an HTML context. sanitized_string: str = "".join( - "_" if char in ["<", ">", "&"] else char for char in strip_one_string + "_" if char in ("<", ">") else char for char in strip_one_string ) return sanitized_string diff --git a/usr/lib/python3/dist-packages/strip_markup/tests/strip_markup.py b/usr/lib/python3/dist-packages/strip_markup/tests/strip_markup.py index f9f29122..8151063f 100644 --- a/usr/lib/python3/dist-packages/strip_markup/tests/strip_markup.py +++ b/usr/lib/python3/dist-packages/strip_markup/tests/strip_markup.py @@ -359,6 +359,104 @@ def _test_malicious_markup_strings( stdin_string=test_case[0], ) + def _test_bare_angle_bracket_strings( + self, + main_func: Callable[[], int], + argv0: str, + pos_args_prefix: list[str] | None = None, + ) -> None: + """ + Ensure strip_markup.py sanitizes bare '<' and '>' characters that + are not part of valid tags but could be misinterpreted as markup + in downstream HTML contexts. This function is reused by + sanitize_string's tests. + """ + + if pos_args_prefix is None: + pos_args_prefix = [] + + test_case_list: list[tuple[str, str]] = [ + ( + "2 < 3", + "2 _ 3", + ), + ( + "2 > 1", + "2 _ 1", + ), + ( + "1 < 2 > 0", + "1 _ 2 _ 0", + ), + ( + "x << y", + "x __ y", + ), + ] + + for test_case in test_case_list: + self._test_args( + main_func=main_func, + argv0=argv0, + stdout_string=test_case[1], + stderr_string="", + exit_code=0, + args=[*pos_args_prefix, test_case[0]], + ) + self._test_stdin( + main_func=main_func, + argv0=argv0, + stdout_string=test_case[1], + stderr_string="", + args=[*pos_args_prefix], + stdin_string=test_case[0], + ) + + def _test_malicious_markup_quote_strings( + self, + main_func: Callable[[], int], + argv0: str, + pos_args_prefix: list[str] | None = None, + ) -> None: + """ + Ensure strip_markup.py sanitizes quote characters in strings that + trigger the malicious-input branch, preventing attribute injection + if the output is placed into an HTML attribute context. This + function is reused by sanitize_string's tests. + """ + + if pos_args_prefix is None: + pos_args_prefix = [] + + test_case_list: list[tuple[str, str]] = [ + ( + '<b "onmouseover="alert(1)</b>', + '_b _onmouseover=_alert(1)_/b_', + ), + ( + "<b 'onmouseover='alert(1)</b>", + "_b _onmouseover=_alert(1)_/b_", + ), + ] + + for test_case in test_case_list: + self._test_args( + main_func=main_func, + argv0=argv0, + stdout_string=test_case[1], + stderr_string="", + exit_code=0, + args=[*pos_args_prefix, test_case[0]], + ) + self._test_stdin( + main_func=main_func, + argv0=argv0, + stdout_string=test_case[1], + stderr_string="", + args=[*pos_args_prefix], + stdin_string=test_case[0], + ) + class TestStripMarkup(TestStripMarkupBase): """ @@ -414,3 +512,21 @@ def test_malicious_markup_strings(self) -> None: """ self._test_malicious_markup_strings(strip_markup_main, self.argv0) + + def test_bare_angle_bracket_strings(self) -> None: + """ + Wrapper for _test_bare_angle_bracket_strings specific to + TestStripMarkup. + """ + + self._test_bare_angle_bracket_strings(strip_markup_main, self.argv0) + + def test_malicious_markup_quote_strings(self) -> None: + """ + Wrapper for _test_malicious_markup_quote_strings specific to + TestStripMarkup. + """ + + self._test_malicious_markup_quote_strings( + strip_markup_main, self.argv0 + )