diff --git a/usr/lib/python3/dist-packages/sanitize_string/tests/sanitize_string.py b/usr/lib/python3/dist-packages/sanitize_string/tests/sanitize_string.py
index 31cf19b9..ece794c8 100644
--- a/usr/lib/python3/dist-packages/sanitize_string/tests/sanitize_string.py
+++ b/usr/lib/python3/dist-packages/sanitize_string/tests/sanitize_string.py
@@ -92,6 +92,26 @@ def test_malicious_markup_strings(self) -> None:
sanitize_string_main, self.argv0, pos_args_prefix=["nolimit"]
)
+ def test_bare_angle_bracket_strings(self) -> None:
+ """
+ Wrapper for _test_bare_angle_bracket_strings (from TestStripMarkup)
+ specific to TestSanitizeString.
+ """
+
+ self._test_bare_angle_bracket_strings(
+ sanitize_string_main, self.argv0, pos_args_prefix=["nolimit"]
+ )
+
+ def test_malicious_markup_quote_strings(self) -> None:
+ """
+ Wrapper for _test_malicious_markup_quote_strings (from
+ TestStripMarkup) specific to TestSanitizeString.
+ """
+
+ self._test_malicious_markup_quote_strings(
+ sanitize_string_main, self.argv0, pos_args_prefix=["nolimit"]
+ )
+
def test_simple_escape_cases(self) -> None:
"""
Ensures sanitize_string.py correctly sanitizes escape sequences and
@@ -153,10 +173,10 @@ def test_malicious_cases(self) -> None:
""",
"""\
-__blowupWorld() __//__ Won't blow up world, because it's commented :) \
+__blowupWorld() __//__ Won_t blow up world, because it_s commented :) \
_[8mor not!_[0m
-There really isn't bold text below, I promise!
+There really isn_t bold text below, I promise!
_b_Not bold!_/b_
[8mThis text might become invisible.[0m
diff --git a/usr/lib/python3/dist-packages/strip_markup/strip_markup_lib.py b/usr/lib/python3/dist-packages/strip_markup/strip_markup_lib.py
index 8379f48f..14e837e8 100644
--- a/usr/lib/python3/dist-packages/strip_markup/strip_markup_lib.py
+++ b/usr/lib/python3/dist-packages/strip_markup/strip_markup_lib.py
@@ -55,19 +55,26 @@ def strip_markup(untrusted_string: str) -> str:
markup_stripper = StripMarkupEngine()
markup_stripper.feed(strip_one_string)
strip_two_string: str = markup_stripper.get_data()
- if strip_one_string == strip_two_string:
- return strip_one_string
-
- ## If we get this far, the second strip attempt further transformed the
- ## text, indicating an attempt to maliciously circumvent the stripper.
- ## Sanitize the malicious text by changing all '<', '>', and '&'
- ## characters to underscores. See
- ## https://stackoverflow.com/a/10371699/19474638
- ##
- ## Note that we sanitize strip_one_string, NOT strip_two_string, so that
- ## the neutered malicious text is displayed to the user. This is so that
- ## the user is alerted to something odd happening.
+ if strip_one_string != strip_two_string:
+ ## If we get this far, the second strip attempt further transformed
+ ## the text, indicating an attempt to maliciously circumvent the
+ ## stripper. Sanitize the malicious text by changing all '<', '>',
+ ## '&', '"', and "'" characters to underscores. See
+ ## https://stackoverflow.com/a/10371699/19474638
+ ##
+ ## Note that we sanitize strip_one_string, NOT strip_two_string, so
+ ## that the neutered malicious text is displayed to the user. This
+ ## is so that the user is alerted to something odd happening.
+ strip_one_string = "".join(
+ "_" if char in ("<", ">", "&", '"', "'") else char
+ for char in strip_one_string
+ )
+
+ ## Sanitize any remaining '<' and '>' characters that survived both
+ ## strip passes (e.g. bare '<' in "2 < 3" which HTMLParser does not
+ ## treat as a tag). These could be misinterpreted as markup if the
+ ## output is later placed into an HTML context.
sanitized_string: str = "".join(
- "_" if char in ["<", ">", "&"] else char for char in strip_one_string
+ "_" if char in ("<", ">") else char for char in strip_one_string
)
return sanitized_string
diff --git a/usr/lib/python3/dist-packages/strip_markup/tests/strip_markup.py b/usr/lib/python3/dist-packages/strip_markup/tests/strip_markup.py
index f9f29122..8151063f 100644
--- a/usr/lib/python3/dist-packages/strip_markup/tests/strip_markup.py
+++ b/usr/lib/python3/dist-packages/strip_markup/tests/strip_markup.py
@@ -359,6 +359,104 @@ def _test_malicious_markup_strings(
stdin_string=test_case[0],
)
+ def _test_bare_angle_bracket_strings(
+ self,
+ main_func: Callable[[], int],
+ argv0: str,
+ pos_args_prefix: list[str] | None = None,
+ ) -> None:
+ """
+ Ensure strip_markup.py sanitizes bare '<' and '>' characters that
+ are not part of valid tags but could be misinterpreted as markup
+ in downstream HTML contexts. This function is reused by
+ sanitize_string's tests.
+ """
+
+ if pos_args_prefix is None:
+ pos_args_prefix = []
+
+ test_case_list: list[tuple[str, str]] = [
+ (
+ "2 < 3",
+ "2 _ 3",
+ ),
+ (
+ "2 > 1",
+ "2 _ 1",
+ ),
+ (
+ "1 < 2 > 0",
+ "1 _ 2 _ 0",
+ ),
+ (
+ "x << y",
+ "x __ y",
+ ),
+ ]
+
+ for test_case in test_case_list:
+ self._test_args(
+ main_func=main_func,
+ argv0=argv0,
+ stdout_string=test_case[1],
+ stderr_string="",
+ exit_code=0,
+ args=[*pos_args_prefix, test_case[0]],
+ )
+ self._test_stdin(
+ main_func=main_func,
+ argv0=argv0,
+ stdout_string=test_case[1],
+ stderr_string="",
+ args=[*pos_args_prefix],
+ stdin_string=test_case[0],
+ )
+
+ def _test_malicious_markup_quote_strings(
+ self,
+ main_func: Callable[[], int],
+ argv0: str,
+ pos_args_prefix: list[str] | None = None,
+ ) -> None:
+ """
+ Ensure strip_markup.py sanitizes quote characters in strings that
+ trigger the malicious-input branch, preventing attribute injection
+ if the output is placed into an HTML attribute context. This
+ function is reused by sanitize_string's tests.
+ """
+
+ if pos_args_prefix is None:
+ pos_args_prefix = []
+
+ test_case_list: list[tuple[str, str]] = [
+ (
+ '<b "onmouseover="alert(1)</b>',
+ '_b _onmouseover=_alert(1)_/b_',
+ ),
+ (
+ "<b 'onmouseover='alert(1)</b>",
+ "_b _onmouseover=_alert(1)_/b_",
+ ),
+ ]
+
+ for test_case in test_case_list:
+ self._test_args(
+ main_func=main_func,
+ argv0=argv0,
+ stdout_string=test_case[1],
+ stderr_string="",
+ exit_code=0,
+ args=[*pos_args_prefix, test_case[0]],
+ )
+ self._test_stdin(
+ main_func=main_func,
+ argv0=argv0,
+ stdout_string=test_case[1],
+ stderr_string="",
+ args=[*pos_args_prefix],
+ stdin_string=test_case[0],
+ )
+
class TestStripMarkup(TestStripMarkupBase):
"""
@@ -414,3 +512,21 @@ def test_malicious_markup_strings(self) -> None:
"""
self._test_malicious_markup_strings(strip_markup_main, self.argv0)
+
+ def test_bare_angle_bracket_strings(self) -> None:
+ """
+ Wrapper for _test_bare_angle_bracket_strings specific to
+ TestStripMarkup.
+ """
+
+ self._test_bare_angle_bracket_strings(strip_markup_main, self.argv0)
+
+ def test_malicious_markup_quote_strings(self) -> None:
+ """
+ Wrapper for _test_malicious_markup_quote_strings specific to
+ TestStripMarkup.
+ """
+
+ self._test_malicious_markup_quote_strings(
+ strip_markup_main, self.argv0
+ )