Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,26 @@ def test_malicious_markup_strings(self) -> None:
sanitize_string_main, self.argv0, pos_args_prefix=["nolimit"]
)

def test_bare_angle_bracket_strings(self) -> None:
"""
Wrapper for _test_bare_angle_bracket_strings (from TestStripMarkup)
specific to TestSanitizeString.
"""

self._test_bare_angle_bracket_strings(
sanitize_string_main, self.argv0, pos_args_prefix=["nolimit"]
)

def test_malicious_markup_quote_strings(self) -> None:
"""
Wrapper for _test_malicious_markup_quote_strings (from
TestStripMarkup) specific to TestSanitizeString.
"""

self._test_malicious_markup_quote_strings(
sanitize_string_main, self.argv0, pos_args_prefix=["nolimit"]
)

def test_simple_escape_cases(self) -> None:
"""
Ensures sanitize_string.py correctly sanitizes escape sequences and
Expand Down Expand Up @@ -153,10 +173,10 @@ def test_malicious_cases(self) -> None:
""",
"""\

__blowupWorld() __//__ Won't blow up world, because it's commented :) \
__blowupWorld() __//__ Won_t blow up world, because it_s commented :) \
_[8mor not!_[0m

There really isn't bold text below, I promise!
There really isn_t bold text below, I promise!
_b_Not bold!_/b_
[8mThis text might become invisible.[0m

Expand Down
33 changes: 20 additions & 13 deletions usr/lib/python3/dist-packages/strip_markup/strip_markup_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,19 +55,26 @@ def strip_markup(untrusted_string: str) -> str:
markup_stripper = StripMarkupEngine()
markup_stripper.feed(strip_one_string)
strip_two_string: str = markup_stripper.get_data()
if strip_one_string == strip_two_string:
return strip_one_string

## If we get this far, the second strip attempt further transformed the
## text, indicating an attempt to maliciously circumvent the stripper.
## Sanitize the malicious text by changing all '<', '>', and '&'
## characters to underscores. See
## https://stackoverflow.com/a/10371699/19474638
##
## Note that we sanitize strip_one_string, NOT strip_two_string, so that
## the neutered malicious text is displayed to the user. This is so that
## the user is alerted to something odd happening.
if strip_one_string != strip_two_string:
## If we get this far, the second strip attempt further transformed
## the text, indicating an attempt to maliciously circumvent the
## stripper. Sanitize the malicious text by changing all '<', '>',
## '&', '"', and "'" characters to underscores. See
## https://stackoverflow.com/a/10371699/19474638
##
## Note that we sanitize strip_one_string, NOT strip_two_string, so
## that the neutered malicious text is displayed to the user. This
## is so that the user is alerted to something odd happening.
strip_one_string = "".join(
"_" if char in ("<", ">", "&", '"', "'") else char
for char in strip_one_string
)

## Sanitize any remaining '<' and '>' characters that survived both
## strip passes (e.g. bare '<' in "2 < 3" which HTMLParser does not
## treat as a tag). These could be misinterpreted as markup if the
## output is later placed into an HTML context.
sanitized_string: str = "".join(
"_" if char in ["<", ">", "&"] else char for char in strip_one_string
"_" if char in ("<", ">") else char for char in strip_one_string
)
return sanitized_string
116 changes: 116 additions & 0 deletions usr/lib/python3/dist-packages/strip_markup/tests/strip_markup.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,104 @@ def _test_malicious_markup_strings(
stdin_string=test_case[0],
)

def _test_bare_angle_bracket_strings(
self,
main_func: Callable[[], int],
argv0: str,
pos_args_prefix: list[str] | None = None,
) -> None:
"""
Ensure strip_markup.py sanitizes bare '<' and '>' characters that
are not part of valid tags but could be misinterpreted as markup
in downstream HTML contexts. This function is reused by
sanitize_string's tests.
"""

if pos_args_prefix is None:
pos_args_prefix = []

test_case_list: list[tuple[str, str]] = [
(
"2 < 3",
"2 _ 3",
),
(
"2 > 1",
"2 _ 1",
),
(
"1 < 2 > 0",
"1 _ 2 _ 0",
),
(
"x << y",
"x __ y",
),
]

for test_case in test_case_list:
self._test_args(
main_func=main_func,
argv0=argv0,
stdout_string=test_case[1],
stderr_string="",
exit_code=0,
args=[*pos_args_prefix, test_case[0]],
)
self._test_stdin(
main_func=main_func,
argv0=argv0,
stdout_string=test_case[1],
stderr_string="",
args=[*pos_args_prefix],
stdin_string=test_case[0],
)

def _test_malicious_markup_quote_strings(
self,
main_func: Callable[[], int],
argv0: str,
pos_args_prefix: list[str] | None = None,
) -> None:
"""
Ensure strip_markup.py sanitizes quote characters in strings that
trigger the malicious-input branch, preventing attribute injection
if the output is placed into an HTML attribute context. This
function is reused by sanitize_string's tests.
"""

if pos_args_prefix is None:
pos_args_prefix = []

test_case_list: list[tuple[str, str]] = [
(
'<<b>b "onmouseover="alert(1)<</b>/b>',
'_b _onmouseover=_alert(1)_/b_',
),
(
"<<b>b 'onmouseover='alert(1)<</b>/b>",
"_b _onmouseover=_alert(1)_/b_",
),
]

for test_case in test_case_list:
self._test_args(
main_func=main_func,
argv0=argv0,
stdout_string=test_case[1],
stderr_string="",
exit_code=0,
args=[*pos_args_prefix, test_case[0]],
)
self._test_stdin(
main_func=main_func,
argv0=argv0,
stdout_string=test_case[1],
stderr_string="",
args=[*pos_args_prefix],
stdin_string=test_case[0],
)


class TestStripMarkup(TestStripMarkupBase):
"""
Expand Down Expand Up @@ -414,3 +512,21 @@ def test_malicious_markup_strings(self) -> None:
"""

self._test_malicious_markup_strings(strip_markup_main, self.argv0)

def test_bare_angle_bracket_strings(self) -> None:
"""
Wrapper for _test_bare_angle_bracket_strings specific to
TestStripMarkup.
"""

self._test_bare_angle_bracket_strings(strip_markup_main, self.argv0)

def test_malicious_markup_quote_strings(self) -> None:
"""
Wrapper for _test_malicious_markup_quote_strings specific to
TestStripMarkup.
"""

self._test_malicious_markup_quote_strings(
strip_markup_main, self.argv0
)
Loading