Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 10 additions & 12 deletions Lib/test/test_binascii.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,23 +240,21 @@ def assertNonBase64Data(data, expected, ignorechars):

def test_base64_excess_data(self):
# Test excess data exceptions
def assertExcessData(data, non_strict_expected,
ignore_padchar_expected=None):
def assertExcessData(data, expected):
assert_regex = r'(?i)Excess data'
data = self.type2test(data)
with self.assertRaisesRegex(binascii.Error, assert_regex):
binascii.a2b_base64(data, strict_mode=True)
self.assertEqual(binascii.a2b_base64(data, strict_mode=False),
non_strict_expected)
if ignore_padchar_expected is not None:
self.assertEqual(binascii.a2b_base64(data, strict_mode=True,
ignorechars=b'='),
ignore_padchar_expected)
self.assertEqual(binascii.a2b_base64(data), non_strict_expected)

assertExcessData(b'ab==c', b'i')
assertExcessData(b'ab==cd', b'i', b'i\xb7\x1d')
assertExcessData(b'abc=d', b'i\xb7', b'i\xb7\x1d')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this test used to highlight the difference between strict and non-strict mode. we should keep a test highlighting that.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In strict mode we get an error. We get a result only when strict_mode=False or new argument ignorechars contains "=", and they given different results. Now this difference has been fixed.

expected)
self.assertEqual(binascii.a2b_base64(data, strict_mode=True,
ignorechars=b'='),
expected)
self.assertEqual(binascii.a2b_base64(data), expected)

assertExcessData(b'ab==c=', b'i\xb7')
assertExcessData(b'ab==cd', b'i\xb7\x1d')
assertExcessData(b'abc=d', b'i\xb7\x1d')

def test_base64errors(self):
# Test base64 with invalid padding
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Base64 decoder (see :func:`binascii.a2b_base64`, :func:`base64.b64decode`, etc) no
longer ignores excess data after the first padded quad in non-strict
(default) mode. Instead, in conformance with :rfc:`4648`, section 3.3, it now ignores
the pad character, "=", if it is present before the end of the encoded data.
51 changes: 22 additions & 29 deletions Modules/binascii.c
Original file line number Diff line number Diff line change
Expand Up @@ -640,40 +640,33 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
*/
if (this_ch == BASE64_PAD) {
pads++;

if (strict_mode) {
if (quad_pos >= 2 && quad_pos + pads <= 4) {
continue;
}
if (ignorechar(BASE64_PAD, ignorechars, ignorecache)) {
continue;
}
if (quad_pos == 1) {
/* Set an error below. */
break;
}
state = get_binascii_state(module);
if (state) {
PyErr_SetString(state->Error,
(quad_pos == 0 && ascii_data == data->buf)
? "Leading padding not allowed"
: "Excess padding not allowed");
}
goto error_end;
if (quad_pos >= 2 && quad_pos + pads <= 4) {
continue;
}
else {
if (quad_pos >= 2 && quad_pos + pads >= 4) {
/* A pad sequence means we should not parse more input.
** We've already interpreted the data from the quad at this point.
*/
goto done;
}
// See RFC 4648, section-3.3: "specifications MAY ignore the
// pad character, "=", treating it as non-alphabet data, if
// it is present before the end of the encoded data" and
// "the excess pad characters MAY also be ignored."
if (!strict_mode || ignorechar(BASE64_PAD, ignorechars, ignorecache)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a comment in this block linking to the RFC section.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

continue;
}
if (quad_pos == 1) {
/* Set an error below. */
break;
}
state = get_binascii_state(module);
if (state) {
PyErr_SetString(state->Error,
(quad_pos == 0 && ascii_data == data->buf)
? "Leading padding not allowed"
: "Excess padding not allowed");
}
goto error_end;
}

unsigned char v = table_a2b_base64[this_ch];
if (v >= 64) {
// See RFC 4648, section-3.3.
if (strict_mode && !ignorechar(this_ch, ignorechars, ignorecache)) {
state = get_binascii_state(module);
if (state) {
Expand All @@ -684,7 +677,8 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
continue;
}

// Characters that are not '=', in the middle of the padding, are not allowed
// Characters that are not '=', in the middle of the padding, are
// not allowed (except when they are). See RFC 4648, section-3.3.
if (pads && strict_mode &&
!ignorechar(BASE64_PAD, ignorechars, ignorecache))
{
Expand Down Expand Up @@ -748,7 +742,6 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
goto error_end;
}

done:
return PyBytesWriter_FinishWithPointer(writer, bin_data);

error_end:
Expand Down
Loading