Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
341 changes: 334 additions & 7 deletions src/persist-client/src/internal/encoding.rs

Large diffs are not rendered by default.

21 changes: 21 additions & 0 deletions src/persist-client/src/internal/state_diff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1360,6 +1360,27 @@ mod tests {

use super::*;

#[mz_ore::test]
#[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function on OS `linux`
fn proto_state_diff_invalid_field_diffs_is_error() {
// A `ProtoStateDiff` decoded from an untrusted blob whose field diffs
// fail `validate()` must convert to an error, not panic. We previously
// `debug_assert`ed validity, which panicked under debug assertions /
// fuzzing. Regression for the state_diff_proto_roundtrip cargo-fuzz
// finding.
use crate::internal::state::ProtoStateDiff;
use mz_proto::ProtoType;
use prost::Message;

let bytes: &[u8] = &[0x2a, 0x04, 0x08, 0x00, 0x68, 0x00, 0x40, 0x48];
let proto = ProtoStateDiff::decode(bytes).expect("crash input decodes as a proto");
let result: Result<StateDiff<u64>, _> = proto.into_rust();
assert!(
result.is_err(),
"invalid field diffs must be a decode error"
);
}

/// Model a situation where a "leader" is constantly making changes to its state, and a "follower"
/// is applying those changes as diffs.
#[mz_ore::test]
Expand Down
275 changes: 190 additions & 85 deletions src/persist-client/src/internal/trace.rs

Large diffs are not rendered by default.

41 changes: 35 additions & 6 deletions src/persist-types/src/arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,12 +170,22 @@ fn from_proto_with_type(
for b in buffers.into_iter().map(|b| b.into_rust()) {
builder = builder.add_buffer(b?);
}
for c in children
.into_iter()
.zip_eq(fields_for_type(&data_type))
.map(|(c, field)| from_proto_with_type(c, Some(field.data_type())))
{
builder = builder.add_child_data(c?);
// `children` is reconstructed from the wire, so its count may disagree with
// what `data_type` expects. Guard the length explicitly and reject a mismatch
// as a decode error (corrupted/crafted parts must not crash readers); the
// `zip_eq` below is then infallible.
let fields = fields_for_type(&data_type);
if children.len() != fields.len() {
return Err(TryFromProtoError::RowConversionError(format!(
"ProtoArrayData for {:?} has {} children but its data type expects {}",
data_type,
children.len(),
fields.len(),
)));
}
for (c, field) in children.into_iter().zip_eq(fields) {
let c = from_proto_with_type(c, Some(field.data_type()))?;
builder = builder.add_child_data(c);
}

// Construct the builder which validates all inputs and aligns data.
Expand Down Expand Up @@ -810,6 +820,25 @@ mod tests {
use mz_proto::ProtoType;
use std::sync::Arc;

#[mz_ore::test]
fn from_proto_child_count_mismatch_is_error() {
// A `ProtoArrayData` whose child count disagrees with its declared data
// type must decode to an error, not panic via `zip_eq`. Regression for
// the array_data_proto_roundtrip cargo-fuzz finding.
use prost::Message;
let bytes: &[u8] = &[
0x0a, 0x0a, 0x18, 0x32, 0x22, 0x00, 0x18, 0x0a, 0x18, 0x32, 0x22, 0x00, 0x2a, 0x00,
0x2a, 0x00, 0xe0, 0x32, 0x24,
];
let proto =
crate::arrow::ProtoArrayData::decode(bytes).expect("crash input decodes as a proto");
let result: Result<arrow::array::ArrayData, _> = proto.into_rust();
assert!(
result.is_err(),
"child-count mismatch must be a decode error"
);
}

#[mz_ore::test]
fn trim_proto() {
let nested_fields: Fields = vec![Field::new("a", DataType::UInt64, true)].into();
Expand Down
9 changes: 5 additions & 4 deletions src/persist-types/src/codec_impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -618,13 +618,14 @@ mod tests {
.to_string()
)
);
// Too short to be a UUID: rejected before the `uuid` crate (which
// panics building an error for some such inputs), so the message is
// generic rather than the crate's "invalid length".
assert_eq!(
ShardId::from_str("s0"),
Err(
"invalid ShardId s0: invalid length: expected length 32 for simple format, found 1"
.to_string()
)
Err("invalid ShardId s0: malformed UUID".to_string())
);
// Long enough and ASCII, so the `uuid` crate's own error survives.
assert_eq!(
ShardId::from_str("s00000000-0000-0000-0000-000000000000FOO"),
Err("invalid ShardId s00000000-0000-0000-0000-000000000000FOO: invalid character: expected an optional prefix of `urn:uuid:` followed by [0-9a-fA-F-], found `O` at 38".to_string())
Expand Down
7 changes: 7 additions & 0 deletions src/persist-types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,13 @@ impl std::str::FromStr for ShardId {
Some(x) => x,
None => return Err(format!("invalid ShardId {}: incorrect prefix", s)),
};
// `Uuid::parse_str` panics (rather than erroring) while building its
// parse error for some malformed inputs — e.g. non-ASCII bytes make it
// slice the input on a non-char boundary. Reject anything that can't be
// a UUID first: the shortest valid form is 32 hex digits, all ASCII.
if uuid_encoded.len() < 32 || !uuid_encoded.is_ascii() {
return Err(format!("invalid ShardId {}: malformed UUID", s));
}
let uuid = Uuid::parse_str(uuid_encoded)
.map_err(|err| format!("invalid ShardId {}: {}", s, err))?;
Ok(ShardId(*uuid.as_bytes()))
Expand Down
39 changes: 35 additions & 4 deletions src/pgrepr/src/value/numeric.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,20 +205,24 @@ impl<'a> FromSql<'a> for Numeric {
_ => return Err("bad sign in numeric".into()),
}

let mut scale = (units - weight - 1) * 4;
// Compute in i32: `units`/`weight` are attacker-controlled `i16`s from
// the wire, so `(units - weight - 1) * 4` overflows `i16` for crafted
// headers (panic under overflow checks, silent wraparound otherwise).
let in_scale = i32::from(in_scale);
let mut scale = (i32::from(units) - i32::from(weight) - 1) * 4;

// Adjust scales
if scale < 0 {
// Multiply by 10^scale
cx.scaleb(&mut d, &AdtNumeric::from(-i32::from(scale)));
cx.scaleb(&mut d, &AdtNumeric::from(-scale));
scale = 0;
} else if scale > in_scale {
// Divide by 10^(difference in scale and in_scale)
cx.scaleb(&mut d, &AdtNumeric::from(-i32::from(scale - in_scale)));
cx.scaleb(&mut d, &AdtNumeric::from(-(scale - in_scale)));
scale = in_scale;
}

cx.scaleb(&mut d, &AdtNumeric::from(-i32::from(scale)));
cx.scaleb(&mut d, &AdtNumeric::from(-scale));
cx.reduce(&mut d);

let mut cx = cx_datum();
Expand Down Expand Up @@ -299,3 +303,30 @@ fn test_to_from_sql_roundtrip() {
let d_from_sql = Numeric::from_sql(&Type::NUMERIC, &out).unwrap();
assert_eq!(r.0, d_from_sql.0);
}

#[mz_ore::test]
#[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `decContextDefault` on OS `linux`
fn test_from_sql_extreme_weight_no_overflow() {
// `(units - weight - 1) * 4` is computed in i32 because `weight` is an
// attacker-controlled i16 from the wire: extreme values overflowed the old
// i16 arithmetic (panic under overflow checks, silent wraparound
// otherwise). `from_sql` must return a result, never panic. Regression for
// the value_decode_binary cargo-fuzz finding.
fn header(units: i16, weight: i16, sign: u16, in_scale: i16) -> Vec<u8> {
let mut b = Vec::new();
b.extend_from_slice(&units.to_be_bytes());
b.extend_from_slice(&weight.to_be_bytes());
b.extend_from_slice(&sign.to_be_bytes());
b.extend_from_slice(&in_scale.to_be_bytes());
b
}
for (weight, in_scale) in [
(i16::MAX, 0),
(i16::MIN, 0),
(-28174, -28271), // exact fuzz repro
(13606, -28272),
] {
// units = 0 (no trailing digits); the header alone triggered the overflow.
let _ = Numeric::from_sql(&Type::NUMERIC, &header(0, weight, 0, in_scale));
}
}
2 changes: 1 addition & 1 deletion src/pgwire/src/codec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ where
}
}

struct Codec {
pub struct Codec {
decode_state: DecodeState,
encode_state: Vec<(mz_pgrepr::Type, mz_pgwire_common::Format)>,
/// When true, skip the aggregate buffer size check in `decode()`.
Expand Down
30 changes: 19 additions & 11 deletions src/postgres-util/src/desc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,14 +202,16 @@ impl RustType<ProtoPostgresColumnDesc> for PostgresColumnDesc {
}

fn from_proto(proto: ProtoPostgresColumnDesc) -> Result<Self, TryFromProtoError> {
let col_num_u32: u32 = proto
.col_num
.into_rust_if_some("ProtoPostgresColumnDesc::col_num")?;
// `col_num` is `u16` on the Rust side; reject u32 values that don't
// fit instead of panicking — reachable from untrusted proto bytes.
let col_num = u16::try_from(col_num_u32)
.map_err(|e| TryFromProtoError::InvalidFieldError(e.to_string()))?;
Ok(PostgresColumnDesc {
name: proto.name,
col_num: {
let v: u32 = proto
.col_num
.into_rust_if_some("ProtoPostgresColumnDesc::col_num")?;
u16::try_from(v).expect("u16 must roundtrip")
},
col_num,
type_oid: proto.type_oid,
type_mod: proto.type_mod,
nullable: proto.nullable,
Expand Down Expand Up @@ -257,14 +259,20 @@ impl RustType<ProtoPostgresKeyDesc> for PostgresKeyDesc {
}

fn from_proto(proto: ProtoPostgresKeyDesc) -> Result<Self, TryFromProtoError> {
// `cols` is `Vec<u16>` on the Rust side but `Vec<u32>` on the wire;
// a u32 value above 65535 used to panic via `.expect`, which is
// reachable from untrusted proto bytes.
let cols = proto
.cols
.into_iter()
.map(|c| {
u16::try_from(c).map_err(|e| TryFromProtoError::InvalidFieldError(e.to_string()))
})
.collect::<Result<Vec<_>, _>>()?;
Ok(PostgresKeyDesc {
oid: proto.oid,
name: proto.name,
cols: proto
.cols
.into_iter()
.map(|c| c.try_into().expect("values roundtrip"))
.collect(),
cols,
is_primary: proto.is_primary,
nulls_not_distinct: proto.nulls_not_distinct,
})
Expand Down
Loading