Skip to content

Commit d56cb23

Browse files
committed
mcp: validate data product names before interpolating into SQL
1 parent 860ea8e commit d56cb23

2 files changed

Lines changed: 97 additions & 34 deletions

File tree

src/environmentd/src/http/mcp.rs

Lines changed: 95 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -768,6 +768,77 @@ async fn get_data_product_details(
768768
format_rows_response(rows, max_response_size)
769769
}
770770

771+
/// Validates that a data product name is a safe SQL identifier.
772+
///
773+
/// The name is interpolated into `SELECT * FROM {name}`, so we parse a
774+
/// synthetic query and inspect the AST to ensure nothing beyond a bare
775+
/// table reference was smuggled in (preventing SQL injection).
776+
fn validate_data_product_name(name: &str) -> Result<(), McpRequestError> {
777+
let name = name.trim();
778+
if name.is_empty() {
779+
return Err(McpRequestError::QueryValidationFailed(
780+
"Data product name cannot be empty".to_string(),
781+
));
782+
}
783+
784+
let make_err = || {
785+
McpRequestError::QueryValidationFailed(format!(
786+
"Invalid data product name: {}. Expected a valid object name, \
787+
e.g. '\"database\".\"schema\".\"name\"' or 'my_view'",
788+
name
789+
))
790+
};
791+
792+
// Parse a synthetic query to verify the name is a valid table reference.
793+
let probe = format!("SELECT 1 FROM {}", name);
794+
let stmts = parse(&probe).map_err(|_| make_err())?;
795+
796+
if stmts.len() != 1 {
797+
return Err(make_err());
798+
}
799+
800+
// Inspect the AST to ensure the parsed query is a bare SELECT with
801+
// exactly one table and no extra clauses (WHERE, ORDER BY, UNION, etc.)
802+
// that would indicate injected SQL beyond the table name.
803+
use mz_sql_parser::ast::{CteBlock, SetExpr, Statement};
804+
let Statement::Select(select_stmt) = &stmts[0].ast else {
805+
return Err(make_err());
806+
};
807+
let query = &select_stmt.query;
808+
809+
// No CTEs, ORDER BY, LIMIT, or OFFSET
810+
let has_ctes = match &query.ctes {
811+
CteBlock::Simple(ctes) => !ctes.is_empty(),
812+
CteBlock::MutuallyRecursive(_) => true,
813+
};
814+
if has_ctes || !query.order_by.is_empty() || query.limit.is_some() || query.offset.is_some() {
815+
return Err(make_err());
816+
}
817+
818+
// Body must be a simple Select, not a UNION/EXCEPT/INTERSECT
819+
let SetExpr::Select(select) = &query.body else {
820+
return Err(make_err());
821+
};
822+
823+
// No WHERE, GROUP BY, HAVING, QUALIFY, DISTINCT, or OPTIONS
824+
if select.selection.is_some()
825+
|| !select.group_by.is_empty()
826+
|| select.having.is_some()
827+
|| select.qualify.is_some()
828+
|| select.distinct.is_some()
829+
|| !select.options.is_empty()
830+
{
831+
return Err(make_err());
832+
}
833+
834+
// Exactly one table in FROM, no JOINs
835+
if select.from.len() != 1 || !select.from[0].joins.is_empty() {
836+
return Err(make_err());
837+
}
838+
839+
Ok(())
840+
}
841+
771842
/// Read rows from a data product. Issues a single read-only query.
772843
///
773844
/// When `cluster_override` is provided, sets the cluster explicitly.
@@ -784,6 +855,9 @@ async fn read_data_product(
784855
) -> Result<McpResult, McpRequestError> {
785856
debug!(name = %name, limit = limit, cluster_override = ?cluster_override, "Executing read_data_product");
786857

858+
// Validate the name is a safe identifier before interpolating into SQL.
859+
validate_data_product_name(name)?;
860+
787861
// Lightweight existence check: verify the data product is visible in the
788862
// catalog before running the read query. This gives a clean DataProductNotFound
789863
// error for missing or inaccessible products (including RBAC revocations)
@@ -1498,46 +1572,35 @@ mod tests {
14981572
}
14991573
}
15001574

1501-
// ── Response size cap tests ────────────────────────────────────────
1575+
// ── Data product name validation tests ─────────────────────────────
15021576

15031577
#[mz_ore::test]
1504-
fn test_format_rows_response_within_limit() {
1505-
let rows = vec![vec![json!("a"), json!(1)], vec![json!("b"), json!(2)]];
1506-
let result = format_rows_response(rows, 1_000_000).unwrap();
1507-
let McpResult::ToolContent(content) = result else {
1508-
panic!("Expected ToolContent");
1509-
};
1510-
assert_eq!(content.content.len(), 1);
1511-
assert!(content.content[0].text.contains("\"a\""));
1512-
assert!(content.content[0].text.contains("\"b\""));
1578+
fn test_validate_data_product_name_valid() {
1579+
// Fully qualified quoted identifiers
1580+
assert!(validate_data_product_name(r#""materialize"."public"."my_view""#).is_ok());
1581+
assert!(validate_data_product_name(r#""my_db"."my_schema"."orders""#).is_ok());
1582+
// Two-part name
1583+
assert!(validate_data_product_name(r#""public"."my_view""#).is_ok());
1584+
// Unquoted simple name
1585+
assert!(validate_data_product_name("my_view").is_ok());
15131586
}
15141587

15151588
#[mz_ore::test]
1516-
fn test_format_rows_response_errors_when_over_limit() {
1517-
let rows: Vec<Vec<serde_json::Value>> = (0..100)
1518-
.map(|i| vec![json!(format!("row_{}", i)), json!(i)])
1519-
.collect();
1520-
let err = format_rows_response(rows, 500).unwrap_err();
1521-
let msg = err.to_string();
1522-
assert!(
1523-
msg.contains("exceeds the 500 byte limit"),
1524-
"Error should mention the size limit, got: {msg}"
1525-
);
1526-
assert!(
1527-
msg.contains("Use LIMIT or WHERE"),
1528-
"Error should suggest narrowing the query, got: {msg}"
1529-
);
1589+
fn test_validate_data_product_name_rejects_empty() {
1590+
assert!(validate_data_product_name("").is_err());
1591+
assert!(validate_data_product_name(" ").is_err());
15301592
}
15311593

15321594
#[mz_ore::test]
1533-
fn test_format_rows_response_empty_rows() {
1534-
let rows: Vec<Vec<serde_json::Value>> = vec![];
1535-
let result = format_rows_response(rows, 1000).unwrap();
1536-
let McpResult::ToolContent(content) = result else {
1537-
panic!("Expected ToolContent");
1538-
};
1539-
assert_eq!(content.content.len(), 1);
1540-
assert_eq!(content.content[0].text, "[]");
1595+
fn test_validate_data_product_name_rejects_sql_injection() {
1596+
// Attempted injection via semicolon
1597+
assert!(validate_data_product_name("my_view; DROP TABLE users").is_err());
1598+
// Attempted injection via subquery
1599+
assert!(validate_data_product_name("my_view UNION SELECT * FROM secrets").is_err());
1600+
// Multiple table references via comma
1601+
assert!(validate_data_product_name("my_view, secrets").is_err());
1602+
// SQL keywords
1603+
assert!(validate_data_product_name("my_view WHERE 1=1 --").is_err());
15411604
}
15421605

15431606
#[mz_ore::test]

src/environmentd/tests/server.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5353,8 +5353,8 @@ fn test_mcp_agents_with_data_product() {
53535353
body["error"]["message"]
53545354
.as_str()
53555355
.unwrap()
5356-
.contains("Data product not found"),
5357-
"SQL injection in read_data_product name should be safely handled"
5356+
.contains("Invalid data product name"),
5357+
"SQL injection in read_data_product name should be rejected by validation"
53585358
);
53595359

53605360
// SQL injection attempt in cluster override.

0 commit comments

Comments
 (0)