From 899c08bf0407e16bfab0ed921df8a119b02d0d1b Mon Sep 17 00:00:00 2001 From: Viicos <65306057+Viicos@users.noreply.github.com> Date: Wed, 25 Feb 2026 12:15:15 +0100 Subject: [PATCH 1/2] Only parse FROM identifier in CTE if using Hive --- src/parser/mod.rs | 2 +- tests/sqlparser_common.rs | 123 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index bb11d79c2..b4bdd79e0 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -13958,7 +13958,7 @@ impl<'a> Parser<'a> { closing_paren_token: closing_paren_token.into(), } }; - if self.parse_keyword(Keyword::FROM) { + if dialect_of!(self is HiveDialect) && self.parse_keyword(Keyword::FROM) { cte.from = Some(self.parse_identifier()?); } Ok(cte) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index a3b5404d3..8c7595f6c 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -16073,6 +16073,129 @@ fn test_select_from_first() { } } +#[test] +fn test_select_from_first_with_cte() { + let dialects = all_dialects_where(|d| d.supports_from_first_select()); + let q = "WITH test AS (FROM t SELECT a) FROM test SELECT 1"; + + let ast = dialects.verified_query(q); + + let expected = Query { + with: Some(With { + with_token: AttachedToken::empty(), + recursive: false, + cte_tables: vec![Cte { + alias: TableAlias { + explicit: false, + name: Ident { + value: "test".to_string(), + quote_style: None, + span: Span::empty(), + }, + columns: vec![], + }, + query: Box::new(Query { + with: None, + body: Box::new(SetExpr::Select(Box::new(Select { + select_token: AttachedToken::empty(), + optimizer_hints: vec![], + distinct: None, + select_modifiers: None, + top: None, + projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident { + value: "a".to_string(), + quote_style: None, + span: Span::empty(), + }))], + exclude: None, + top_before_distinct: false, + into: None, + from: vec![TableWithJoins { + relation: table_from_name(ObjectName::from(vec![Ident { + value: "t".to_string(), + quote_style: None, + span: Span::empty(), + }])), + joins: vec![], + }], + lateral_views: vec![], + prewhere: None, + selection: None, + group_by: GroupByExpr::Expressions(vec![], vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + window_before_qualify: false, + qualify: None, + value_table_mode: None, + connect_by: vec![], + flavor: SelectFlavor::FromFirst, + }))), + order_by: None, + limit_clause: None, + fetch: None, + locks: vec![], + for_clause: None, + settings: None, + format_clause: None, + pipe_operators: vec![], + }), + from: None, + materialized: None, + closing_paren_token: AttachedToken::empty(), + }], + }), + body: Box::new(SetExpr::Select(Box::new(Select { + select_token: AttachedToken::empty(), + optimizer_hints: vec![], + distinct: None, + select_modifiers: None, + top: None, + projection: vec![SelectItem::UnnamedExpr(Expr::Value(ValueWithSpan { + value: test_utils::number("1"), + span: Span::empty(), + }))], + exclude: None, + top_before_distinct: false, + into: None, + from: vec![TableWithJoins { + relation: table_from_name(ObjectName::from(vec![Ident { + value: "test".to_string(), + quote_style: None, + span: Span::empty(), + }])), + joins: vec![], + }], + lateral_views: vec![], + prewhere: None, + selection: None, + group_by: GroupByExpr::Expressions(vec![], vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + window_before_qualify: false, + qualify: None, + value_table_mode: None, + connect_by: vec![], + flavor: SelectFlavor::FromFirst, + }))), + order_by: None, + limit_clause: None, + fetch: None, + locks: vec![], + for_clause: None, + settings: None, + format_clause: None, + pipe_operators: vec![], + }; + assert_eq!(expected, ast); + assert_eq!(ast.to_string(), q); +} + #[test] fn test_geometric_unary_operators() { // Number of points in path or polygon From 09854d818289a5d6d2c88996f278cd480327718f Mon Sep 17 00:00:00 2001 From: Viicos <65306057+Viicos@users.noreply.github.com> Date: Tue, 14 Apr 2026 16:19:15 +0200 Subject: [PATCH 2/2] Feedback --- src/dialect/hive.rs | 7 ++ src/dialect/mod.rs | 15 +++++ src/parser/mod.rs | 2 +- tests/sqlparser_common.rs | 134 ++++++-------------------------------- 4 files changed, 42 insertions(+), 116 deletions(-) diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index b39232ad5..085d1598c 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -72,4 +72,11 @@ impl Dialect for HiveDialect { fn supports_group_by_with_modifier(&self) -> bool { true } + + // TODO: The parsing of the FROM keyword seems wrong, as it happens within the CTE. + // See https://github.com/apache/datafusion-sqlparser-rs/issues/2236 for more details. + /// See + fn supports_from_first_insert(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index bcca455ec..3eef5b49c 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -648,6 +648,21 @@ pub trait Dialect: Debug + Any { false } + /// Return true if the dialect supports "FROM-first" inserts. + /// + /// Example: + /// ```sql + /// WITH cte AS (SELECT key FROM src) + /// FROM cte + /// INSERT OVERWRITE table my_table + /// SELECT * + /// + /// See + /// ``` + fn supports_from_first_insert(&self) -> bool { + false + } + /// Return true if the dialect supports pipe operator. /// /// Example: diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b4bdd79e0..7d988cfe2 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -13958,7 +13958,7 @@ impl<'a> Parser<'a> { closing_paren_token: closing_paren_token.into(), } }; - if dialect_of!(self is HiveDialect) && self.parse_keyword(Keyword::FROM) { + if self.dialect.supports_from_first_insert() && self.parse_keyword(Keyword::FROM) { cte.from = Some(self.parse_identifier()?); } Ok(cte) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 8c7595f6c..19ea751b3 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -16069,7 +16069,6 @@ fn test_select_from_first() { pipe_operators: vec![], }; assert_eq!(expected, ast); - assert_eq!(ast.to_string(), q); } } @@ -16080,120 +16079,25 @@ fn test_select_from_first_with_cte() { let ast = dialects.verified_query(q); - let expected = Query { - with: Some(With { - with_token: AttachedToken::empty(), - recursive: false, - cte_tables: vec![Cte { - alias: TableAlias { - explicit: false, - name: Ident { - value: "test".to_string(), - quote_style: None, - span: Span::empty(), - }, - columns: vec![], - }, - query: Box::new(Query { - with: None, - body: Box::new(SetExpr::Select(Box::new(Select { - select_token: AttachedToken::empty(), - optimizer_hints: vec![], - distinct: None, - select_modifiers: None, - top: None, - projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident { - value: "a".to_string(), - quote_style: None, - span: Span::empty(), - }))], - exclude: None, - top_before_distinct: false, - into: None, - from: vec![TableWithJoins { - relation: table_from_name(ObjectName::from(vec![Ident { - value: "t".to_string(), - quote_style: None, - span: Span::empty(), - }])), - joins: vec![], - }], - lateral_views: vec![], - prewhere: None, - selection: None, - group_by: GroupByExpr::Expressions(vec![], vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - window_before_qualify: false, - qualify: None, - value_table_mode: None, - connect_by: vec![], - flavor: SelectFlavor::FromFirst, - }))), - order_by: None, - limit_clause: None, - fetch: None, - locks: vec![], - for_clause: None, - settings: None, - format_clause: None, - pipe_operators: vec![], - }), - from: None, - materialized: None, - closing_paren_token: AttachedToken::empty(), - }], - }), - body: Box::new(SetExpr::Select(Box::new(Select { - select_token: AttachedToken::empty(), - optimizer_hints: vec![], - distinct: None, - select_modifiers: None, - top: None, - projection: vec![SelectItem::UnnamedExpr(Expr::Value(ValueWithSpan { - value: test_utils::number("1"), - span: Span::empty(), - }))], - exclude: None, - top_before_distinct: false, - into: None, - from: vec![TableWithJoins { - relation: table_from_name(ObjectName::from(vec![Ident { - value: "test".to_string(), - quote_style: None, - span: Span::empty(), - }])), - joins: vec![], - }], - lateral_views: vec![], - prewhere: None, - selection: None, - group_by: GroupByExpr::Expressions(vec![], vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - window_before_qualify: false, - qualify: None, - value_table_mode: None, - connect_by: vec![], - flavor: SelectFlavor::FromFirst, - }))), - order_by: None, - limit_clause: None, - fetch: None, - locks: vec![], - for_clause: None, - settings: None, - format_clause: None, - pipe_operators: vec![], - }; - assert_eq!(expected, ast); - assert_eq!(ast.to_string(), q); + let ast_select = ast.body.as_select().unwrap(); + + let expected_body_select_projection = + vec![SelectItem::UnnamedExpr(Expr::Value(ValueWithSpan { + value: test_utils::number("1"), + span: Span::empty(), + }))]; + + let expected_body_from = vec![TableWithJoins { + relation: table_from_name(ObjectName::from(vec![Ident { + value: "test".to_string(), + quote_style: None, + span: Span::empty(), + }])), + joins: vec![], + }]; + + assert_eq!(ast_select.projection, expected_body_select_projection); + assert_eq!(ast_select.from, expected_body_from); } #[test]