From 899c08bf0407e16bfab0ed921df8a119b02d0d1b Mon Sep 17 00:00:00 2001
From: Viicos <65306057+Viicos@users.noreply.github.com>
Date: Wed, 25 Feb 2026 12:15:15 +0100
Subject: [PATCH 1/2] Only parse FROM identifier in CTE if using Hive
---
src/parser/mod.rs | 2 +-
tests/sqlparser_common.rs | 123 ++++++++++++++++++++++++++++++++++++++
2 files changed, 124 insertions(+), 1 deletion(-)
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index bb11d79c2..b4bdd79e0 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -13958,7 +13958,7 @@ impl<'a> Parser<'a> {
closing_paren_token: closing_paren_token.into(),
}
};
- if self.parse_keyword(Keyword::FROM) {
+ if dialect_of!(self is HiveDialect) && self.parse_keyword(Keyword::FROM) {
cte.from = Some(self.parse_identifier()?);
}
Ok(cte)
diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs
index a3b5404d3..8c7595f6c 100644
--- a/tests/sqlparser_common.rs
+++ b/tests/sqlparser_common.rs
@@ -16073,6 +16073,129 @@ fn test_select_from_first() {
}
}
+#[test]
+fn test_select_from_first_with_cte() {
+ let dialects = all_dialects_where(|d| d.supports_from_first_select());
+ let q = "WITH test AS (FROM t SELECT a) FROM test SELECT 1";
+
+ let ast = dialects.verified_query(q);
+
+ let expected = Query {
+ with: Some(With {
+ with_token: AttachedToken::empty(),
+ recursive: false,
+ cte_tables: vec![Cte {
+ alias: TableAlias {
+ explicit: false,
+ name: Ident {
+ value: "test".to_string(),
+ quote_style: None,
+ span: Span::empty(),
+ },
+ columns: vec![],
+ },
+ query: Box::new(Query {
+ with: None,
+ body: Box::new(SetExpr::Select(Box::new(Select {
+ select_token: AttachedToken::empty(),
+ optimizer_hints: vec![],
+ distinct: None,
+ select_modifiers: None,
+ top: None,
+ projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident {
+ value: "a".to_string(),
+ quote_style: None,
+ span: Span::empty(),
+ }))],
+ exclude: None,
+ top_before_distinct: false,
+ into: None,
+ from: vec![TableWithJoins {
+ relation: table_from_name(ObjectName::from(vec![Ident {
+ value: "t".to_string(),
+ quote_style: None,
+ span: Span::empty(),
+ }])),
+ joins: vec![],
+ }],
+ lateral_views: vec![],
+ prewhere: None,
+ selection: None,
+ group_by: GroupByExpr::Expressions(vec![], vec![]),
+ cluster_by: vec![],
+ distribute_by: vec![],
+ sort_by: vec![],
+ having: None,
+ named_window: vec![],
+ window_before_qualify: false,
+ qualify: None,
+ value_table_mode: None,
+ connect_by: vec![],
+ flavor: SelectFlavor::FromFirst,
+ }))),
+ order_by: None,
+ limit_clause: None,
+ fetch: None,
+ locks: vec![],
+ for_clause: None,
+ settings: None,
+ format_clause: None,
+ pipe_operators: vec![],
+ }),
+ from: None,
+ materialized: None,
+ closing_paren_token: AttachedToken::empty(),
+ }],
+ }),
+ body: Box::new(SetExpr::Select(Box::new(Select {
+ select_token: AttachedToken::empty(),
+ optimizer_hints: vec![],
+ distinct: None,
+ select_modifiers: None,
+ top: None,
+ projection: vec![SelectItem::UnnamedExpr(Expr::Value(ValueWithSpan {
+ value: test_utils::number("1"),
+ span: Span::empty(),
+ }))],
+ exclude: None,
+ top_before_distinct: false,
+ into: None,
+ from: vec![TableWithJoins {
+ relation: table_from_name(ObjectName::from(vec![Ident {
+ value: "test".to_string(),
+ quote_style: None,
+ span: Span::empty(),
+ }])),
+ joins: vec![],
+ }],
+ lateral_views: vec![],
+ prewhere: None,
+ selection: None,
+ group_by: GroupByExpr::Expressions(vec![], vec![]),
+ cluster_by: vec![],
+ distribute_by: vec![],
+ sort_by: vec![],
+ having: None,
+ named_window: vec![],
+ window_before_qualify: false,
+ qualify: None,
+ value_table_mode: None,
+ connect_by: vec![],
+ flavor: SelectFlavor::FromFirst,
+ }))),
+ order_by: None,
+ limit_clause: None,
+ fetch: None,
+ locks: vec![],
+ for_clause: None,
+ settings: None,
+ format_clause: None,
+ pipe_operators: vec![],
+ };
+ assert_eq!(expected, ast);
+ assert_eq!(ast.to_string(), q);
+}
+
#[test]
fn test_geometric_unary_operators() {
// Number of points in path or polygon
From 09854d818289a5d6d2c88996f278cd480327718f Mon Sep 17 00:00:00 2001
From: Viicos <65306057+Viicos@users.noreply.github.com>
Date: Tue, 14 Apr 2026 16:19:15 +0200
Subject: [PATCH 2/2] Feedback
---
src/dialect/hive.rs | 7 ++
src/dialect/mod.rs | 15 +++++
src/parser/mod.rs | 2 +-
tests/sqlparser_common.rs | 134 ++++++--------------------------------
4 files changed, 42 insertions(+), 116 deletions(-)
diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs
index b39232ad5..085d1598c 100644
--- a/src/dialect/hive.rs
+++ b/src/dialect/hive.rs
@@ -72,4 +72,11 @@ impl Dialect for HiveDialect {
fn supports_group_by_with_modifier(&self) -> bool {
true
}
+
+ // TODO: The parsing of the FROM keyword seems wrong, as it happens within the CTE.
+ // See https://github.com/apache/datafusion-sqlparser-rs/issues/2236 for more details.
+ /// See
+ fn supports_from_first_insert(&self) -> bool {
+ true
+ }
}
diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs
index bcca455ec..3eef5b49c 100644
--- a/src/dialect/mod.rs
+++ b/src/dialect/mod.rs
@@ -648,6 +648,21 @@ pub trait Dialect: Debug + Any {
false
}
+ /// Return true if the dialect supports "FROM-first" inserts.
+ ///
+ /// Example:
+ /// ```sql
+ /// WITH cte AS (SELECT key FROM src)
+ /// FROM cte
+ /// INSERT OVERWRITE table my_table
+ /// SELECT *
+ ///
+ /// See
+ /// ```
+ fn supports_from_first_insert(&self) -> bool {
+ false
+ }
+
/// Return true if the dialect supports pipe operator.
///
/// Example:
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index b4bdd79e0..7d988cfe2 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -13958,7 +13958,7 @@ impl<'a> Parser<'a> {
closing_paren_token: closing_paren_token.into(),
}
};
- if dialect_of!(self is HiveDialect) && self.parse_keyword(Keyword::FROM) {
+ if self.dialect.supports_from_first_insert() && self.parse_keyword(Keyword::FROM) {
cte.from = Some(self.parse_identifier()?);
}
Ok(cte)
diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs
index 8c7595f6c..19ea751b3 100644
--- a/tests/sqlparser_common.rs
+++ b/tests/sqlparser_common.rs
@@ -16069,7 +16069,6 @@ fn test_select_from_first() {
pipe_operators: vec![],
};
assert_eq!(expected, ast);
- assert_eq!(ast.to_string(), q);
}
}
@@ -16080,120 +16079,25 @@ fn test_select_from_first_with_cte() {
let ast = dialects.verified_query(q);
- let expected = Query {
- with: Some(With {
- with_token: AttachedToken::empty(),
- recursive: false,
- cte_tables: vec![Cte {
- alias: TableAlias {
- explicit: false,
- name: Ident {
- value: "test".to_string(),
- quote_style: None,
- span: Span::empty(),
- },
- columns: vec![],
- },
- query: Box::new(Query {
- with: None,
- body: Box::new(SetExpr::Select(Box::new(Select {
- select_token: AttachedToken::empty(),
- optimizer_hints: vec![],
- distinct: None,
- select_modifiers: None,
- top: None,
- projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident {
- value: "a".to_string(),
- quote_style: None,
- span: Span::empty(),
- }))],
- exclude: None,
- top_before_distinct: false,
- into: None,
- from: vec![TableWithJoins {
- relation: table_from_name(ObjectName::from(vec![Ident {
- value: "t".to_string(),
- quote_style: None,
- span: Span::empty(),
- }])),
- joins: vec![],
- }],
- lateral_views: vec![],
- prewhere: None,
- selection: None,
- group_by: GroupByExpr::Expressions(vec![], vec![]),
- cluster_by: vec![],
- distribute_by: vec![],
- sort_by: vec![],
- having: None,
- named_window: vec![],
- window_before_qualify: false,
- qualify: None,
- value_table_mode: None,
- connect_by: vec![],
- flavor: SelectFlavor::FromFirst,
- }))),
- order_by: None,
- limit_clause: None,
- fetch: None,
- locks: vec![],
- for_clause: None,
- settings: None,
- format_clause: None,
- pipe_operators: vec![],
- }),
- from: None,
- materialized: None,
- closing_paren_token: AttachedToken::empty(),
- }],
- }),
- body: Box::new(SetExpr::Select(Box::new(Select {
- select_token: AttachedToken::empty(),
- optimizer_hints: vec![],
- distinct: None,
- select_modifiers: None,
- top: None,
- projection: vec![SelectItem::UnnamedExpr(Expr::Value(ValueWithSpan {
- value: test_utils::number("1"),
- span: Span::empty(),
- }))],
- exclude: None,
- top_before_distinct: false,
- into: None,
- from: vec![TableWithJoins {
- relation: table_from_name(ObjectName::from(vec![Ident {
- value: "test".to_string(),
- quote_style: None,
- span: Span::empty(),
- }])),
- joins: vec![],
- }],
- lateral_views: vec![],
- prewhere: None,
- selection: None,
- group_by: GroupByExpr::Expressions(vec![], vec![]),
- cluster_by: vec![],
- distribute_by: vec![],
- sort_by: vec![],
- having: None,
- named_window: vec![],
- window_before_qualify: false,
- qualify: None,
- value_table_mode: None,
- connect_by: vec![],
- flavor: SelectFlavor::FromFirst,
- }))),
- order_by: None,
- limit_clause: None,
- fetch: None,
- locks: vec![],
- for_clause: None,
- settings: None,
- format_clause: None,
- pipe_operators: vec![],
- };
- assert_eq!(expected, ast);
- assert_eq!(ast.to_string(), q);
+ let ast_select = ast.body.as_select().unwrap();
+
+ let expected_body_select_projection =
+ vec![SelectItem::UnnamedExpr(Expr::Value(ValueWithSpan {
+ value: test_utils::number("1"),
+ span: Span::empty(),
+ }))];
+
+ let expected_body_from = vec![TableWithJoins {
+ relation: table_from_name(ObjectName::from(vec![Ident {
+ value: "test".to_string(),
+ quote_style: None,
+ span: Span::empty(),
+ }])),
+ joins: vec![],
+ }];
+
+ assert_eq!(ast_select.projection, expected_body_select_projection);
+ assert_eq!(ast_select.from, expected_body_from);
}
#[test]