From 2fcd8f20a30e47ba8f07e2283ddfa4dad33ba873 Mon Sep 17 00:00:00 2001 From: David Levin Date: Mon, 9 Mar 2026 10:54:06 -0400 Subject: [PATCH] feat(substrait): implement consume_nested for List expressions Converts Substrait Nested::List expressions into DataFusion make_array(...) scalar function calls, enabling inline array constructors like ARRAY['name', 'city'] to flow through the Substrait consumer without error. Co-Authored-By: Claude Sonnet 4.6 --- .../src/logical_plan/consumer/expr/mod.rs | 2 + .../src/logical_plan/consumer/expr/nested.rs | 60 +++++++++++++++++++ .../consumer/substrait_consumer.rs | 8 +-- 3 files changed, 66 insertions(+), 4 deletions(-) create mode 100644 datafusion/substrait/src/logical_plan/consumer/expr/nested.rs diff --git a/datafusion/substrait/src/logical_plan/consumer/expr/mod.rs b/datafusion/substrait/src/logical_plan/consumer/expr/mod.rs index 5d98850c72cca..037d4ff688c25 100644 --- a/datafusion/substrait/src/logical_plan/consumer/expr/mod.rs +++ b/datafusion/substrait/src/logical_plan/consumer/expr/mod.rs @@ -21,6 +21,7 @@ mod field_reference; mod function_arguments; mod if_then; mod literal; +mod nested; mod scalar_function; mod singular_or_list; mod subquery; @@ -32,6 +33,7 @@ pub use field_reference::*; pub use function_arguments::*; pub use if_then::*; pub use literal::*; +pub use nested::*; pub use scalar_function::*; pub use singular_or_list::*; pub use subquery::*; diff --git a/datafusion/substrait/src/logical_plan/consumer/expr/nested.rs b/datafusion/substrait/src/logical_plan/consumer/expr/nested.rs new file mode 100644 index 0000000000000..59ce49dd37cd7 --- /dev/null +++ b/datafusion/substrait/src/logical_plan/consumer/expr/nested.rs @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::logical_plan::consumer::SubstraitConsumer; +use crate::logical_plan::consumer::expr::from_substrait_rex; +use datafusion::common::{DFSchema, not_impl_err, substrait_err}; +use datafusion::execution::FunctionRegistry; +use datafusion::logical_expr::Expr; +use substrait::proto::expression::Nested; +use substrait::proto::expression::nested::NestedType; + +/// Convert a Substrait Nested expression (List, Struct, Map constructors) to a DataFusion Expr. +/// +/// Nested::List is converted to a `make_array(...)` scalar function call. +pub async fn from_nested( + consumer: &impl SubstraitConsumer, + nested: &Nested, + input_schema: &DFSchema, +) -> datafusion::common::Result { + let Some(nested_type) = nested.nested_type.as_ref() else { + return substrait_err!("Nested expression must set nested_type"); + }; + + match nested_type { + NestedType::List(list) => { + let mut args = Vec::with_capacity(list.values.len()); + for expr in &list.values { + args.push(from_substrait_rex(consumer, expr, input_schema).await?); + } + + let make_array_udf = consumer.get_function_registry().udf("make_array")?; + Ok(Expr::ScalarFunction( + datafusion::logical_expr::expr::ScalarFunction::new_udf( + make_array_udf.to_owned(), + args, + ), + )) + } + NestedType::Struct(_) => { + not_impl_err!("Nested Struct expression is not yet supported") + } + NestedType::Map(_) => { + not_impl_err!("Nested Map expression is not yet supported") + } + } +} diff --git a/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs b/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs index a23f1faed1eb0..c2bc16ea89be6 100644 --- a/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs +++ b/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs @@ -18,7 +18,7 @@ use super::{ from_aggregate_rel, from_cast, from_cross_rel, from_exchange_rel, from_fetch_rel, from_field_reference, from_filter_rel, from_if_then, from_join_rel, from_literal, - from_project_rel, from_read_rel, from_scalar_function, from_set_rel, + from_nested, from_project_rel, from_read_rel, from_scalar_function, from_set_rel, from_singular_or_list, from_sort_rel, from_subquery, from_substrait_rel, from_substrait_rex, from_window_function, }; @@ -342,10 +342,10 @@ pub trait SubstraitConsumer: Send + Sync + Sized { async fn consume_nested( &self, - _expr: &Nested, - _input_schema: &DFSchema, + expr: &Nested, + input_schema: &DFSchema, ) -> datafusion::common::Result { - not_impl_err!("Nested expression not supported") + from_nested(self, expr, input_schema).await } async fn consume_enum(