From f6809a1c7da93148874dc7a86f475f34d4792c16 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Sun, 18 Jan 2026 16:54:32 +0100 Subject: [PATCH 01/83] Add a default FileStatisticsCache implementation for the ListingTable --- datafusion-cli/src/main.rs | 49 -- datafusion/catalog-listing/src/table.rs | 30 +- datafusion/common/src/heap_size.rs | 458 ++++++++++++++++++ datafusion/common/src/lib.rs | 2 + .../src/datasource/listing_table_factory.rs | 4 +- datafusion/core/src/execution/context/mod.rs | 11 +- .../core/tests/parquet/file_statistics.rs | 2 +- datafusion/core/tests/sql/runtime_config.rs | 46 ++ .../execution/src/cache/cache_manager.rs | 68 ++- datafusion/execution/src/cache/cache_unit.rs | 249 +++++++++- datafusion/execution/src/runtime_env.rs | 26 +- .../test_files/encrypted_parquet.slt | 4 + .../test_files/information_schema.slt | 2 + .../test_files/parquet_sorted_statistics.slt | 4 + .../sqllogictest/test_files/set_variable.slt | 16 + 15 files changed, 868 insertions(+), 103 deletions(-) create mode 100644 datafusion/common/src/heap_size.rs diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs index 6bfe1160ecdd6..1909ed392afd3 100644 --- a/datafusion-cli/src/main.rs +++ b/datafusion-cli/src/main.rs @@ -689,55 +689,6 @@ mod tests { // When the cache manager creates a StatisticsCache by default, // the contents will show up here - let sql = "SELECT split_part(path, '/', -1) as filename, file_size_bytes, num_rows, num_columns, table_size_bytes from statistics_cache() order by filename"; - let df = ctx.sql(sql).await?; - let rbs = df.collect().await?; - assert_snapshot!(batches_to_string(&rbs),@r" - ++ - ++ - "); - - Ok(()) - } - - // Can be removed when https://github.com/apache/datafusion/issues/19217 is resolved - #[tokio::test] - async fn test_statistics_cache_override() -> Result<(), DataFusionError> { - // Install a specific StatisticsCache implementation - let file_statistics_cache = Arc::new(DefaultFileStatisticsCache::default()); - let cache_config = CacheManagerConfig::default() - .with_files_statistics_cache(Some(file_statistics_cache.clone())); - let runtime = RuntimeEnvBuilder::new() - .with_cache_manager(cache_config) - .build()?; - let config = SessionConfig::new().with_collect_statistics(true); - let ctx = SessionContext::new_with_config_rt(config, Arc::new(runtime)); - - ctx.register_udtf( - "statistics_cache", - Arc::new(StatisticsCacheFunc::new( - ctx.task_ctx().runtime_env().cache_manager.clone(), - )), - ); - - for filename in [ - "alltypes_plain", - "alltypes_tiny_pages", - "lz4_raw_compressed_larger", - ] { - ctx.sql( - format!( - "create external table {filename} - stored as parquet - location '../parquet-testing/data/{filename}.parquet'", - ) - .as_str(), - ) - .await? - .collect() - .await?; - } - let sql = "SELECT split_part(path, '/', -1) as filename, file_size_bytes, num_rows, num_columns, table_size_bytes from statistics_cache() order by filename"; let df = ctx.sql(sql).await?; let rbs = df.collect().await?; diff --git a/datafusion/catalog-listing/src/table.rs b/datafusion/catalog-listing/src/table.rs index 06ba8c8113fac..0ed2b452bc626 100644 --- a/datafusion/catalog-listing/src/table.rs +++ b/datafusion/catalog-listing/src/table.rs @@ -36,7 +36,6 @@ use datafusion_datasource::{ }; use datafusion_execution::cache::TableScopedPath; use datafusion_execution::cache::cache_manager::FileStatisticsCache; -use datafusion_execution::cache::cache_unit::DefaultFileStatisticsCache; use datafusion_expr::dml::InsertOp; use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::{Expr, TableProviderFilterPushDown, TableType}; @@ -187,7 +186,7 @@ pub struct ListingTable { /// The SQL definition for this table, if any definition: Option, /// Cache for collected file statistics - collected_statistics: Arc, + collected_statistics: Option>, /// Constraints applied to this table constraints: Constraints, /// Column default expressions for columns that are not physically present in the data files @@ -231,7 +230,7 @@ impl ListingTable { schema_source, options, definition: None, - collected_statistics: Arc::new(DefaultFileStatisticsCache::default()), + collected_statistics: None, constraints: Constraints::default(), column_defaults: HashMap::new(), expr_adapter_factory: config.expr_adapter_factory, @@ -260,10 +259,8 @@ impl ListingTable { /// Setting a statistics cache on the `SessionContext` can avoid refetching statistics /// multiple times in the same session. /// - /// If `None`, creates a new [`DefaultFileStatisticsCache`] scoped to this query. pub fn with_cache(mut self, cache: Option>) -> Self { - self.collected_statistics = - cache.unwrap_or_else(|| Arc::new(DefaultFileStatisticsCache::default())); + self.collected_statistics = cache; self } @@ -806,7 +803,8 @@ impl ListingTable { let meta = &part_file.object_meta; // Check cache first - if we have valid cached statistics and ordering - if let Some(cached) = self.collected_statistics.get(path) + if let Some(cache) = &self.collected_statistics + && let Some(cached) = cache.get(path) && cached.is_valid_for(meta) { // Return cached statistics and ordering @@ -823,14 +821,16 @@ impl ListingTable { let statistics = Arc::new(file_meta.statistics); // Store in cache - self.collected_statistics.put( - path, - CachedFileMetadata::new( - meta.clone(), - Arc::clone(&statistics), - file_meta.ordering.clone(), - ), - ); + if let Some(cache) = &self.collected_statistics { + cache.put( + path, + CachedFileMetadata::new( + meta.clone(), + Arc::clone(&statistics), + file_meta.ordering.clone(), + ), + ); + } Ok((statistics, file_meta.ordering)) } diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs new file mode 100644 index 0000000000000..6dee7d5c0a373 --- /dev/null +++ b/datafusion/common/src/heap_size.rs @@ -0,0 +1,458 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::stats::Precision; +use crate::{ColumnStatistics, ScalarValue, Statistics}; +use arrow::array::{ + Array, FixedSizeListArray, LargeListArray, ListArray, MapArray, StructArray, +}; +use arrow::datatypes::{ + DataType, Field, Fields, IntervalDayTime, IntervalMonthDayNano, IntervalUnit, + TimeUnit, UnionFields, UnionMode, i256, +}; +use chrono::{DateTime, Utc}; +use half::f16; +use std::collections::HashMap; +use std::fmt::Debug; +use std::sync::Arc; + +/// This is a temporary solution until and +/// are resolved. +/// Trait for calculating the size of various containers +pub trait DFHeapSize { + /// Return the size of any bytes allocated on the heap by this object, + /// including heap memory in those structures + /// + /// Note that the size of the type itself is not included in the result -- + /// instead, that size is added by the caller (e.g. container). + fn heap_size(&self) -> usize; +} + +impl DFHeapSize for Statistics { + fn heap_size(&self) -> usize { + self.num_rows.heap_size() + + self.total_byte_size.heap_size() + + self + .column_statistics + .iter() + .map(|s| s.heap_size()) + .sum::() + } +} + +impl DFHeapSize + for Precision +{ + fn heap_size(&self) -> usize { + self.get_value().map_or_else(|| 0, |v| v.heap_size()) + } +} + +impl DFHeapSize for ColumnStatistics { + fn heap_size(&self) -> usize { + self.null_count.heap_size() + + self.max_value.heap_size() + + self.min_value.heap_size() + + self.sum_value.heap_size() + + self.distinct_count.heap_size() + + self.byte_size.heap_size() + } +} + +impl DFHeapSize for ScalarValue { + fn heap_size(&self) -> usize { + use crate::scalar::ScalarValue::*; + match self { + Null => 0, + Boolean(b) => b.heap_size(), + Float16(f) => f.heap_size(), + Float32(f) => f.heap_size(), + Float64(f) => f.heap_size(), + Decimal32(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), + Decimal64(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), + Decimal128(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), + Decimal256(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), + Int8(i) => i.heap_size(), + Int16(i) => i.heap_size(), + Int32(i) => i.heap_size(), + Int64(i) => i.heap_size(), + UInt8(u) => u.heap_size(), + UInt16(u) => u.heap_size(), + UInt32(u) => u.heap_size(), + UInt64(u) => u.heap_size(), + Utf8(u) => u.heap_size(), + Utf8View(u) => u.heap_size(), + LargeUtf8(l) => l.heap_size(), + Binary(b) => b.heap_size(), + BinaryView(b) => b.heap_size(), + FixedSizeBinary(a, b) => a.heap_size() + b.heap_size(), + LargeBinary(l) => l.heap_size(), + FixedSizeList(f) => f.heap_size(), + List(l) => l.heap_size(), + LargeList(l) => l.heap_size(), + Struct(s) => s.heap_size(), + Map(m) => m.heap_size(), + Date32(d) => d.heap_size(), + Date64(d) => d.heap_size(), + Time32Second(t) => t.heap_size(), + Time32Millisecond(t) => t.heap_size(), + Time64Microsecond(t) => t.heap_size(), + Time64Nanosecond(t) => t.heap_size(), + TimestampSecond(a, b) => a.heap_size() + b.heap_size(), + TimestampMillisecond(a, b) => a.heap_size() + b.heap_size(), + TimestampMicrosecond(a, b) => a.heap_size() + b.heap_size(), + TimestampNanosecond(a, b) => a.heap_size() + b.heap_size(), + IntervalYearMonth(i) => i.heap_size(), + IntervalDayTime(i) => i.heap_size(), + IntervalMonthDayNano(i) => i.heap_size(), + DurationSecond(d) => d.heap_size(), + DurationMillisecond(d) => d.heap_size(), + DurationMicrosecond(d) => d.heap_size(), + DurationNanosecond(d) => d.heap_size(), + Union(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), + Dictionary(a, b) => a.heap_size() + b.heap_size(), + } + } +} + +impl DFHeapSize for DataType { + fn heap_size(&self) -> usize { + use DataType::*; + match self { + Null => 0, + Boolean => 0, + Int8 => 0, + Int16 => 0, + Int32 => 0, + Int64 => 0, + UInt8 => 0, + UInt16 => 0, + UInt32 => 0, + UInt64 => 0, + Float16 => 0, + Float32 => 0, + Float64 => 0, + Timestamp(t, s) => t.heap_size() + s.heap_size(), + Date32 => 0, + Date64 => 0, + Time32(t) => t.heap_size(), + Time64(t) => t.heap_size(), + Duration(t) => t.heap_size(), + Interval(i) => i.heap_size(), + Binary => 0, + FixedSizeBinary(i) => i.heap_size(), + LargeBinary => 0, + BinaryView => 0, + Utf8 => 0, + LargeUtf8 => 0, + Utf8View => 0, + List(v) => v.heap_size(), + ListView(v) => v.heap_size(), + FixedSizeList(f, i) => f.heap_size() + i.heap_size(), + LargeList(l) => l.heap_size(), + LargeListView(l) => l.heap_size(), + Struct(s) => s.heap_size(), + Union(u, m) => u.heap_size() + m.heap_size(), + Dictionary(a, b) => a.heap_size() + b.heap_size(), + Decimal32(u8, i8) => u8.heap_size() + i8.heap_size(), + Decimal64(u8, i8) => u8.heap_size() + i8.heap_size(), + Decimal128(u8, i8) => u8.heap_size() + i8.heap_size(), + Decimal256(u8, i8) => u8.heap_size() + i8.heap_size(), + Map(m, b) => m.heap_size() + b.heap_size(), + RunEndEncoded(a, b) => a.heap_size() + b.heap_size(), + } + } +} + +impl DFHeapSize for Vec { + fn heap_size(&self) -> usize { + let item_size = size_of::(); + // account for the contents of the Vec + (self.capacity() * item_size) + + // add any heap allocations by contents + self.iter().map(|t| t.heap_size()).sum::() + } +} + +impl DFHeapSize for HashMap { + fn heap_size(&self) -> usize { + let capacity = self.capacity(); + if capacity == 0 { + return 0; + } + + // HashMap doesn't provide a way to get its heap size, so this is an approximation based on + // the behavior of hashbrown::HashMap as at version 0.16.0, and may become inaccurate + // if the implementation changes. + let key_val_size = size_of::<(K, V)>(); + // Overhead for the control tags group, which may be smaller depending on architecture + let group_size = 16; + // 1 byte of metadata stored per bucket. + let metadata_size = 1; + + // Compute the number of buckets for the capacity. Based on hashbrown's capacity_to_buckets + let buckets = if capacity < 15 { + let min_cap = match key_val_size { + 0..=1 => 14, + 2..=3 => 7, + _ => 3, + }; + let cap = min_cap.max(capacity); + if cap < 4 { + 4 + } else if cap < 8 { + 8 + } else { + 16 + } + } else { + (capacity.saturating_mul(8) / 7).next_power_of_two() + }; + + group_size + + (buckets * (key_val_size + metadata_size)) + + self.keys().map(|k| k.heap_size()).sum::() + + self.values().map(|v| v.heap_size()).sum::() + } +} + +impl DFHeapSize for Arc { + fn heap_size(&self) -> usize { + // Arc stores weak and strong counts on the heap alongside an instance of T + 2 * size_of::() + size_of::() + self.as_ref().heap_size() + } +} + +impl DFHeapSize for Arc { + fn heap_size(&self) -> usize { + 2 * size_of::() + size_of_val(self.as_ref()) + self.as_ref().heap_size() + } +} + +impl DFHeapSize for Fields { + fn heap_size(&self) -> usize { + self.into_iter().map(|f| f.heap_size()).sum::() + } +} + +impl DFHeapSize for StructArray { + fn heap_size(&self) -> usize { + self.get_array_memory_size() + } +} + +impl DFHeapSize for LargeListArray { + fn heap_size(&self) -> usize { + self.get_array_memory_size() + } +} + +impl DFHeapSize for ListArray { + fn heap_size(&self) -> usize { + self.get_array_memory_size() + } +} + +impl DFHeapSize for FixedSizeListArray { + fn heap_size(&self) -> usize { + self.get_array_memory_size() + } +} +impl DFHeapSize for MapArray { + fn heap_size(&self) -> usize { + self.get_array_memory_size() + } +} + +impl DFHeapSize for Arc { + fn heap_size(&self) -> usize { + 2 * size_of::() + self.as_ref().heap_size() + } +} + +impl DFHeapSize for Box { + fn heap_size(&self) -> usize { + size_of::() + self.as_ref().heap_size() + } +} + +impl DFHeapSize for Option { + fn heap_size(&self) -> usize { + self.as_ref().map(|inner| inner.heap_size()).unwrap_or(0) + } +} + +impl DFHeapSize for (A, B) +where + A: DFHeapSize, + B: DFHeapSize, +{ + fn heap_size(&self) -> usize { + self.0.heap_size() + self.1.heap_size() + } +} + +impl DFHeapSize for String { + fn heap_size(&self) -> usize { + self.capacity() + } +} + +impl DFHeapSize for str { + fn heap_size(&self) -> usize { + self.to_string().capacity() + } +} + +impl DFHeapSize for UnionFields { + fn heap_size(&self) -> usize { + self.iter().map(|f| f.0.heap_size() + f.1.heap_size()).sum() + } +} + +impl DFHeapSize for UnionMode { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for TimeUnit { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for IntervalUnit { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for Field { + fn heap_size(&self) -> usize { + self.name().heap_size() + + self.data_type().heap_size() + + self.is_nullable().heap_size() + + self.dict_is_ordered().heap_size() + + self.metadata().heap_size() + } +} + +impl DFHeapSize for IntervalMonthDayNano { + fn heap_size(&self) -> usize { + self.days.heap_size() + self.months.heap_size() + self.nanoseconds.heap_size() + } +} + +impl DFHeapSize for IntervalDayTime { + fn heap_size(&self) -> usize { + self.days.heap_size() + self.milliseconds.heap_size() + } +} + +impl DFHeapSize for DateTime { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for bool { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} +impl DFHeapSize for u8 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for u16 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for u32 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for u64 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for i8 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for i16 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for i32 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} +impl DFHeapSize for i64 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for i128 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for i256 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for f16 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for f32 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} +impl DFHeapSize for f64 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} + +impl DFHeapSize for usize { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs index 996c563f0d8a2..40af6b1b3882c 100644 --- a/datafusion/common/src/lib.rs +++ b/datafusion/common/src/lib.rs @@ -46,6 +46,7 @@ pub mod error; pub mod file_options; pub mod format; pub mod hash_utils; +pub mod heap_size; pub mod instant; pub mod metadata; pub mod nested_struct; @@ -61,6 +62,7 @@ pub mod test_util; pub mod tree_node; pub mod types; pub mod utils; + /// Reexport arrow crate pub use arrow; pub use column::Column; diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs index a5139346752a9..ce0f214c06d26 100644 --- a/datafusion/core/src/datasource/listing_table_factory.rs +++ b/datafusion/core/src/datasource/listing_table_factory.rs @@ -476,7 +476,7 @@ mod tests { // Test with collect_statistics enabled let file_statistics_cache = Arc::new(DefaultFileStatisticsCache::default()); let cache_config = CacheManagerConfig::default() - .with_files_statistics_cache(Some(file_statistics_cache.clone())); + .with_file_statistics_cache(Some(file_statistics_cache.clone())); let runtime = RuntimeEnvBuilder::new() .with_cache_manager(cache_config) .build_arc() @@ -506,7 +506,7 @@ mod tests { // Test with collect_statistics disabled let file_statistics_cache = Arc::new(DefaultFileStatisticsCache::default()); let cache_config = CacheManagerConfig::default() - .with_files_statistics_cache(Some(file_statistics_cache.clone())); + .with_file_statistics_cache(Some(file_statistics_cache.clone())); let runtime = RuntimeEnvBuilder::new() .with_cache_manager(cache_config) .build_arc() diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 87170f595f413..767e52005f52c 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -102,6 +102,7 @@ use datafusion_session::SessionStore; use async_trait::async_trait; use chrono::{DateTime, Utc}; +use datafusion_execution::cache::cache_unit::DEFAULT_FILE_STATISTICS_MEMORY_LIMIT; use object_store::ObjectStore; use parking_lot::RwLock; use url::Url; @@ -1183,6 +1184,10 @@ impl SessionContext { let duration = Self::parse_duration(variable, value)?; builder.with_object_list_cache_ttl(Some(duration)) } + "file_statistics_cache_limit" => { + let limit = Self::parse_memory_limit(value)?; + builder.with_file_statistics_cache_limit(limit) + } _ => return plan_err!("Unknown runtime configuration: {variable}"), // Remember to update `reset_runtime_variable()` when adding new options }; @@ -1222,9 +1227,13 @@ impl SessionContext { builder = builder.with_object_list_cache_ttl(DEFAULT_LIST_FILES_CACHE_TTL); } + "file_statistics_cache_limit" => { + builder = builder.with_file_statistics_cache_limit( + DEFAULT_FILE_STATISTICS_MEMORY_LIMIT, + ); + } _ => return plan_err!("Unknown runtime configuration: {variable}"), }; - *state = SessionStateBuilder::from(state.clone()) .with_runtime_env(Arc::new(builder.build()?)) .build(); diff --git a/datafusion/core/tests/parquet/file_statistics.rs b/datafusion/core/tests/parquet/file_statistics.rs index 84396be8a6a67..da89b89cee116 100644 --- a/datafusion/core/tests/parquet/file_statistics.rs +++ b/datafusion/core/tests/parquet/file_statistics.rs @@ -267,7 +267,7 @@ fn get_cache_runtime_state() -> ( let list_file_cache = Arc::new(DefaultListFilesCache::default()); let cache_config = cache_config - .with_files_statistics_cache(Some(file_static_cache.clone())) + .with_file_statistics_cache(Some(file_static_cache.clone())) .with_list_files_cache(Some(list_file_cache.clone())); let rt = RuntimeEnvBuilder::new() diff --git a/datafusion/core/tests/sql/runtime_config.rs b/datafusion/core/tests/sql/runtime_config.rs index cf5237d725805..6b7f0568309ce 100644 --- a/datafusion/core/tests/sql/runtime_config.rs +++ b/datafusion/core/tests/sql/runtime_config.rs @@ -25,6 +25,7 @@ use datafusion::execution::context::TaskContext; use datafusion::prelude::SessionConfig; use datafusion_execution::cache::DefaultListFilesCache; use datafusion_execution::cache::cache_manager::CacheManagerConfig; +use datafusion_execution::cache::cache_unit::DefaultFileStatisticsCache; use datafusion_execution::runtime_env::RuntimeEnvBuilder; use datafusion_physical_plan::common::collect; @@ -344,6 +345,51 @@ async fn test_list_files_cache_ttl() { assert_eq!(get_limit(&ctx), Duration::from_secs(90)); } +#[tokio::test] +async fn test_file_statistics_cache_limit() { + let list_files_cache = Arc::new(DefaultFileStatisticsCache::default()); + + let rt = RuntimeEnvBuilder::new() + .with_cache_manager( + CacheManagerConfig::default() + .with_file_statistics_cache(Some(list_files_cache)), + ) + .build_arc() + .unwrap(); + + let ctx = SessionContext::new_with_config_rt(SessionConfig::default(), rt); + + let update_limit = async |ctx: &SessionContext, limit: &str| { + ctx.sql( + format!("SET datafusion.runtime.file_statistics_cache_limit = '{limit}'") + .as_str(), + ) + .await + .unwrap() + .collect() + .await + .unwrap(); + }; + + let get_limit = |ctx: &SessionContext| -> usize { + ctx.task_ctx() + .runtime_env() + .cache_manager + .get_file_statistic_cache() + .unwrap() + .cache_limit() + }; + + update_limit(&ctx, "1M").await; + assert_eq!(get_limit(&ctx), 1024 * 1024); + + update_limit(&ctx, "42G").await; + assert_eq!(get_limit(&ctx), 42 * 1024 * 1024 * 1024); + + update_limit(&ctx, "23K").await; + assert_eq!(get_limit(&ctx), 23 * 1024); +} + #[tokio::test] async fn test_unknown_runtime_config() { let ctx = SessionContext::new(); diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index 0868c968c3a2f..7de993210b7b7 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -17,10 +17,14 @@ use crate::cache::CacheAccessor; use crate::cache::DefaultListFilesCache; -use crate::cache::cache_unit::DefaultFilesMetadataCache; +use crate::cache::cache_unit::{ + DEFAULT_FILE_STATISTICS_MEMORY_LIMIT, DefaultFileStatisticsCache, + DefaultFilesMetadataCache, +}; use crate::cache::list_files_cache::ListFilesEntry; use crate::cache::list_files_cache::TableScopedPath; use datafusion_common::TableReference; +use datafusion_common::heap_size::DFHeapSize; use datafusion_common::stats::Precision; use datafusion_common::{Result, Statistics}; use datafusion_physical_expr_common::sort_expr::LexOrdering; @@ -41,7 +45,7 @@ pub use super::list_files_cache::{ /// /// This struct embeds the [`ObjectMeta`] used for cache validation, /// along with the cached statistics and ordering information. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct CachedFileMetadata { /// File metadata used for cache validation (size, last_modified). pub meta: ObjectMeta, @@ -81,7 +85,7 @@ impl CachedFileMetadata { /// - Statistics for the file /// - Ordering information for the file /// -/// If enabled via [`CacheManagerConfig::with_files_statistics_cache`] this +/// If enabled via [`CacheManagerConfig::with_file_statistics_cache`] this /// cache avoids inferring the same file statistics repeatedly during the /// session lifetime. /// @@ -92,10 +96,26 @@ impl CachedFileMetadata { /// /// See [`crate::runtime_env::RuntimeEnv`] for more details pub trait FileStatisticsCache: CacheAccessor { + fn cache_limit(&self) -> usize; + + /// Updates the cache with a new memory limit in bytes. + fn update_cache_limit(&self, limit: usize); + /// Retrieves the information about the entries currently cached. fn list_entries(&self) -> HashMap; } +impl DFHeapSize for CachedFileMetadata { + fn heap_size(&self) -> usize { + self.meta.size.heap_size() + + self.meta.last_modified.heap_size() + + self.meta.version.heap_size() + + self.meta.e_tag.heap_size() + + self.meta.location.as_ref().heap_size() + + self.statistics.heap_size() + } +} + /// Represents information about a cached statistics entry. /// This is used to expose the statistics cache contents to outside modules. #[derive(Debug, Clone, PartialEq, Eq)] @@ -330,8 +350,19 @@ pub struct CacheManager { impl CacheManager { pub fn try_new(config: &CacheManagerConfig) -> Result> { - let file_statistic_cache = - config.table_files_statistics_cache.as_ref().map(Arc::clone); + let file_statistic_cache = match &config.file_statistics_cache { + Some(fsc) if config.file_statistics_cache_limit > 0 => { + fsc.update_cache_limit(config.file_statistics_cache_limit); + Some(Arc::clone(fsc)) + } + None if config.file_statistics_cache_limit > 0 => { + let fsc: Arc = Arc::new( + DefaultFileStatisticsCache::new(config.file_statistics_cache_limit), + ); + Some(fsc) + } + _ => None, + }; let list_files_cache = match &config.list_files_cache { Some(lfc) if config.list_files_cache_limit > 0 => { @@ -371,11 +402,18 @@ impl CacheManager { })) } - /// Get the cache of listing files statistics. + /// Get the file statistics cache. pub fn get_file_statistic_cache(&self) -> Option> { self.file_statistic_cache.clone() } + /// Get the memory limit of the file statistics cache. + pub fn get_file_statistic_cache_limit(&self) -> usize { + self.file_statistic_cache + .as_ref() + .map_or(0, |c| c.cache_limit()) + } + /// Get the cache for storing the result of listing [`ObjectMeta`]s under the same path. pub fn get_list_files_cache(&self) -> Option> { self.list_files_cache.clone() @@ -411,7 +449,9 @@ pub struct CacheManagerConfig { /// Enable caching of file statistics when listing files. /// Enabling the cache avoids repeatedly reading file statistics in a DataFusion session. /// Default is disabled. Currently only Parquet files are supported. - pub table_files_statistics_cache: Option>, + pub file_statistics_cache: Option>, + /// Limit of the file statistics cache, in bytes. Default: 1MiB. + pub file_statistics_cache_limit: usize, /// Enable caching of file metadata when listing files. /// Enabling the cache avoids repeat list and object metadata fetch operations, which may be /// expensive in certain situations (e.g. remote object storage), for objects under paths that @@ -437,7 +477,8 @@ pub struct CacheManagerConfig { impl Default for CacheManagerConfig { fn default() -> Self { Self { - table_files_statistics_cache: Default::default(), + file_statistics_cache: Default::default(), + file_statistics_cache_limit: DEFAULT_FILE_STATISTICS_MEMORY_LIMIT, list_files_cache: Default::default(), list_files_cache_limit: DEFAULT_LIST_FILES_CACHE_MEMORY_LIMIT, list_files_cache_ttl: DEFAULT_LIST_FILES_CACHE_TTL, @@ -448,14 +489,19 @@ impl Default for CacheManagerConfig { } impl CacheManagerConfig { - /// Set the cache for files statistics. + /// Set the cache for file statistics. /// /// Default is `None` (disabled). - pub fn with_files_statistics_cache( + pub fn with_file_statistics_cache( mut self, cache: Option>, ) -> Self { - self.table_files_statistics_cache = cache; + self.file_statistics_cache = cache; + self + } + + pub fn with_file_statistics_cache_limit(mut self, limit: usize) -> Self { + self.file_statistics_cache_limit = limit; self } diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 49e16ca4b6cbf..4e2b1eeac988e 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -15,17 +15,17 @@ // specific language governing permissions and limitations // under the License. -use std::collections::HashMap; - use crate::cache::CacheAccessor; use crate::cache::cache_manager::{ CachedFileMetadata, FileStatisticsCache, FileStatisticsCacheEntry, }; - -use dashmap::DashMap; -use object_store::path::Path; +use std::collections::HashMap; +use std::sync::Mutex; pub use crate::cache::DefaultFilesMetadataCache; +use crate::cache::lru_queue::LruQueue; +use datafusion_common::heap_size::DFHeapSize; +use object_store::path::Path; /// Default implementation of [`FileStatisticsCache`] /// @@ -41,32 +41,137 @@ pub use crate::cache::DefaultFilesMetadataCache; /// [`FileStatisticsCache`]: crate::cache::cache_manager::FileStatisticsCache #[derive(Default)] pub struct DefaultFileStatisticsCache { - cache: DashMap, + state: Mutex, +} + +impl DefaultFileStatisticsCache { + pub fn new(memory_limit: usize) -> Self { + Self { + state: Mutex::new(DefaultFileStatisticsCacheState::new(memory_limit)), + } + } + + /// Returns the size of the cached memory, in bytes. + pub fn memory_used(&self) -> usize { + let state = self.state.lock().unwrap(); + state.memory_used + } +} + +pub struct DefaultFileStatisticsCacheState { + lru_queue: LruQueue, + memory_limit: usize, + memory_used: usize, +} + +pub const DEFAULT_FILE_STATISTICS_MEMORY_LIMIT: usize = 1024 * 1024; // 1MiB + +impl Default for DefaultFileStatisticsCacheState { + fn default() -> Self { + Self { + lru_queue: LruQueue::new(), + memory_limit: DEFAULT_FILE_STATISTICS_MEMORY_LIMIT, + memory_used: 0, + } + } } +impl DefaultFileStatisticsCacheState { + fn new(memory_limit: usize) -> Self { + Self { + lru_queue: LruQueue::new(), + memory_limit, + memory_used: 0, + } + } + fn get(&mut self, key: &Path) -> Option { + self.lru_queue.get(key).cloned() + } + + fn put( + &mut self, + key: &Path, + value: CachedFileMetadata, + ) -> Option { + let entry_size = value.heap_size(); + + if entry_size > self.memory_limit { + return None; + } + + let old_value = self.lru_queue.put(key.clone(), value); + self.memory_used += entry_size; + + if let Some(old_entry) = &old_value { + self.memory_used -= old_entry.heap_size(); + } + + self.evict_entries(); + + old_value + } + + fn remove(&mut self, k: &Path) -> Option { + self.lru_queue.remove(k) + } + + fn contains_key(&self, k: &Path) -> bool { + self.lru_queue.contains_key(k) + } + + fn len(&self) -> usize { + self.lru_queue.len() + } + + fn clear(&mut self) { + self.lru_queue.clear(); + self.memory_used = 0; + } + + fn evict_entries(&mut self) { + while self.memory_used > self.memory_limit { + if let Some(removed) = self.lru_queue.pop() { + self.memory_used -= removed.1.heap_size(); + } else { + // cache is empty while memory_used > memory_limit, cannot happen + debug_assert!( + false, + "cache is empty while memory_used > memory_limit, cannot happen" + ); + return; + } + } + } +} impl CacheAccessor for DefaultFileStatisticsCache { fn get(&self, key: &Path) -> Option { - self.cache.get(key).map(|entry| entry.value().clone()) + let mut state = self.state.lock().unwrap(); + state.get(key) } fn put(&self, key: &Path, value: CachedFileMetadata) -> Option { - self.cache.insert(key.clone(), value) + let mut state = self.state.lock().unwrap(); + state.put(key, value) } - fn remove(&self, k: &Path) -> Option { - self.cache.remove(k).map(|(_, entry)| entry) + fn remove(&self, key: &Path) -> Option { + let mut state = self.state.lock().unwrap(); + state.remove(key) } fn contains_key(&self, k: &Path) -> bool { - self.cache.contains_key(k) + let state = self.state.lock().unwrap(); + state.contains_key(k) } fn len(&self) -> usize { - self.cache.len() + let state = self.state.lock().unwrap(); + state.len() } fn clear(&self) { - self.cache.clear(); + let mut state = self.state.lock().unwrap(); + state.clear(); } fn name(&self) -> String { @@ -75,12 +180,22 @@ impl CacheAccessor for DefaultFileStatisticsCache { } impl FileStatisticsCache for DefaultFileStatisticsCache { + fn cache_limit(&self) -> usize { + let state = self.state.lock().unwrap(); + state.memory_limit + } + + fn update_cache_limit(&self, limit: usize) { + let mut state = self.state.lock().unwrap(); + state.memory_limit = limit; + state.evict_entries(); + } + fn list_entries(&self) -> HashMap { let mut entries = HashMap::::new(); - - for entry in self.cache.iter() { - let path = entry.key(); - let cached = entry.value(); + for entry in self.state.lock().unwrap().lru_queue.list_entries() { + let path = entry.0.clone(); + let cached = entry.1.clone(); entries.insert( path.clone(), FileStatisticsCacheEntry { @@ -88,7 +203,7 @@ impl FileStatisticsCache for DefaultFileStatisticsCache { num_rows: cached.statistics.num_rows, num_columns: cached.statistics.column_statistics.len(), table_size_bytes: cached.statistics.total_byte_size, - statistics_size_bytes: 0, // TODO: set to the real size in the future + statistics_size_bytes: cached.statistics.heap_size(), has_ordering: cached.ordering.is_some(), }, ); @@ -104,11 +219,12 @@ mod tests { use crate::cache::cache_manager::{ CachedFileMetadata, FileStatisticsCache, FileStatisticsCacheEntry, }; - use arrow::array::RecordBatch; + use arrow::array::{Int32Array, ListArray, RecordBatch}; + use arrow::buffer::{OffsetBuffer, ScalarBuffer}; use arrow::datatypes::{DataType, Field, Schema, TimeUnit}; use chrono::DateTime; - use datafusion_common::Statistics; use datafusion_common::stats::Precision; + use datafusion_common::{ColumnStatistics, ScalarValue, Statistics}; use datafusion_expr::ColumnarValue; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; @@ -389,7 +505,7 @@ mod tests { num_rows: Precision::Absent, num_columns: 1, table_size_bytes: Precision::Absent, - statistics_size_bytes: 0, + statistics_size_bytes: 72, has_ordering: false, } ), @@ -400,11 +516,100 @@ mod tests { num_rows: Precision::Absent, num_columns: 1, table_size_bytes: Precision::Absent, - statistics_size_bytes: 0, + statistics_size_bytes: 72, has_ordering: true, } ), ]) ); } + + #[test] + fn test_cache_entry_added_when_entries_are_within_cache_limit() { + let (meta_1, value_1) = create_cached_file_metadata_with_stats("test1.parquet"); + let (meta_2, value_2) = create_cached_file_metadata_with_stats("test2.parquet"); + let (meta_3, value_3) = create_cached_file_metadata_with_stats("test3.parquet"); + + let limit_for_2_entries = value_1.heap_size() + value_2.heap_size(); + + // create a cache with a limit which fits exactly 2 entries + let cache = DefaultFileStatisticsCache::new(limit_for_2_entries); + + cache.put(&meta_1.location, value_1.clone()); + cache.put(&meta_2.location, value_2.clone()); + + assert_eq!(cache.len(), 2); + assert_eq!(cache.memory_used(), limit_for_2_entries); + + let result_1 = cache.get(&meta_1.location); + let result_2 = cache.get(&meta_2.location); + assert_eq!(result_1.unwrap(), value_1); + assert_eq!(result_2.unwrap(), value_2); + + // adding the third entry evicts the first entry + cache.put(&meta_3.location, value_3.clone()); + assert_eq!(cache.len(), 2); + assert_eq!(cache.memory_used(), limit_for_2_entries); + + let result_1 = cache.get(&meta_1.location); + assert!(result_1.is_none()); + + let result_2 = cache.get(&meta_2.location); + let result_3 = cache.get(&meta_3.location); + + assert_eq!(result_2.unwrap(), value_2); + assert_eq!(result_3.unwrap(), value_3); + + cache.remove(&meta_2.location); + + assert_eq!(cache.len(), 1); + + cache.clear(); + assert_eq!(cache.len(), 0); + } + + #[test] + fn test_cache_rejects_entry_which_is_too_large() { + let (meta, value) = create_cached_file_metadata_with_stats("test1.parquet"); + + let limit_less_than_the_entry = value.heap_size() - 1; + + // create a cache with a size less than the entry + let cache = DefaultFileStatisticsCache::new(limit_less_than_the_entry); + + cache.put(&meta.location, value); + + assert_eq!(cache.len(), 0); + assert_eq!(cache.memory_used(), 0); + } + + fn create_cached_file_metadata_with_stats( + file_name: &str, + ) -> (ObjectMeta, CachedFileMetadata) { + let series: Vec = (0..=10).step_by(1).collect(); + let values = Int32Array::from(series); + let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0])); + let field = Arc::new(Field::new_list_field(DataType::Int32, false)); + let list_array = ListArray::new(field, offsets, Arc::new(values), None); + + let column_statistics = ColumnStatistics { + null_count: Precision::Exact(1), + max_value: Precision::Exact(ScalarValue::List(Arc::new(list_array.clone()))), + min_value: Precision::Exact(ScalarValue::List(Arc::new(list_array.clone()))), + sum_value: Precision::Exact(ScalarValue::List(Arc::new(list_array.clone()))), + distinct_count: Precision::Exact(10), + byte_size: Precision::Absent, + }; + + let stats = Statistics { + num_rows: Precision::Exact(100), + total_byte_size: Precision::Exact(100), + column_statistics: vec![column_statistics.clone()], + }; + + let object_meta = create_test_meta(file_name, stats.heap_size() as u64); + let value = + CachedFileMetadata::new(object_meta.clone(), Arc::new(stats.clone()), None); + (object_meta, value) + } } diff --git a/datafusion/execution/src/runtime_env.rs b/datafusion/execution/src/runtime_env.rs index 67604c424c766..e393a7a127873 100644 --- a/datafusion/execution/src/runtime_env.rs +++ b/datafusion/execution/src/runtime_env.rs @@ -103,6 +103,7 @@ fn create_runtime_config_entries( metadata_cache_limit: Option, list_files_cache_limit: Option, list_files_cache_ttl: Option, + file_statistics_cache_limit: Option, ) -> Vec { vec![ ConfigEntry { @@ -135,6 +136,11 @@ fn create_runtime_config_entries( value: list_files_cache_ttl, description: "TTL (time-to-live) of the entries in the list file cache. Supports units m (minutes), and s (seconds). Example: '2m' for 2 minutes.", }, + ConfigEntry { + key: "datafusion.runtime.file_statistics_cache_limit".to_string(), + value: file_statistics_cache_limit, + description: "Maximum memory to use for file statistics cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.", + }, ] } @@ -296,6 +302,14 @@ impl RuntimeEnv { .get_list_files_cache_ttl() .map(format_duration); + let file_statistics_cache_limit = + self.cache_manager.get_file_statistic_cache_limit(); + let file_statistics_cache_value = format_byte_size( + file_statistics_cache_limit + .try_into() + .expect("File statistics cache size conversion failed"), + ); + create_runtime_config_entries( memory_limit_value, Some(max_temp_dir_value), @@ -303,6 +317,7 @@ impl RuntimeEnv { Some(metadata_cache_value), Some(list_files_cache_value), list_files_cache_ttl, + Some(file_statistics_cache_value), ) } } @@ -438,6 +453,11 @@ impl RuntimeEnvBuilder { self } + pub fn with_file_statistics_cache_limit(mut self, limit: usize) -> Self { + self.cache_manager = self.cache_manager.with_file_statistics_cache_limit(limit); + self + } + /// Build a RuntimeEnv pub fn build(self) -> Result { let Self { @@ -475,9 +495,10 @@ impl RuntimeEnvBuilder { /// Create a new RuntimeEnvBuilder from an existing RuntimeEnv pub fn from_runtime_env(runtime_env: &RuntimeEnv) -> Self { let cache_config = CacheManagerConfig { - table_files_statistics_cache: runtime_env + file_statistics_cache: runtime_env.cache_manager.get_file_statistic_cache(), + file_statistics_cache_limit: runtime_env .cache_manager - .get_file_statistic_cache(), + .get_file_statistic_cache_limit(), list_files_cache: runtime_env.cache_manager.get_list_files_cache(), list_files_cache_limit: runtime_env .cache_manager @@ -514,6 +535,7 @@ impl RuntimeEnvBuilder { Some("50M".to_owned()), Some("1M".to_owned()), None, + Some("1M".to_owned()), ) } diff --git a/datafusion/sqllogictest/test_files/encrypted_parquet.slt b/datafusion/sqllogictest/test_files/encrypted_parquet.slt index d580b7d1ad2b8..fd375778b7a53 100644 --- a/datafusion/sqllogictest/test_files/encrypted_parquet.slt +++ b/datafusion/sqllogictest/test_files/encrypted_parquet.slt @@ -77,6 +77,10 @@ ORDER BY double_field 3 4 5 6 +# Disable file statistics cache because file statistics have been previously created +statement ok +set datafusion.runtime.file_statistics_cache_limit = "0K"; + statement count 0 CREATE EXTERNAL TABLE parquet_table ( diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index b04c78bd2774c..b4faa414e3acb 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -331,6 +331,7 @@ datafusion.optimizer.repartition_windows true datafusion.optimizer.skip_failed_rules false datafusion.optimizer.subset_repartition_threshold 4 datafusion.optimizer.top_down_join_key_reordering true +datafusion.runtime.file_statistics_cache_limit 1M datafusion.optimizer.use_statistics_registry false datafusion.runtime.list_files_cache_limit 1M datafusion.runtime.list_files_cache_ttl NULL @@ -478,6 +479,7 @@ datafusion.optimizer.repartition_windows true Should DataFusion repartition data datafusion.optimizer.skip_failed_rules false When set to true, the logical plan optimizer will produce warning messages if any optimization rules produce errors and then proceed to the next rule. When set to false, any rules that produce errors will cause the query to fail datafusion.optimizer.subset_repartition_threshold 4 Partition count threshold for subset satisfaction optimization. When the current partition count is >= this threshold, DataFusion will skip repartitioning if the required partitioning expression is a subset of the current partition expression such as Hash(a) satisfies Hash(a, b). When the current partition count is < this threshold, DataFusion will repartition to increase parallelism even when subset satisfaction applies. Set to 0 to always repartition (disable subset satisfaction optimization). Set to a high value to always use subset satisfaction. Example (subset_repartition_threshold = 4): ```text Hash([a]) satisfies Hash([a, b]) because (Hash([a, b]) is subset of Hash([a]) If current partitions (3) < threshold (4), repartition: AggregateExec: mode=FinalPartitioned, gby=[a, b], aggr=[SUM(x)] RepartitionExec: partitioning=Hash([a, b], 8), input_partitions=3 AggregateExec: mode=Partial, gby=[a, b], aggr=[SUM(x)] DataSourceExec: file_groups={...}, output_partitioning=Hash([a], 3) If current partitions (8) >= threshold (4), use subset satisfaction: AggregateExec: mode=SinglePartitioned, gby=[a, b], aggr=[SUM(x)] DataSourceExec: file_groups={...}, output_partitioning=Hash([a], 8) ``` datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys +datafusion.runtime.file_statistics_cache_limit 1M Maximum memory to use for file statistics cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. datafusion.optimizer.use_statistics_registry false When set to true, the physical plan optimizer uses the pluggable `StatisticsRegistry` for statistics propagation across operators. This enables more accurate cardinality estimates compared to each operator's built-in `partition_statistics`. datafusion.runtime.list_files_cache_limit 1M Maximum memory to use for list files cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. datafusion.runtime.list_files_cache_ttl NULL TTL (time-to-live) of the entries in the list file cache. Supports units m (minutes), and s (seconds). Example: '2m' for 2 minutes. diff --git a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt index a4a613e383ec8..53ec7e72d9f16 100644 --- a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt +++ b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt @@ -177,6 +177,10 @@ physical_plan statement ok DROP TABLE test_table; +# Disable file statistics cache because file statistics have been previously created +statement ok +set datafusion.runtime.file_statistics_cache_limit = "0K"; + statement ok CREATE EXTERNAL TABLE test_table ( partition_col TEXT NOT NULL, diff --git a/datafusion/sqllogictest/test_files/set_variable.slt b/datafusion/sqllogictest/test_files/set_variable.slt index f270b9b169572..42a12ab6a7542 100644 --- a/datafusion/sqllogictest/test_files/set_variable.slt +++ b/datafusion/sqllogictest/test_files/set_variable.slt @@ -351,6 +351,12 @@ RESET datafusion.runtime.memory_limit statement ok EXPLAIN ANALYZE SELECT * FROM generate_series(1, 1000) AS t1(v1) ORDER BY v1 +statement ok +SET datafusion.runtime.file_statistics_cache_limit = '1K' + +statement ok +RESET datafusion.runtime.file_statistics_cache_limit + statement ok SET datafusion.runtime.list_files_cache_limit = '1K' @@ -605,6 +611,15 @@ SHOW datafusion.runtime.max_temp_directory_size ---- datafusion.runtime.max_temp_directory_size 10G +# Test SET and SHOW rruntime.file_statistics_cache_limit +statement ok +SET datafusion.runtime.file_statistics_cache_limit = '42M' + +query TT +SHOW datafusion.runtime.file_statistics_cache_limit +---- +datafusion.runtime.file_statistics_cache_limit 42M + # Test SET and SHOW runtime.metadata_cache_limit statement ok SET datafusion.runtime.metadata_cache_limit = '200M' @@ -639,6 +654,7 @@ datafusion.runtime.list_files_cache_ttl 1m30s query T SELECT name FROM information_schema.df_settings WHERE name LIKE 'datafusion.runtime.%' ORDER BY name ---- +datafusion.runtime.file_statistics_cache_limit datafusion.runtime.list_files_cache_limit datafusion.runtime.list_files_cache_ttl datafusion.runtime.max_temp_directory_size From 0ef1b064adde443797d1f0a5cf251f8d783ff216 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 28 Jan 2026 17:00:13 +0100 Subject: [PATCH 02/83] fixup! Add a default FileStatisticsCache implementation for the ListingTable --- docs/source/user-guide/configs.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 46039f3c99c27..6a8014ddf1d8f 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -229,14 +229,15 @@ SET datafusion.runtime.memory_limit = '2G'; The following runtime configuration settings are available: -| key | default | description | -| ------------------------------------------ | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| datafusion.runtime.list_files_cache_limit | 1M | Maximum memory to use for list files cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | -| datafusion.runtime.list_files_cache_ttl | NULL | TTL (time-to-live) of the entries in the list file cache. Supports units m (minutes), and s (seconds). Example: '2m' for 2 minutes. | -| datafusion.runtime.max_temp_directory_size | 100G | Maximum temporary file directory size. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | -| datafusion.runtime.memory_limit | NULL | Maximum memory limit for query execution. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | -| datafusion.runtime.metadata_cache_limit | 50M | Maximum memory to use for file metadata cache such as Parquet metadata. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | -| datafusion.runtime.temp_directory | NULL | The path to the temporary file directory. | +| key | default | description | +| ---------------------------------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| datafusion.runtime.file_statistics_cache_limit | 1M | Maximum memory to use for file statistics cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | +| datafusion.runtime.list_files_cache_limit | 1M | Maximum memory to use for list files cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | +| datafusion.runtime.list_files_cache_ttl | NULL | TTL (time-to-live) of the entries in the list file cache. Supports units m (minutes), and s (seconds). Example: '2m' for 2 minutes. | +| datafusion.runtime.max_temp_directory_size | 100G | Maximum temporary file directory size. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | +| datafusion.runtime.memory_limit | NULL | Maximum memory limit for query execution. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | +| datafusion.runtime.metadata_cache_limit | 50M | Maximum memory to use for file metadata cache such as Parquet metadata. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | +| datafusion.runtime.temp_directory | NULL | The path to the temporary file directory. | # Tuning Guide From 994aecd9a09b96d774e627115b7df861d2ccbaa5 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 4 Feb 2026 12:59:27 +0100 Subject: [PATCH 03/83] Adapt memory usage when removing entries --- datafusion/execution/src/cache/cache_unit.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 4e2b1eeac988e..fe7950c218593 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -112,7 +112,12 @@ impl DefaultFileStatisticsCacheState { } fn remove(&mut self, k: &Path) -> Option { - self.lru_queue.remove(k) + if let Some(old_entry) = self.lru_queue.remove(k) { + self.memory_used -= old_entry.heap_size(); + Some(old_entry) + } else { + None + } } fn contains_key(&self, k: &Path) -> bool { @@ -561,11 +566,14 @@ mod tests { assert_eq!(result_3.unwrap(), value_3); cache.remove(&meta_2.location); - assert_eq!(cache.len(), 1); + assert_eq!(cache.memory_used(), value_3.heap_size()); + cache.clear(); assert_eq!(cache.len(), 0); + assert_eq!(cache.memory_used(), 0); + } #[test] From 5cf0b111319858d62421ae089c0d2070940ea99b Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 4 Feb 2026 13:00:37 +0100 Subject: [PATCH 04/83] Adapt heapsize for &str --- datafusion/common/src/heap_size.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index 6dee7d5c0a373..479e6aa85f73b 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -314,7 +314,7 @@ impl DFHeapSize for String { impl DFHeapSize for str { fn heap_size(&self) -> usize { - self.to_string().capacity() + self.as_bytes().len() } } From d8372f8af319fc3d7f667d7749900797519cfd81 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 4 Feb 2026 13:08:03 +0100 Subject: [PATCH 05/83] Fix formatting --- datafusion/execution/src/cache/cache_unit.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index fe7950c218593..6db79ea7b16f5 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -569,11 +569,9 @@ mod tests { assert_eq!(cache.len(), 1); assert_eq!(cache.memory_used(), value_3.heap_size()); - cache.clear(); assert_eq!(cache.len(), 0); assert_eq!(cache.memory_used(), 0); - } #[test] From de9789d5203ecba068a62f39efafc2e95978e2e3 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 4 Feb 2026 13:08:26 +0100 Subject: [PATCH 06/83] Adapt heapsize for &str and add another scalarvalue --- datafusion/common/src/heap_size.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index 479e6aa85f73b..ca9c03894c23d 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -125,6 +125,7 @@ impl DFHeapSize for ScalarValue { DurationNanosecond(d) => d.heap_size(), Union(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), Dictionary(a, b) => a.heap_size() + b.heap_size(), + RunEndEncoded(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), } } } @@ -314,7 +315,7 @@ impl DFHeapSize for String { impl DFHeapSize for str { fn heap_size(&self) -> usize { - self.as_bytes().len() + self.len() } } From e9cdb5fca88d6e10f25d75bafe5e1770443671f4 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 10 Feb 2026 10:33:39 +0100 Subject: [PATCH 07/83] Add better error message --- datafusion/execution/src/cache/cache_unit.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 6db79ea7b16f5..512f8c94d7efd 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -141,7 +141,7 @@ impl DefaultFileStatisticsCacheState { // cache is empty while memory_used > memory_limit, cannot happen debug_assert!( false, - "cache is empty while memory_used > memory_limit, cannot happen" + "This is a bug! Please report it to the Apache DataFusion developers" ); return; } From f8591508339a0b8b13878d868043abb2fd4cd401 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 10 Feb 2026 10:35:33 +0100 Subject: [PATCH 08/83] Add todo to add heapsize for ordering in CachedFileMetadata --- datafusion/execution/src/cache/cache_manager.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index 7de993210b7b7..da1e1081da0b6 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -113,6 +113,7 @@ impl DFHeapSize for CachedFileMetadata { + self.meta.e_tag.heap_size() + self.meta.location.as_ref().heap_size() + self.statistics.heap_size() + //TODO add ordering once LexOrdering /PhysicalExpr implements DFHeapSize } } From c7274d8936678129f1001d4e3611592df98b48c4 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 10 Feb 2026 10:39:31 +0100 Subject: [PATCH 09/83] Fix comment/docs on DefaultFileStatisticsCache --- datafusion/execution/src/cache/cache_unit.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 512f8c94d7efd..8e34950a701b6 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -16,9 +16,7 @@ // under the License. use crate::cache::CacheAccessor; -use crate::cache::cache_manager::{ - CachedFileMetadata, FileStatisticsCache, FileStatisticsCacheEntry, -}; +use crate::cache::cache_manager::{CachedFileMetadata, FileMetadataCache, FileStatisticsCache, FileStatisticsCacheEntry}; use std::collections::HashMap; use std::sync::Mutex; @@ -36,7 +34,13 @@ use object_store::path::Path; /// 2. If `Some(cached)`, validate with `cached.is_valid_for(¤t_meta)` /// 3. If invalid or missing, compute new value and call `put(path, new_value)` /// -/// Uses DashMap for lock-free concurrent access. +/// # Internal details +/// +/// The `memory_limit` controls the maximum size of the cache, which uses a +/// Least Recently Used eviction algorithm. When adding a new entry, if the total +/// size of the cached entries exceeds `memory_limit`, the least recently used entries +/// are evicted until the total size is lower than `memory_limit`. +/// /// /// [`FileStatisticsCache`]: crate::cache::cache_manager::FileStatisticsCache #[derive(Default)] From 24d91cb6ccd27c8752b4c3c69e60ce19b809742c Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 10 Feb 2026 10:44:20 +0100 Subject: [PATCH 10/83] Simplify test data generation --- datafusion/execution/src/cache/cache_unit.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 8e34950a701b6..9f26f6104cc2f 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -596,7 +596,7 @@ mod tests { fn create_cached_file_metadata_with_stats( file_name: &str, ) -> (ObjectMeta, CachedFileMetadata) { - let series: Vec = (0..=10).step_by(1).collect(); + let series: Vec = (0..=10).collect(); let values = Int32Array::from(series); let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0])); let field = Arc::new(Field::new_list_field(DataType::Int32, false)); From 5adc600cb0312e89bf18a879bdf8215063a5e083 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 10 Feb 2026 10:47:07 +0100 Subject: [PATCH 11/83] Remove potential stale entry, if entry is too large --- datafusion/execution/src/cache/cache_unit.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 9f26f6104cc2f..9166f4c3da685 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -100,6 +100,8 @@ impl DefaultFileStatisticsCacheState { let entry_size = value.heap_size(); if entry_size > self.memory_limit { + // Remove stale entry if exists + self.remove(key); return None; } From 9a8b538f15bad65b73718b05a774d3b9465d39ba Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 10 Feb 2026 10:48:06 +0100 Subject: [PATCH 12/83] Fix typo in sql logic test comment --- datafusion/sqllogictest/test_files/set_variable.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/set_variable.slt b/datafusion/sqllogictest/test_files/set_variable.slt index 42a12ab6a7542..aea9b9aeea41f 100644 --- a/datafusion/sqllogictest/test_files/set_variable.slt +++ b/datafusion/sqllogictest/test_files/set_variable.slt @@ -611,7 +611,7 @@ SHOW datafusion.runtime.max_temp_directory_size ---- datafusion.runtime.max_temp_directory_size 10G -# Test SET and SHOW rruntime.file_statistics_cache_limit +# Test SET and SHOW runtime.file_statistics_cache_limit statement ok SET datafusion.runtime.file_statistics_cache_limit = '42M' From 40587c704f0916d5701f75f75bd03864958449a7 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 10 Feb 2026 10:52:36 +0100 Subject: [PATCH 13/83] Fix comment about default behaviour in cache manager --- datafusion/execution/src/cache/cache_manager.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index da1e1081da0b6..3ec0c1898b8bf 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -449,7 +449,7 @@ pub const DEFAULT_METADATA_CACHE_LIMIT: usize = 50 * 1024 * 1024; // 50M pub struct CacheManagerConfig { /// Enable caching of file statistics when listing files. /// Enabling the cache avoids repeatedly reading file statistics in a DataFusion session. - /// Default is disabled. Currently only Parquet files are supported. + /// Default is enabled with 1MiB. Currently only Parquet files are supported. pub file_statistics_cache: Option>, /// Limit of the file statistics cache, in bytes. Default: 1MiB. pub file_statistics_cache_limit: usize, From 38b45f75e128f2053b5f27c3c23dbc23cad28553 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 10 Feb 2026 11:05:30 +0100 Subject: [PATCH 14/83] Fix variable name in test --- datafusion/core/tests/sql/runtime_config.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/core/tests/sql/runtime_config.rs b/datafusion/core/tests/sql/runtime_config.rs index 6b7f0568309ce..9f8c2575d7530 100644 --- a/datafusion/core/tests/sql/runtime_config.rs +++ b/datafusion/core/tests/sql/runtime_config.rs @@ -260,11 +260,11 @@ async fn test_test_metadata_cache_limit() { #[tokio::test] async fn test_list_files_cache_limit() { - let list_files_cache = Arc::new(DefaultListFilesCache::default()); + let file_statistics_cache = Arc::new(DefaultListFilesCache::default()); let rt = RuntimeEnvBuilder::new() .with_cache_manager( - CacheManagerConfig::default().with_list_files_cache(Some(list_files_cache)), + CacheManagerConfig::default().with_list_files_cache(Some(file_statistics_cache)), ) .build_arc() .unwrap(); From b4c7b907f9e0657b8bb561959f990a32b85b817e Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 10 Feb 2026 11:07:29 +0100 Subject: [PATCH 15/83] Fix variable name in test --- datafusion/core/tests/sql/runtime_config.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/core/tests/sql/runtime_config.rs b/datafusion/core/tests/sql/runtime_config.rs index 9f8c2575d7530..5998148c42d0b 100644 --- a/datafusion/core/tests/sql/runtime_config.rs +++ b/datafusion/core/tests/sql/runtime_config.rs @@ -260,11 +260,11 @@ async fn test_test_metadata_cache_limit() { #[tokio::test] async fn test_list_files_cache_limit() { - let file_statistics_cache = Arc::new(DefaultListFilesCache::default()); + let list_files_cache = Arc::new(DefaultListFilesCache::default()); let rt = RuntimeEnvBuilder::new() .with_cache_manager( - CacheManagerConfig::default().with_list_files_cache(Some(file_statistics_cache)), + CacheManagerConfig::default().with_list_files_cache(Some(list_files_cache)), ) .build_arc() .unwrap(); @@ -347,12 +347,12 @@ async fn test_list_files_cache_ttl() { #[tokio::test] async fn test_file_statistics_cache_limit() { - let list_files_cache = Arc::new(DefaultFileStatisticsCache::default()); + let file_statistics_cache = Arc::new(DefaultFileStatisticsCache::default()); let rt = RuntimeEnvBuilder::new() .with_cache_manager( CacheManagerConfig::default() - .with_file_statistics_cache(Some(list_files_cache)), + .with_file_statistics_cache(Some(file_statistics_cache)), ) .build_arc() .unwrap(); From e900ddbb3249c9de7af6a3def3d1d3ad4fd015f3 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 10 Feb 2026 12:31:05 +0100 Subject: [PATCH 16/83] Disable cache for sql logic test --- datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt b/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt index 85f9549357138..80a1a838cb7e9 100644 --- a/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt +++ b/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt @@ -37,6 +37,9 @@ COPY ( ) TO 'test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet' STORED AS PARQUET; +statement ok +set datafusion.runtime.file_statistics_cache_limit = "0K"; + ## Create table without filter pushdown ## (pushdown setting is part of the table, but is copied from the session settings) From e6c05b93c2f5ec63e8ce0aaa4c2b9cd4025d9c61 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 11 Feb 2026 15:35:43 +0100 Subject: [PATCH 17/83] Include key into memory estimation --- datafusion/common/Cargo.toml | 2 +- datafusion/common/src/heap_size.rs | 7 +++++++ datafusion/execution/src/cache/cache_unit.rs | 11 +++++++---- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 740d4e45b8d05..e8b03bedc57e4 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -77,7 +77,7 @@ indexmap = { workspace = true } itertools = { workspace = true } libc = "0.2.185" log = { workspace = true } -object_store = { workspace = true, optional = true } +object_store = { workspace = true, optional = true, default-features = true } parquet = { workspace = true, optional = true, default-features = true } recursive = { workspace = true, optional = true } sqlparser = { workspace = true, optional = true } diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index ca9c03894c23d..c9f6b4671e1f5 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -29,6 +29,7 @@ use half::f16; use std::collections::HashMap; use std::fmt::Debug; use std::sync::Arc; +use object_store::path::Path; /// This is a temporary solution until and /// are resolved. @@ -179,6 +180,12 @@ impl DFHeapSize for DataType { } } +impl DFHeapSize for Path { + fn heap_size(&self) -> usize { + self.as_ref().heap_size() + } +} + impl DFHeapSize for Vec { fn heap_size(&self) -> usize { let item_size = size_of::(); diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 9166f4c3da685..90dbaff621810 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -97,16 +97,17 @@ impl DefaultFileStatisticsCacheState { key: &Path, value: CachedFileMetadata, ) -> Option { + let key_size = key.heap_size(); let entry_size = value.heap_size(); - if entry_size > self.memory_limit { + if entry_size + key_size > self.memory_limit { // Remove stale entry if exists self.remove(key); return None; } let old_value = self.lru_queue.put(key.clone(), value); - self.memory_used += entry_size; + self.memory_used += entry_size + key_size; if let Some(old_entry) = &old_value { self.memory_used -= old_entry.heap_size(); @@ -119,6 +120,7 @@ impl DefaultFileStatisticsCacheState { fn remove(&mut self, k: &Path) -> Option { if let Some(old_entry) = self.lru_queue.remove(k) { + self.memory_used -= k.heap_size(); self.memory_used -= old_entry.heap_size(); Some(old_entry) } else { @@ -142,6 +144,7 @@ impl DefaultFileStatisticsCacheState { fn evict_entries(&mut self) { while self.memory_used > self.memory_limit { if let Some(removed) = self.lru_queue.pop() { + self.memory_used -= removed.0.heap_size(); self.memory_used -= removed.1.heap_size(); } else { // cache is empty while memory_used > memory_limit, cannot happen @@ -541,7 +544,7 @@ mod tests { let (meta_2, value_2) = create_cached_file_metadata_with_stats("test2.parquet"); let (meta_3, value_3) = create_cached_file_metadata_with_stats("test3.parquet"); - let limit_for_2_entries = value_1.heap_size() + value_2.heap_size(); + let limit_for_2_entries = &meta_1.location.heap_size() + value_1.heap_size() + &meta_2.location.heap_size() + value_2.heap_size(); // create a cache with a limit which fits exactly 2 entries let cache = DefaultFileStatisticsCache::new(limit_for_2_entries); @@ -573,7 +576,7 @@ mod tests { cache.remove(&meta_2.location); assert_eq!(cache.len(), 1); - assert_eq!(cache.memory_used(), value_3.heap_size()); + assert_eq!(cache.memory_used(), &meta_3.location.heap_size() + value_3.heap_size()); cache.clear(); assert_eq!(cache.len(), 0); From 2fa8a6121b2eb6d0342d7b1b97aac077cde4af79 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 11 Feb 2026 15:45:29 +0100 Subject: [PATCH 18/83] Fix fmt --- datafusion/common/src/heap_size.rs | 4 ++-- datafusion/execution/src/cache/cache_unit.rs | 16 ++++++++++++---- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index c9f6b4671e1f5..ad0855bbe9d4c 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -26,10 +26,10 @@ use arrow::datatypes::{ }; use chrono::{DateTime, Utc}; use half::f16; +use object_store::path::Path; use std::collections::HashMap; use std::fmt::Debug; use std::sync::Arc; -use object_store::path::Path; /// This is a temporary solution until and /// are resolved. @@ -182,7 +182,7 @@ impl DFHeapSize for DataType { impl DFHeapSize for Path { fn heap_size(&self) -> usize { - self.as_ref().heap_size() + self.as_ref().heap_size() } } diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 90dbaff621810..9f13c9b8e7418 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -16,14 +16,16 @@ // under the License. use crate::cache::CacheAccessor; -use crate::cache::cache_manager::{CachedFileMetadata, FileMetadataCache, FileStatisticsCache, FileStatisticsCacheEntry}; +use crate::cache::cache_manager::{ + CachedFileMetadata, FileMetadataCache, FileStatisticsCache, FileStatisticsCacheEntry, +}; +use object_store::path::Path; use std::collections::HashMap; use std::sync::Mutex; pub use crate::cache::DefaultFilesMetadataCache; use crate::cache::lru_queue::LruQueue; use datafusion_common::heap_size::DFHeapSize; -use object_store::path::Path; /// Default implementation of [`FileStatisticsCache`] /// @@ -544,7 +546,10 @@ mod tests { let (meta_2, value_2) = create_cached_file_metadata_with_stats("test2.parquet"); let (meta_3, value_3) = create_cached_file_metadata_with_stats("test3.parquet"); - let limit_for_2_entries = &meta_1.location.heap_size() + value_1.heap_size() + &meta_2.location.heap_size() + value_2.heap_size(); + let limit_for_2_entries = &meta_1.location.heap_size() + + value_1.heap_size() + + &meta_2.location.heap_size() + + value_2.heap_size(); // create a cache with a limit which fits exactly 2 entries let cache = DefaultFileStatisticsCache::new(limit_for_2_entries); @@ -576,7 +581,10 @@ mod tests { cache.remove(&meta_2.location); assert_eq!(cache.len(), 1); - assert_eq!(cache.memory_used(), &meta_3.location.heap_size() + value_3.heap_size()); + assert_eq!( + cache.memory_used(), + &meta_3.location.heap_size() + value_3.heap_size() + ); cache.clear(); assert_eq!(cache.len(), 0); From e4dea07e0db7648dda72e8f5f922be4a32a9e014 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 11 Feb 2026 15:48:51 +0100 Subject: [PATCH 19/83] Fix clippy --- datafusion/execution/src/cache/cache_unit.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 9f13c9b8e7418..a2e747e099622 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -17,7 +17,7 @@ use crate::cache::CacheAccessor; use crate::cache::cache_manager::{ - CachedFileMetadata, FileMetadataCache, FileStatisticsCache, FileStatisticsCacheEntry, + CachedFileMetadata, FileStatisticsCache, FileStatisticsCacheEntry, }; use object_store::path::Path; use std::collections::HashMap; @@ -546,9 +546,9 @@ mod tests { let (meta_2, value_2) = create_cached_file_metadata_with_stats("test2.parquet"); let (meta_3, value_3) = create_cached_file_metadata_with_stats("test3.parquet"); - let limit_for_2_entries = &meta_1.location.heap_size() + let limit_for_2_entries = meta_1.location.heap_size() + value_1.heap_size() - + &meta_2.location.heap_size() + + meta_2.location.heap_size() + value_2.heap_size(); // create a cache with a limit which fits exactly 2 entries @@ -583,7 +583,7 @@ mod tests { assert_eq!(cache.len(), 1); assert_eq!( cache.memory_used(), - &meta_3.location.heap_size() + value_3.heap_size() + meta_3.location.heap_size() + value_3.heap_size() ); cache.clear(); From 241480deb50711bb9a74222d0c6c00605a24af03 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 11 Feb 2026 16:09:38 +0100 Subject: [PATCH 20/83] minor --- datafusion/common/src/heap_size.rs | 8 ++++---- datafusion/execution/src/cache/cache_manager.rs | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index ad0855bbe9d4c..d510d3389d4c7 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -170,10 +170,10 @@ impl DFHeapSize for DataType { Struct(s) => s.heap_size(), Union(u, m) => u.heap_size() + m.heap_size(), Dictionary(a, b) => a.heap_size() + b.heap_size(), - Decimal32(u8, i8) => u8.heap_size() + i8.heap_size(), - Decimal64(u8, i8) => u8.heap_size() + i8.heap_size(), - Decimal128(u8, i8) => u8.heap_size() + i8.heap_size(), - Decimal256(u8, i8) => u8.heap_size() + i8.heap_size(), + Decimal32(p, s) => p.heap_size() + s.heap_size(), + Decimal64(p, s) => p.heap_size() + s.heap_size(), + Decimal128(p, s) => p.heap_size() + s.heap_size(), + Decimal256(p, s) => p.heap_size() + s.heap_size(), Map(m, b) => m.heap_size() + b.heap_size(), RunEndEncoded(a, b) => a.heap_size() + b.heap_size(), } diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index 3ec0c1898b8bf..933c7df8c7646 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -113,7 +113,7 @@ impl DFHeapSize for CachedFileMetadata { + self.meta.e_tag.heap_size() + self.meta.location.as_ref().heap_size() + self.statistics.heap_size() - //TODO add ordering once LexOrdering /PhysicalExpr implements DFHeapSize + //TODO add ordering once LexOrdering/PhysicalExpr implements DFHeapSize } } @@ -501,6 +501,7 @@ impl CacheManagerConfig { self } + /// Specifies the memory limit for the file statistics cache, in bytes. pub fn with_file_statistics_cache_limit(mut self, limit: usize) -> Self { self.file_statistics_cache_limit = limit; self From af56b068975caebb1011e1b466732a267d3651c3 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 12 Feb 2026 09:13:37 +0100 Subject: [PATCH 21/83] Add more key memory accounting --- datafusion/execution/src/cache/cache_unit.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index a2e747e099622..988d9886241f9 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -103,16 +103,18 @@ impl DefaultFileStatisticsCacheState { let entry_size = value.heap_size(); if entry_size + key_size > self.memory_limit { - // Remove stale entry if exists + // Remove potential stale entry self.remove(key); return None; } let old_value = self.lru_queue.put(key.clone(), value); - self.memory_used += entry_size + key_size; + self.memory_used += entry_size; if let Some(old_entry) = &old_value { self.memory_used -= old_entry.heap_size(); + } else { + self.memory_used += key.heap_size(); } self.evict_entries(); From 43f7d643fce5f7ee9268fff82732bdf72aebf938 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 12 Feb 2026 09:28:48 +0100 Subject: [PATCH 22/83] Fix Formatting --- datafusion/execution/src/cache/cache_unit.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 988d9886241f9..a7def2c3d89df 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -103,7 +103,7 @@ impl DefaultFileStatisticsCacheState { let entry_size = value.heap_size(); if entry_size + key_size > self.memory_limit { - // Remove potential stale entry + // Remove potential stale entry self.remove(key); return None; } From 47a585396561beeb58f77224f54788188e0235a5 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 12 Feb 2026 14:38:00 +0100 Subject: [PATCH 23/83] Account path as string and remove dependency to object_store --- datafusion/common/Cargo.toml | 2 +- datafusion/common/src/heap_size.rs | 7 ------- datafusion/execution/src/cache/cache_unit.rs | 14 +++++++------- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index e8b03bedc57e4..740d4e45b8d05 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -77,7 +77,7 @@ indexmap = { workspace = true } itertools = { workspace = true } libc = "0.2.185" log = { workspace = true } -object_store = { workspace = true, optional = true, default-features = true } +object_store = { workspace = true, optional = true } parquet = { workspace = true, optional = true, default-features = true } recursive = { workspace = true, optional = true } sqlparser = { workspace = true, optional = true } diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index d510d3389d4c7..f5aa4704be15c 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -26,7 +26,6 @@ use arrow::datatypes::{ }; use chrono::{DateTime, Utc}; use half::f16; -use object_store::path::Path; use std::collections::HashMap; use std::fmt::Debug; use std::sync::Arc; @@ -180,12 +179,6 @@ impl DFHeapSize for DataType { } } -impl DFHeapSize for Path { - fn heap_size(&self) -> usize { - self.as_ref().heap_size() - } -} - impl DFHeapSize for Vec { fn heap_size(&self) -> usize { let item_size = size_of::(); diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index a7def2c3d89df..1c047ed74e974 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -99,7 +99,7 @@ impl DefaultFileStatisticsCacheState { key: &Path, value: CachedFileMetadata, ) -> Option { - let key_size = key.heap_size(); + let key_size = key.as_ref().heap_size(); let entry_size = value.heap_size(); if entry_size + key_size > self.memory_limit { @@ -114,7 +114,7 @@ impl DefaultFileStatisticsCacheState { if let Some(old_entry) = &old_value { self.memory_used -= old_entry.heap_size(); } else { - self.memory_used += key.heap_size(); + self.memory_used += key.as_ref().heap_size(); } self.evict_entries(); @@ -124,7 +124,7 @@ impl DefaultFileStatisticsCacheState { fn remove(&mut self, k: &Path) -> Option { if let Some(old_entry) = self.lru_queue.remove(k) { - self.memory_used -= k.heap_size(); + self.memory_used -= k.as_ref().heap_size(); self.memory_used -= old_entry.heap_size(); Some(old_entry) } else { @@ -148,7 +148,7 @@ impl DefaultFileStatisticsCacheState { fn evict_entries(&mut self) { while self.memory_used > self.memory_limit { if let Some(removed) = self.lru_queue.pop() { - self.memory_used -= removed.0.heap_size(); + self.memory_used -= removed.0.as_ref().heap_size(); self.memory_used -= removed.1.heap_size(); } else { // cache is empty while memory_used > memory_limit, cannot happen @@ -548,9 +548,9 @@ mod tests { let (meta_2, value_2) = create_cached_file_metadata_with_stats("test2.parquet"); let (meta_3, value_3) = create_cached_file_metadata_with_stats("test3.parquet"); - let limit_for_2_entries = meta_1.location.heap_size() + let limit_for_2_entries = meta_1.location.as_ref().heap_size() + value_1.heap_size() - + meta_2.location.heap_size() + + meta_2.location.as_ref().heap_size() + value_2.heap_size(); // create a cache with a limit which fits exactly 2 entries @@ -585,7 +585,7 @@ mod tests { assert_eq!(cache.len(), 1); assert_eq!( cache.memory_used(), - meta_3.location.heap_size() + value_3.heap_size() + meta_3.location.as_ref().heap_size() + value_3.heap_size() ); cache.clear(); From 49478cc0548450748633145a2b279ba64fc03f32 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 12 Feb 2026 15:15:04 +0100 Subject: [PATCH 24/83] Improve error handling --- datafusion/execution/src/cache/cache_unit.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 1c047ed74e974..6e2540bd4a4c9 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -64,7 +64,7 @@ impl DefaultFileStatisticsCache { } } -pub struct DefaultFileStatisticsCacheState { +struct DefaultFileStatisticsCacheState { lru_queue: LruQueue, memory_limit: usize, memory_used: usize, @@ -152,10 +152,17 @@ impl DefaultFileStatisticsCacheState { self.memory_used -= removed.1.heap_size(); } else { // cache is empty while memory_used > memory_limit, cannot happen + log::error!( + "File statistics cache memory accounting bug: memory_used={} but cache is empty. \ + Please report this to the Apache DataFusion developers.", + self.memory_used + ); debug_assert!( false, - "This is a bug! Please report it to the Apache DataFusion developers" + "memory_used={} but cache is empty", + self.memory_used ); + self.memory_used = 0; return; } } From 0dc67ea8e04399d54ac481d1a224250e8d0c205b Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 12 Feb 2026 15:15:30 +0100 Subject: [PATCH 25/83] Fix fmt --- datafusion/execution/src/cache/cache_manager.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index 933c7df8c7646..e02a4763ba0cc 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -96,6 +96,7 @@ impl CachedFileMetadata { /// /// See [`crate::runtime_env::RuntimeEnv`] for more details pub trait FileStatisticsCache: CacheAccessor { + /// Cache memory limit in bytes. fn cache_limit(&self) -> usize; /// Updates the cache with a new memory limit in bytes. From 9ed4bce11c519dfba7c40265459ae6fc3f6e0df5 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 12 Feb 2026 15:18:30 +0100 Subject: [PATCH 26/83] Remove path.clone --- datafusion/execution/src/cache/cache_unit.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 6e2540bd4a4c9..2887d3ca090be 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -222,7 +222,7 @@ impl FileStatisticsCache for DefaultFileStatisticsCache { let path = entry.0.clone(); let cached = entry.1.clone(); entries.insert( - path.clone(), + path, FileStatisticsCacheEntry { object_meta: cached.meta.clone(), num_rows: cached.statistics.num_rows, From 69483d1dabbcb36fd20516329939ad9a1f226c8b Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 12 Feb 2026 15:19:00 +0100 Subject: [PATCH 27/83] Simplify accounting for statistics --- datafusion/common/src/heap_size.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index f5aa4704be15c..c39b6de7daf5c 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -46,11 +46,7 @@ impl DFHeapSize for Statistics { fn heap_size(&self) -> usize { self.num_rows.heap_size() + self.total_byte_size.heap_size() - + self - .column_statistics - .iter() - .map(|s| s.heap_size()) - .sum::() + + self.column_statistics.heap_size() } } From 101a801ee6e20f9a56fc047e1450e4d0cb0daf6a Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 12 Feb 2026 15:22:06 +0100 Subject: [PATCH 28/83] Adapt offset buffer --- datafusion/execution/src/cache/cache_unit.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 2887d3ca090be..6529c63a72aca 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -620,7 +620,7 @@ mod tests { ) -> (ObjectMeta, CachedFileMetadata) { let series: Vec = (0..=10).collect(); let values = Int32Array::from(series); - let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0])); + let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 11])); let field = Arc::new(Field::new_list_field(DataType::Int32, false)); let list_array = ListArray::new(field, offsets, Arc::new(values), None); From ec35784d44da5aca6b36af86c1f2ce738d7ad915 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 12 Feb 2026 15:24:36 +0100 Subject: [PATCH 29/83] Fix heap size for Arc --- datafusion/common/src/heap_size.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index c39b6de7daf5c..1acc3486eb51c 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -230,7 +230,7 @@ impl DFHeapSize for HashMap { impl DFHeapSize for Arc { fn heap_size(&self) -> usize { // Arc stores weak and strong counts on the heap alongside an instance of T - 2 * size_of::() + size_of::() + self.as_ref().heap_size() + 2 * size_of::() + self.as_ref().heap_size() } } From 838708fc4987cbbd91bd66904ce6fdf81e485966 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 12 Feb 2026 19:59:56 +0100 Subject: [PATCH 30/83] Adapt estimate in test --- datafusion/execution/src/cache/cache_unit.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 6529c63a72aca..1c1b6b9e6e692 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -530,7 +530,7 @@ mod tests { num_rows: Precision::Absent, num_columns: 1, table_size_bytes: Precision::Absent, - statistics_size_bytes: 72, + statistics_size_bytes: 304, has_ordering: false, } ), @@ -541,7 +541,7 @@ mod tests { num_rows: Precision::Absent, num_columns: 1, table_size_bytes: Precision::Absent, - statistics_size_bytes: 72, + statistics_size_bytes: 304, has_ordering: true, } ), From 7f6f92ae04ab86a09383b050ec8b25e9507c73cb Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 12 Feb 2026 20:23:50 +0100 Subject: [PATCH 31/83] Fix sql logic test --- datafusion/sqllogictest/test_files/array.slt | 9949 ++++++++++++++++++ 1 file changed, 9949 insertions(+) create mode 100644 datafusion/sqllogictest/test_files/array.slt diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt new file mode 100644 index 0000000000000..45cf02700c39a --- /dev/null +++ b/datafusion/sqllogictest/test_files/array.slt @@ -0,0 +1,9949 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +############# +## Array Expressions Tests +############# + +### Tables + +statement ok +CREATE TABLE values( + a INT, + b INT, + c INT, + d FLOAT, + e VARCHAR, + f VARCHAR +) AS VALUES + (1, 1, 2, 1.1, 'Lorem', 'A'), + (2, 3, 4, 2.2, 'ipsum', ''), + (3, 5, 6, 3.3, 'dolor', 'BB'), + (4, 7, 8, 4.4, 'sit', NULL), + (NULL, 9, 10, 5.5, 'amet', 'CCC'), + (5, NULL, 12, 6.6, ',', 'DD'), + (6, 11, NULL, 7.7, 'consectetur', 'E'), + (7, 13, 14, NULL, 'adipiscing', 'F'), + (8, 15, 16, 8.8, NULL, '') +; + +statement ok +CREATE TABLE values_without_nulls +AS VALUES + (1, 1, 2, 1.1, 'Lorem', 'A'), + (2, 3, 4, 2.2, 'ipsum', ''), + (3, 5, 6, 3.3, 'dolor', 'BB'), + (4, 7, 8, 4.4, 'sit', NULL), + (5, 9, 10, 5.5, 'amet', 'CCC'), + (6, 11, 12, 6.6, ',', 'DD'), + (7, 13, 14, 7.7, 'consectetur', 'E'), + (8, 15, 16, 8.8, 'adipiscing', 'F'), + (9, 17, 18, 9.9, 'elit', '') +; + +statement ok +CREATE TABLE arrays +AS VALUES + (make_array(make_array(NULL, 2),make_array(3, NULL)), make_array(1.1, 2.2, 3.3), make_array('L', 'o', 'r', 'e', 'm')), + (make_array(make_array(3, 4),make_array(5, 6)), make_array(NULL, 5.5, 6.6), make_array('i', 'p', NULL, 'u', 'm')), + (make_array(make_array(5, 6),make_array(7, 8)), make_array(7.7, 8.8, 9.9), make_array('d', NULL, 'l', 'o', 'r')), + (make_array(make_array(7, NULL),make_array(9, 10)), make_array(10.1, NULL, 12.2), make_array('s', 'i', 't')), + (NULL, make_array(13.3, 14.4, 15.5), make_array('a', 'm', 'e', 't')), + (make_array(make_array(11, 12),make_array(13, 14)), NULL, make_array(',')), + (make_array(make_array(15, 16),make_array(NULL, 18)), make_array(16.6, 17.7, 18.8), NULL) +; + +statement ok +CREATE TABLE large_arrays +AS + SELECT + arrow_cast(column1, 'LargeList(List(Int64))') AS column1, + arrow_cast(column2, 'LargeList(Float64)') AS column2, + arrow_cast(column3, 'LargeList(Utf8)') AS column3 + FROM arrays +; + +statement ok +CREATE TABLE fixed_size_arrays +AS VALUES + (arrow_cast(make_array(make_array(NULL, 2),make_array(3, NULL)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(1.1, 2.2, 3.3), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('L', 'o', 'r', 'e', 'm'), 'FixedSizeList(5, Utf8)')), + (arrow_cast(make_array(make_array(3, 4),make_array(5, 6)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(NULL, 5.5, 6.6), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('i', 'p', NULL, 'u', 'm'), 'FixedSizeList(5, Utf8)')), + (arrow_cast(make_array(make_array(5, 6),make_array(7, 8)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(7.7, 8.8, 9.9), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('d', NULL, 'l', 'o', 'r'), 'FixedSizeList(5, Utf8)')), + (arrow_cast(make_array(make_array(7, NULL),make_array(9, 10)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(10.1, NULL, 12.2), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('s', 'i', 't', 'a', 'b'), 'FixedSizeList(5, Utf8)')), + (NULL, arrow_cast(make_array(13.3, 14.4, 15.5), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('a', 'm', 'e', 't', 'x'), 'FixedSizeList(5, Utf8)')), + (arrow_cast(make_array(make_array(11, 12),make_array(13, 14)), 'FixedSizeList(2, List(Int64))'), NULL, arrow_cast(make_array(',','a','b','c','d'), 'FixedSizeList(5, Utf8)')), + (arrow_cast(make_array(make_array(15, 16),make_array(NULL, 18)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(16.6, 17.7, 18.8), 'FixedSizeList(3, Float64)'), NULL) +; + +statement ok +CREATE TABLE slices +AS VALUES + (make_array(NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1), + (make_array(11, 12, 13, 14, 15, 16, 17, 18, NULL, 20), 2, -4), + (make_array(21, 22, 23, NULL, 25, 26, 27, 28, 29, 30), 0, 0), + (make_array(31, 32, 33, 34, 35, NULL, 37, 38, 39, 40), -4, -7), + (NULL, 4, 5), + (make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), NULL, 6), + (make_array(51, 52, NULL, 54, 55, 56, 57, 58, 59, 60), 5, NULL) +; + +statement ok +CREATE TABLE fixed_slices +AS VALUES + (arrow_cast(make_array(NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10), 'FixedSizeList(10, Int64)'), 1, 1), + (arrow_cast(make_array(11, 12, 13, 14, 15, 16, 17, 18, NULL, 20), 'FixedSizeList(10, Int64)'), 2, -4), + (arrow_cast(make_array(21, 22, 23, NULL, 25, 26, 27, 28, 29, 30), 'FixedSizeList(10, Int64)'), 0, 0), + (arrow_cast(make_array(31, 32, 33, 34, 35, NULL, 37, 38, 39, 40), 'FixedSizeList(10, Int64)'), -4, -7), + (arrow_cast(make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), 'FixedSizeList(10, Int64)'), NULL, 6), + (arrow_cast(make_array(51, 52, NULL, 54, 55, 56, 57, 58, 59, 60),'FixedSizeList(10, Int64)'), 5, NULL) +; + +statement ok +CREATE TABLE arrayspop +AS VALUES + (make_array(1, 2, NULL)), + (make_array(3, 4, 5, NULL)), + (make_array(6, 7, 8, NULL, 9)), + (make_array(NULL, NULL, 100)), + (NULL), + (make_array(NULL, 10, 11, 12)) +; + +statement ok +CREATE TABLE large_arrayspop +AS SELECT + arrow_cast(column1, 'LargeList(Int64)') AS column1 +FROM arrayspop +; + +statement ok +CREATE TABLE nested_arrays +AS VALUES + (make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), make_array(7, 8, 9), 2, make_array([[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]), make_array(11, 12, 13)), + (make_array(make_array(4, 5, 6), make_array(10, 11, 12), make_array(4, 9, 8), make_array(7, 8, 9), make_array(10, 11, 12), make_array(1, 8, 7)), make_array(10, 11, 12), 3, make_array([[11, 12, 13], [14, 15, 16]], [[17, 18, 19], [20, 21, 22]]), make_array(121, 131, 141)) +; + +statement ok +CREATE TABLE large_nested_arrays +AS + SELECT + arrow_cast(column1, 'LargeList(LargeList(Int64))') AS column1, + arrow_cast(column2, 'LargeList(Int64)') AS column2, + column3, + arrow_cast(column4, 'LargeList(LargeList(List(Int64)))') AS column4, + arrow_cast(column5, 'LargeList(Int64)') AS column5 + FROM nested_arrays +; + +statement ok +CREATE TABLE fixed_size_nested_arrays +AS VALUES + (arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'FixedSizeList(6, List(Int64))'), arrow_cast(make_array(7, 8, 9), 'FixedSizeList(3, Int64)'), 2, arrow_cast(make_array([[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array(11, 12, 13), 'FixedSizeList(3, Int64)')), + (arrow_cast(make_array(make_array(4, 5, 6), make_array(10, 11, 12), make_array(4, 9, 8), make_array(7, 8, 9), make_array(10, 11, 12), make_array(1, 8, 7)), 'FixedSizeList(6, List(Int64))'), arrow_cast(make_array(10, 11, 12), 'FixedSizeList(3, Int64)'), 3, arrow_cast(make_array([[11, 12, 13], [14, 15, 16]], [[17, 18, 19], [20, 21, 22]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array(121, 131, 141), 'FixedSizeList(3, Int64)')) +; + +statement ok +CREATE TABLE arrays_values +AS VALUES + (make_array(NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ','), + (make_array(11, 12, 13, 14, 15, 16, 17, 18, NULL, 20), 12, 2, '.'), + (make_array(21, 22, 23, NULL, 25, 26, 27, 28, 29, 30), 23, 3, '-'), + (make_array(31, 32, 33, 34, 35, NULL, 37, 38, 39, 40), 34, 4, 'ok'), + (NULL, 44, 5, '@'), + (make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), NULL, 6, '$'), + (make_array(51, 52, NULL, 54, 55, 56, 57, 58, 59, 60), 55, NULL, '^'), + (make_array(61, 62, 63, 64, 65, 66, 67, 68, 69, 70), 66, 7, NULL) +; + +statement ok +CREATE TABLE large_arrays_values +AS SELECT + arrow_cast(column1, 'LargeList(Int64)') AS column1, + column2, + column3, + column4 +FROM arrays_values +; + +statement ok +CREATE TABLE fixed_arrays_values +AS SELECT + arrow_cast(column1, 'FixedSizeList(10, Int64)') AS column1, + column2, + column3, + column4 +FROM arrays_values +; + +statement ok +CREATE TABLE arrays_values_v2 +AS VALUES + (make_array(NULL, 2, 3), make_array(4, 5, NULL), 12, make_array([30, 40, 50])), + (NULL, make_array(7, NULL, 8), 13, make_array(make_array(NULL,NULL,60))), + (make_array(9, NULL, 10), NULL, 14, make_array(make_array(70,NULL,NULL))), + (make_array(NULL, 1), make_array(NULL, 21), NULL, NULL), + (make_array(11, 12), NULL, NULL, NULL), + (NULL, NULL, NULL, NULL) +; + +statement ok +CREATE TABLE large_arrays_values_v2 +AS SELECT + arrow_cast(column1, 'LargeList(Int64)') AS column1, + arrow_cast(column2, 'LargeList(Int64)') AS column2, + column3, + arrow_cast(column4, 'LargeList(LargeList(Int64))') AS column4 +FROM arrays_values_v2 +; + +statement ok +CREATE TABLE flatten_table +AS VALUES + (make_array([1], [2], [3]), make_array([[1, 2, 3]], [[4, 5]], [[6]]), make_array([[[1]]], [[[2, 3]]]), make_array([1.0], [2.1, 2.2], [3.2, 3.3, 3.4])), + (make_array([1, 2], [3, 4], [5, 6]), make_array([[8]]), make_array([[[1,2]]], [[[3]]]), make_array([1.0, 2.0], [3.0, 4.0], [5.0, 6.0])) +; + +statement ok +CREATE TABLE large_flatten_table +AS + SELECT + arrow_cast(column1, 'LargeList(LargeList(Int64))') AS column1, + arrow_cast(column2, 'LargeList(LargeList(LargeList(Int64)))') AS column2, + arrow_cast(column3, 'LargeList(LargeList(LargeList(LargeList(Int64))))') AS column3, + arrow_cast(column4, 'LargeList(LargeList(Float64))') AS column4 + FROM flatten_table +; + +statement ok +CREATE TABLE fixed_size_flatten_table +AS VALUES + (arrow_cast(make_array([1], [2], [3]), 'FixedSizeList(3, List(Int64))'), + arrow_cast(make_array([[1, 2, 3]], [[4, 5]], [[6]]), 'FixedSizeList(3, List(List(Int64)))'), + arrow_cast(make_array([[[1]]], [[[2, 3]]]), 'FixedSizeList(2, List(List(List(Int64))))'), + arrow_cast(make_array([1.0], [2.1, 2.2], [3.2, 3.3, 3.4]), 'FixedSizeList(3, List(Float64))') + ), + ( + arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'FixedSizeList(3, List(Int64))'), + arrow_cast(make_array([[8]], [[9, 10]], [[11, 12, 13]]), 'FixedSizeList(3, List(List(Int64)))'), + arrow_cast(make_array([[[1,2]]], [[[3]]]), 'FixedSizeList(2, List(List(List(Int64))))'), + arrow_cast(make_array([1.0, 2.0], [3.0, 4.0], [5.0, 6.0]), 'FixedSizeList(3, List(Float64))') + ) +; + +statement ok +CREATE TABLE array_has_table_1D +AS VALUES + (make_array(1, 2), 1, make_array(1,2,3), make_array(1,3), make_array(1,3,5), make_array(2,4,6,8,1,3,5)), + (make_array(3, 4, 5), 2, make_array(1,2,3,4), make_array(2,5), make_array(2,4,6), make_array(1,3,5)) +; + +statement ok +CREATE TABLE fixed_size_array_has_table_1D +AS VALUES + (arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 1, arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)'), arrow_cast(make_array(1,3), 'FixedSizeList(2, Int64)'), arrow_cast(make_array(1,3,5), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(2, 4, 6, 8, 1, 3, 5), 'FixedSizeList(7, Int64)')), + (arrow_cast(make_array(3, 4, 5), 'FixedSizeList(3, Int64)'), 2, arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)'), arrow_cast(make_array(2,5), 'FixedSizeList(2, Int64)'), arrow_cast(make_array(2,4,6), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(1, 3, 5, 7, 9, 11, 13), 'FixedSizeList(7, Int64)')) +; + +statement ok +CREATE TABLE array_has_table_1D_Float +AS VALUES + (make_array(1.0, 2.0), 1.0, make_array(1.0,2.0,3.0), make_array(1.0,3.0), make_array(1.11), make_array(2.22, 3.33)), + (make_array(3.0, 4.0, 5.0), 2.0, make_array(1.0,2.0,3.0,4.0), make_array(2.0,5.0), make_array(2.22, 1.11), make_array(1.11, 3.33)) +; + +statement ok +CREATE TABLE fixed_size_array_has_table_1D_Float +AS VALUES + (arrow_cast(make_array(1.0, 2.0, 3.0), 'FixedSizeList(3, Float64)'), 1.0, arrow_cast(make_array(1.0, 2.0, 3.0, 4.0), 'FixedSizeList(4, Float64)'), arrow_cast(make_array(1.0,3.0), 'FixedSizeList(2, Float64)'), arrow_cast(make_array(1.11, 2.22), 'FixedSizeList(2, Float64)'), arrow_cast(make_array(2.22, 3.33), 'FixedSizeList(2, Float64)')), + (arrow_cast(make_array(3.0, 4.0, 5.0), 'FixedSizeList(3, Float64)'), 2.0, arrow_cast(make_array(1.0, 2.0, 3.0, 4.0), 'FixedSizeList(4, Float64)'), arrow_cast(make_array(2.0,5.0), 'FixedSizeList(2, Float64)'), arrow_cast(make_array(2.22, 1.11), 'FixedSizeList(2, Float64)'), arrow_cast(make_array(1.11, 3.33), 'FixedSizeList(2, Float64)')) +; + +statement ok +CREATE TABLE array_has_table_1D_Boolean +AS VALUES + (make_array(true, true, true), false, make_array(true, true, false, true, false), make_array(true, false, true), make_array(false), make_array(true, false)), + (make_array(false, false, false), false, make_array(true, false, true), make_array(true, true), make_array(true, true), make_array(false,false,true)) +; + +statement ok +CREATE TABLE fixed_size_array_has_table_1D_Boolean +AS VALUES + (arrow_cast(make_array(true, true, true), 'FixedSizeList(3, Boolean)'), false, arrow_cast(make_array(true, true, false, true, false), 'FixedSizeList(5, Boolean)'), arrow_cast(make_array(true, false, true), 'FixedSizeList(3, Boolean)'), arrow_cast(make_array(false, true), 'FixedSizeList(2, Boolean)'), arrow_cast(make_array(true, false, true), 'FixedSizeList(3, Boolean)')), + (arrow_cast(make_array(false, false, false), 'FixedSizeList(3, Boolean)'), false, arrow_cast(make_array(true, false, true, true, false), 'FixedSizeList(5, Boolean)'), arrow_cast(make_array(true, true, false), 'FixedSizeList(3, Boolean)'), arrow_cast(make_array(true, true), 'FixedSizeList(2, Boolean)'), arrow_cast(make_array(false,false,true), 'FixedSizeList(3, Boolean)')) +; + +statement ok +CREATE TABLE array_has_table_1D_UTF8 +AS VALUES + (make_array('a', 'bc', 'def'), 'bc', make_array('datafusion', 'rust', 'arrow'), make_array('rust', 'arrow'), make_array('rust', 'arrow', 'python'), make_array('data')), + (make_array('a', 'bc', 'def'), 'defg', make_array('datafusion', 'rust', 'arrow'), make_array('datafusion', 'rust', 'arrow', 'python'), make_array('rust', 'arrow'), make_array('datafusion', 'rust', 'arrow')) +; + +statement ok +CREATE TABLE fixed_size_array_has_table_1D_UTF8 +AS VALUES + (arrow_cast(make_array('a', 'bc', 'def'), 'FixedSizeList(3, Utf8)'), 'bc', arrow_cast(make_array('datafusion', 'rust', 'arrow'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array('rust', 'arrow', 'datafusion', 'rust'), 'FixedSizeList(4, Utf8)'), arrow_cast(make_array('rust', 'arrow', 'python'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array('data', 'fusion', 'rust'), 'FixedSizeList(3, Utf8)')), + (arrow_cast(make_array('a', 'bc', 'def'), 'FixedSizeList(3, Utf8)'), 'defg', arrow_cast(make_array('datafusion', 'rust', 'arrow'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array('datafusion', 'rust', 'arrow', 'python'), 'FixedSizeList(4, Utf8)'), arrow_cast(make_array('rust', 'arrow', 'python'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array('datafusion', 'rust', 'arrow'), 'FixedSizeList(3, Utf8)')) +; + +statement ok +CREATE TABLE array_has_table_2D +AS VALUES + (make_array([1,2]), make_array(1,3), make_array([1,2,3], [4,5], [6,7]), make_array([4,5], [6,7])), + (make_array([3,4], [5]), make_array(5), make_array([1,2,3,4], [5,6,7], [8,9,10]), make_array([1,2,3], [5,6,7], [8,9,10])) +; + +statement ok +CREATE TABLE fixed_size_array_has_table_2D +AS VALUES + (arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(1,3), 'FixedSizeList(2, Int64)'), arrow_cast(make_array([1,2,3], [4,5], [6,7]), 'FixedSizeList(3, List(Int64))'), arrow_cast(make_array([4,5], [6,7], [1,2,3]), 'FixedSizeList(3, List(Int64))')), + (arrow_cast(make_array([3,4], [5]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(5, 3), 'FixedSizeList(2, Int64)'), arrow_cast(make_array([1,2,3,4], [5,6,7], [8,9,10]), 'FixedSizeList(3, List(Int64))'), arrow_cast(make_array([1,2,3], [5,6,7], [8,9,10]), 'FixedSizeList(3, List(Int64))')) +; + +statement ok +CREATE TABLE array_has_table_2D_float +AS VALUES + (make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), make_array([1.1, 2.2], [3.3])), + (make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), make_array([1.0], [1.1, 2.2], [3.3])) +; + +statement ok +CREATE TABLE fixed_size_array_has_table_2D_Float +AS VALUES + (arrow_cast(make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), 'FixedSizeList(3, List(Float64))'), arrow_cast(make_array([1.1, 2.2], [3.3], [4.4]), 'FixedSizeList(3, List(Float64))')), + (arrow_cast(make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), 'FixedSizeList(3, List(Float64))'), arrow_cast(make_array([1.0], [1.1, 2.2], [3.3]), 'FixedSizeList(3, List(Float64))')) +; + +statement ok +CREATE TABLE array_has_table_3D +AS VALUES + (make_array([[1,2]]), make_array([1])), + (make_array([[1,2]]), make_array([1,2])), + (make_array([[1,2]]), make_array([1,2,3])), + (make_array([[1], [2]]), make_array([2])), + (make_array([[1], [2]]), make_array([1], [2])), + (make_array([[1], [2]], [[2], [3]]), make_array([1], [2], [3])), + (make_array([[1], [2]], [[2], [3]]), make_array([1], [2])) +; + +statement ok +CREATE TABLE fixed_size_array_has_table_3D +AS VALUES + (arrow_cast(make_array([[1,2]], [[3, 4]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')), + (arrow_cast(make_array([[1,2]], [[4, 4]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1,2], [3, 4]), 'FixedSizeList(2, List(Int64))')), + (arrow_cast(make_array([[1,2]], [[4, 4]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1,2,3], [1]), 'FixedSizeList(2, List(Int64))')), + (arrow_cast(make_array([[1], [2]], [[]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([2], [3]), 'FixedSizeList(2, List(Int64))')), + (arrow_cast(make_array([[1], [2]], [[]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')), + (arrow_cast(make_array([[1], [2]], [[2], [3]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')), + (arrow_cast(make_array([[1], [2]], [[2], [3]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')) +; + +statement ok +CREATE TABLE array_has_table_null +AS VALUES + (make_array(1, 2), 1), + (make_array(1, NULL), 1), + (make_array(3, 4, 5), 2), + (make_array(3, NULL, 5), 2), + (make_array(NULL, NULL, NULL), 2) +; + +statement ok +CREATE TABLE array_has_table_empty +AS VALUES + (make_array(1, 3, 5), 1), + (make_array(), 1), + (NULL, 1) +; + +statement ok +CREATE TABLE array_distinct_table_1D +AS VALUES + (make_array(1, 1, 2, 2, 3)), + (make_array(1, 2, 3, 4, 5)), + (make_array(3, 5, 3, 3, 3)) +; + +statement ok +CREATE TABLE array_distinct_table_1D_UTF8 +AS VALUES + (make_array('a', 'a', 'bc', 'bc', 'def')), + (make_array('a', 'bc', 'def', 'defg', 'defg')), + (make_array('defg', 'defg', 'defg', 'defg', 'defg')) +; + +statement ok +CREATE TABLE array_distinct_table_2D +AS VALUES + (make_array([1,2], [1,2], [3,4], [3,4], [5,6])), + (make_array([1,2], [3,4], [5,6], [7,8], [9,10])), + (make_array([5,6], [5,6], NULL)) +; + +statement ok +CREATE TABLE array_distinct_table_1D_large +AS SELECT + arrow_cast(column1, 'LargeList(Int64)') AS column1 +FROM array_distinct_table_1D +; + +statement ok +CREATE TABLE array_distinct_table_1D_fixed +AS SELECT + arrow_cast(column1, 'FixedSizeList(5, Int64)') AS column1 +FROM array_distinct_table_1D +; + +statement ok +CREATE TABLE array_distinct_table_1D_UTF8_fixed +AS SELECT + arrow_cast(column1, 'FixedSizeList(5, Utf8)') AS column1 +FROM array_distinct_table_1D_UTF8 +; + +statement ok +CREATE TABLE array_distinct_table_2D_fixed +AS VALUES + (arrow_cast(make_array([1,2], [1,2], [3,4], [3,4], [5,6]), 'FixedSizeList(5, List(Int64))')), + (arrow_cast(make_array([1,2], [3,4], [5,6], [7,8], [9,10]), 'FixedSizeList(5, List(Int64))')), + (arrow_cast(make_array([5,6], [5,6], NULL, NULL, NULL), 'FixedSizeList(5, List(Int64))')) +; + +statement ok +CREATE TABLE array_intersect_table_1D +AS VALUES + (make_array(1, 2), make_array(1), make_array(1,2,3), make_array(1,3), make_array(1,3,5), make_array(2,4,6,8,1,3)), + (make_array(11, 22), make_array(11), make_array(11,22,33), make_array(11,33), make_array(11,33,55), make_array(22,44,66,88,11,33)) +; + +statement ok +CREATE TABLE large_array_intersect_table_1D +AS + SELECT + arrow_cast(column1, 'LargeList(Int64)') as column1, + arrow_cast(column2, 'LargeList(Int64)') as column2, + arrow_cast(column3, 'LargeList(Int64)') as column3, + arrow_cast(column4, 'LargeList(Int64)') as column4, + arrow_cast(column5, 'LargeList(Int64)') as column5, + arrow_cast(column6, 'LargeList(Int64)') as column6 +FROM array_intersect_table_1D +; + +statement ok +CREATE TABLE array_intersect_table_1D_Float +AS VALUES + (make_array(1.0, 2.0), make_array(1.0), make_array(1.0,2.0,3.0), make_array(1.0,3.0), make_array(1.11), make_array(2.22, 3.33)), + (make_array(3.0, 4.0, 5.0), make_array(2.0), make_array(1.0,2.0,3.0,4.0), make_array(2.0,5.0), make_array(2.22, 1.11), make_array(1.11, 3.33)) +; + +statement ok +CREATE TABLE large_array_intersect_table_1D_Float +AS + SELECT + arrow_cast(column1, 'LargeList(Float64)') as column1, + arrow_cast(column2, 'LargeList(Float64)') as column2, + arrow_cast(column3, 'LargeList(Float64)') as column3, + arrow_cast(column4, 'LargeList(Float64)') as column4, + arrow_cast(column5, 'LargeList(Float64)') as column5, + arrow_cast(column6, 'LargeList(Float64)') as column6 +FROM array_intersect_table_1D_Float +; + +statement ok +CREATE TABLE array_intersect_table_1D_Boolean +AS VALUES + (make_array(true, true, true), make_array(false), make_array(true, true, false, true, false), make_array(true, false, true), make_array(false), make_array(true, false)), + (make_array(false, false, false), make_array(false), make_array(true, false, true), make_array(true, true), make_array(true, true), make_array(false,false,true)) +; + +statement ok +CREATE TABLE large_array_intersect_table_1D_Boolean +AS + SELECT + arrow_cast(column1, 'LargeList(Boolean)') as column1, + arrow_cast(column2, 'LargeList(Boolean)') as column2, + arrow_cast(column3, 'LargeList(Boolean)') as column3, + arrow_cast(column4, 'LargeList(Boolean)') as column4, + arrow_cast(column5, 'LargeList(Boolean)') as column5, + arrow_cast(column6, 'LargeList(Boolean)') as column6 +FROM array_intersect_table_1D_Boolean +; + +statement ok +CREATE TABLE array_intersect_table_1D_UTF8 +AS VALUES + (make_array('a', 'bc', 'def'), make_array('bc'), make_array('datafusion', 'rust', 'arrow'), make_array('rust', 'arrow'), make_array('rust', 'arrow', 'python'), make_array('data')), + (make_array('a', 'bc', 'def'), make_array('defg'), make_array('datafusion', 'rust', 'arrow'), make_array('datafusion', 'rust', 'arrow', 'python'), make_array('rust', 'arrow'), make_array('datafusion', 'rust', 'arrow')) +; + +statement ok +CREATE TABLE large_array_intersect_table_1D_UTF8 +AS + SELECT + arrow_cast(column1, 'LargeList(Utf8)') as column1, + arrow_cast(column2, 'LargeList(Utf8)') as column2, + arrow_cast(column3, 'LargeList(Utf8)') as column3, + arrow_cast(column4, 'LargeList(Utf8)') as column4, + arrow_cast(column5, 'LargeList(Utf8)') as column5, + arrow_cast(column6, 'LargeList(Utf8)') as column6 +FROM array_intersect_table_1D_UTF8 +; + +statement ok +CREATE TABLE array_intersect_table_1D_NULL +AS VALUES + ([1, 2, 2, 3], [2, 3, 4]), + ([2, 3, 3], [3]), + ([3], [3, 3, 4]), + (null, [3, 4]), + ([1, 2], null), + (null, null) +; + +statement ok +CREATE TABLE array_intersect_table_2D +AS VALUES + (make_array([1,2]), make_array([1,3]), make_array([1,2,3], [4,5], [6,7]), make_array([4,5], [6,7])), + (make_array([3,4], [5]), make_array([3,4]), make_array([1,2,3,4], [5,6,7], [8,9,10]), make_array([1,2,3], [5,6,7], [8,9,10])) +; + +statement ok +CREATE TABLE large_array_intersect_table_2D +AS + SELECT + arrow_cast(column1, 'LargeList(List(Int64))') as column1, + arrow_cast(column2, 'LargeList(List(Int64))') as column2, + arrow_cast(column3, 'LargeList(List(Int64))') as column3, + arrow_cast(column4, 'LargeList(List(Int64))') as column4 +FROM array_intersect_table_2D +; + +statement ok +CREATE TABLE array_intersect_table_2D_float +AS VALUES + (make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), make_array([1.1, 2.2], [3.3])), + (make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), make_array([1.0], [1.1, 2.2], [3.3])) +; + +statement ok +CREATE TABLE large_array_intersect_table_2D_Float +AS + SELECT + arrow_cast(column1, 'LargeList(List(Float64))') as column1, + arrow_cast(column2, 'LargeList(List(Float64))') as column2 +FROM array_intersect_table_2D_Float +; + +statement ok +CREATE TABLE array_intersect_table_3D +AS VALUES + (make_array([[1,2]]), make_array([[1]])), + (make_array([[1,2]]), make_array([[1,2]])) +; + +statement ok +CREATE TABLE large_array_intersect_table_3D +AS + SELECT + arrow_cast(column1, 'LargeList(List(List(Int64)))') as column1, + arrow_cast(column2, 'LargeList(List(List(Int64)))') as column2 +FROM array_intersect_table_3D +; + +statement ok +CREATE TABLE arrays_values_without_nulls +AS VALUES + (make_array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ',', [2,3]), + (make_array(11, 12, 13, 14, 15, 16, 17, 18, 19, 20), 12, 2, '.', [4,5]), + (make_array(21, 22, 23, 24, 25, 26, 27, 28, 29, 30), 23, 3, '-', [6,7]), + (make_array(31, 32, 33, 34, 35, 26, 37, 38, 39, 40), 34, 4, 'ok', [8,9]) +; + +statement ok +CREATE TABLE large_arrays_values_without_nulls +AS SELECT + arrow_cast(column1, 'LargeList(Int64)') AS column1, + column2, + column3, + column4, + arrow_cast(column5, 'LargeList(Int64)') AS column5 +FROM arrays_values_without_nulls +; + +statement ok +CREATE TABLE fixed_size_arrays_values_without_nulls +AS VALUES + (arrow_cast(make_array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 'FixedSizeList(10, Int64)'), 1, 1, ',', [2,3]), + (arrow_cast(make_array(11, 12, 13, 14, 15, 16, 17, 18, 19, 20), 'FixedSizeList(10, Int64)'), 12, 2, '.', [4,5]), + (arrow_cast(make_array(21, 22, 23, 24, 25, 26, 27, 28, 29, 30), 'FixedSizeList(10, Int64)'), 23, 3, '-', [6,7]), + (arrow_cast(make_array(31, 32, 33, 34, 35, 26, 37, 38, 39, 40), 'FixedSizeList(10, Int64)'), 34, 4, 'ok', [8,9]) +; + +statement ok +CREATE TABLE arrays_range +AS VALUES + (3, 10, 2), + (4, 13, 3) +; + +statement ok +CREATE TABLE arrays_with_repeating_elements +AS VALUES + (make_array(1, 2, 1, 3, 2, 2, 1, 3, 2, 3), 2, 4, 3), + (make_array(4, 4, 5, 5, 6, 5, 5, 5, 4, 4), 4, 7, 2), + (make_array(7, 7, 7, 8, 7, 9, 7, 8, 7, 7), 7, 10, 5), + (make_array(10, 11, 12, 10, 11, 12, 10, 11, 12, 10), 10, 13, 10) +; + +statement ok +CREATE TABLE large_arrays_with_repeating_elements +AS + SELECT + arrow_cast(column1, 'LargeList(Int64)') AS column1, + column2, + column3, + column4 + FROM arrays_with_repeating_elements +; + +statement ok +CREATE TABLE fixed_arrays_with_repeating_elements +AS VALUES + (arrow_cast(make_array(1, 2, 1, 3, 2, 2, 1, 3, 2, 3), 'FixedSizeList(10, Int64)'), 2, 4, 3), + (arrow_cast(make_array(4, 4, 5, 5, 6, 5, 5, 5, 4, 4), 'FixedSizeList(10, Int64)'), 4, 7, 2), + (arrow_cast(make_array(7, 7, 7, 8, 7, 9, 7, 8, 7, 7), 'FixedSizeList(10, Int64)'), 7, 10, 5), + (arrow_cast(make_array(10, 11, 12, 10, 11, 12, 10, 11, 12, 10), 'FixedSizeList(10, Int64)'), 10, 13, 10) +; + +statement ok +CREATE TABLE nested_arrays_with_repeating_elements +AS VALUES + (make_array([1, 2, 3], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]), [4, 5, 6], [10, 11, 12], 3), + (make_array([10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]), [10, 11, 12], [19, 20, 21], 2), + (make_array([19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]), [19, 20, 21], [28, 29, 30], 5), + (make_array([28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]), [28, 29, 30], [37, 38, 39], 10) +; + +statement ok +CREATE TABLE large_nested_arrays_with_repeating_elements +AS + SELECT + arrow_cast(column1, 'LargeList(List(Int64))') AS column1, + column2, + column3, + column4 + FROM nested_arrays_with_repeating_elements +; + +statement ok +CREATE TABLE fixed_size_nested_arrays_with_repeating_elements +AS VALUES + (arrow_cast(make_array([1, 2, 3], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(10, List(Int64))'), [4, 5, 6], [10, 11, 12], 3), + (arrow_cast(make_array([10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]), 'FixedSizeList(10, List(Int64))'), [10, 11, 12], [19, 20, 21], 2), + (arrow_cast(make_array([19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]), 'FixedSizeList(10, List(Int64))'), [19, 20, 21], [28, 29, 30], 5), + (arrow_cast(make_array([28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]), 'FixedSizeList(10, List(Int64))'), [28, 29, 30], [28, 29, 30], 10) +; + +statement ok +CREATE TABLE arrays_distance_table +AS VALUES + (make_array(1, 2, 3), make_array(1, 2, 3), make_array(1.1, 2.2, 3.3) , make_array(1.1, NULL, 3.3)), + (make_array(1, 2, 3), make_array(4, 5, 6), make_array(4.4, 5.5, 6.6), make_array(4.4, NULL, 6.6)), + (make_array(1, 2, 3), make_array(7, 8, 9), make_array(7.7, 8.8, 9.9), make_array(7.7, NULL, 9.9)), + (make_array(1, 2, 3), make_array(10, 11, 12), make_array(10.1, 11.2, 12.3), make_array(10.1, NULL, 12.3)) +; + +statement ok +CREATE TABLE large_arrays_distance_table +AS + SELECT + arrow_cast(column1, 'LargeList(Int64)') AS column1, + arrow_cast(column2, 'LargeList(Int64)') AS column2, + arrow_cast(column3, 'LargeList(Float64)') AS column3, + arrow_cast(column4, 'LargeList(Float64)') AS column4 +FROM arrays_distance_table +; + +statement ok +CREATE TABLE fixed_size_arrays_distance_table +AS + SELECT + arrow_cast(column1, 'FixedSizeList(3, Int64)') AS column1, + arrow_cast(column2, 'FixedSizeList(3, Int64)') AS column2, + arrow_cast(column3, 'FixedSizeList(3, Float64)') AS column3, + arrow_cast(column4, 'FixedSizeList(3, Float64)') AS column4 +FROM arrays_distance_table +; + +# Disable file statistics cache because file statistics have been previously created +statement ok +set datafusion.runtime.file_statistics_cache_limit = "0K"; + + +# Array literal + +## boolean coercion is not supported +query error +select [1, true, null] + +## wrapped in array_length to get deterministic results +query I +SELECT array_length([now()]) +---- +1 + +## array literal with functions +query ? +select [abs(-1.2), sin(-1), log(2), ceil(3.141)] +---- +[1.2, -0.8414709848078965, 0.30102999566398114, 4.0] + +## array literal with nested types +query ??? +select + [struct('foo', 1)], + [struct('foo', [1,2,3])], + [struct('foo', [struct(3, 'x')])] +; +---- +[{c0: foo, c1: 1}] [{c0: foo, c1: [1, 2, 3]}] [{c0: foo, c1: [{c0: 3, c1: x}]}] + +query TTT +select arrow_typeof(column1), arrow_typeof(column2), arrow_typeof(column3) from arrays; +---- +List(List(Int64)) List(Float64) List(Utf8) +List(List(Int64)) List(Float64) List(Utf8) +List(List(Int64)) List(Float64) List(Utf8) +List(List(Int64)) List(Float64) List(Utf8) +List(List(Int64)) List(Float64) List(Utf8) +List(List(Int64)) List(Float64) List(Utf8) +List(List(Int64)) List(Float64) List(Utf8) + +# arrays table +query ??? +select column1, column2, column3 from arrays; +---- +[[NULL, 2], [3, NULL]] [1.1, 2.2, 3.3] [L, o, r, e, m] +[[3, 4], [5, 6]] [NULL, 5.5, 6.6] [i, p, NULL, u, m] +[[5, 6], [7, 8]] [7.7, 8.8, 9.9] [d, NULL, l, o, r] +[[7, NULL], [9, 10]] [10.1, NULL, 12.2] [s, i, t] +NULL [13.3, 14.4, 15.5] [a, m, e, t] +[[11, 12], [13, 14]] NULL [,] +[[15, 16], [NULL, 18]] [16.6, 17.7, 18.8] NULL + +# nested_arrays table +query ??I?? +select column1, column2, column3, column4, column5 from nested_arrays; +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] [7, 8, 9] 2 [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]] [11, 12, 13] +[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] [10, 11, 12] 3 [[[11, 12, 13], [14, 15, 16]], [[17, 18, 19], [20, 21, 22]]] [121, 131, 141] + +# values table +query IIIRT +select a, b, c, d, e from values; +---- +1 1 2 1.1 Lorem +2 3 4 2.2 ipsum +3 5 6 3.3 dolor +4 7 8 4.4 sit +NULL 9 10 5.5 amet +5 NULL 12 6.6 , +6 11 NULL 7.7 consectetur +7 13 14 NULL adipiscing +8 15 16 8.8 NULL + +# arrays_values table +query ?IIT +select column1, column2, column3, column4 from arrays_values; +---- +[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 , +[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] 12 2 . +[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] 23 3 - +[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] 34 4 ok +NULL 44 5 @ +[41, 42, 43, 44, 45, 46, 47, 48, 49, 50] NULL 6 $ +[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] 55 NULL ^ +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70] 66 7 NULL + +# slices table +query ?II +select column1, column2, column3 from slices; +---- +[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 +[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] 2 -4 +[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] 0 0 +[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] -4 -7 +NULL 4 5 +[41, 42, 43, 44, 45, 46, 47, 48, 49, 50] NULL 6 +[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] 5 NULL + +query ??I? +select column1, column2, column3, column4 from arrays_values_v2; +---- +[NULL, 2, 3] [4, 5, NULL] 12 [[30, 40, 50]] +NULL [7, NULL, 8] 13 [[NULL, NULL, 60]] +[9, NULL, 10] NULL 14 [[70, NULL, NULL]] +[NULL, 1] [NULL, 21] NULL NULL +[11, 12] NULL NULL NULL +NULL NULL NULL NULL + +# arrays_values_without_nulls table +query ?IIT +select column1, column2, column3, column4 from arrays_values_without_nulls; +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 , +[11, 12, 13, 14, 15, 16, 17, 18, 19, 20] 12 2 . +[21, 22, 23, 24, 25, 26, 27, 28, 29, 30] 23 3 - +[31, 32, 33, 34, 35, 26, 37, 38, 39, 40] 34 4 ok + +# arrays_with_repeating_elements table +query ?III +select column1, column2, column3, column4 from arrays_with_repeating_elements; +---- +[1, 2, 1, 3, 2, 2, 1, 3, 2, 3] 2 4 3 +[4, 4, 5, 5, 6, 5, 5, 5, 4, 4] 4 7 2 +[7, 7, 7, 8, 7, 9, 7, 8, 7, 7] 7 10 5 +[10, 11, 12, 10, 11, 12, 10, 11, 12, 10] 10 13 10 + +# nested_arrays_with_repeating_elements table +query ???I +select column1, column2, column3, column4 from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [4, 5, 6] [10, 11, 12] 3 +[[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [10, 11, 12] [19, 20, 21] 2 +[[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [19, 20, 21] [28, 29, 30] 5 +[[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [28, 29, 30] [37, 38, 39] 10 + + +### Array index + + +## array[i] + +# single index with scalars #1 (positive index) +query IRT +select make_array(1, 2, 3)[1], make_array(1.0, 2.0, 3.0)[2], make_array('h', 'e', 'l', 'l', 'o')[3]; +---- +1 2 l + +# single index with scalars #2 (zero index) +query I +select make_array(1, 2, 3)[0]; +---- +NULL + +# single index with scalars #3 (negative index) +query IRT +select make_array(1, 2, 3)[-1], make_array(1.0, 2.0, 3.0)[-2], make_array('h', 'e', 'l', 'l', 'o')[-3]; +---- +3 2 l + +# single index with scalars #4 (complex index) +query IRT +select make_array(1, 2, 3)[1 + 2 - 1], make_array(1.0, 2.0, 3.0)[2 * 1 * 0 - 2], make_array('h', 'e', 'l', 'l', 'o')[2 - 3]; +---- +2 2 o + +# single index with columns #1 (positive index) +query ?RT +select column1[2], column2[3], column3[1] from arrays; +---- +[3, NULL] 3.3 L +[5, 6] 6.6 i +[7, 8] 9.9 d +[9, 10] 12.2 s +NULL 15.5 a +[13, 14] NULL , +[NULL, 18] 18.8 NULL + +# single index with columns #2 (zero index) +query ?RT +select column1[0], column2[0], column3[0] from arrays; +---- +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL + +# single index with columns #3 (negative index) +query ?RT +select column1[-2], column2[-3], column3[-1] from arrays; +---- +[NULL, 2] 1.1 m +[3, 4] NULL m +[5, 6] 7.7 r +[7, NULL] 10.1 t +NULL 13.3 t +[11, 12] NULL , +[15, 16] 16.6 NULL + +# single index with columns #4 (complex index) +query ?RT +select column1[9 - 7], column2[2 * 0], column3[1 - 3] from arrays; +---- +[3, NULL] NULL e +[5, 6] NULL u +[7, 8] NULL o +[9, 10] NULL i +NULL NULL e +[13, 14] NULL NULL +[NULL, 18] NULL NULL + +# TODO: support index as column +# single index with columns #5 (index as column) +# query ? +# select make_array(1, 2, 3, 4, 5)[column2] from arrays_with_repeating_elements; +# ---- + +# TODO: support argument and index as columns +# single index with columns #6 (argument and index as columns) +# query I +# select column1[column2] from arrays_with_repeating_elements; +# ---- + +## array[i:j] + +# multiple index with columns #1 (positive index) +query ??? +select make_array(1, 2, 3)[1:2], make_array(1.0, 2.0, 3.0)[2:3], make_array('h', 'e', 'l', 'l', 'o')[2:4]; +---- +[1, 2] [2.0, 3.0] [e, l, l] + +query ??? +select arrow_cast([1, 2, 3], 'LargeList(Int64)')[1:2], + arrow_cast([1.0, 2.0, 3.0], 'LargeList(Int64)')[2:3], + arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)')[2:4] +; +---- +[1, 2] [2, 3] [e, l, l] + +# multiple index with columns #2 (zero index) +query ??? +select make_array(1, 2, 3)[0:0], make_array(1.0, 2.0, 3.0)[0:2], make_array('h', 'e', 'l', 'l', 'o')[0:6]; +---- +[] [1.0, 2.0] [h, e, l, l, o] + +query ??? +select arrow_cast([1, 2, 3], 'LargeList(Int64)')[0:0], + arrow_cast([1.0, 2.0, 3.0], 'LargeList(Int64)')[0:2], + arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)')[0:6] +; +---- +[] [1, 2] [h, e, l, l, o] + +query I +select arrow_cast([1, 2, 3], 'LargeList(Int64)')[1]; +---- +1 + +# TODO: support multiple negative index +# multiple index with columns #3 (negative index) +# query II +# select make_array(1, 2, 3)[-3:-1], make_array(1.0, 2.0, 3.0)[-3:-1], make_array('h', 'e', 'l', 'l', 'o')[-2:0]; +# ---- + +# TODO: support complex index +# multiple index with columns #4 (complex index) +# query III +# select make_array(1, 2, 3)[2 + 1 - 1:10], make_array(1.0, 2.0, 3.0)[2 | 2:10], make_array('h', 'e', 'l', 'l', 'o')[6 ^ 6:10]; +# ---- + +# multiple index with columns #1 (positive index) +query ??? +select column1[2:4], column2[1:4], column3[3:4] from arrays; +---- +[[3, NULL]] [1.1, 2.2, 3.3] [r, e] +[[5, 6]] [NULL, 5.5, 6.6] [NULL, u] +[[7, 8]] [7.7, 8.8, 9.9] [l, o] +[[9, 10]] [10.1, NULL, 12.2] [t] +NULL [13.3, 14.4, 15.5] [e, t] +[[13, 14]] NULL [] +[[NULL, 18]] [16.6, 17.7, 18.8] NULL + +# multiple index with columns #2 (zero index) +query ??? +select column1[0:5], column2[0:3], column3[0:9] from arrays; +---- +[[NULL, 2], [3, NULL]] [1.1, 2.2, 3.3] [L, o, r, e, m] +[[3, 4], [5, 6]] [NULL, 5.5, 6.6] [i, p, NULL, u, m] +[[5, 6], [7, 8]] [7.7, 8.8, 9.9] [d, NULL, l, o, r] +[[7, NULL], [9, 10]] [10.1, NULL, 12.2] [s, i, t] +NULL [13.3, 14.4, 15.5] [a, m, e, t] +[[11, 12], [13, 14]] NULL [,] +[[15, 16], [NULL, 18]] [16.6, 17.7, 18.8] NULL + +# TODO: support negative index +# multiple index with columns #3 (negative index) +# query ?RT +# select column1[-2:-4], column2[-3:-5], column3[-1:-4] from arrays; +# ---- +# [NULL, 2] 1.1 m + +# TODO: support complex index +# multiple index with columns #4 (complex index) +# query ?RT +# select column1[9 - 7:2 + 2], column2[1 * 0:2 * 3], column3[1 + 1 - 0:5 % 3] from arrays; +# ---- + +# TODO: support first index as column +# multiple index with columns #5 (first index as column) +# query ? +# select make_array(1, 2, 3, 4, 5)[column2:4] from arrays_with_repeating_elements +# ---- + +# TODO: support last index as column +# multiple index with columns #6 (last index as column) +# query ?RT +# select make_array(1, 2, 3, 4, 5)[2:column3] from arrays_with_repeating_elements; +# ---- + +# TODO: support argument and indices as column +# multiple index with columns #7 (argument and indices as column) +# query ?RT +# select column1[column2:column3] from arrays_with_repeating_elements; +# ---- + +# array[i:j:k] + +# multiple index with columns #1 (positive index) +query ??? +select make_array(1, 2, 3)[1:2:2], make_array(1.0, 2.0, 3.0)[2:3:2], make_array('h', 'e', 'l', 'l', 'o')[2:4:2]; +---- +[1] [2.0] [e, l] + +# multiple index with columns #2 (zero index) +query ??? +select make_array(1, 2, 3)[0:0:2], make_array(1.0, 2.0, 3.0)[0:2:2], make_array('h', 'e', 'l', 'l', 'o')[0:6:2]; +---- +[] [1.0] [h, l, o] + +#TODO: sqlparser does not support negative index +## multiple index with columns #3 (negative index) +#query ??? +#select make_array(1, 2, 3)[-1:-2:-2], make_array(1.0, 2.0, 3.0)[-2:-3:-2], make_array('h', 'e', 'l', 'l', 'o')[-2:-4:-2]; +#---- +#[1] [2.0] [e, l] + +# multiple index with columns #1 (positive index) +query ??? +select column1[2:4:2], column2[1:4:2], column3[3:4:2] from arrays; +---- +[[3, NULL]] [1.1, 3.3] [r] +[[5, 6]] [NULL, 6.6] [NULL] +[[7, 8]] [7.7, 9.9] [l] +[[9, 10]] [10.1, 12.2] [t] +NULL [13.3, 15.5] [e] +[[13, 14]] NULL [] +[[NULL, 18]] [16.6, 18.8] NULL + +# multiple index with columns #2 (zero index) +query ??? +select column1[0:5:2], column2[0:3:2], column3[0:9:2] from arrays; +---- +[[NULL, 2]] [1.1, 3.3] [L, r, m] +[[3, 4]] [NULL, 6.6] [i, NULL, m] +[[5, 6]] [7.7, 9.9] [d, l, r] +[[7, NULL]] [10.1, 12.2] [s, t] +NULL [13.3, 15.5] [a, e] +[[11, 12]] NULL [,] +[[15, 16]] [16.6, 18.8] NULL + + +### Array function tests + + +## make_array (aliases: `make_list`) + +# make_array scalar function #1 +query ??? +select make_array(1, 2, 3), make_array(1.0, 2.0, 3.0), make_array('h', 'e', 'l', 'l', 'o'); +---- +[1, 2, 3] [1.0, 2.0, 3.0] [h, e, l, l, o] + +# make_array scalar function #2 +query ??? +select make_array(1, 2, 3), make_array(make_array(1, 2), make_array(3, 4)), make_array([[[[1], [2]]]]); +---- +[1, 2, 3] [[1, 2], [3, 4]] [[[[[1], [2]]]]] + +# make_array scalar function #3 +query ?? +select make_array([1, 2, 3], [4, 5, 6], [7, 8, 9]), make_array([[1, 2], [3, 4]], [[5, 6], [7, 8]]); +---- +[[1, 2, 3], [4, 5, 6], [7, 8, 9]] [[[1, 2], [3, 4]], [[5, 6], [7, 8]]] + +# make_array scalar function #4 +query ?? +select make_array([1.0, 2.0], [3.0, 4.0]), make_array('h', 'e', 'l', 'l', 'o'); +---- +[[1.0, 2.0], [3.0, 4.0]] [h, e, l, l, o] + +# make_array scalar function #5 +query ? +select make_array(make_array(make_array(make_array(1, 2, 3), make_array(4, 5, 6)), make_array(make_array(7, 8, 9), make_array(10, 11, 12)))) +---- +[[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]] + +# make_array scalar function #6 +query ? +select make_array() +---- +[] + +# make_array scalar function #7 +query ?? +select make_array(make_array()), make_array(make_array(make_array())) +---- +[[]] [[[]]] + +# make_list scalar function #8 (function alias: `make_array`) +query ??? +select make_list(1, 2, 3), make_list(1.0, 2.0, 3.0), make_list('h', 'e', 'l', 'l', 'o'); +---- +[1, 2, 3] [1.0, 2.0, 3.0] [h, e, l, l, o] + +# make_array scalar function with nulls +query ??? +select make_array(1, NULL, 3), make_array(NULL, 2.0, NULL), make_array('h', NULL, 'l', NULL, 'o'); +---- +[1, NULL, 3] [NULL, 2.0, NULL] [h, NULL, l, NULL, o] + +# make_array scalar function with nulls #2 +query ?? +select make_array(1, 2, NULL), make_array(make_array(NULL, 2), make_array(NULL, 3)); +---- +[1, 2, NULL] [[NULL, 2], [NULL, 3]] + +# make_array scalar function with nulls #3 +query ??? +select make_array(NULL), make_array(NULL, NULL, NULL), make_array(make_array(NULL, NULL), make_array(NULL, NULL)); +---- +[NULL] [NULL, NULL, NULL] [[NULL, NULL], [NULL, NULL]] + +# make_array with 1 columns +query ??? +select make_array(a), make_array(d), make_array(e) from values; +---- +[1] [1.1] [Lorem] +[2] [2.2] [ipsum] +[3] [3.3] [dolor] +[4] [4.4] [sit] +[NULL] [5.5] [amet] +[5] [6.6] [,] +[6] [7.7] [consectetur] +[7] [NULL] [adipiscing] +[8] [8.8] [NULL] + +# make_array with 2 columns #1 +query ?? +select make_array(b, c), make_array(e, f) from values; +---- +[1, 2] [Lorem, A] +[3, 4] [ipsum, ] +[5, 6] [dolor, BB] +[7, 8] [sit, NULL] +[9, 10] [amet, CCC] +[NULL, 12] [,, DD] +[11, NULL] [consectetur, E] +[13, 14] [adipiscing, F] +[15, 16] [NULL, ] + +# make_array with 4 columns +query ? +select make_array(a, b, c, d) from values; +---- +[1.0, 1.0, 2.0, 1.1] +[2.0, 3.0, 4.0, 2.2] +[3.0, 5.0, 6.0, 3.3] +[4.0, 7.0, 8.0, 4.4] +[NULL, 9.0, 10.0, 5.5] +[5.0, NULL, 12.0, 6.6] +[6.0, 11.0, NULL, 7.7] +[7.0, 13.0, 14.0, NULL] +[8.0, 15.0, 16.0, 8.8] + +# make_array with column of list +query ?? +select column1, column5 from arrays_values_without_nulls; +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] [2, 3] +[11, 12, 13, 14, 15, 16, 17, 18, 19, 20] [4, 5] +[21, 22, 23, 24, 25, 26, 27, 28, 29, 30] [6, 7] +[31, 32, 33, 34, 35, 26, 37, 38, 39, 40] [8, 9] + +# make array with arrays of different types +query ? +select make_array(make_array(1), arrow_cast(make_array(-1), 'LargeList(Int8)')) +---- +[[1], [-1]] + +query T +select arrow_typeof(make_array(make_array(1), arrow_cast(make_array(-1), 'LargeList(Int8)'))); +---- +List(LargeList(Int64)) + + +query ??? +select make_array(column1), + make_array(column1, column5), + make_array(column1, make_array(50,51,52)) +from arrays_values_without_nulls; +---- +[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]] [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [2, 3]] [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [50, 51, 52]] +[[11, 12, 13, 14, 15, 16, 17, 18, 19, 20]] [[11, 12, 13, 14, 15, 16, 17, 18, 19, 20], [4, 5]] [[11, 12, 13, 14, 15, 16, 17, 18, 19, 20], [50, 51, 52]] +[[21, 22, 23, 24, 25, 26, 27, 28, 29, 30]] [[21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [6, 7]] [[21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [50, 51, 52]] +[[31, 32, 33, 34, 35, 26, 37, 38, 39, 40]] [[31, 32, 33, 34, 35, 26, 37, 38, 39, 40], [8, 9]] [[31, 32, 33, 34, 35, 26, 37, 38, 39, 40], [50, 51, 52]] + +## array_element (aliases: array_extract, list_extract, list_element) + +# Testing with empty arguments should result in an error +query error DataFusion error: Error during planning: 'array_element' does not support zero arguments +select array_element(); + +# array_element error +query error +select array_element(1, 2); + +# array_element with null +query I +select array_element([1, 2], NULL); +---- +NULL + +query ? +select array_element(NULL, 2); +---- +NULL + +# array_element scalar function #1 (with positive index) +query IT +select array_element(make_array(1, 2, 3, 4, 5), 2), array_element(make_array('h', 'e', 'l', 'l', 'o'), 3); +---- +2 l + +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3); +---- +2 l + +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 2), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 3); +---- +2 l + +# array_element scalar function #2 (with positive index; out of bounds) +query IT +select array_element(make_array(1, 2, 3, 4, 5), 7), array_element(make_array('h', 'e', 'l', 'l', 'o'), 11); +---- +NULL NULL + +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 7), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 11); +---- +NULL NULL + +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 7), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 11); +---- +NULL NULL + +# array_element scalar function #3 (with zero) +query IT +select array_element(make_array(1, 2, 3, 4, 5), 0), array_element(make_array('h', 'e', 'l', 'l', 'o'), 0); +---- +NULL NULL + +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 0), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 0); +---- +NULL NULL + +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 0), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 0); +---- +NULL NULL + +# array_element scalar function #4 (with NULL) +query IT +select array_element(make_array(1, 2, 3, 4, 5), NULL), array_element(make_array('h', 'e', 'l', 'l', 'o'), NULL); +---- +NULL NULL + +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), NULL); +---- +NULL NULL + +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), NULL), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), NULL); +---- +NULL NULL + +# array_element scalar function #5 (with negative index) +query IT +select array_element(make_array(1, 2, 3, 4, 5), -2), array_element(make_array('h', 'e', 'l', 'l', 'o'), -3); +---- +4 l + +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -2), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3); +---- +4 l + +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), -2), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), -3); +---- +4 l + +# array_element scalar function #6 (with negative index; out of bounds) +query IT +select array_element(make_array(1, 2, 3, 4, 5), -11), array_element(make_array('h', 'e', 'l', 'l', 'o'), -7); +---- +NULL NULL + +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -11), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -7); +---- +NULL NULL + +query IT +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), -11), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), -7); +---- +NULL NULL + +# array_element scalar function #7 (nested array) +query ? +select array_element(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 1); +---- +[1, 2, 3, 4, 5] + +query ? +select array_element(arrow_cast(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 'LargeList(List(Int64))'), 1); +---- +[1, 2, 3, 4, 5] + +query ? +select array_element(arrow_cast(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 'FixedSizeList(2, List(Int64))'), 1); +---- +[1, 2, 3, 4, 5] + +# array_extract scalar function #8 (function alias `array_element`) +query IT +select array_extract(make_array(1, 2, 3, 4, 5), 2), array_extract(make_array('h', 'e', 'l', 'l', 'o'), 3); +---- +2 l + +query IT +select array_extract(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2), array_extract(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3); +---- +2 l + +query IT +select array_extract(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 2), array_extract(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 3); +---- +2 l + +# list_element scalar function #9 (function alias `array_element`) +query IT +select list_element(make_array(1, 2, 3, 4, 5), 2), list_element(make_array('h', 'e', 'l', 'l', 'o'), 3); +---- +2 l + +query IT +select list_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2), array_extract(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3); +---- +2 l + +query IT +select list_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 2), list_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 3); +---- +2 l + +# list_extract scalar function #10 (function alias `array_element`) +query IT +select list_extract(make_array(1, 2, 3, 4, 5), 2), list_extract(make_array('h', 'e', 'l', 'l', 'o'), 3); +---- +2 l + +query IT +select list_extract(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2), list_extract(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3); +---- +2 l + +query IT +select list_extract(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 2), list_extract(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 3); +---- +2 l + +# array_element with columns +query I +select array_element(column1, column2) from slices; +---- +NULL +12 +NULL +37 +NULL +NULL +55 + +query I +select array_element(arrow_cast(column1, 'LargeList(Int64)'), column2) from slices; +---- +NULL +12 +NULL +37 +NULL +NULL +55 + +query I +select array_element(column1, column2) from fixed_slices; +---- +NULL +12 +NULL +37 +NULL +55 + +# array_element with columns and scalars +query II +select array_element(make_array(1, 2, 3, 4, 5), column2), array_element(column1, 3) from slices; +---- +1 3 +2 13 +NULL 23 +2 33 +4 NULL +NULL 43 +5 NULL + +query II +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), column2), array_element(arrow_cast(column1, 'LargeList(Int64)'), 3) from slices; +---- +1 3 +2 13 +NULL 23 +2 33 +4 NULL +NULL 43 +5 NULL + +query II +select array_element(make_array(1, 2, 3, 4, 5), column2), array_element(column1, 3) from fixed_slices; +---- +1 3 +2 13 +NULL 23 +2 33 +NULL 43 +5 NULL + +# array_element of empty array +query T +select coalesce(array_element([], 1), array_element(NULL, 1), 'ok'); +---- +ok + + +## array_max +# array_max scalar function #1 (with positive index) +query I +select array_max(make_array(5, 3, 6, 4)); +---- +6 + +query I +select array_max(make_array(5, 3, 4, NULL, 6, NULL)); +---- +6 + +query ? +select array_max(make_array(NULL, NULL)); +---- +NULL + +query T +select array_max(make_array('h', 'e', 'o', 'l', 'l')); +---- +o + +query T +select array_max(make_array('h', 'e', 'l', NULL, 'l', 'o', NULL)); +---- +o + +query B +select array_max(make_array(false, true, false, true)); +---- +true + +query B +select array_max(make_array(false, true, NULL, false, true)); +---- +true + +query D +select array_max(make_array(DATE '1992-09-01', DATE '1993-03-01', DATE '1999-05-01', DATE '1985-11-01')); +---- +1999-05-01 + +query D +select array_max(make_array(DATE '1995-09-01', DATE '1999-05-01', DATE '1993-03-01', NULL)); +---- +1999-05-01 + +query P +select array_max(make_array(TIMESTAMP '1992-09-01', TIMESTAMP '1995-06-01', TIMESTAMP '1984-10-01')); +---- +1995-06-01T00:00:00 + +query P +select array_max(make_array(NULL, TIMESTAMP '1996-10-01', TIMESTAMP '1995-06-01')); +---- +1996-10-01T00:00:00 + +query R +select array_max(make_array(5.1, -3.2, 6.3, 4.9)); +---- +6.3 + +query ?I +select input, array_max(input) from (select make_array(d - 1, d, d + 1) input from (values (0), (10), (20), (30), (NULL)) t(d)) +---- +[-1, 0, 1] 1 +[9, 10, 11] 11 +[19, 20, 21] 21 +[29, 30, 31] 31 +[NULL, NULL, NULL] NULL + +query II +select array_max(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_max(arrow_cast(make_array(1), 'LargeList(Int64)')); +---- +3 1 + +query II +select array_max(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_max(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); +---- +3 1 + +query ? +select array_max(make_array()); +---- +NULL + +# Testing with empty arguments should result in an error +query error DataFusion error: Error during planning: 'array_max' does not support zero arguments +select array_max(); + +# array_max over multiple rows (exercises the offsets-based iteration) +query I +select array_max(column1) from (values + (make_array(1, 5, 3)), + (make_array(10, 2, 8)), + (NULL), + (make_array(NULL, 7, NULL)), + (make_array(100)) +) as t(column1); +---- +5 +10 +NULL +7 +100 + +# array_max with NaN values (NaN should not be returned as max) +query R +select array_max(make_array(1.0, 'NaN'::double, 3.0)); +---- +NaN + +query R +select array_max(make_array('NaN'::double, 'NaN'::double)); +---- +NaN + +query R +select array_max(make_array('NaN'::double, NULL)); +---- +NaN + +# array_max with Int32 (exercises a different primitive type than Int64) +query I +select array_max(arrow_cast(make_array(10, -5, 3), 'List(Int32)')); +---- +10 + +## array_min + +query I +select array_min(make_array(5, 3, 6, 4)); +---- +3 + +query I +select array_min(make_array(5, 3, 4, NULL, 6, NULL)); +---- +3 + +query ? +select array_min(make_array(NULL, NULL)); +---- +NULL + +query T +select array_min(make_array('h', 'e', 'o', 'l', 'l')); +---- +e + +query T +select array_min(make_array('h', 'e', 'l', NULL, 'l', 'o', NULL)); +---- +e + +query B +select array_min(make_array(false, true, false, true)); +---- +false + +query B +select array_min(make_array(false, true, NULL, false, true)); +---- +false + +query D +select array_min(make_array(DATE '1992-09-01', DATE '1993-03-01', DATE '1999-05-01', DATE '1985-11-01')); +---- +1985-11-01 + +query D +select array_min(make_array(DATE '1995-09-01', DATE '1999-05-01', DATE '1993-03-01', NULL)); +---- +1993-03-01 + +query P +select array_min(make_array(TIMESTAMP '1992-09-01', TIMESTAMP '1995-06-01', TIMESTAMP '1984-10-01')); +---- +1984-10-01T00:00:00 + +query P +select array_min(make_array(NULL, TIMESTAMP '1996-10-01', TIMESTAMP '1995-06-01')); +---- +1995-06-01T00:00:00 + +query R +select array_min(make_array(5.1, -3.2, 6.3, 4.9)); +---- +-3.2 + +query ?I +select input, array_min(input) from (select make_array(d - 1, d, d + 1) input from (values (0), (10), (20), (30), (NULL)) t(d)) +---- +[-1, 0, 1] -1 +[9, 10, 11] 9 +[19, 20, 21] 19 +[29, 30, 31] 29 +[NULL, NULL, NULL] NULL + +query II +select array_min(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_min(arrow_cast(make_array(1), 'LargeList(Int64)')); +---- +1 1 + +query II +select array_min(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_min(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); +---- +1 1 + +query ? +select array_min(make_array()); +---- +NULL + +# Testing with empty arguments should result in an error +query error DataFusion error: Error during planning: 'array_min' does not support zero arguments +select array_min(); + +# array_min over multiple rows (exercises the offsets-based iteration) +query I +select array_min(column1) from (values + (make_array(1, 5, 3)), + (make_array(10, 2, 8)), + (NULL), + (make_array(NULL, 7, NULL)), + (make_array(100)) +) as t(column1); +---- +1 +2 +NULL +7 +100 + +# array_min with NaN values (NaN should not be returned as min) +query R +select array_min(make_array(1.0, 'NaN'::double, 3.0)); +---- +1 + +query R +select array_min(make_array('NaN'::double, 'NaN'::double)); +---- +NaN + +query R +select array_min(make_array('NaN'::double, NULL)); +---- +NaN + +# array_min with Int32 (exercises a different primitive type than Int64) +query I +select array_min(arrow_cast(make_array(10, -5, 3), 'List(Int32)')); +---- +-5 + +# array_min/array_max preserve parameterized primitive metadata +query PPTT +select + array_min(ts_list), + array_max(ts_list), + arrow_typeof(array_min(ts_list)), + arrow_typeof(array_max(ts_list)) +from ( + select arrow_cast( + make_array( + arrow_cast(20, 'Timestamp(Nanosecond, Some("UTC"))'), + arrow_cast(10, 'Timestamp(Nanosecond, Some("UTC"))'), + arrow_cast(30, 'Timestamp(Nanosecond, Some("UTC"))') + ), + 'List(Timestamp(Nanosecond, Some("UTC")))' + ) as ts_list +) t; +---- +1970-01-01T00:00:00.000000010Z 1970-01-01T00:00:00.000000030Z Timestamp(ns, "UTC") Timestamp(ns, "UTC") + +query RRTT +select + array_min(dec_list), + array_max(dec_list), + arrow_typeof(array_min(dec_list)), + arrow_typeof(array_max(dec_list)) +from ( + select arrow_cast( + make_array( + arrow_cast(200, 'Decimal128(20, 4)'), + arrow_cast(100, 'Decimal128(20, 4)'), + arrow_cast(300, 'Decimal128(20, 4)') + ), + 'List(Decimal128(20, 4))' + ) as dec_list +) t; +---- +100 300 Decimal128(20, 4) Decimal128(20, 4) + + +## array_pop_back (aliases: `list_pop_back`) + +# array_pop_back scalar function with null +#TODO: https://github.com/apache/datafusion/issues/7142 +# follow clickhouse and duckdb +#query ? +#select array_pop_back(null); +#---- +#NULL + +# array_pop_back scalar function #1 +query ?? +select array_pop_back(make_array(1, 2, 3, 4, 5)), array_pop_back(make_array('h', 'e', 'l', 'l', 'o')); +---- +[1, 2, 3, 4] [h, e, l, l] + +query ?? +select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), array_pop_back(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)')); +---- +[1, 2, 3, 4] [h, e, l, l] + +query ?? +select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)')), array_pop_back(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)')); +---- +[1, 2, 3, 4] [h, e, l, l] + +# array_pop_back scalar function #2 (after array_pop_back, array is empty) +query ? +select array_pop_back(make_array(1)); +---- +[] + +query ? +select array_pop_back(arrow_cast(make_array(1), 'LargeList(Int64)')); +---- +[] + +query ? +select array_pop_back(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); +---- +[] + +# array_pop_back scalar function #3 (array_pop_back the empty array) +query ? +select array_pop_back(array_pop_back(make_array(1))); +---- +[] + +query ? +select array_pop_back(array_pop_back(arrow_cast(make_array(1), 'LargeList(Int64)'))); +---- +[] + +query ? +select array_pop_back(array_pop_back(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'))); +---- +[] + +# array_pop_back scalar function #4 (array_pop_back the arrays which have NULL) +query ?? +select array_pop_back(make_array(1, 2, 3, 4, NULL)), array_pop_back(make_array(NULL, 'e', 'l', NULL, 'o')); +---- +[1, 2, 3, 4] [NULL, e, l, NULL] + +query ?? +select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, NULL), 'LargeList(Int64)')), array_pop_back(arrow_cast(make_array(NULL, 'e', 'l', NULL, 'o'), 'LargeList(Utf8)')); +---- +[1, 2, 3, 4] [NULL, e, l, NULL] + +query ?? +select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, NULL), 'FixedSizeList(5, Int64)')), array_pop_back(arrow_cast(make_array(NULL, 'e', 'l', NULL, 'o'), 'FixedSizeList(5, Utf8)')); +---- +[1, 2, 3, 4] [NULL, e, l, NULL] + +# array_pop_back scalar function #5 (array_pop_back the nested arrays) +query ? +select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6))); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + +query ? +select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'LargeList(List(Int64))')); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + +query ? +select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'FixedSizeList(6, List(Int64))')); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + +# array_pop_back scalar function #6 (array_pop_back the nested arrays with NULL) +query ? +select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), NULL)); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + +query ? +select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), NULL), 'LargeList(List(Int64))')); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + +query ? +select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), NULL), 'FixedSizeList(6, List(Int64))')); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + +# array_pop_back scalar function #7 (array_pop_back the nested arrays with NULL) +query ? +select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), NULL, make_array(1, 7, 4))); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], NULL] + +query ? +select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), NULL, make_array(1, 7, 4)), 'LargeList(List(Int64))')); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], NULL] + +query ? +select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), NULL, make_array(1, 7, 4)), 'FixedSizeList(5, List(Int64))')); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], NULL] + +# array_pop_back scalar function #8 (after array_pop_back, nested array is empty) +query ? +select array_pop_back(make_array(make_array(1, 2, 3))); +---- +[] + +query ? +select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3)), 'LargeList(List(Int64))')); +---- +[] + +query ? +select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3)), 'FixedSizeList(1, List(Int64))')); +---- +[] + +# array_pop_back with columns +query ? +select array_pop_back(column1) from arrayspop; +---- +[1, 2] +[3, 4, 5] +[6, 7, 8, NULL] +[NULL, NULL] +NULL +[NULL, 10, 11] + +query ? +select array_pop_back(arrow_cast(column1, 'LargeList(Int64)')) from arrayspop; +---- +[1, 2] +[3, 4, 5] +[6, 7, 8, NULL] +[NULL, NULL] +NULL +[NULL, 10, 11] + +query ? +select array_pop_back(column1) from large_arrayspop; +---- +[1, 2] +[3, 4, 5] +[6, 7, 8, NULL] +[NULL, NULL] +NULL +[NULL, 10, 11] + +query ? +select array_pop_back(arrow_cast(column1, 'LargeList(Int64)')) from large_arrayspop; +---- +[1, 2] +[3, 4, 5] +[6, 7, 8, NULL] +[NULL, NULL] +NULL +[NULL, 10, 11] + +## array_pop_front (aliases: `list_pop_front`) + +#TODO:https://github.com/apache/datafusion/issues/7142 +# array_pop_front scalar function with null +# follow clickhouse and duckdb +#query ? +#select array_pop_front(null); +#---- +#NULL + +# array_pop_front scalar function #1 +query ?? +select array_pop_front(make_array(1, 2, 3, 4, 5)), array_pop_front(make_array('h', 'e', 'l', 'l', 'o')); +---- +[2, 3, 4, 5] [e, l, l, o] + +query ?? +select array_pop_front(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), array_pop_front(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)')); +---- +[2, 3, 4, 5] [e, l, l, o] + +query ?? +select array_pop_front(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)')), array_pop_front(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)')); +---- +[2, 3, 4, 5] [e, l, l, o] + +# array_pop_front scalar function #2 (after array_pop_front, array is empty) +query ? +select array_pop_front(make_array(1)); +---- +[] + +query ? +select array_pop_front(arrow_cast(make_array(1), 'LargeList(Int64)')); +---- +[] + +query ? +select array_pop_front(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); +---- +[] + +# array_pop_front scalar function #3 (array_pop_front the empty array) +query ? +select array_pop_front(array_pop_front(make_array(1))); +---- +[] + +query ? +select array_pop_front(array_pop_front(arrow_cast(make_array(1), 'LargeList(Int64)'))); +---- +[] + +query ? +select array_pop_front(array_pop_front(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'))); +---- +[] + +# array_pop_front scalar function #5 (array_pop_front the nested arrays) +query ? +select array_pop_front(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6))); +---- +[[2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] + +query ? +select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'LargeList(List(Int64))')); +---- +[[2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] + +query ? +select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'FixedSizeList(6, List(Int64))')); +---- +[[2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] + +# array_pop_front scalar function #6 (array_pop_front the nested arrays with NULL) +query ? +select array_pop_front(make_array(NULL, make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4))); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + +query ? +select array_pop_front(arrow_cast(make_array(NULL, make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4)), 'LargeList(List(Int64))')); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + +query ? +select array_pop_front(arrow_cast(make_array(NULL, make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4)), 'FixedSizeList(6, List(Int64))')); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + +# array_pop_front scalar function #8 (after array_pop_front, nested array is empty) +query ? +select array_pop_front(make_array(make_array(1, 2, 3))); +---- +[] + +query ? +select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3)), 'LargeList(List(Int64))')); +---- +[] + +query ? +select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3)), 'FixedSizeList(1, List(Int64))')); +---- +[] + +## array_slice (aliases: list_slice) + +# array_slice scalar function #1 (with positive indexes) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 2, 4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 2); +---- +[2, 3, 4] [h, e] + +query ???? +select array_slice(make_array(1, 2, 3, 4, 5), 1, 5, 2), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 5, 2), + array_slice(make_array(1, 2, 3, 4, 5), 0, 5, 2), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 0, 5, 2); +---- +[1, 3, 5] [h, l, o] [1, 3, 5] [h, l, o] + +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 1, 5, -1), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 5, -1); +---- +[] [] + +query error Execution error: array_slice got invalid stride: 0, it cannot be 0 +select array_slice(make_array(1, 2, 3, 4, 5), 1, 5, 0), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 5, 0); + +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 5, 1, -2), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 5, 1, -2); +---- +[5, 3, 1] [o, l, h] + +# Test NULL stride +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 1, 5, NULL), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 5, NULL); +---- +NULL NULL + +# Test NULL stride +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 1, 5, NULL), + array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 1, 5, NULL); +---- +NULL NULL + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, 4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 1, 2); +---- +[2, 3, 4] [h, e] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 2, 4), + array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 1, 2); +---- +[2, 3, 4] [h, e] + +# array_slice scalar function #2 (with positive indexes; full array) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 0, 6), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 0, 5); +---- +[1, 2, 3, 4, 5] [h, e, l, l, o] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 0, 6), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 0, 5); +---- +[1, 2, 3, 4, 5] [h, e, l, l, o] + +# TODO make error message nicer: https://github.com/apache/datafusion/issues/19004 +# Expected output (once supported): +# ---- +# [1, 2, 3, 4, 5] [h, e, l, l, o] +query error Failed to coerce arguments to satisfy a call to 'array_slice' function: +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'ListView(Int64)'), 0, 6), + array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'ListView(Utf8)'), 0, 5); + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 0, 6), + array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 0, 5); +---- +[1, 2, 3, 4, 5] [h, e, l, l, o] + +# array_slice scalar function #3 (with positive indexes; first index = second index) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 4, 4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 3, 3); +---- +[4] [l] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 4, 4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3, 3); +---- +[4] [l] + +# array_slice scalar function #4 (with positive indexes; first index > second_index) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 2, 1), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 4, 1); +---- +[] [] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, 1), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 4, 1); +---- +[] [] + +# array_slice scalar function #5 (with positive indexes; out of bounds) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 2, 6), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 3, 7); +---- +[2, 3, 4, 5] [l, l, o] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, 6), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3, 7); +---- +[2, 3, 4, 5] [l, l, o] + +# TODO: Enable once array_slice supports LargeListView types. +# Expected output (once supported): +# ---- +# [2, 3, 4, 5] [l, l, o] +query error Failed to coerce arguments to satisfy a call to 'array_slice' function: +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeListView(Int64)'), 2, 6), + array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeListView(Utf8)'), 3, 7); + + +# array_slice scalar function #6 (with positive indexes; nested array) +query ? +select array_slice(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 1, 1); +---- +[[1, 2, 3, 4, 5]] + +query ? +select array_slice(arrow_cast(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 'LargeList(List(Int64))'), 1, 1); +---- +[[1, 2, 3, 4, 5]] + +# array_slice scalar function #7 (with zero and positive number) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 0, 4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 0, 3); +---- +[1, 2, 3, 4] [h, e, l] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 0, 4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 0, 3); +---- +[1, 2, 3, 4] [h, e, l] + +# array_slice scalar function #8 (with NULL and positive number) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), NULL, 4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), NULL, 3); +---- +NULL NULL + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL, 4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), NULL, 3); +---- +NULL NULL + +# array_slice scalar function #9 (with positive number and NULL) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 2, NULL), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 3, NULL); +---- +NULL NULL + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, NULL), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3, NULL); +---- +NULL NULL + +# array_slice scalar function #10 (with zero-zero) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 0, 0), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 0, 0); +---- +[] [] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 0, 0), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 0, 0); +---- +[] [] + +# array_slice scalar function #11 (with NULL-NULL) +query error +select array_slice(make_array(1, 2, 3, 4, 5), NULL), array_slice(make_array('h', 'e', 'l', 'l', 'o'), NULL); + +query error +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), NULL); + +# array_slice scalar function #12 (with zero and negative number) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 0, -4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 0, -3); +---- +[1, 2] [h, e, l] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 0, -4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 0, -3); +---- +[1, 2] [h, e, l] + +# array_slice scalar function #13 (with negative number and NULL) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), -2, NULL), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -3, NULL); +---- +NULL NULL + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -2, NULL), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3, NULL); +---- +NULL NULL + +# array_slice scalar function #14 (with NULL and negative number) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), NULL, -4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), NULL, -3); +---- +NULL NULL + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL, -4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), NULL, -3); +---- +NULL NULL + +# array_slice scalar function #15 (with negative indexes) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), -4, -1), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -3, -1); +---- +[2, 3, 4, 5] [l, l, o] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -4, -1), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3, -1); +---- +[2, 3, 4, 5] [l, l, o] + +# array_slice scalar function #16 (with negative indexes; almost full array (only with negative indices cannot return full array)) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), -5, -1), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -5, -1); +---- +[1, 2, 3, 4, 5] [h, e, l, l, o] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -5, -1), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -5, -1); +---- +[1, 2, 3, 4, 5] [h, e, l, l, o] + +# array_slice scalar function #17 (with negative indexes; first index = second index) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), -4, -4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -3, -3); +---- +[2] [l] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -4, -4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3, -3); +---- +[2] [l] + +# array_slice scalar function #18 (with negative indexes; first index > second_index) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), -4, -6), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -3, -6); +---- +[] [] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -4, -6), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3, -6); +---- +[] [] + +# array_slice scalar function #19 (with negative indexes; out of bounds) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), -7, -2), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -7, -3); +---- +[1, 2, 3, 4] [h, e, l] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -7, -2), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -7, -3); +---- +[1, 2, 3, 4] [h, e, l] + +# array_slice scalar function #20 (with negative indexes; nested array) +query ?? +select array_slice(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), -2, -1), array_slice(make_array(make_array(1, 2, 3), make_array(6, 7, 8)), -1, -1); +---- +[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]] [[6, 7, 8]] + +query ?? +select array_slice(arrow_cast(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 'LargeList(List(Int64))'), -2, -1), array_slice(arrow_cast(make_array(make_array(1, 2, 3), make_array(6, 7, 8)), 'LargeList(List(Int64))'), -1, -1); +---- +[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]] [[6, 7, 8]] + + +# array_slice scalar function #21 (with first positive index and last negative index) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 2, -3), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 2, -2); +---- +[2, 3] [e, l, l] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, -3), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 2, -2); +---- +[2, 3] [e, l, l] + +# array_slice scalar function #22 (with first negative index and last positive index) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), -2, 5), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -3, 4); +---- +[4, 5] [l, l] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -2, 5), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3, 4); +---- +[4, 5] [l, l] + +# list_slice scalar function #23 (function alias `array_slice`) +query ?? +select list_slice(make_array(1, 2, 3, 4, 5), 2, 4), list_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 2); +---- +[2, 3, 4] [h, e] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, 4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 1, 2); +---- +[2, 3, 4] [h, e] + +# array_slice scalar function #24 (with first negative index larger than len) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), -2147483648, 1), list_slice(make_array('h', 'e', 'l', 'l', 'o'), -2147483648, 1); +---- +[1] [h] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -9223372036854775808, 1), list_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -9223372036854775808, 1); +---- +[1] [h] + +# array_slice scalar function #25 (with negative step and equal indexes) +query ?? +select array_slice(make_array(1, 2, 3, 4, 5), 2, 2, -1), list_slice(make_array('h', 'e', 'l', 'l', 'o'), 2, 2, -1); +---- +[2] [e] + +query ?? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, 2, -1), list_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 2, 2, -1); +---- +[2] [e] + +# array_slice with columns +query ? +select array_slice(column1, column2, column3) from slices; +---- +[NULL] +[12, 13, 14, 15, 16, 17] +[] +[] +NULL +NULL +NULL + +query ? +select array_slice(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) from slices; +---- +[NULL] +[12, 13, 14, 15, 16, 17] +[] +[] +NULL +NULL +NULL + +# TODO: support NULLS in output instead of `[]` +# array_slice with columns and scalars +query ??? +select array_slice(make_array(1, 2, 3, 4, 5), column2, column3), array_slice(column1, 3, column3), array_slice(column1, column2, 5) from slices; +---- +[1] [] [NULL, 2, 3, 4, 5] +[2] [13, 14, 15, 16, 17] [12, 13, 14, 15] +[] [] [21, 22, 23, NULL, 25] +[] [33, 34] [] +[4, 5] NULL NULL +NULL [43, 44, 45, 46] NULL +NULL NULL [55] + +query ??? +select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), column2, column3), array_slice(arrow_cast(column1, 'LargeList(Int64)'), 3, column3), array_slice(arrow_cast(column1, 'LargeList(Int64)'), column2, 5) from slices; +---- +[1] [] [NULL, 2, 3, 4, 5] +[2] [13, 14, 15, 16, 17] [12, 13, 14, 15] +[] [] [21, 22, 23, NULL, 25] +[] [33, 34] [] +[4, 5] NULL NULL +NULL [43, 44, 45, 46] NULL +NULL NULL [55] + +# Test issue: https://github.com/apache/datafusion/issues/10425 +# `from` may be larger than `to` and `stride` is positive +query ???? +select array_slice(a, -1, 2, 1), array_slice(a, -1, 2), + array_slice(a, 3, 2, 1), array_slice(a, 3, 2) + from (values ([1.0, 2.0, 3.0, 3.0]), ([4.0, 5.0, 3.0]), ([6.0])) t(a); +---- +[] [] [] [] +[] [] [] [] +[6.0] [6.0] [] [] + +# array_slice with overlapping nulls across multiple inputs +query ? +select array_slice(column1, column2, column3) from ( + values + (make_array(1, 2, 3), NULL, NULL), + (NULL, NULL, 3), + (NULL, 1, NULL), + (make_array(4, 5, 6), 1, 3) +) as t(column1, column2, column3); +---- +NULL +NULL +NULL +[4, 5, 6] + +query ? +select array_slice(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) from ( + values + (make_array(1, 2, 3), NULL, NULL), + (NULL, NULL, 3), + (NULL, 1, NULL), + (make_array(4, 5, 6), 1, 3) +) as t(column1, column2, column3); +---- +NULL +NULL +NULL +[4, 5, 6] + +# array_slice with overlapping nulls including stride +query ? +select array_slice(column1, column2, column3, column4) from ( + values + (make_array(1, 2, 3, 4, 5), 1, 5, NULL), + (NULL, NULL, 3, 2), + (make_array(1, 2, 3, 4, 5), NULL, NULL, NULL), + (make_array(1, 2, 3, 4, 5), 1, 5, 2) +) as t(column1, column2, column3, column4); +---- +NULL +NULL +NULL +[1, 3, 5] + +# Testing with empty arguments should result in an error +query error DataFusion error: Error during planning: 'array_slice' does not support zero arguments +select array_slice(); + +query error Failed to coerce arguments +select array_slice(3.5, NULL, NULL); + +## array_any_value (aliases: list_any_value) + +# Testing with empty arguments should result in an error +query error +select array_any_value(); + +# Testing with non-array arguments should result in an error +query error +select array_any_value(1), array_any_value('a'), array_any_value(NULL); + +# array_any_value scalar function #1 (with null and non-null elements) + +query IT?I +select array_any_value(make_array(NULL, 1, 2, 3, 4, 5)), array_any_value(make_array(NULL, 'h', 'e', 'l', 'l', 'o')), array_any_value(make_array(NULL, NULL)), array_any_value(make_array(NULL, NULL, 1, 2, 3)); +---- +1 h NULL 1 + +query ITITI +select array_any_value(arrow_cast(make_array(NULL, 1, 2, 3, 4, 5), 'LargeList(Int64)')), array_any_value(arrow_cast(make_array(NULL, 'h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)')), array_any_value(arrow_cast(make_array(NULL, NULL), 'LargeList(Int64)')), array_any_value(arrow_cast(make_array(NULL, NULL), 'LargeList(Utf8)')), array_any_value(arrow_cast(make_array(NULL, NULL, 1, 2, 3), 'LargeList(Int64)'));; +---- +1 h NULL NULL 1 + +query ITITI +select array_any_value(arrow_cast(make_array(NULL, 1, 2, 3, 4, 5), 'FixedSizeList(6, Int64)')), array_any_value(arrow_cast(make_array(NULL, 'h', 'e', 'l', 'l', 'o'), 'FixedSizeList(6, Utf8)')), array_any_value(arrow_cast(make_array(NULL, NULL), 'FixedSizeList(2, Int64)')), array_any_value(arrow_cast(make_array(NULL, NULL), 'FixedSizeList(2, Utf8)')), array_any_value(arrow_cast(make_array(NULL, NULL, 1, 2, 3, 4), 'FixedSizeList(6, Int64)')); +---- +1 h NULL NULL 1 + +# array_any_value scalar function #2 (with nested array) + +query ? +select array_any_value(make_array(NULL, make_array(NULL, 1, 2, 3, 4, 5), make_array(NULL, 6, 7, 8, 9, 10))); +---- +[NULL, 1, 2, 3, 4, 5] + +query ? +select array_any_value(arrow_cast(make_array(NULL, make_array(NULL, 1, 2, 3, 4, 5), make_array(NULL, 6, 7, 8, 9, 10)), 'LargeList(List(Int64))')); +---- +[NULL, 1, 2, 3, 4, 5] + +query ? +select array_any_value(arrow_cast(make_array(NULL, make_array(NULL, 1, 2, 3, 4, 5), make_array(NULL, 6, 7, 8, 9, 10)), 'FixedSizeList(3, List(Int64))')); +---- +[NULL, 1, 2, 3, 4, 5] + +# array_any_value scalar function #3 (using function alias `list_any_value`) +query IT +select list_any_value(make_array(NULL, 1, 2, 3, 4, 5)), list_any_value(make_array(NULL, 'h', 'e', 'l', 'l', 'o')); +---- +1 h + +query IT +select list_any_value(arrow_cast(make_array(NULL, 1, 2, 3, 4, 5), 'LargeList(Int64)')), list_any_value(arrow_cast(make_array(NULL, 'h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)')); +---- +1 h + +query IT +select list_any_value(arrow_cast(make_array(NULL, 1, 2, 3, 4, 5), 'FixedSizeList(6, Int64)')), list_any_value(arrow_cast(make_array(NULL, 'h', 'e', 'l', 'l', 'o'), 'FixedSizeList(6, Utf8)')); +---- +1 h + +# array_any_value with columns + +query I +select array_any_value(column1) from slices; +---- +2 +11 +21 +31 +NULL +41 +51 + +query I +select array_any_value(arrow_cast(column1, 'LargeList(Int64)')) from slices; +---- +2 +11 +21 +31 +NULL +41 +51 + +query I +select array_any_value(column1) from fixed_slices; +---- +2 +11 +21 +31 +41 +51 + +# array_any_value with columns and scalars + +query II +select array_any_value(make_array(NULL, 1, 2, 3, 4, 5)), array_any_value(column1) from slices; +---- +1 2 +1 11 +1 21 +1 31 +1 NULL +1 41 +1 51 + +query II +select array_any_value(arrow_cast(make_array(NULL, 1, 2, 3, 4, 5), 'LargeList(Int64)')), array_any_value(arrow_cast(column1, 'LargeList(Int64)')) from slices; +---- +1 2 +1 11 +1 21 +1 31 +1 NULL +1 41 +1 51 + +query II +select array_any_value(make_array(NULL, 1, 2, 3, 4, 5)), array_any_value(column1) from fixed_slices; +---- +1 2 +1 11 +1 21 +1 31 +1 41 +1 51 + +# make_array with nulls +query ??????? +select make_array(make_array('a','b'), null), + make_array(make_array('a','b'), null, make_array('c','d')), + make_array(null, make_array('a','b'), null), + make_array(null, make_array('a','b'), null, null, make_array('c','d')), + make_array(['a', 'bc', 'def'], null, make_array('rust')), + make_array([1,2,3], null, make_array(4,5,6,7)), + make_array(null, 1, null, 2, null, 3, null, null, 4, 5); +---- +[[a, b], NULL] [[a, b], NULL, [c, d]] [NULL, [a, b], NULL] [NULL, [a, b], NULL, NULL, [c, d]] [[a, bc, def], NULL, [rust]] [[1, 2, 3], NULL, [4, 5, 6, 7]] [NULL, 1, NULL, 2, NULL, 3, NULL, NULL, 4, 5] + +query ? +select make_array(column5, null, column5) from arrays_values_without_nulls; +---- +[[2, 3], NULL, [2, 3]] +[[4, 5], NULL, [4, 5]] +[[6, 7], NULL, [6, 7]] +[[8, 9], NULL, [8, 9]] + +query ? +select make_array(['a','b'], null); +---- +[[a, b], NULL] + +## array_sort (aliases: `list_sort`) +query ??? +select array_sort(make_array(1, 3, null, 5, NULL, -5)), array_sort(make_array(1, 3, null, 2), 'ASC'), array_sort(make_array(1, 3, null, 2), 'desc', 'NULLS FIRST'); +---- +[NULL, NULL, -5, 1, 3, 5] [NULL, 1, 2, 3] [NULL, 3, 2, 1] + +query ??? +select array_sort(arrow_cast(make_array(1, 3, null, 5, NULL, -5), 'LargeList(Int64)')), + array_sort(arrow_cast(make_array(1, 3, null, 2), 'LargeList(Int64)'), 'ASC'), + array_sort(arrow_cast(make_array(1, 3, null, 2), 'LargeList(Int64)'), 'desc', 'NULLS FIRST'); +---- +[NULL, NULL, -5, 1, 3, 5] [NULL, 1, 2, 3] [NULL, 3, 2, 1] + +query ??? +select array_sort(arrow_cast(make_array(1, 3, null, 5, NULL, -5), 'FixedSizeList(6, Int64)')), + array_sort(arrow_cast(make_array(1, 3, null, 2), 'FixedSizeList(4, Int64)'), 'ASC'), + array_sort(arrow_cast(make_array(1, 3, null, 2), 'FixedSizeList(4, Int64)'), 'desc', 'NULLS FIRST'); +---- +[NULL, NULL, -5, 1, 3, 5] [NULL, 1, 2, 3] [NULL, 3, 2, 1] + +query ? +select array_sort(column1, 'DESC', 'NULLS LAST') from arrays_values; +---- +[10, 9, 8, 7, 6, 5, 4, 3, 2, NULL] +[20, 18, 17, 16, 15, 14, 13, 12, 11, NULL] +[30, 29, 28, 27, 26, 25, 23, 22, 21, NULL] +[40, 39, 38, 37, 35, 34, 33, 32, 31, NULL] +NULL +[50, 49, 48, 47, 46, 45, 44, 43, 42, 41] +[60, 59, 58, 57, 56, 55, 54, 52, 51, NULL] +[70, 69, 68, 67, 66, 65, 64, 63, 62, 61] + +query ? +select array_sort(column1, 'ASC', 'NULLS FIRST') from arrays_values; +---- +[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] +[NULL, 11, 12, 13, 14, 15, 16, 17, 18, 20] +[NULL, 21, 22, 23, 25, 26, 27, 28, 29, 30] +[NULL, 31, 32, 33, 34, 35, 37, 38, 39, 40] +NULL +[41, 42, 43, 44, 45, 46, 47, 48, 49, 50] +[NULL, 51, 52, 54, 55, 56, 57, 58, 59, 60] +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70] + +# test with empty table +query ? +select array_sort(column1, 'DESC', 'NULLS FIRST') from arrays_values where false; +---- + +# test with empty array +query ? +select array_sort([]); +---- +[] + +# empty-but-non-null string arrays should remain non-null, not become null +query ?B +select array_sort(column1), array_sort(column1) is null +from (values (arrow_cast(make_array('b', 'a'), 'List(Utf8)')), (arrow_cast([], 'List(Utf8)'))) as t(column1); +---- +[a, b] false +[] false + +# test with null arguments +query ? +select array_sort(NULL); +---- +NULL + +query ? +select array_sort(column1, NULL) from arrays_values; +---- +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL + +query ?? +select array_sort(column1, 'DESC', NULL), array_sort(column1, 'ASC', NULL) from arrays_values; +---- +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL + +query ?? +select array_sort(column1, NULL, 'NULLS FIRST'), array_sort(column1, NULL, 'NULLS LAST') from arrays_values; +---- +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL + +# maintains inner nullability +query ?T +select array_sort(column1), arrow_typeof(array_sort(column1)) +from values + (arrow_cast([], 'List(non-null Int32)')), + (arrow_cast(NULL, 'List(non-null Int32)')), + (arrow_cast([1, 3, 5, -5], 'List(non-null Int32)')) +; +---- +[] List(non-null Int32) +NULL List(non-null Int32) +[-5, 1, 3, 5] List(non-null Int32) + +query ?T +select column1, arrow_typeof(column1) +from values (array_sort(arrow_cast([1, 3, 5, -5], 'LargeList(non-null Int32)'))); +---- +[-5, 1, 3, 5] LargeList(non-null Int32) + +query ?T +select column1, arrow_typeof(column1) +from values (array_sort(arrow_cast([1, 3, 5, -5], 'FixedSizeList(4 x non-null Int32)'))); +---- +[-5, 1, 3, 5] List(non-null Int32) + +# arrays of strings +query ??? +select array_sort(make_array('banana', 'apple', null, 'cherry')), + array_sort(make_array('banana', 'apple', null, 'cherry'), 'DESC', 'NULLS LAST'), + array_sort(make_array('banana', 'apple', null, 'cherry'), 'ASC', 'NULLS LAST'); +---- +[NULL, apple, banana, cherry] [cherry, banana, apple, NULL] [apple, banana, cherry, NULL] + +query ? +select array_sort([struct('foo', 3), struct('foo', 1), struct('bar', 1)]) +---- +[{c0: bar, c1: 1}, {c0: foo, c1: 1}, {c0: foo, c1: 3}] + +## test with argument of incorrect types +query error DataFusion error: Execution error: the second parameter of array_sort expects DESC or ASC +select array_sort([1, 3, null, 5, NULL, -5], 1), array_sort([1, 3, null, 5, NULL, -5], 'DESC', 1), array_sort([1, 3, null, 5, NULL, -5], 1, 1); + +# test with empty row, the row that does not match the condition has row count 0 +statement ok +create table t1(a int, b int) as values (100, 1), (101, 2), (102, 3), (101, 2); + +# rowsort is to ensure the order of group by is deterministic, array_sort has no effect here, since the sum() always returns single row. +query ? rowsort +select array_sort([sum(a)]) from t1 where a > 100 group by b; +---- +[102] +[202] + +statement ok +drop table t1; + +# float arrays with NaN and Infinity (NaN sorts after Infinity per IEEE totalOrder) +query ??? +select array_sort(make_array(1.0, 'NaN'::double, -1.0, 'Infinity'::double, '-Infinity'::double, null)), + array_sort(make_array(1.0, 'NaN'::double, -1.0, 'Infinity'::double, '-Infinity'::double, null), 'DESC', 'NULLS LAST'), + array_sort(make_array('NaN'::double, 'NaN'::double, 1.0)); +---- +[NULL, -inf, -1.0, 1.0, inf, NaN] [NaN, inf, 1.0, -1.0, -inf, NULL] [1.0, NaN, NaN] + +# float32 arrays +query ?? +select array_sort(arrow_cast(make_array(3.0, 1.0, 'NaN'::double, null, 2.0), 'List(Float32)')), + array_sort(arrow_cast(make_array(3.0, 1.0, 'NaN'::double, null, 2.0), 'List(Float32)'), 'DESC', 'NULLS LAST'); +---- +[NULL, 1.0, 2.0, 3.0, NaN] [NaN, 3.0, 2.0, 1.0, NULL] + +# element-level nulls with all sort option combinations +query ???? +select array_sort(make_array(3, null, 1, null, 2), 'ASC', 'NULLS FIRST'), + array_sort(make_array(3, null, 1, null, 2), 'ASC', 'NULLS LAST'), + array_sort(make_array(3, null, 1, null, 2), 'DESC', 'NULLS FIRST'), + array_sort(make_array(3, null, 1, null, 2), 'DESC', 'NULLS LAST'); +---- +[NULL, NULL, 1, 2, 3] [1, 2, 3, NULL, NULL] [NULL, NULL, 3, 2, 1] [3, 2, 1, NULL, NULL] + +# timestamp arrays +query ?? +select array_sort(make_array(arrow_cast('2024-01-15T10:00:00', 'Timestamp(Nanosecond, None)'), + arrow_cast('2024-01-01T00:00:00', 'Timestamp(Nanosecond, None)'), + null, + arrow_cast('2024-06-15T12:00:00', 'Timestamp(Nanosecond, None)'))), + array_sort(make_array(arrow_cast('2024-01-15T10:00:00', 'Timestamp(Nanosecond, None)'), + arrow_cast('2024-01-01T00:00:00', 'Timestamp(Nanosecond, None)'), + null, + arrow_cast('2024-06-15T12:00:00', 'Timestamp(Nanosecond, None)')), 'DESC', 'NULLS LAST'); +---- +[NULL, 2024-01-01T00:00:00, 2024-01-15T10:00:00, 2024-06-15T12:00:00] [2024-06-15T12:00:00, 2024-01-15T10:00:00, 2024-01-01T00:00:00, NULL] + +# date arrays +query ?? +select array_sort(make_array('2024-03-01'::date, '2024-01-01'::date, null, '2024-02-01'::date)), + array_sort(make_array('2024-03-01'::date, '2024-01-01'::date, null, '2024-02-01'::date), 'DESC', 'NULLS LAST'); +---- +[NULL, 2024-01-01, 2024-02-01, 2024-03-01] [2024-03-01, 2024-02-01, 2024-01-01, NULL] + +# struct arrays with nulls and DESC +query ?? +select array_sort([struct('b', 2), struct('a', 1), null, struct('a', 3)]), + array_sort([struct('b', 2), struct('a', 1), null, struct('a', 3)], 'DESC', 'NULLS LAST'); +---- +[NULL, {c0: a, c1: 1}, {c0: a, c1: 3}, {c0: b, c1: 2}] [{c0: b, c1: 2}, {c0: a, c1: 3}, {c0: a, c1: 1}, NULL] + +# boolean arrays +query ?? +select array_sort(make_array(true, false, null, true, false)), + array_sort(make_array(true, false, null, true, false), 'DESC', 'NULLS LAST'); +---- +[NULL, false, false, true, true] [true, true, false, false, NULL] + +# all-null array +query ? +select array_sort(make_array(null, null, null)); +---- +[NULL, NULL, NULL] + +# single-element arrays +query ?? +select array_sort(make_array(42)), array_sort(make_array(null::int)); +---- +[42] [NULL] + +## list_sort (aliases: `array_sort`) +query ??? +select list_sort(make_array(1, 3, null, 5, NULL, -5)), list_sort(make_array(1, 3, null, 2), 'ASC'), list_sort(make_array(1, 3, null, 2), 'desc', 'NULLS FIRST'); +---- +[NULL, NULL, -5, 1, 3, 5] [NULL, 1, 2, 3] [NULL, 3, 2, 1] + + +## array_append (aliases: `list_append`, `array_push_back`, `list_push_back`) + +# array_append with NULLs + +query ? +select array_append(null, 1); +---- +[1] + +query ? +select array_append(null, [2, 3]); +---- +[[2, 3]] + +query ? +select array_append(null, [[4]]); +---- +[[[4]]] + +query ???? +select + array_append(make_array(), 4), + array_append(make_array(), null), + array_append(make_array(1, null, 3), 4), + array_append(make_array(null, null), 1) +; +---- +[4] [NULL] [1, NULL, 3, 4] [NULL, NULL, 1] + +query ???? +select + array_append(arrow_cast(make_array(), 'LargeList(Int64)'), 4), + array_append(arrow_cast(make_array(), 'LargeList(Int64)'), null), + array_append(arrow_cast(make_array(1, null, 3), 'LargeList(Int64)'), 4), + array_append(arrow_cast(make_array(null, null), 'LargeList(Int64)'), 1) +; +---- +[4] [NULL] [1, NULL, 3, 4] [NULL, NULL, 1] + +query ?? +select + array_append(arrow_cast(make_array(1, null, 3), 'FixedSizeList(3, Int64)'), 4), + array_append(arrow_cast(make_array(null, null), 'FixedSizeList(2, Int64)'), 1) +; +---- +[1, NULL, 3, 4] [NULL, NULL, 1] + +# test invalid (non-null) +query error +select array_append(1, 2); + +query error +select array_append(1, [2]); + +query error +select array_append([1], [2]); + +query ?? +select + array_append(make_array(make_array(1, null, 3)), make_array(null)), + array_append(make_array(make_array(1, null, 3)), null); +---- +[[1, NULL, 3], [NULL]] [[1, NULL, 3], NULL] + +query ?? +select + array_append(arrow_cast(make_array(make_array(1, null, 3)), 'LargeList(LargeList(Int64))'), arrow_cast(make_array(null), 'LargeList(Int64)')), + array_append(arrow_cast(make_array(make_array(1, null, 3)), 'LargeList(LargeList(Int64))'), null); +---- +[[1, NULL, 3], [NULL]] [[1, NULL, 3], NULL] + +query ?? +select + array_append(arrow_cast(make_array(make_array(1, null, 3)), 'FixedSizeList(1, List(Int64))'), [null]), + array_append(arrow_cast(make_array(make_array(1, null, 3)), 'FixedSizeList(1, List(Int64))'), null); +---- +[[1, NULL, 3], [NULL]] [[1, NULL, 3], NULL] + +# array_append scalar function #3 +query ??? +select array_append(make_array(1, 2, 3), 4), array_append(make_array(1.0, 2.0, 3.0), 4.0), array_append(make_array('h', 'e', 'l', 'l'), 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +query ??? +select array_append(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4), array_append(arrow_cast(make_array(1.0, 2.0, 3.0), 'LargeList(Float64)'), 4.0), array_append(arrow_cast(make_array('h', 'e', 'l', 'l'), 'LargeList(Utf8)'), 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +query ??? +select array_append(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 4), array_append(arrow_cast(make_array(1.0, 2.0, 3.0), 'FixedSizeList(3, Float64)'), 4.0), array_append(arrow_cast(make_array('h', 'e', 'l', 'l'), 'FixedSizeList(4, Utf8)'), 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# array_append scalar function #4 (element is list) +query ??? +select array_append(make_array([1], [2], [3]), make_array(4)), array_append(make_array([1.0], [2.0], [3.0]), make_array(4.0)), array_append(make_array(['h'], ['e'], ['l'], ['l']), make_array('o')); +---- +[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] + +query ??? +select array_append(arrow_cast(make_array([1], [2], [3]), 'LargeList(LargeList(Int64))'), arrow_cast(make_array(4), 'LargeList(Int64)')), array_append(arrow_cast(make_array([1.0], [2.0], [3.0]), 'LargeList(LargeList(Float64))'), arrow_cast(make_array(4.0), 'LargeList(Float64)')), array_append(arrow_cast(make_array(['h'], ['e'], ['l'], ['l']), 'LargeList(LargeList(Utf8))'), arrow_cast(make_array('o'), 'LargeList(Utf8)')); +---- +[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] + +query ??? +select array_append(arrow_cast(make_array([1], [2], [3]), 'FixedSizeList(3, List(Int64))'), [4]), array_append(arrow_cast(make_array([1.0], [2.0], [3.0]), 'FixedSizeList(3, List(Float64))'), [4.0]), array_append(arrow_cast(make_array(['h'], ['e'], ['l'], ['l']), 'FixedSizeList(4, List(Utf8))'), ['o']); +---- +[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] + +# list_append scalar function #5 (function alias `array_append`) +query ??? +select list_append(make_array(1, 2, 3), 4), list_append(make_array(1.0, 2.0, 3.0), 4.0), list_append(make_array('h', 'e', 'l', 'l'), 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +query ??? +select list_append(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4), list_append(arrow_cast(make_array(1.0, 2.0, 3.0), 'LargeList(Float64)'), 4.0), list_append(make_array('h', 'e', 'l', 'l'), 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# array_push_back scalar function #6 (function alias `array_append`) +query ??? +select array_push_back(make_array(1, 2, 3), 4), array_push_back(make_array(1.0, 2.0, 3.0), 4.0), array_push_back(make_array('h', 'e', 'l', 'l'), 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +query ??? +select array_push_back(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4), array_push_back(arrow_cast(make_array(1.0, 2.0, 3.0), 'LargeList(Float64)'), 4.0), array_push_back(make_array('h', 'e', 'l', 'l'), 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# list_push_back scalar function #7 (function alias `array_append`) +query ??? +select list_push_back(make_array(1, 2, 3), 4), list_push_back(make_array(1.0, 2.0, 3.0), 4.0), list_push_back(make_array('h', 'e', 'l', 'l'), 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +query ??? +select list_push_back(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4), list_push_back(arrow_cast(make_array(1.0, 2.0, 3.0), 'LargeList(Float64)'), 4.0), list_push_back(make_array('h', 'e', 'l', 'l'), 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# array_append with columns #1 +query ? +select array_append(column1, column2) from arrays_values; +---- +[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1] +[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 12] +[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 23] +[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 34] +[44] +[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, NULL] +[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, 55] +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66] + +query ? +select array_append(column1, column2) from large_arrays_values; +---- +[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1] +[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 12] +[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 23] +[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 34] +[44] +[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, NULL] +[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, 55] +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66] + +query ? +select array_append(column1, column2) from fixed_arrays_values; +---- +[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1] +[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 12] +[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 23] +[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 34] +[NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 44] +[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, NULL] +[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, 55] +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66] + +# array_append with columns #2 (element is list) +query ? +select array_append(column1, column2) from nested_arrays; +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [7, 8, 9]] +[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [10, 11, 12]] + +query ? +select array_append(column1, column2) from large_nested_arrays; +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [7, 8, 9]] +[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [10, 11, 12]] + +query ? +select array_append(column1, column2) from fixed_size_nested_arrays; +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [7, 8, 9]] +[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [10, 11, 12]] + +# array_append with columns and scalars #1 +query ?? +select array_append(column2, 100.1), array_append(column3, '.') from arrays; +---- +[1.1, 2.2, 3.3, 100.1] [L, o, r, e, m, .] +[NULL, 5.5, 6.6, 100.1] [i, p, NULL, u, m, .] +[7.7, 8.8, 9.9, 100.1] [d, NULL, l, o, r, .] +[10.1, NULL, 12.2, 100.1] [s, i, t, .] +[13.3, 14.4, 15.5, 100.1] [a, m, e, t, .] +[100.1] [,, .] +[16.6, 17.7, 18.8, 100.1] [.] + +query ?? +select array_append(column2, 100.1), array_append(column3, '.') from large_arrays; +---- +[1.1, 2.2, 3.3, 100.1] [L, o, r, e, m, .] +[NULL, 5.5, 6.6, 100.1] [i, p, NULL, u, m, .] +[7.7, 8.8, 9.9, 100.1] [d, NULL, l, o, r, .] +[10.1, NULL, 12.2, 100.1] [s, i, t, .] +[13.3, 14.4, 15.5, 100.1] [a, m, e, t, .] +[100.1] [,, .] +[16.6, 17.7, 18.8, 100.1] [.] + +query ?? +select array_append(column2, 100.1), array_append(column3, '.') from fixed_size_arrays; +---- +[1.1, 2.2, 3.3, 100.1] [L, o, r, e, m, .] +[NULL, 5.5, 6.6, 100.1] [i, p, NULL, u, m, .] +[7.7, 8.8, 9.9, 100.1] [d, NULL, l, o, r, .] +[10.1, NULL, 12.2, 100.1] [s, i, t, a, b, .] +[13.3, 14.4, 15.5, 100.1] [a, m, e, t, x, .] +[NULL, NULL, NULL, 100.1] [,, a, b, c, d, .] +[16.6, 17.7, 18.8, 100.1] [NULL, NULL, NULL, NULL, NULL, .] + +# array_append with columns and scalars #2 +query ?? +select array_append(column1, make_array(1, 11, 111)), array_append(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), column2) from nested_arrays; +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [7, 8, 9]] +[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [10, 11, 12]] + +query ?? +select array_append(column1, arrow_cast(make_array(1, 11, 111), 'LargeList(Int64)')), array_append(arrow_cast(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), 'LargeList(LargeList(Int64))'), column2) from large_nested_arrays; +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [7, 8, 9]] +[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [10, 11, 12]] + +query ?? +select array_append(column1, arrow_cast(make_array(1, 11, 111), 'FixedSizeList(3, Int64)')), array_append(arrow_cast(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), 'FixedSizeList(2, List(Int64))'), column2) from fixed_size_nested_arrays; +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [7, 8, 9]] +[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [10, 11, 12]] + +## array_prepend (aliases: `list_prepend`, `array_push_front`, `list_push_front`) + +# array_prepend with NULLs + +# DuckDB: [4] +# ClickHouse: Null +query ? +select array_prepend(4, NULL); +---- +[4] + +query ? +select array_prepend(4, []); +---- +[4] + +query ? +select array_prepend(4, [null]); +---- +[4, NULL] + +# DuckDB: [null] +# ClickHouse: [null] +query ? +select array_prepend(null, []); +---- +[NULL] + +query ? +select array_prepend(null, [1]); +---- +[NULL, 1] + +query ? +select array_prepend(null, [[1,2,3]]); +---- +[NULL, [1, 2, 3]] + +# DuckDB: [[]] +# ClickHouse: [[]] +# TODO: We may also return [[]] +query ? +select array_prepend([], []); +---- +[[]] + +query ? +select array_prepend(null, null); +---- +[NULL] + +query ? +select array_append([], null); +---- +[NULL] + + +# array_prepend scalar function #3 +query ??? +select array_prepend(1, make_array(2, 3, 4)), array_prepend(1.0, make_array(2.0, 3.0, 4.0)), array_prepend('h', make_array('e', 'l', 'l', 'o')); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +query ??? +select array_prepend(1, arrow_cast(make_array(2, 3, 4), 'LargeList(Int64)')), array_prepend(1.0, arrow_cast(make_array(2.0, 3.0, 4.0), 'LargeList(Float64)')), array_prepend('h', arrow_cast(make_array('e', 'l', 'l', 'o'), 'LargeList(Utf8)')); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +query ??? +select array_prepend(1, arrow_cast([2, 3, 4], 'FixedSizeList(3, Int64)')), array_prepend(1.0, arrow_cast([2.0, 3.0, 4.0], 'FixedSizeList(3, Float64)')), array_prepend('h', arrow_cast(['e', 'l', 'l', 'o'], 'FixedSizeList(4, Utf8)')); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# array_prepend scalar function #4 (element is list) +query ??? +select array_prepend(make_array(1), make_array(make_array(2), make_array(3), make_array(4))), array_prepend(make_array(1.0), make_array([2.0], [3.0], [4.0])), array_prepend(make_array('h'), make_array(['e'], ['l'], ['l'], ['o'])); +---- +[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] + +query ??? +select array_prepend(arrow_cast(make_array(1), 'LargeList(Int64)'), arrow_cast(make_array(make_array(2), make_array(3), make_array(4)), 'LargeList(LargeList(Int64))')), + array_prepend(arrow_cast(make_array(1.0), 'LargeList(Float64)'), arrow_cast(make_array([2.0], [3.0], [4.0]), 'LargeList(LargeList(Float64))')), + array_prepend(arrow_cast(make_array('h'), 'LargeList(Utf8)'), arrow_cast(make_array(['e'], ['l'], ['l'], ['o']), 'LargeList(LargeList(Utf8))')); +---- +[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] + +query ??? +select array_prepend(arrow_cast([1], 'FixedSizeList(1, Int64)'), arrow_cast([[1], [2], [3]], 'FixedSizeList(3, List(Int64))')), + array_prepend(arrow_cast([1.0], 'FixedSizeList(1, Float64)'), arrow_cast([[2.0], [3.0], [4.0]], 'FixedSizeList(3, List(Float64))')), + array_prepend(arrow_cast(['h'], 'FixedSizeList(1, Utf8)'), arrow_cast([['e'], ['l'], ['l'], ['o']], 'FixedSizeList(4, List(Utf8))')); +---- +[[1], [1], [2], [3]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] + +# list_prepend scalar function #5 (function alias `array_prepend`) +query ??? +select list_prepend(1, make_array(2, 3, 4)), list_prepend(1.0, make_array(2.0, 3.0, 4.0)), list_prepend('h', make_array('e', 'l', 'l', 'o')); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +query ??? +select list_prepend(1, arrow_cast(make_array(2, 3, 4), 'LargeList(Int64)')), list_prepend(1.0, arrow_cast(make_array(2.0, 3.0, 4.0), 'LargeList(Float64)')), list_prepend('h', arrow_cast(make_array('e', 'l', 'l', 'o'), 'LargeList(Utf8)')); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# array_push_front scalar function #6 (function alias `array_prepend`) +query ??? +select array_push_front(1, make_array(2, 3, 4)), array_push_front(1.0, make_array(2.0, 3.0, 4.0)), array_push_front('h', make_array('e', 'l', 'l', 'o')); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +query ??? +select array_push_front(1, arrow_cast(make_array(2, 3, 4), 'LargeList(Int64)')), array_push_front(1.0, arrow_cast(make_array(2.0, 3.0, 4.0), 'LargeList(Float64)')), array_push_front('h', arrow_cast(make_array('e', 'l', 'l', 'o'), 'LargeList(Utf8)')); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# list_push_front scalar function #7 (function alias `array_prepend`) +query ??? +select list_push_front(1, make_array(2, 3, 4)), list_push_front(1.0, make_array(2.0, 3.0, 4.0)), list_push_front('h', make_array('e', 'l', 'l', 'o')); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +query ??? +select list_push_front(1, arrow_cast(make_array(2, 3, 4), 'LargeList(Int64)')), list_push_front(1.0, arrow_cast(make_array(2.0, 3.0, 4.0), 'LargeList(Float64)')), list_push_front('h', arrow_cast(make_array('e', 'l', 'l', 'o'), 'LargeList(Utf8)')); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# array_prepend scalar function #7 (element is fixed size list) +query ??? +select array_prepend(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'), make_array(arrow_cast(make_array(2), 'FixedSizeList(1, Int64)'), arrow_cast(make_array(3), 'FixedSizeList(1, Int64)'), arrow_cast(make_array(4), 'FixedSizeList(1, Int64)'))), + array_prepend(arrow_cast(make_array(1.0), 'FixedSizeList(1, Float64)'), make_array(arrow_cast([2.0], 'FixedSizeList(1, Float64)'), arrow_cast([3.0], 'FixedSizeList(1, Float64)'), arrow_cast([4.0], 'FixedSizeList(1, Float64)'))), + array_prepend(arrow_cast(make_array('h'), 'FixedSizeList(1, Utf8)'), make_array(arrow_cast(['e'], 'FixedSizeList(1, Utf8)'), arrow_cast(['l'], 'FixedSizeList(1, Utf8)'), arrow_cast(['l'], 'FixedSizeList(1, Utf8)'), arrow_cast(['o'], 'FixedSizeList(1, Utf8)'))); +---- +[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] + +query ??? +select array_prepend(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'), arrow_cast(make_array(make_array(2), make_array(3), make_array(4)), 'LargeList(FixedSizeList(1, Int64))')), + array_prepend(arrow_cast(make_array(1.0), 'FixedSizeList(1, Float64)'), arrow_cast(make_array([2.0], [3.0], [4.0]), 'LargeList(FixedSizeList(1, Float64))')), + array_prepend(arrow_cast(make_array('h'), 'FixedSizeList(1, Utf8)'), arrow_cast(make_array(['e'], ['l'], ['l'], ['o']), 'LargeList(FixedSizeList(1, Utf8))')); +---- +[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] + +query ??? +select array_prepend(arrow_cast([1], 'FixedSizeList(1, Int64)'), arrow_cast([[1], [2], [3]], 'FixedSizeList(3, FixedSizeList(1, Int64))')), + array_prepend(arrow_cast([1.0], 'FixedSizeList(1, Float64)'), arrow_cast([[2.0], [3.0], [4.0]], 'FixedSizeList(3, FixedSizeList(1, Float64))')), + array_prepend(arrow_cast(['h'], 'FixedSizeList(1, Utf8)'), arrow_cast([['e'], ['l'], ['l'], ['o']], 'FixedSizeList(4, FixedSizeList(1, Utf8))')); +---- +[[1], [1], [2], [3]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] + +# array_prepend with columns #1 +query ? +select array_prepend(column2, column1) from arrays_values; +---- +[1, NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] +[12, 11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] +[23, 21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] +[34, 31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] +[44] +[NULL, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] +[55, 51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] +[66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70] + +query ? +select array_prepend(column2, column1) from large_arrays_values; +---- +[1, NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] +[12, 11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] +[23, 21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] +[34, 31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] +[44] +[NULL, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] +[55, 51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] +[66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70] + +query ? +select array_prepend(column2, column1) from fixed_arrays_values; +---- +[1, NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] +[12, 11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] +[23, 21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] +[34, 31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] +[44, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL] +[NULL, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] +[55, 51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] +[66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70] + +# array_prepend with columns #2 (element is list) +query ? +select array_prepend(column2, column1) from nested_arrays; +---- +[[7, 8, 9], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] +[[10, 11, 12], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] + +query ? +select array_prepend(column2, column1) from large_nested_arrays; +---- +[[7, 8, 9], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] +[[10, 11, 12], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] + +query ? +select array_prepend(column2, column1) from fixed_size_nested_arrays; +---- +[[7, 8, 9], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] +[[10, 11, 12], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] + +# array_prepend with columns and scalars #1 +query ?? +select array_prepend(100.1, column2), array_prepend('.', column3) from arrays; +---- +[100.1, 1.1, 2.2, 3.3] [., L, o, r, e, m] +[100.1, NULL, 5.5, 6.6] [., i, p, NULL, u, m] +[100.1, 7.7, 8.8, 9.9] [., d, NULL, l, o, r] +[100.1, 10.1, NULL, 12.2] [., s, i, t] +[100.1, 13.3, 14.4, 15.5] [., a, m, e, t] +[100.1] [., ,] +[100.1, 16.6, 17.7, 18.8] [.] + +query ?? +select array_prepend(100.1, column2), array_prepend('.', column3) from large_arrays; +---- +[100.1, 1.1, 2.2, 3.3] [., L, o, r, e, m] +[100.1, NULL, 5.5, 6.6] [., i, p, NULL, u, m] +[100.1, 7.7, 8.8, 9.9] [., d, NULL, l, o, r] +[100.1, 10.1, NULL, 12.2] [., s, i, t] +[100.1, 13.3, 14.4, 15.5] [., a, m, e, t] +[100.1] [., ,] +[100.1, 16.6, 17.7, 18.8] [.] + +query ?? +select array_prepend(100.1, column2), array_prepend('.', column3) from fixed_size_arrays; +---- +[100.1, 1.1, 2.2, 3.3] [., L, o, r, e, m] +[100.1, NULL, 5.5, 6.6] [., i, p, NULL, u, m] +[100.1, 7.7, 8.8, 9.9] [., d, NULL, l, o, r] +[100.1, 10.1, NULL, 12.2] [., s, i, t, a, b] +[100.1, 13.3, 14.4, 15.5] [., a, m, e, t, x] +[100.1, NULL, NULL, NULL] [., ,, a, b, c, d] +[100.1, 16.6, 17.7, 18.8] [., NULL, NULL, NULL, NULL, NULL] + +# array_prepend with columns and scalars #2 (element is list) +query ?? +select array_prepend(make_array(1, 11, 111), column1), array_prepend(column2, make_array(make_array(1, 2, 3), make_array(11, 12, 13))) from nested_arrays; +---- +[[1, 11, 111], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] [[7, 8, 9], [1, 2, 3], [11, 12, 13]] +[[1, 11, 111], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] [[10, 11, 12], [1, 2, 3], [11, 12, 13]] + +query ?? +select array_prepend(arrow_cast(make_array(1, 11, 111), 'LargeList(Int64)'), column1), array_prepend(column2, arrow_cast(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), 'LargeList(LargeList(Int64))')) from large_nested_arrays; +---- +[[1, 11, 111], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] [[7, 8, 9], [1, 2, 3], [11, 12, 13]] +[[1, 11, 111], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] [[10, 11, 12], [1, 2, 3], [11, 12, 13]] + +query ?? +select array_prepend(arrow_cast(make_array(1, 11, 111), 'FixedSizeList(3, Int64)'), column1), array_prepend(column2, arrow_cast(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), 'FixedSizeList(2, List(Int64))')) from fixed_size_nested_arrays; +---- +[[1, 11, 111], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] [[7, 8, 9], [1, 2, 3], [11, 12, 13]] +[[1, 11, 111], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] [[10, 11, 12], [1, 2, 3], [11, 12, 13]] + +## array_repeat (aliases: `list_repeat`) + +# array_repeat scalar function #1 +query ???????? +select + array_repeat(1, 5), + array_repeat(3.14, 3), + array_repeat('l', 4), + array_repeat(null, 2), + list_repeat(-1, 5), + list_repeat(-3.14, 0), + list_repeat('rust', 4), + list_repeat(null, 0); +---- +[1, 1, 1, 1, 1] [3.14, 3.14, 3.14] [l, l, l, l] [NULL, NULL] [-1, -1, -1, -1, -1] [] [rust, rust, rust, rust] [] + +# array_repeat scalar function #2 (element as list) +query ???? +select + array_repeat([1], 5), + array_repeat([1.1, 2.2, 3.3], 3), + array_repeat([null, null], 3), + array_repeat([[1, 2], [3, 4]], 2); +---- +[[1], [1], [1], [1], [1]] [[1.1, 2.2, 3.3], [1.1, 2.2, 3.3], [1.1, 2.2, 3.3]] [[NULL, NULL], [NULL, NULL], [NULL, NULL]] [[[1, 2], [3, 4]], [[1, 2], [3, 4]]] + +query ???? +select + array_repeat(arrow_cast([1], 'LargeList(Int64)'), 5), + array_repeat(arrow_cast([1.1, 2.2, 3.3], 'LargeList(Float64)'), 3), + array_repeat(arrow_cast([null, null], 'LargeList(Int64)'), 3), + array_repeat(arrow_cast([[1, 2], [3, 4]], 'LargeList(List(Int64))'), 2); +---- +[[1], [1], [1], [1], [1]] [[1.1, 2.2, 3.3], [1.1, 2.2, 3.3], [1.1, 2.2, 3.3]] [[NULL, NULL], [NULL, NULL], [NULL, NULL]] [[[1, 2], [3, 4]], [[1, 2], [3, 4]]] + +# array_repeat scalar function with count of different integer types +query ???????? +Select + array_repeat(1, arrow_cast(2,'Int8')), + array_repeat(2, arrow_cast(2,'Int16')), + array_repeat(3, arrow_cast(2,'Int32')), + array_repeat(4, arrow_cast(2,'Int64')), + array_repeat(1, arrow_cast(2,'UInt8')), + array_repeat(2, arrow_cast(2,'UInt16')), + array_repeat(3, arrow_cast(2,'UInt32')), + array_repeat(4, arrow_cast(2,'UInt64')); +---- +[1, 1] [2, 2] [3, 3] [4, 4] [1, 1] [2, 2] [3, 3] [4, 4] + +# array_repeat scalar function with count of negative integer types +query ???? +Select + array_repeat(1, arrow_cast(-2,'Int8')), + array_repeat(2, arrow_cast(-2,'Int16')), + array_repeat(3, arrow_cast(-2,'Int32')), + array_repeat(4, arrow_cast(-2,'Int64')); +---- +[] [] [] [] + +# array_repeat with columns #1 + +statement ok +CREATE TABLE array_repeat_table +AS VALUES + (1, 1, 1.1, 'a', make_array(4, 5, 6)), + (2, null, null, null, null), + (3, 2, 2.2, 'rust', make_array(7)), + (0, 3, 3.3, 'datafusion', make_array(8, 9)); + +statement ok +CREATE TABLE large_array_repeat_table +AS SELECT + column1, + column2, + column3, + column4, + arrow_cast(column5, 'LargeList(Int64)') as column5 +FROM array_repeat_table; + +query ?????? +select + array_repeat(column2, column1), + array_repeat(column3, column1), + array_repeat(column4, column1), + array_repeat(column5, column1), + array_repeat(column2, 3), + array_repeat(make_array(1), column1) +from array_repeat_table; +---- +[1] [1.1] [a] [[4, 5, 6]] [1, 1, 1] [[1]] +[NULL, NULL] [NULL, NULL] [NULL, NULL] [NULL, NULL] [NULL, NULL, NULL] [[1], [1]] +[2, 2, 2] [2.2, 2.2, 2.2] [rust, rust, rust] [[7], [7], [7]] [2, 2, 2] [[1], [1], [1]] +[] [] [] [] [3, 3, 3] [] + +query ?????? +select + array_repeat(column2, column1), + array_repeat(column3, column1), + array_repeat(column4, column1), + array_repeat(column5, column1), + array_repeat(column2, 3), + array_repeat(make_array(1), column1) +from large_array_repeat_table; +---- +[1] [1.1] [a] [[4, 5, 6]] [1, 1, 1] [[1]] +[NULL, NULL] [NULL, NULL] [NULL, NULL] [NULL, NULL] [NULL, NULL, NULL] [[1], [1]] +[2, 2, 2] [2.2, 2.2, 2.2] [rust, rust, rust] [[7], [7], [7]] [2, 2, 2] [[1], [1], [1]] +[] [] [] [] [3, 3, 3] [] + +statement ok +drop table array_repeat_table; + +statement ok +drop table large_array_repeat_table; + +# array_repeat: arrays with NULL counts +statement ok +create table array_repeat_null_count_table +as values +(1, 2), +(2, null), +(3, 1), +(4, -1), +(null, null); + +query I? +select column1, array_repeat(column1, column2) from array_repeat_null_count_table; +---- +1 [1, 1] +2 NULL +3 [3] +4 [] +NULL NULL + +statement ok +drop table array_repeat_null_count_table + +# array_repeat: nested arrays with NULL counts +statement ok +create table array_repeat_nested_null_count_table +as values +([[1, 2], [3, 4]], 2), +([[5, 6], [7, 8]], null), +([[null, null], [9, 10]], 1), +(null, 3), +([[11, 12]], -1); + +query ?? +select column1, array_repeat(column1, column2) from array_repeat_nested_null_count_table; +---- +[[1, 2], [3, 4]] [[[1, 2], [3, 4]], [[1, 2], [3, 4]]] +[[5, 6], [7, 8]] NULL +[[NULL, NULL], [9, 10]] [[[NULL, NULL], [9, 10]]] +NULL [NULL, NULL, NULL] +[[11, 12]] [] + +statement ok +drop table array_repeat_nested_null_count_table + +# array_repeat edge cases: empty arrays +query ??? +select array_repeat([], 3), array_repeat([], 0), array_repeat([], null); +---- +[[], [], []] [] NULL + +query ?? +select array_repeat(null::int, 0), array_repeat(null::int, null); +---- +[] NULL + +# array_repeat LargeList with NULL count +statement ok +create table array_repeat_large_list_null_table +as values +(arrow_cast([1, 2, 3], 'LargeList(Int64)'), 2), +(arrow_cast([4, 5], 'LargeList(Int64)'), null), +(arrow_cast(null, 'LargeList(Int64)'), 3); + +query ?? +select column1, array_repeat(column1, column2) from array_repeat_large_list_null_table; +---- +[1, 2, 3] [[1, 2, 3], [1, 2, 3]] +[4, 5] NULL +NULL [NULL, NULL, NULL] + +statement ok +drop table array_repeat_large_list_null_table + +# array_repeat edge cases: LargeList nested with NULL count +statement ok +create table array_repeat_large_nested_null_table +as values +(arrow_cast([[1, 2], [3, 4]], 'LargeList(List(Int64))'), 2), +(arrow_cast([[5, 6], [7, 8]], 'LargeList(List(Int64))'), null), +(arrow_cast([[null, null]], 'LargeList(List(Int64))'), 1), +(null, 3); + +query ?? +select column1, array_repeat(column1, column2) from array_repeat_large_nested_null_table; +---- +[[1, 2], [3, 4]] [[[1, 2], [3, 4]], [[1, 2], [3, 4]]] +[[5, 6], [7, 8]] NULL +[[NULL, NULL]] [[[NULL, NULL]]] +NULL [NULL, NULL, NULL] + +statement ok +drop table array_repeat_large_nested_null_table + +## array_concat (aliases: `array_cat`, `list_concat`, `list_cat`) + +# test with empty array +query ? +select array_concat([]); +---- +[] + +# test with NULL array +query ? +select array_concat(NULL::integer[]); +---- +NULL + +# test with multiple NULL arrays +query ? +select array_concat(NULL::integer[], NULL::integer[]); +---- +NULL + +# test with NULL LargeList +query ? +select array_concat(arrow_cast(NULL::string[], 'LargeList(Utf8)')); +---- +NULL + +# test with NULL FixedSizeList +query ? +select array_concat(arrow_cast(NULL::string[], 'FixedSizeList(2, Utf8)')); +---- +NULL + +# test with mix of NULL and empty arrays +query ? +select array_concat(NULL::integer[], []); +---- +[] + +# test with mix of NULL and non-empty arrays +query ? +select array_concat(NULL::integer[], [1, 2, 3]); +---- +[1, 2, 3] + +# Concatenating strings arrays +query ? +select array_concat( + ['1', '2'], + ['3'] +); +---- +[1, 2, 3] + +query ? +select array_concat( + arrow_cast(['1', '2'], 'LargeList(Utf8)'), + arrow_cast(['3'], 'LargeList(Utf8)') +); +---- +[1, 2, 3] + +query ? +select array_concat( + arrow_cast(['1', '2'], 'FixedSizeList(2, Utf8)'), + arrow_cast(['3'], 'FixedSizeList(1, Utf8)') +); +---- +[1, 2, 3] + +# Concatenating string arrays +query ? +select array_concat( + [arrow_cast('1', 'LargeUtf8'), arrow_cast('2', 'LargeUtf8')], + [arrow_cast('3', 'LargeUtf8')] +); +---- +[1, 2, 3] + +# Concatenating stringview +query ? +select array_concat( + [arrow_cast('1', 'Utf8View'), arrow_cast('2', 'Utf8View')], + [arrow_cast('3', 'Utf8View')] +); +---- +[1, 2, 3] + +# Concatenating Mixed types +query ? +select array_concat( + [arrow_cast('1', 'Utf8'), arrow_cast('2', 'Utf8')], + [arrow_cast('3', 'LargeUtf8')] +); +---- +[1, 2, 3] + +# Concatenating Mixed types +query ?T +select + array_concat([arrow_cast('1', 'Utf8'), arrow_cast('2', 'Utf8')], [arrow_cast('3', 'Utf8View')]), + arrow_typeof(array_concat([arrow_cast('1', 'Utf8'), arrow_cast('2', 'Utf8')], [arrow_cast('3', 'Utf8View')])); +---- +[1, 2, 3] List(Utf8View) + +# array_concat with NULL elements inside arrays +query ? +select array_concat([1, NULL, 3], [NULL, 5]); +---- +[1, NULL, 3, NULL, 5] + +query ? +select array_concat([NULL, NULL], [1, 2], [NULL]); +---- +[NULL, NULL, 1, 2, NULL] + +query ? +select array_concat([NULL, NULL], [NULL, NULL]); +---- +[NULL, NULL, NULL, NULL] + +# array_concat error +query error DataFusion error: Error during planning: Execution error: Function 'array_concat' user-defined coercion failed with: Error during planning: array_concat does not support type Int64 +select array_concat(1, 2); + +# array_concat scalar function #1 +query ?? +select array_concat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), array_concat(make_array([1], [2]), make_array([3], [4])); +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] + +# array_concat scalar function #2 +query ? +select array_concat(make_array(make_array(1, 2), make_array(3, 4)), make_array(make_array(5, 6), make_array(7, 8))); +---- +[[1, 2], [3, 4], [5, 6], [7, 8]] + +# array_concat scalar function #3 +query ? +select array_concat(make_array([1], [2], [3]), make_array([4], [5], [6]), make_array([7], [8], [9])); +---- +[[1], [2], [3], [4], [5], [6], [7], [8], [9]] + +# array_concat scalar function #4 +query ? +select array_concat(make_array([[1]]), make_array([[2]])); +---- +[[[1]], [[2]]] + +# array_concat scalar function #5 +query ? +select array_concat(make_array(2, 3), make_array()); +---- +[2, 3] + +# array_concat scalar function #6 +query ? +select array_concat(make_array(), make_array(2, 3)); +---- +[2, 3] + +# array_concat scalar function #7 (with empty arrays) +query ? +select array_concat(make_array(make_array(1, 2), make_array(3, 4)), make_array(make_array())); +---- +[[1, 2], [3, 4], []] + +# array_concat scalar function #8 (with empty arrays) +query ? +select array_concat(make_array(make_array(1, 2), make_array(3, 4)), make_array(make_array()), make_array(make_array(), make_array()), make_array(make_array(5, 6), make_array(7, 8))); +---- +[[1, 2], [3, 4], [], [], [], [5, 6], [7, 8]] + +# array_concat scalar function #9 (with empty arrays) +query ? +select array_concat(make_array(make_array()), make_array(make_array(1, 2), make_array(3, 4))); +---- +[[], [1, 2], [3, 4]] + +# array_cat scalar function #10 (function alias `array_concat`) +query ?? +select array_cat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), array_cat(make_array([1], [2]), make_array([3], [4])); +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] + +# list_concat scalar function #11 (function alias `array_concat`) +query ?? +select list_concat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), list_concat(make_array([1], [2]), make_array([3], [4])); +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] + +# list_cat scalar function #12 (function alias `array_concat`) +query ?? +select list_cat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), list_cat(make_array([1], [2]), make_array([3], [4])); +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] + +# array_concat with different dimensions #1 (2D + 1D) +query ? +select array_concat(make_array([1,2], [3,4]), make_array(5, 6)); +---- +[[1, 2], [3, 4], [5, 6]] + +# array_concat with different dimensions #2 (1D + 2D) +query ? +select array_concat(make_array(5, 6), make_array([1,2], [3,4])); +---- +[[5, 6], [1, 2], [3, 4]] + +# array_concat with different dimensions #3 (2D + 1D + 1D) +query ? +select array_concat(make_array([1,2], [3,4]), make_array(5, 6), make_array(7,8)); +---- +[[1, 2], [3, 4], [5, 6], [7, 8]] + +# array_concat with different dimensions #4 (1D + 2D + 3D) +query ? +select array_concat(make_array(10, 20), make_array([30, 40]), make_array([[50, 60]])); +---- +[[[10, 20]], [[30, 40]], [[50, 60]]] + +# array_concat with different dimensions #5 (2D + 1D + 3D) +query ? +select array_concat(make_array([30, 40]), make_array(10, 20), make_array([[50, 60]])); +---- +[[[30, 40]], [[10, 20]], [[50, 60]]] + +# array_concat with different dimensions #6 (2D + 1D + 3D + 4D + 3D) +query ? +select array_concat(make_array([30, 40]), make_array(10, 20), make_array([[50, 60]]), make_array([[[70, 80]]]), make_array([[80, 40]])); +---- +[[[[30, 40]]], [[[10, 20]]], [[[50, 60]]], [[[70, 80]]], [[[80, 40]]]] + +# array_concat column-wise #1 +query ? +select array_concat(column1, make_array(0)) from arrays_values_without_nulls; +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0] +[11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 0] +[21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0] +[31, 32, 33, 34, 35, 26, 37, 38, 39, 40, 0] + +# array_concat column-wise #2 +query ? +select array_concat(column1, column1) from arrays_values_without_nulls; +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] +[11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] +[21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30] +[31, 32, 33, 34, 35, 26, 37, 38, 39, 40, 31, 32, 33, 34, 35, 26, 37, 38, 39, 40] + +# array_concat column-wise #3 +query ? +select array_concat(make_array(column2), make_array(column3)) from arrays_values_without_nulls; +---- +[1, 1] +[12, 2] +[23, 3] +[34, 4] + +# array_concat column-wise #4 +query ? +select array_concat(make_array(column2), make_array(0)) from arrays_values; +---- +[1, 0] +[12, 0] +[23, 0] +[34, 0] +[44, 0] +[NULL, 0] +[55, 0] +[66, 0] + +# array_concat column-wise #5 +query ??? +select array_concat(column1, column1), array_concat(column2, column2), array_concat(column3, column3) from arrays; +---- +[[NULL, 2], [3, NULL], [NULL, 2], [3, NULL]] [1.1, 2.2, 3.3, 1.1, 2.2, 3.3] [L, o, r, e, m, L, o, r, e, m] +[[3, 4], [5, 6], [3, 4], [5, 6]] [NULL, 5.5, 6.6, NULL, 5.5, 6.6] [i, p, NULL, u, m, i, p, NULL, u, m] +[[5, 6], [7, 8], [5, 6], [7, 8]] [7.7, 8.8, 9.9, 7.7, 8.8, 9.9] [d, NULL, l, o, r, d, NULL, l, o, r] +[[7, NULL], [9, 10], [7, NULL], [9, 10]] [10.1, NULL, 12.2, 10.1, NULL, 12.2] [s, i, t, s, i, t] +NULL [13.3, 14.4, 15.5, 13.3, 14.4, 15.5] [a, m, e, t, a, m, e, t] +[[11, 12], [13, 14], [11, 12], [13, 14]] NULL [,, ,] +[[15, 16], [NULL, 18], [15, 16], [NULL, 18]] [16.6, 17.7, 18.8, 16.6, 17.7, 18.8] NULL + +# array_concat column-wise #6 +query ?? +select array_concat(column1, make_array(make_array(1, 2), make_array(3, 4))), array_concat(column2, make_array(1.1, 2.2, 3.3)) from arrays; +---- +[[NULL, 2], [3, NULL], [1, 2], [3, 4]] [1.1, 2.2, 3.3, 1.1, 2.2, 3.3] +[[3, 4], [5, 6], [1, 2], [3, 4]] [NULL, 5.5, 6.6, 1.1, 2.2, 3.3] +[[5, 6], [7, 8], [1, 2], [3, 4]] [7.7, 8.8, 9.9, 1.1, 2.2, 3.3] +[[7, NULL], [9, 10], [1, 2], [3, 4]] [10.1, NULL, 12.2, 1.1, 2.2, 3.3] +[[1, 2], [3, 4]] [13.3, 14.4, 15.5, 1.1, 2.2, 3.3] +[[11, 12], [13, 14], [1, 2], [3, 4]] [1.1, 2.2, 3.3] +[[15, 16], [NULL, 18], [1, 2], [3, 4]] [16.6, 17.7, 18.8, 1.1, 2.2, 3.3] + +# array_concat column-wise #7 +query ? +select array_concat(column3, make_array('.', '.', '.')) from arrays; +---- +[L, o, r, e, m, ., ., .] +[i, p, NULL, u, m, ., ., .] +[d, NULL, l, o, r, ., ., .] +[s, i, t, ., ., .] +[a, m, e, t, ., ., .] +[,, ., ., .] +[., ., .] + +# query ??I? +# select column1, column2, column3, column4 from arrays_values_v2; +# ---- +# [NULL, 2, 3] [4, 5, NULL] 12 [[30, 40, 50]] +# NULL [7, NULL, 8] 13 [[NULL, NULL, 60]] +# [9, NULL, 10] NULL 14 [[70, NULL, NULL]] +# [NULL, 1] [NULL, 21] NULL NULL +# [11, 12] NULL NULL NULL +# NULL NULL NULL NULL + + +# array_concat column-wise #8 (1D + 1D) +query ? +select array_concat(column1, column2) from arrays_values_v2; +---- +[NULL, 2, 3, 4, 5, NULL] +[7, NULL, 8] +[9, NULL, 10] +[NULL, 1, NULL, 21] +[11, 12] +NULL + +# array_concat column-wise #9 (2D + 1D) +query ? +select array_concat(column4, make_array(column3)) from arrays_values_v2; +---- +[[30, 40, 50], [12]] +[[NULL, NULL, 60], [13]] +[[70, NULL, NULL], [14]] +[[NULL]] +[[NULL]] +[[NULL]] + +# array_concat column-wise #10 (3D + 2D + 1D) +query ? +select array_concat(column4, column1, column2) from nested_arrays; +---- +[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]], [[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]], [[7, 8, 9]]] +[[[11, 12, 13], [14, 15, 16]], [[17, 18, 19], [20, 21, 22]], [[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]], [[10, 11, 12]]] + +# array_concat column-wise #11 (2D + 1D) +query ? +select array_concat(column4, column1) from arrays_values_v2; +---- +[[30, 40, 50], [NULL, 2, 3]] +[[NULL, NULL, 60], NULL] +[[70, NULL, NULL], [9, NULL, 10]] +[[NULL, 1]] +[[11, 12]] +[NULL] + +# array_concat column-wise #12 (1D + 1D + 1D) +query ? +select array_concat(make_array(column3), column1, column2) from arrays_values_v2; +---- +[12, NULL, 2, 3, 4, 5, NULL] +[13, 7, NULL, 8] +[14, 9, NULL, 10] +[NULL, NULL, 1, NULL, 21] +[NULL, 11, 12] +[NULL] + +## array_position (aliases: `list_position`, `array_indexof`, `list_indexof`) + +## array_position with NULL (follow PostgreSQL) +query II +select array_position([1, 2, 3, 4, 5], arrow_cast(NULL, 'Int64')), array_position(arrow_cast(NULL, 'List(Int64)'), 1); +---- +NULL NULL + +# array_position with no match (incl. empty array) returns NULL +query II +select array_position([], 1), array_position([2], 1); +---- +NULL NULL + +# array_position scalar function #1 +query III +select array_position(['h', 'e', 'l', 'l', 'o'], 'l'), array_position([1, 2, 3, 4, 5], 5), array_position([1, 1, 1], 1); +---- +3 5 1 + +query III +select array_position(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l'), array_position(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5), array_position(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1); +---- +3 5 1 + +query III +select array_position(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), 'l'), array_position(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), 5), array_position(arrow_cast([1, 1, 1], 'FixedSizeList(3, Int64)'), 1); +---- +3 5 1 + +# array_position scalar function #2 (with optional argument) +query III +select array_position(['h', 'e', 'l', 'l', 'o'], 'l', 4), array_position([1, 2, 5, 4, 5], 5, 4), array_position([1, 1, 1], 1, 2); +---- +4 5 2 + +query III +select array_position(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l', 4), array_position(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5, 4), array_position(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1, 2); +---- +4 5 2 + +query III +select array_position(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), 'l', 4), array_position(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), 5, 4), array_position(arrow_cast([1, 1, 1], 'FixedSizeList(3, Int64)'), 1, 2); +---- +4 5 2 + +# array_position scalar function #3 (element is list) +query II +select array_position(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), [4, 5, 6]), array_position(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), [2, 3, 4]); +---- +2 2 + +# array_position scalar function #4 (element in list; with optional argument) +query II +select array_position(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), [4, 5, 6], 3), array_position(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), [2, 3, 4], 3); +---- +4 3 + +query II +select array_position(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), [4, 5, 6]), array_position(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), [2, 3, 4]); +---- +2 2 + +query I +SELECT array_position(arrow_cast([5, 2, 3, 4, 5], 'List(Int32)'), 5) +---- +1 + +query I +SELECT array_position(arrow_cast([5, 2, 3, 4, 5], 'List(Int32)'), 5, 2) +---- +5 + +query I +SELECT array_position(arrow_cast([1, 1, 100, 1, 1], 'LargeList(Int32)'), 100) +---- +3 + +query error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'array_position' function: coercion from +SELECT array_position([1, 2, 3], 'foo') + +query error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'array_position' function: coercion from +SELECT array_position([1, 2, 3], 'foo', 2) + +# list_position scalar function #5 (function alias `array_position`) +query III +select list_position(['h', 'e', 'l', 'l', 'o'], 'l'), list_position([1, 2, 3, 4, 5], 5), list_position([1, 1, 1], 1); +---- +3 5 1 + +query III +select list_position(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l'), list_position(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5), list_position(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1); +---- +3 5 1 + +# array_indexof scalar function #6 (function alias `array_position`) +query III +select array_indexof(['h', 'e', 'l', 'l', 'o'], 'l'), array_indexof([1, 2, 3, 4, 5], 5), array_indexof([1, 1, 1], 1); +---- +3 5 1 + +query III +select array_indexof(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l'), array_indexof(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5), array_indexof(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1); +---- +3 5 1 + +# list_indexof scalar function #7 (function alias `array_position`) +query III +select list_indexof(['h', 'e', 'l', 'l', 'o'], 'l'), list_indexof([1, 2, 3, 4, 5], 5), list_indexof([1, 1, 1], 1); +---- +3 5 1 + +query III +select list_indexof(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l'), list_indexof(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5), list_indexof(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1); +---- +3 5 1 + +# array_position with columns #1 +query II +select array_position(column1, column2), array_position(column1, column2, column3) from arrays_values_without_nulls; +---- +1 1 +2 2 +3 3 +4 4 + +query II +select array_position(column1, column2), array_position(column1, column2, column3) from large_arrays_values_without_nulls; +---- +1 1 +2 2 +3 3 +4 4 + +# array_position with columns #2 (element is list) +query II +select array_position(column1, column2), array_position(column1, column2, column3) from nested_arrays; +---- +3 3 +2 5 + +query II +select array_position(column1, column2), array_position(column1, column2, column3) from nested_arrays; +---- +3 3 +2 5 + +# array_position with columns and scalars #1 +query III +select array_position(make_array(1, 2, 3, 4, 5), column2), array_position(column1, 3), array_position(column1, 3, 5) from arrays_values_without_nulls; +---- +1 3 NULL +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL + +query III +select array_position(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), column2), array_position(column1, 3), array_position(column1, 3, 5) from large_arrays_values_without_nulls; +---- +1 3 NULL +NULL NULL NULL +NULL NULL NULL +NULL NULL NULL + +# array_position with columns and scalars #2 (element is list) +query III +select array_position(make_array([1, 2, 3], [4, 5, 6], [11, 12, 13]), column2), array_position(column1, make_array(4, 5, 6)), array_position(column1, make_array(1, 2, 3), 2) from nested_arrays; +---- +NULL 6 4 +NULL 1 NULL + +query III +select array_position(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [11, 12, 13]), 'LargeList(LargeList(Int64))'), column2), array_position(column1, arrow_cast(make_array(4, 5, 6), 'LargeList(Int64)')), array_position(column1, arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 2) from large_nested_arrays; +---- +NULL 6 4 +NULL 1 NULL + +# array_position with NULL element in haystack array (NULL = NULL semantics) +query III +select array_position([1, NULL, 3], arrow_cast(NULL, 'Int64')), array_position([NULL, 2, 3], arrow_cast(NULL, 'Int64')), array_position([1, 2, NULL], arrow_cast(NULL, 'Int64')); +---- +2 1 3 + +query I +select array_position(arrow_cast([1, NULL, 3], 'LargeList(Int64)'), arrow_cast(NULL, 'Int64')); +---- +2 + +# array_position with NULL element in array and start_from +query II +select array_position([NULL, 1, NULL, 2], arrow_cast(NULL, 'Int64'), 2), array_position([NULL, 1, NULL, 2], arrow_cast(NULL, 'Int64'), 1); +---- +3 1 + +# array_position with column array and scalar element +query IIII +select array_position(column1, 3), array_position(column1, 10), array_position(column1, 20), array_position(column1, 999) from arrays_values_without_nulls; +---- +3 10 NULL NULL +NULL NULL 10 NULL +NULL NULL NULL NULL +NULL NULL NULL NULL + +query II +select array_position(column1, 3), array_position(column1, 20) from large_arrays_values_without_nulls; +---- +3 NULL +NULL 10 +NULL NULL +NULL NULL + +query II +select array_position(column1, 3), array_position(column1, 20) from fixed_size_arrays_values_without_nulls; +---- +3 NULL +NULL 10 +NULL NULL +NULL NULL + +# array_position with column array, scalar element, and scalar start_from +query II +select array_position(column1, 3, 1), array_position(column1, 3, 4) from arrays_values_without_nulls; +---- +3 NULL +NULL NULL +NULL NULL +NULL NULL + +query II +select array_position(column1, 3, 1), array_position(column1, 3, 4) from large_arrays_values_without_nulls; +---- +3 NULL +NULL NULL +NULL NULL +NULL NULL + +# array_position with column array, scalar element, and column start_from +query I +select array_position(column1, 3, column3) from arrays_values_without_nulls; +---- +3 +NULL +NULL +NULL + +# array_position with scalar haystack, scalar element, and column start_from +query I +select array_position([1, 2, 1, 2], 2, column3) from arrays_values_without_nulls; +---- +2 +2 +4 +4 + +# array_position start_from boundary cases +query IIII +select array_position([1, 2, 3], 3, 3), array_position([1, 2, 3], 1, 2), array_position([1, 2, 3], 1, 1), array_position([1, 2, 3], 3, 4); +---- +3 NULL 1 NULL + +query II +select array_position([1, 2, 3], 3, 4), array_position([1], 1, 2); +---- +NULL NULL + +# array_position with empty array in various contexts +query II +select array_position(arrow_cast(make_array(), 'List(Int64)'), 1), array_position(arrow_cast(make_array(), 'LargeList(Int64)'), 1); +---- +NULL NULL + +# FixedSizeList with start_from +query II +select array_position(arrow_cast([1, 2, 3, 1, 2], 'FixedSizeList(5, Int64)'), 1, 2), array_position(arrow_cast([1, 2, 3, 1, 2], 'FixedSizeList(5, Int64)'), 2, 4); +---- +4 5 + +query I +select array_position(arrow_cast(['a', 'b', 'c', 'b'], 'FixedSizeList(4, Utf8)'), 'b', 3); +---- +4 + +## array_positions (aliases: `list_positions`) + +# array_positions with empty array +query ? +select array_positions(arrow_cast(make_array(), 'List(Int64)'), 1); +---- +[] + +query ? +select array_positions([1, 2, 3, 4, 5], null); +---- +[] + +#TODO: https://github.com/apache/datafusion/issues/7142 +# array_positions with NULL (follow PostgreSQL) +#query ? +#select array_positions(null, 1); +#---- +#NULL + +# array_positions scalar function #1 +query ??? +select array_positions(['h', 'e', 'l', 'l', 'o'], 'l'), array_positions([1, 2, 3, 4, 5], 5), array_positions([1, 1, 1], 1); +---- +[3, 4] [5] [1, 2, 3] + +query ??? +select array_positions(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l'), array_positions(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5), array_positions(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1); +---- +[3, 4] [5] [1, 2, 3] + +query ??? +select array_positions(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), 'l'), array_positions(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), 5), array_positions(arrow_cast([1, 1, 1], 'FixedSizeList(3, Int64)'), 1); +---- +[3, 4] [5] [1, 2, 3] + +# array_positions scalar function #2 (element is list) +query ? +select array_positions(make_array([1, 2, 3], [2, 1, 3], [1, 5, 6], [2, 1, 3], [4, 5, 6]), [2, 1, 3]); +---- +[2, 4] + +query ? +select array_positions(arrow_cast(make_array([1, 2, 3], [2, 1, 3], [1, 5, 6], [2, 1, 3], [4, 5, 6]), 'LargeList(List(Int64))'), [2, 1, 3]); +---- +[2, 4] + +query ? +select array_positions(arrow_cast(make_array([1, 2, 3], [2, 1, 3], [1, 5, 6], [2, 1, 3], [4, 5, 6]), 'FixedSizeList(5, List(Int64))'), [2, 1, 3]); +---- +[2, 4] + +# list_positions scalar function #3 (function alias `array_positions`) +query ??? +select list_positions(['h', 'e', 'l', 'l', 'o'], 'l'), list_positions([1, 2, 3, 4, 5], 5), list_positions([1, 1, 1], 1); +---- +[3, 4] [5] [1, 2, 3] + +query ??? +select list_positions(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l'), list_positions(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5), list_positions(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1); +---- +[3, 4] [5] [1, 2, 3] + +query ??? +select list_positions(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), 'l'), + list_positions(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), 5), + list_positions(arrow_cast([1, 1, 1], 'FixedSizeList(3, Int64)'), 1); +---- +[3, 4] [5] [1, 2, 3] + +# array_positions with columns #1 +query ? +select array_positions(column1, column2) from arrays_values_without_nulls; +---- +[1] +[2] +[3] +[4] + +query ? +select array_positions(arrow_cast(column1, 'LargeList(Int64)'), column2) from arrays_values_without_nulls; +---- +[1] +[2] +[3] +[4] + +query ? +select array_positions(arrow_cast(column1, 'LargeList(Int64)'), column2) from fixed_size_arrays_values_without_nulls; +---- +[1] +[2] +[3] +[4] + +# array_positions with columns #2 (element is list) +query ? +select array_positions(column1, column2) from nested_arrays; +---- +[3] +[2, 5] + +query ? +select array_positions(arrow_cast(column1, 'LargeList(List(Int64))'), column2) from nested_arrays; +---- +[3] +[2, 5] + +query ? +select array_positions(column1, column2) from fixed_size_nested_arrays; +---- +[3] +[2, 5] + +# array_positions with columns and scalars #1 +query ?? +select array_positions(column1, 4), array_positions(array[1, 2, 23, 13, 33, 45], column2) from arrays_values_without_nulls; +---- +[4] [1] +[] [] +[] [3] +[] [] + +query ?? +select array_positions(arrow_cast(column1, 'LargeList(Int64)'), 4), array_positions(array[1, 2, 23, 13, 33, 45], column2) from arrays_values_without_nulls; +---- +[4] [1] +[] [] +[] [3] +[] [] + +query ?? +select array_positions(column1, 4), array_positions(array[1, 2, 23, 13, 33, 45], column2) from fixed_size_arrays_values_without_nulls; +---- +[4] [1] +[] [] +[] [3] +[] [] + +# array_positions with columns and scalars #2 (element is list) +query ?? +select array_positions(column1, make_array(4, 5, 6)), array_positions(make_array([1, 2, 3], [11, 12, 13], [4, 5, 6]), column2) from nested_arrays; +---- +[6] [] +[1] [] + +query ?? +select array_positions(arrow_cast(column1, 'LargeList(List(Int64))'), make_array(4, 5, 6)), array_positions(arrow_cast(make_array([1, 2, 3], [11, 12, 13], [4, 5, 6]), 'LargeList(List(Int64))'), column2) from nested_arrays; +---- +[6] [] +[1] [] + +query ?? +select array_positions(column1, make_array(4, 5, 6)), array_positions(make_array([1, 2, 3], [11, 12, 13], [4, 5, 6]), column2) from fixed_size_nested_arrays; +---- +[6] [] +[1] [] + +## array_replace (aliases: `list_replace`) + +# array_replace scalar function #1 +query ??? +select + array_replace(make_array(1, 2, 3, 4), 2, 3), + array_replace(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0), + array_replace(make_array(1, 2, 3), 4, 0); +---- +[1, 3, 3, 4] [1, 0, 4, 5, 4, 6, 7] [1, 2, 3] + +query ??? +select + array_replace(arrow_cast(make_array(1, 2, 3, 4), 'LargeList(Int64)'), 2, 3), + array_replace(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'LargeList(Int64)'), 4, 0), + array_replace(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4, 0); +---- +[1, 3, 3, 4] [1, 0, 4, 5, 4, 6, 7] [1, 2, 3] + +query ??? +select + array_replace(arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)'), 2, 3), + array_replace(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'FixedSizeList(7, Int64)'), 4, 0), + array_replace(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 4, 0); +---- +[1, 3, 3, 4] [1, 0, 4, 5, 4, 6, 7] [1, 2, 3] + +# array_replace scalar function #2 (element is list) +query ?? +select + array_replace( + make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), + [4, 5, 6], + [1, 1, 1] + ), + array_replace( + make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), + [2, 3, 4], + [3, 1, 4] + ); +---- +[[1, 2, 3], [1, 1, 1], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]] + +query ?? +select + array_replace( + arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), + [4, 5, 6], + [1, 1, 1] + ), + array_replace( + arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), + [2, 3, 4], + [3, 1, 4] + ); +---- +[[1, 2, 3], [1, 1, 1], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]] + +query ?? +select + array_replace( + arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), + [4, 5, 6], + [1, 1, 1] + ), + array_replace( + arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), + [2, 3, 4], + [3, 1, 4] + ); +---- +[[1, 2, 3], [1, 1, 1], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]] + +# list_replace scalar function #3 (function alias `list_replace`) +query ??? +select list_replace( + make_array(1, 2, 3, 4), 2, 3), + list_replace(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0), + list_replace(make_array(1, 2, 3), 4, 0); +---- +[1, 3, 3, 4] [1, 0, 4, 5, 4, 6, 7] [1, 2, 3] + +query ??? +select list_replace( + arrow_cast(make_array(1, 2, 3, 4), 'LargeList(Int64)'), 2, 3), + list_replace(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'LargeList(Int64)'), 4, 0), + list_replace(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4, 0); +---- +[1, 3, 3, 4] [1, 0, 4, 5, 4, 6, 7] [1, 2, 3] + +# array_replace scalar function #4 (null input) +query ? +select array_replace(make_array(1, 2, 3, 4, 5), NULL, NULL); +---- +[1, 2, 3, 4, 5] + +query ? +select array_replace(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL, NULL); +---- +[1, 2, 3, 4, 5] + +# array_replace scalar function with columns #1 +query ? +select array_replace(column1, column2, column3) from arrays_with_repeating_elements; +---- +[1, 4, 1, 3, 2, 2, 1, 3, 2, 3] +[7, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[10, 7, 7, 8, 7, 9, 7, 8, 7, 7] +[13, 11, 12, 10, 11, 12, 10, 11, 12, 10] + +query ? +select array_replace(column1, column2, column3) from large_arrays_with_repeating_elements; +---- +[1, 4, 1, 3, 2, 2, 1, 3, 2, 3] +[7, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[10, 7, 7, 8, 7, 9, 7, 8, 7, 7] +[13, 11, 12, 10, 11, 12, 10, 11, 12, 10] + +# array_replace scalar function with columns #2 (element is list) +query ? +select array_replace(column1, column2, column3) from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[19, 20, 21], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[28, 29, 30], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[37, 38, 39], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +query ? +select array_replace(column1, column2, column3) from large_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[19, 20, 21], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[28, 29, 30], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[37, 38, 39], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +# array_replace scalar function with columns and scalars #1 +query ??? +select + array_replace(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2, column3), + array_replace(column1, 1, column3), + array_replace(column1, column2, 4) +from arrays_with_repeating_elements; +---- +[1, 4, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [4, 2, 1, 3, 2, 2, 1, 3, 2, 3] [1, 4, 1, 3, 2, 2, 1, 3, 2, 3] +[1, 2, 2, 7, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 10, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [4, 7, 7, 8, 7, 9, 7, 8, 7, 7] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 13, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [4, 11, 12, 10, 11, 12, 10, 11, 12, 10] + +query ??? +select + array_replace(arrow_cast(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), 'LargeList(Int64)'), column2, column3), + array_replace(column1, 1, column3), + array_replace(column1, column2, 4) +from large_arrays_with_repeating_elements; +---- +[1, 4, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [4, 2, 1, 3, 2, 2, 1, 3, 2, 3] [1, 4, 1, 3, 2, 2, 1, 3, 2, 3] +[1, 2, 2, 7, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 10, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [4, 7, 7, 8, 7, 9, 7, 8, 7, 7] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 13, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [4, 11, 12, 10, 11, 12, 10, 11, 12, 10] + +# array_replace scalar function with columns and scalars #2 (element is list) +query ??? +select + array_replace( + make_array( + [1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), + column2, + column3 + ), + array_replace(column1, make_array(1, 2, 3), column3), + array_replace(column1, column2, make_array(11, 12, 13)) +from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [11, 12, 13], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [19, 20, 21], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[11, 12, 13], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[11, 12, 13], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [37, 38, 39], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[11, 12, 13], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +query ??? +select + array_replace( + arrow_cast(make_array( + [1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]),'LargeList(List(Int64))'), + column2, + column3 + ), + array_replace(column1, make_array(1, 2, 3), column3), + array_replace(column1, column2, make_array(11, 12, 13)) +from large_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [11, 12, 13], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [19, 20, 21], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[11, 12, 13], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[11, 12, 13], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [37, 38, 39], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[11, 12, 13], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +## array_replace_n (aliases: `list_replace_n`) + +# array_replace_n scalar function #1 +query ??? +select + array_replace_n(make_array(1, 2, 3, 4), 2, 3, 2), + array_replace_n(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0, 2), + array_replace_n(make_array(1, 2, 3), 4, 0, 3); +---- +[1, 3, 3, 4] [1, 0, 0, 5, 4, 6, 7] [1, 2, 3] + +query ??? +select + array_replace_n(arrow_cast(make_array(1, 2, 3, 4), 'LargeList(Int64)'), 2, 3, 2), + array_replace_n(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'LargeList(Int64)'), 4, 0, 2), + array_replace_n(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4, 0, 3); +---- +[1, 3, 3, 4] [1, 0, 0, 5, 4, 6, 7] [1, 2, 3] + +query ??? +select + array_replace_n(arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)'), 2, 3, 2), + array_replace_n(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'FixedSizeList(7, Int64)'), 4, 0, 2), + array_replace_n(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 4, 0, 3); +---- +[1, 3, 3, 4] [1, 0, 0, 5, 4, 6, 7] [1, 2, 3] + +# array_replace_n scalar function #2 (element is list) +query ?? +select + array_replace_n( + make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), + [4, 5, 6], + [1, 1, 1], + 2 + ), + array_replace_n( + make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), + [2, 3, 4], + [3, 1, 4], + 2 + ); +---- +[[1, 2, 3], [1, 1, 1], [5, 5, 5], [1, 1, 1], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [3, 1, 4], [5, 3, 1], [1, 3, 2]] + +query ?? +select + array_replace_n( + arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), + [4, 5, 6], + [1, 1, 1], + 2 + ), + array_replace_n( + arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), + [2, 3, 4], + [3, 1, 4], + 2 + ); +---- +[[1, 2, 3], [1, 1, 1], [5, 5, 5], [1, 1, 1], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [3, 1, 4], [5, 3, 1], [1, 3, 2]] + +query ?? +select + array_replace_n( + arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), + [4, 5, 6], + [1, 1, 1], + 2 + ), + array_replace_n( + arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), + [2, 3, 4], + [3, 1, 4], + 2 + ); +---- +[[1, 2, 3], [1, 1, 1], [5, 5, 5], [1, 1, 1], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [3, 1, 4], [5, 3, 1], [1, 3, 2]] + +# list_replace_n scalar function #3 (function alias `array_replace_n`) +query ??? +select + list_replace_n(make_array(1, 2, 3, 4), 2, 3, 2), + list_replace_n(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0, 2), + list_replace_n(make_array(1, 2, 3), 4, 0, 3); +---- +[1, 3, 3, 4] [1, 0, 0, 5, 4, 6, 7] [1, 2, 3] + +query ??? +select + list_replace_n(arrow_cast(make_array(1, 2, 3, 4), 'LargeList(Int64)'), 2, 3, 2), + list_replace_n(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'LargeList(Int64)'), 4, 0, 2), + list_replace_n(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4, 0, 3); +---- +[1, 3, 3, 4] [1, 0, 0, 5, 4, 6, 7] [1, 2, 3] + +# array_replace_n scalar function #4 (null input) +query ? +select array_replace_n(make_array(1, 2, 3, 4, 5), NULL, NULL, NULL); +---- +[1, 2, 3, 4, 5] + +query ? +select array_replace_n(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL, NULL, NULL); +---- +[1, 2, 3, 4, 5] + +# array_replace_n scalar function with columns #1 +query ? +select + array_replace_n(column1, column2, column3, column4) +from arrays_with_repeating_elements; +---- +[1, 4, 1, 3, 4, 4, 1, 3, 2, 3] +[7, 7, 5, 5, 6, 5, 5, 5, 4, 4] +[10, 10, 10, 8, 10, 9, 10, 8, 7, 7] +[13, 11, 12, 13, 11, 12, 13, 11, 12, 13] + +query ? +select + array_replace_n(column1, column2, column3, column4) +from large_arrays_with_repeating_elements; +---- +[1, 4, 1, 3, 4, 4, 1, 3, 2, 3] +[7, 7, 5, 5, 6, 5, 5, 5, 4, 4] +[10, 10, 10, 8, 10, 9, 10, 8, 7, 7] +[13, 11, 12, 13, 11, 12, 13, 11, 12, 13] + +# array_replace_n scalar function with columns #2 (element is list) +query ? +select + array_replace_n(column1, column2, column3, column4) +from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [10, 11, 12], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[19, 20, 21], [19, 20, 21], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[28, 29, 30], [28, 29, 30], [28, 29, 30], [22, 23, 24], [28, 29, 30], [25, 26, 27], [28, 29, 30], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39]] + +query ? +select + array_replace_n(column1, column2, column3, column4) +from large_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [10, 11, 12], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[19, 20, 21], [19, 20, 21], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[28, 29, 30], [28, 29, 30], [28, 29, 30], [22, 23, 24], [28, 29, 30], [25, 26, 27], [28, 29, 30], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39]] + + +# array_replace_n scalar function with columns and scalars #1 +query ???? +select + array_replace_n(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2, column3, column4), + array_replace_n(column1, 1, column3, column4), + array_replace_n(column1, column2, 4, column4), + array_replace_n(column1, column2, column3, 2) +from arrays_with_repeating_elements; +---- +[1, 4, 4, 4, 5, 4, 4, 7, 7, 10, 7, 8] [4, 2, 4, 3, 2, 2, 4, 3, 2, 3] [1, 4, 1, 3, 4, 4, 1, 3, 2, 3] [1, 4, 1, 3, 4, 2, 1, 3, 2, 3] +[1, 2, 2, 7, 5, 7, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [7, 7, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 10, 10, 10, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [4, 4, 4, 8, 4, 9, 4, 8, 7, 7] [10, 10, 7, 8, 7, 9, 7, 8, 7, 7] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 13, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [4, 11, 12, 4, 11, 12, 4, 11, 12, 4] [13, 11, 12, 13, 11, 12, 10, 11, 12, 10] + +query ???? +select + array_replace_n(arrow_cast(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), 'LargeList(Int64)'), column2, column3, column4), + array_replace_n(column1, 1, column3, column4), + array_replace_n(column1, column2, 4, column4), + array_replace_n(column1, column2, column3, 2) +from large_arrays_with_repeating_elements; +---- +[1, 4, 4, 4, 5, 4, 4, 7, 7, 10, 7, 8] [4, 2, 4, 3, 2, 2, 4, 3, 2, 3] [1, 4, 1, 3, 4, 4, 1, 3, 2, 3] [1, 4, 1, 3, 4, 2, 1, 3, 2, 3] +[1, 2, 2, 7, 5, 7, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [7, 7, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 10, 10, 10, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [4, 4, 4, 8, 4, 9, 4, 8, 7, 7] [10, 10, 7, 8, 7, 9, 7, 8, 7, 7] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 13, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [4, 11, 12, 4, 11, 12, 4, 11, 12, 4] [13, 11, 12, 13, 11, 12, 10, 11, 12, 10] + +# array_replace_n scalar function with columns and scalars #2 (element is list) +query ???? +select + array_replace_n( + make_array( + [7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [4, 5, 6]), + column2, + column3, + column4 + ), + array_replace_n(column1, make_array(1, 2, 3), column3, column4), + array_replace_n(column1, column2, make_array(11, 12, 13), column4), + array_replace_n(column1, column2, column3, 2) +from nested_arrays_with_repeating_elements; +---- +[[7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [10, 11, 12]] [[10, 11, 12], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [11, 12, 13], [1, 2, 3], [7, 8, 9], [11, 12, 13], [11, 12, 13], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[7, 8, 9], [2, 1, 3], [1, 5, 6], [19, 20, 21], [2, 1, 3], [7, 8, 9], [4, 5, 6]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[11, 12, 13], [11, 12, 13], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[19, 20, 21], [19, 20, 21], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [4, 5, 6]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[11, 12, 13], [11, 12, 13], [11, 12, 13], [22, 23, 24], [11, 12, 13], [25, 26, 27], [11, 12, 13], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[28, 29, 30], [28, 29, 30], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [4, 5, 6]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13]] [[37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +query ???? +select + array_replace_n( + arrow_cast(make_array( + [7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [4, 5, 6]), 'LargeList(List(Int64))'), + column2, + column3, + column4 + ), + array_replace_n(column1, make_array(1, 2, 3), column3, column4), + array_replace_n(column1, column2, make_array(11, 12, 13), column4), + array_replace_n(column1, column2, column3, 2) +from large_nested_arrays_with_repeating_elements; +---- +[[7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [10, 11, 12]] [[10, 11, 12], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [11, 12, 13], [1, 2, 3], [7, 8, 9], [11, 12, 13], [11, 12, 13], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[7, 8, 9], [2, 1, 3], [1, 5, 6], [19, 20, 21], [2, 1, 3], [7, 8, 9], [4, 5, 6]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[11, 12, 13], [11, 12, 13], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[19, 20, 21], [19, 20, 21], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [4, 5, 6]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[11, 12, 13], [11, 12, 13], [11, 12, 13], [22, 23, 24], [11, 12, 13], [25, 26, 27], [11, 12, 13], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[28, 29, 30], [28, 29, 30], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [4, 5, 6]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13]] [[37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +## array_replace_all (aliases: `list_replace_all`) + +# array_replace_all scalar function #1 +query ??? +select + array_replace_all(make_array(1, 2, 3, 4), 2, 3), + array_replace_all(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0), + array_replace_all(make_array(1, 2, 3), 4, 0); +---- +[1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] + +query ??? +select + array_replace_all(arrow_cast(make_array(1, 2, 3, 4), 'LargeList(Int64)'), 2, 3), + array_replace_all(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'LargeList(Int64)'), 4, 0), + array_replace_all(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4, 0); +---- +[1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] + +query ??? +select + array_replace_all(arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)'), 2, 3), + array_replace_all(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'FixedSizeList(7, Int64)'), 4, 0), + array_replace_all(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 4, 0); +---- +[1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] + +# array_replace_all scalar function #2 (element is list) +query ?? +select + array_replace_all( + make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), + [4, 5, 6], + [1, 1, 1] + ), + array_replace_all( + make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), + [2, 3, 4], + [3, 1, 4] + ); +---- +[[1, 2, 3], [1, 1, 1], [5, 5, 5], [1, 1, 1], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [3, 1, 4], [5, 3, 1], [1, 3, 2]] + +query ?? +select + array_replace_all( + arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), + [4, 5, 6], + [1, 1, 1] + ), + array_replace_all( + arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), + [2, 3, 4], + [3, 1, 4] + ); +---- +[[1, 2, 3], [1, 1, 1], [5, 5, 5], [1, 1, 1], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [3, 1, 4], [5, 3, 1], [1, 3, 2]] + +query ?? +select + array_replace_all( + arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), + [4, 5, 6], + [1, 1, 1] + ), + array_replace_all( + arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), + [2, 3, 4], + [3, 1, 4] + ); +---- +[[1, 2, 3], [1, 1, 1], [5, 5, 5], [1, 1, 1], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [3, 1, 4], [5, 3, 1], [1, 3, 2]] + +# list_replace_all scalar function #3 (function alias `array_replace_all`) +query ??? +select + list_replace_all(make_array(1, 2, 3, 4), 2, 3), + list_replace_all(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0), + list_replace_all(make_array(1, 2, 3), 4, 0); +---- +[1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] + +query ??? +select + list_replace_all(arrow_cast(make_array(1, 2, 3, 4), 'LargeList(Int64)'), 2, 3), + list_replace_all(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'LargeList(Int64)'), 4, 0), + list_replace_all(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4, 0); +---- +[1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] + +# array_replace_all scalar function #4 (null input) +query ? +select array_replace_all(make_array(1, 2, 3, 4, 5), NULL, NULL); +---- +[1, 2, 3, 4, 5] + +query ? +select array_replace_all(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL, NULL); +---- +[1, 2, 3, 4, 5] + +# array_replace_all scalar function with columns #1 +query ? +select + array_replace_all(column1, column2, column3) +from arrays_with_repeating_elements; +---- +[1, 4, 1, 3, 4, 4, 1, 3, 4, 3] +[7, 7, 5, 5, 6, 5, 5, 5, 7, 7] +[10, 10, 10, 8, 10, 9, 10, 8, 10, 10] +[13, 11, 12, 13, 11, 12, 13, 11, 12, 13] + +query ? +select + array_replace_all(column1, column2, column3) +from large_arrays_with_repeating_elements; +---- +[1, 4, 1, 3, 4, 4, 1, 3, 4, 3] +[7, 7, 5, 5, 6, 5, 5, 5, 7, 7] +[10, 10, 10, 8, 10, 9, 10, 8, 10, 10] +[13, 11, 12, 13, 11, 12, 13, 11, 12, 13] + +# array_replace_all scalar function with columns #2 (element is list) +query ? +select + array_replace_all(column1, column2, column3) +from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [7, 8, 9]] +[[19, 20, 21], [19, 20, 21], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [19, 20, 21], [19, 20, 21]] +[[28, 29, 30], [28, 29, 30], [28, 29, 30], [22, 23, 24], [28, 29, 30], [25, 26, 27], [28, 29, 30], [22, 23, 24], [28, 29, 30], [28, 29, 30]] +[[37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39]] + +query ? +select + array_replace_all(column1, column2, column3) +from large_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [7, 8, 9]] +[[19, 20, 21], [19, 20, 21], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [19, 20, 21], [19, 20, 21]] +[[28, 29, 30], [28, 29, 30], [28, 29, 30], [22, 23, 24], [28, 29, 30], [25, 26, 27], [28, 29, 30], [22, 23, 24], [28, 29, 30], [28, 29, 30]] +[[37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39]] + +# array_replace_all scalar function with columns and scalars #1 +query ??? +select + array_replace_all(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2, column3), + array_replace_all(column1, 1, column3), + array_replace_all(column1, column2, 4) +from arrays_with_repeating_elements; +---- +[1, 4, 4, 4, 5, 4, 4, 7, 7, 10, 7, 8] [4, 2, 4, 3, 2, 2, 4, 3, 2, 3] [1, 4, 1, 3, 4, 4, 1, 3, 4, 3] +[1, 2, 2, 7, 5, 7, 7, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 10, 10, 10, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [4, 4, 4, 8, 4, 9, 4, 8, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 13, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [4, 11, 12, 4, 11, 12, 4, 11, 12, 4] + +query ??? +select + array_replace_all(arrow_cast(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), 'LargeList(Int64)'), column2, column3), + array_replace_all(column1, 1, column3), + array_replace_all(column1, column2, 4) +from large_arrays_with_repeating_elements; +---- +[1, 4, 4, 4, 5, 4, 4, 7, 7, 10, 7, 8] [4, 2, 4, 3, 2, 2, 4, 3, 2, 3] [1, 4, 1, 3, 4, 4, 1, 3, 4, 3] +[1, 2, 2, 7, 5, 7, 7, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 10, 10, 10, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [4, 4, 4, 8, 4, 9, 4, 8, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 13, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [4, 11, 12, 4, 11, 12, 4, 11, 12, 4] + +# array_replace_all scalar function with columns and scalars #2 (element is list) +query ??? +select + array_replace_all( + make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), + column2, + column3 + ), + array_replace_all(column1, make_array(1, 2, 3), column3), + array_replace_all(column1, column2, make_array(11, 12, 13)) +from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [10, 11, 12], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [11, 12, 13], [1, 2, 3], [7, 8, 9], [11, 12, 13], [11, 12, 13], [1, 2, 3], [7, 8, 9], [11, 12, 13], [7, 8, 9]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [19, 20, 21], [13, 14, 15], [19, 20, 21], [19, 20, 21], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[11, 12, 13], [11, 12, 13], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [11, 12, 13], [11, 12, 13]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [28, 29, 30], [28, 29, 30], [28, 29, 30], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[11, 12, 13], [11, 12, 13], [11, 12, 13], [22, 23, 24], [11, 12, 13], [25, 26, 27], [11, 12, 13], [22, 23, 24], [11, 12, 13], [11, 12, 13]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [37, 38, 39], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13]] + +query ??? +select + array_replace_all( + arrow_cast(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), 'LargeList(List(Int64))'), + column2, + column3 + ), + array_replace_all(column1, make_array(1, 2, 3), column3), + array_replace_all(column1, column2, make_array(11, 12, 13)) +from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [10, 11, 12], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [11, 12, 13], [1, 2, 3], [7, 8, 9], [11, 12, 13], [11, 12, 13], [1, 2, 3], [7, 8, 9], [11, 12, 13], [7, 8, 9]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [19, 20, 21], [13, 14, 15], [19, 20, 21], [19, 20, 21], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[11, 12, 13], [11, 12, 13], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [11, 12, 13], [11, 12, 13]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [28, 29, 30], [28, 29, 30], [28, 29, 30], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[11, 12, 13], [11, 12, 13], [11, 12, 13], [22, 23, 24], [11, 12, 13], [25, 26, 27], [11, 12, 13], [22, 23, 24], [11, 12, 13], [11, 12, 13]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [37, 38, 39], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13]] + +# array_replace with null handling + +statement ok +create table t as values + (make_array(3, 1, NULL, 3), 3, 4, 2), + (make_array(3, 1, NULL, 3), NULL, 5, 2), + (NULL, 3, 2, 1), + (make_array(3, 1, 3), 3, NULL, 1) +; + + +# ([3, 1, NULL, 3], 3, 4, 2) => [4, 1, NULL, 4] NULL not matched +# ([3, 1, NULL, 3], NULL, 5, 2) => [3, 1, NULL, 3] NULL is replaced with 5 +# ([NULL], 3, 2, 1) => NULL +# ([3, 1, 3], 3, NULL, 1) => [NULL, 1 3] + +query ?III? +select column1, column2, column3, column4, array_replace_n(column1, column2, column3, column4) from t; +---- +[3, 1, NULL, 3] 3 4 2 [4, 1, NULL, 4] +[3, 1, NULL, 3] NULL 5 2 [3, 1, 5, 3] +NULL 3 2 1 NULL +[3, 1, 3] 3 NULL 1 [NULL, 1, 3] + + + +statement ok +drop table t; + + + +## array_to_string (aliases: `list_to_string`, `array_join`, `list_join`) + +# array_to_string scalar function #1 +query TTT +select array_to_string(['h', 'e', 'l', 'l', 'o'], ','), array_to_string([1, 2, 3, 4, 5], '-'), array_to_string([1.0, 2.0, 3.0], '|'); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +# array_to_string scalar function #2 +query TTT +select array_to_string([1, 1, 1], '1'), array_to_string([[1, 2], [3, 4], [5, 6]], '+'), array_to_string(array_repeat(array_repeat(array_repeat(3, 2), 2), 3), '/\'); +---- +11111 1+2+3+4+5+6 3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3 + +# array_to_string scalar function #3 +query T +select array_to_string(make_array(), ',') +---- +(empty) + +# array to string dictionary +statement ok +CREATE TABLE table1 AS VALUES + (1, 'foo'), + (3, 'bar'), + (1, 'foo'), + (2, NULL), + (NULL, 'baz') + ; + +# expect 1-3-1-2 (dictionary values should be repeated) +query T +SELECT array_to_string(array_agg(column1),'-') +FROM ( + SELECT arrow_cast(column1, 'Dictionary(Int32, Int32)') as column1 + FROM table1 +); +---- +1-3-1-2 + +# expect foo,bar,foo,baz (dictionary values should be repeated) +query T +SELECT array_to_string(array_agg(column2),',') +FROM ( + SELECT arrow_cast(column2, 'Dictionary(Int64, Utf8)') as column2 + FROM table1 +); +---- +foo,bar,foo,baz + +# Expect only values that are in the group +query I?T +SELECT column1, array_agg(column2), array_to_string(array_agg(column2),',') +FROM ( + SELECT column1, arrow_cast(column2, 'Dictionary(Int32, Utf8)') as column2 + FROM table1 +) +GROUP BY column1 +ORDER BY column1; +---- +1 [foo, foo] foo,foo +2 [NULL] (empty) +3 [bar] bar +NULL [baz] baz + +# verify make_array does force to Utf8View +query T +SELECT arrow_typeof(make_array(arrow_cast('a', 'Utf8View'), 'b', 'c', 'd')); +---- +List(Utf8View) + +# expect a,b,c,d. make_array forces all types to be of a common type (see above) +query T +SELECT array_to_string(make_array(arrow_cast('a', 'Utf8View'), 'b', 'c', 'd'), ','); +---- +a,b,c,d + +# array_to_string using largeutf8 for second arg +query TTT +select array_to_string(['h', 'e', 'l', 'l', 'o'], arrow_cast(',', 'LargeUtf8')), array_to_string([1, 2, 3, 4, 5], arrow_cast('-', 'LargeUtf8')), array_to_string([1.0, 2.0, 3.0], arrow_cast('|', 'LargeUtf8')); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +# array_to_string using utf8view for second arg +query TTT +select array_to_string(['h', 'e', 'l', 'l', 'o'], arrow_cast(',', 'Utf8View')), array_to_string([1, 2, 3, 4, 5], arrow_cast('-', 'Utf8View')), array_to_string([1.0, 2.0, 3.0], arrow_cast('|', 'Utf8View')); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +statement ok +drop table table1; + + +## array_union (aliases: `list_union`) + +# array_union scalar function #1 +query ? +select array_union([1, 2, 3, 4], [5, 6, 3, 4]); +---- +[1, 2, 3, 4, 5, 6] + +query ? +select array_union(arrow_cast([1, 2, 3, 4], 'LargeList(Int64)'), arrow_cast([5, 6, 3, 4], 'LargeList(Int64)')); +---- +[1, 2, 3, 4, 5, 6] + +query ? +select array_union(arrow_cast([1, 2, 3, 4], 'FixedSizeList(4, Int64)'), arrow_cast([5, 6, 3, 4], 'FixedSizeList(4, Int64)')); +---- +[1, 2, 3, 4, 5, 6] + +query ? +select array_union(arrow_cast([1, 2, 3, 4], 'FixedSizeList(4, Int64)'), arrow_cast([5, 6], 'FixedSizeList(2, Int64)')); +---- +[1, 2, 3, 4, 5, 6] + +# array_union scalar function #2 +query ? +select array_union([1, 2, 3, 4], [5, 6, 7, 8]); +---- +[1, 2, 3, 4, 5, 6, 7, 8] + +query ? +select array_union(arrow_cast([1, 2, 3, 4], 'LargeList(Int64)'), arrow_cast([5, 6, 7, 8], 'LargeList(Int64)')); +---- +[1, 2, 3, 4, 5, 6, 7, 8] + +# array_union scalar function #3 +query ? +select array_union([1,2,3], []); +---- +[1, 2, 3] + +query ? +select array_union(arrow_cast([1,2,3], 'LargeList(Int64)'), arrow_cast([], 'LargeList(Int64)')); +---- +[1, 2, 3] + +# array_union scalar function #4 +query ? +select array_union([1, 2, 3, 4], [5, 4]); +---- +[1, 2, 3, 4, 5] + +query ? +select array_union(arrow_cast([1, 2, 3, 4], 'LargeList(Int64)'), arrow_cast([5, 4], 'LargeList(Int64)')); +---- +[1, 2, 3, 4, 5] + +# array_union scalar function #5 +statement ok +CREATE TABLE arrays_with_repeating_elements_for_union +AS VALUES + ([0, 1, 1], []), + ([1, 1], [2]), + ([2, 3], [3]), + ([3], [3, 4]) +; + +query ? +select array_union(column1, column2) from arrays_with_repeating_elements_for_union; +---- +[0, 1] +[1, 2] +[2, 3] +[3, 4] + +query ? +select array_union(arrow_cast(column1, 'LargeList(Int64)'), arrow_cast(column2, 'LargeList(Int64)')) from arrays_with_repeating_elements_for_union; +---- +[0, 1] +[1, 2] +[2, 3] +[3, 4] + +statement ok +drop table arrays_with_repeating_elements_for_union; + +# array_union scalar function #6 +query ? +select array_union([], []); +---- +[] + +query ? +select array_union(arrow_cast([], 'LargeList(Int64)'), arrow_cast([], 'LargeList(Int64)')); +---- +[] + +# array_union scalar function #7 +# re-enable when https://github.com/apache/arrow-rs/issues/9227 is fixed +# query ? +# select array_union([[null]], []); +# ---- +# [[]] + +query error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'array_union' function: +select array_union(arrow_cast([[null]], 'LargeList(List(Int64))'), arrow_cast([], 'LargeList(Int64)')); + +# array_union scalar function #8 +query ? +select array_union([null], [null]); +---- +[NULL] + +query ? +select array_union(arrow_cast([[null]], 'LargeList(List(Int64))'), arrow_cast([[null]], 'LargeList(List(Int64))')); +---- +[[NULL]] + +# array_union scalar function #9 +query ? +select array_union(null, []); +---- +NULL + +query ? +select array_union(null, arrow_cast([], 'LargeList(Int64)')); +---- +NULL + +# array_union scalar function #10 +query ? +select array_union(null, null); +---- +NULL + +# array_union scalar function #11 +query ? +select array_union([1, 1, 2, 2, 3, 3], null); +---- +NULL + +query ? +select array_union(arrow_cast([1, 1, 2, 2, 3, 3], 'LargeList(Int64)'), null); +---- +NULL + +# array_union scalar function #12 +query ? +select array_union(null, [1, 1, 2, 2, 3, 3]); +---- +NULL + +query ? +select array_union(null, arrow_cast([1, 1, 2, 2, 3, 3], 'LargeList(Int64)')); +---- +NULL + +# array_union scalar function #13 +query ? +select array_union([1.2, 3.0], [1.2, 3.0, 5.7]); +---- +[1.2, 3.0, 5.7] + +query ? +select array_union(arrow_cast([1.2, 3.0], 'LargeList(Float64)'), arrow_cast([1.2, 3.0, 5.7], 'LargeList(Float64)')); +---- +[1.2, 3.0, 5.7] + +# array_union scalar function #14 +query ? +select array_union(['hello'], ['hello','datafusion']); +---- +[hello, datafusion] + +query ? +select array_union(arrow_cast(['hello'], 'LargeList(Utf8)'), arrow_cast(['hello','datafusion'], 'LargeList(Utf8)')); +---- +[hello, datafusion] + +query ? +select array_union(column1, column2) +from array_intersect_table_1D_NULL; +---- +[1, 2, 3, 4] +[2, 3] +[3, 4] +NULL +NULL +NULL + +query ? +select array_union(arrow_cast(null, 'List(Int64)'), [1, 2]); +---- +NULL + +query ? +select array_union([1, 2], arrow_cast(null, 'List(Int64)')); +---- +NULL + +query ? +select array_intersect(arrow_cast(null, 'List(Int64)'), [1, 2]); +---- +NULL + +query ? +select array_intersect([1, 2], arrow_cast(null, 'List(Int64)')); +---- +NULL + +query ? +select array_except(arrow_cast(null, 'List(Int64)'), [1, 2]); +---- +NULL + +query ? +select array_except([1, 2], arrow_cast(null, 'List(Int64)')); +---- +NULL + +# list_to_string scalar function #4 (function alias `array_to_string`) +query TTT +select list_to_string(['h', 'e', 'l', 'l', 'o'], ','), list_to_string([1, 2, 3, 4, 5], '-'), list_to_string([1.0, 2.0, 3.0], '|'); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +query TTT +select list_to_string(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), ','), list_to_string(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), '-'), list_to_string(arrow_cast([1.0, 2.0, 3.0], 'LargeList(Float64)'), '|'); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +# array_join scalar function #5 (function alias `array_to_string`) +query TTT +select array_join(['h', 'e', 'l', 'l', 'o'], ','), array_join([1, 2, 3, 4, 5], '-'), array_join([1.0, 2.0, 3.0], '|'); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +query TTT +select array_join(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), ','), array_join(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), '-'), array_join(arrow_cast([1.0, 2.0, 3.0], 'LargeList(Float64)'), '|'); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +# list_join scalar function #6 (function alias `list_join`) +query TTT +select list_join(['h', 'e', 'l', 'l', 'o'], ','), list_join([1, 2, 3, 4, 5], '-'), list_join([1.0, 2.0, 3.0], '|'); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +query TTT +select list_join(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), ','), list_join(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), '-'), list_join(arrow_cast([1.0, 2.0, 3.0], 'LargeList(Float64)'), '|'); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +# array_to_string scalar function with nulls #1 +query TTT +select array_to_string(make_array('h', NULL, 'l', NULL, 'o'), ','), array_to_string(make_array(1, NULL, 3, NULL, 5), '-'), array_to_string(make_array(NULL, 2.0, 3.0), '|'); +---- +h,l,o 1-3-5 2|3 + +query TTT +select array_to_string(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), ','), array_to_string(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), '-'), array_to_string(arrow_cast([1.0, 2.0, 3.0], 'LargeList(Float64)'), '|'); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +query TTT +select array_to_string(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), ','), array_to_string(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), '-'), array_to_string(arrow_cast([1.0, 2.0, 3.0], 'FixedSizeList(3, Float64)'), '|'); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +# array_to_string scalar function with nulls #2 +query TTT +select array_to_string(make_array('h', NULL, NULL, NULL, 'o'), ',', '-'), array_to_string(make_array(NULL, 2, NULL, 4, 5), '-', 'nil'), array_to_string(make_array(1.0, NULL, 3.0), '|', '0'); +---- +h,-,-,-,o nil-2-nil-4-5 1|0|3 + +query TTT +select array_to_string(arrow_cast(make_array('h', NULL, NULL, NULL, 'o'), 'LargeList(Utf8)'), ',', '-'), array_to_string(arrow_cast(make_array(NULL, 2, NULL, 4, 5), 'LargeList(Int64)'), '-', 'nil'), array_to_string(arrow_cast(make_array(1.0, NULL, 3.0), 'LargeList(Float64)'), '|', '0'); +---- +h,-,-,-,o nil-2-nil-4-5 1|0|3 + +query TTT +select array_to_string(arrow_cast(make_array('h', NULL, NULL, NULL, 'o'), 'FixedSizeList(5, Utf8)'), ',', '-'), array_to_string(arrow_cast(make_array(NULL, 2, NULL, 4, 5), 'FixedSizeList(5, Int64)'), '-', 'nil'), array_to_string(arrow_cast(make_array(1.0, NULL, 3.0), 'FixedSizeList(3, Float64)'), '|', '0'); +---- +h,-,-,-,o nil-2-nil-4-5 1|0|3 + +# array_to_string float formatting: special values and longer decimals +query TTT +select + array_to_string(make_array(CAST('NaN' AS DOUBLE), CAST('Infinity' AS DOUBLE), CAST('-Infinity' AS DOUBLE), CAST('0.30000000000000004' AS DOUBLE), CAST('1.2345678901234567' AS DOUBLE)), '|'), + array_to_string(arrow_cast(make_array(CAST('NaN' AS DOUBLE), CAST('Infinity' AS DOUBLE), CAST('-Infinity' AS DOUBLE), CAST('0.30000000000000004' AS DOUBLE), CAST('1.2345678901234567' AS DOUBLE)), 'LargeList(Float64)'), '|'), + array_to_string(arrow_cast(make_array(CAST('NaN' AS DOUBLE), CAST('Infinity' AS DOUBLE), CAST('-Infinity' AS DOUBLE), CAST('0.30000000000000004' AS DOUBLE), CAST('1.2345678901234567' AS DOUBLE)), 'FixedSizeList(5, Float64)'), '|'); +---- +NaN|inf|-inf|0.30000000000000004|1.2345678901234567 NaN|inf|-inf|0.30000000000000004|1.2345678901234567 NaN|inf|-inf|0.30000000000000004|1.2345678901234567 + +# array_to_string float formatting: scientific-notation inputs +query T +select array_to_string( + make_array( + CAST('1E20' AS DOUBLE), + CAST('-1e+20' AS DOUBLE), + CAST('6.02214076e23' AS DOUBLE), + CAST('1.2345e6' AS DOUBLE), + CAST('1e-5' AS DOUBLE), + CAST('-1e-5' AS DOUBLE), + CAST('9.1093837015e-31' AS DOUBLE), + CAST('-2.5e-4' AS DOUBLE) + ), + '|' +); +---- +100000000000000000000|-100000000000000000000|602214076000000000000000|1234500|0.00001|-0.00001|0.00000000000000000000000000000091093837015|-0.00025 + +query T +select array_to_string(arrow_cast([arrow_cast([NULL, 'a'], 'FixedSizeList(2, Utf8)'), NULL], 'FixedSizeList(2, FixedSizeList(2, Utf8))'), ',', '-'); +---- +-,a,- + +# array_to_string with columns #1 + +# For reference +# select column1, column4 from arrays_values; +# ---- +# [NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] , +# [11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] . +# [21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] - +# [31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] ok +# NULL @ +# [41, 42, 43, 44, 45, 46, 47, 48, 49, 50] $ +# [51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] ^ +# [61, 62, 63, 64, 65, 66, 67, 68, 69, 70] NULL + +query T +select array_to_string(column1, column4) from arrays_values; +---- +2,3,4,5,6,7,8,9,10 +11.12.13.14.15.16.17.18.20 +21-22-23-25-26-27-28-29-30 +31ok32ok33ok34ok35ok37ok38ok39ok40 +NULL +41$42$43$44$45$46$47$48$49$50 +51^52^54^55^56^57^58^59^60 +NULL + +query T +select array_to_string(column1, column4) from large_arrays_values; +---- +2,3,4,5,6,7,8,9,10 +11.12.13.14.15.16.17.18.20 +21-22-23-25-26-27-28-29-30 +31ok32ok33ok34ok35ok37ok38ok39ok40 +NULL +41$42$43$44$45$46$47$48$49$50 +51^52^54^55^56^57^58^59^60 +NULL + +query TT +select array_to_string(column1, '_'), array_to_string(make_array(1,2,3), '/') from arrays_values; +---- +2_3_4_5_6_7_8_9_10 1/2/3 +11_12_13_14_15_16_17_18_20 1/2/3 +21_22_23_25_26_27_28_29_30 1/2/3 +31_32_33_34_35_37_38_39_40 1/2/3 +NULL 1/2/3 +41_42_43_44_45_46_47_48_49_50 1/2/3 +51_52_54_55_56_57_58_59_60 1/2/3 +61_62_63_64_65_66_67_68_69_70 1/2/3 + +query TT +select array_to_string(column1, '_'), array_to_string(make_array(1,2,3), '/') from large_arrays_values; +---- +2_3_4_5_6_7_8_9_10 1/2/3 +11_12_13_14_15_16_17_18_20 1/2/3 +21_22_23_25_26_27_28_29_30 1/2/3 +31_32_33_34_35_37_38_39_40 1/2/3 +NULL 1/2/3 +41_42_43_44_45_46_47_48_49_50 1/2/3 +51_52_54_55_56_57_58_59_60 1/2/3 +61_62_63_64_65_66_67_68_69_70 1/2/3 + +query TT +select array_to_string(column1, '_', '*'), array_to_string(make_array(make_array(1,2,3)), '.') from arrays_values; +---- +*_2_3_4_5_6_7_8_9_10 1.2.3 +11_12_13_14_15_16_17_18_*_20 1.2.3 +21_22_23_*_25_26_27_28_29_30 1.2.3 +31_32_33_34_35_*_37_38_39_40 1.2.3 +NULL 1.2.3 +41_42_43_44_45_46_47_48_49_50 1.2.3 +51_52_*_54_55_56_57_58_59_60 1.2.3 +61_62_63_64_65_66_67_68_69_70 1.2.3 + +query TT +select array_to_string(column1, '_', '*'), array_to_string(make_array(make_array(1,2,3)), '.') from large_arrays_values; +---- +*_2_3_4_5_6_7_8_9_10 1.2.3 +11_12_13_14_15_16_17_18_*_20 1.2.3 +21_22_23_*_25_26_27_28_29_30 1.2.3 +31_32_33_34_35_*_37_38_39_40 1.2.3 +NULL 1.2.3 +41_42_43_44_45_46_47_48_49_50 1.2.3 +51_52_*_54_55_56_57_58_59_60 1.2.3 +61_62_63_64_65_66_67_68_69_70 1.2.3 + +# array_to_string with per-row null_string column +statement ok +CREATE TABLE test_null_str_col AS VALUES + (make_array(1, NULL, 3), ',', 'N/A'), + (make_array(NULL, 5, NULL), ',', 'MISSING'), + (make_array(10, NULL, 12), '-', 'X'), + (make_array(20, NULL, 21), '-', NULL); + +query T +SELECT array_to_string(column1, column2, column3) FROM test_null_str_col; +---- +1,N/A,3 +MISSING,5,MISSING +10-X-12 +20-21 + +statement ok +DROP TABLE test_null_str_col; + +# array_to_string with decimal values +query T +select array_to_string(arrow_cast(make_array(1.5, NULL, 3.14), 'List(Decimal128(10, 2))'), ',', 'N'); +---- +1.50,N,3.14 + +# array_to_string with date values +query T +select array_to_string(arrow_cast(make_array('2024-01-15', '2024-06-30', '2024-12-25'), 'List(Date32)'), ','); +---- +2024-01-15,2024-06-30,2024-12-25 + +query T +select array_to_string(arrow_cast(make_array('2024-01-15', NULL, '2024-12-25'), 'List(Date32)'), ',', 'N'); +---- +2024-01-15,N,2024-12-25 + +# array_to_string with timestamp values +query T +select array_to_string(make_array(arrow_cast('2024-01-15T10:30:00', 'Timestamp(Second, None)'), arrow_cast('2024-06-30T15:45:00', 'Timestamp(Second, None)')), '|'); +---- +2024-01-15T10:30:00|2024-06-30T15:45:00 + +query T +select array_to_string(make_array(arrow_cast('2024-01-15T10:30:00', 'Timestamp(Millisecond, None)'), arrow_cast('2024-06-30T15:45:00', 'Timestamp(Millisecond, None)')), '|'); +---- +2024-01-15T10:30:00|2024-06-30T15:45:00 + +query T +select array_to_string(make_array(arrow_cast('2024-01-15T10:30:00', 'Timestamp(Microsecond, None)'), arrow_cast('2024-06-30T15:45:00', 'Timestamp(Microsecond, None)')), '|'); +---- +2024-01-15T10:30:00|2024-06-30T15:45:00 + +query T +select array_to_string(make_array(arrow_cast('2024-01-15T10:30:00', 'Timestamp(Nanosecond, None)'), arrow_cast('2024-06-30T15:45:00', 'Timestamp(Nanosecond, None)')), '|'); +---- +2024-01-15T10:30:00|2024-06-30T15:45:00 + +# array_to_string with time values +query T +select array_to_string(make_array(arrow_cast('10:30:00', 'Time32(Second)'), arrow_cast('15:45:00', 'Time32(Second)')), ','); +---- +10:30:00,15:45:00 + +query T +select array_to_string(make_array(arrow_cast('10:30:00', 'Time64(Microsecond)'), arrow_cast('15:45:00', 'Time64(Microsecond)')), ','); +---- +10:30:00,15:45:00 + +# array_to_string with interval values +query T +select array_to_string(make_array(interval '1 year 2 months', interval '3 days 4 hours'), ','); +---- +14 mons,3 days 4 hours + +# array_to_string with duration values +query T +select array_to_string(make_array(arrow_cast(1000, 'Duration(Millisecond)'), arrow_cast(2000, 'Duration(Millisecond)')), ','); +---- +PT1S,PT2S + + +## cardinality + +# cardinality scalar function +query III +select cardinality(make_array(1, 2, 3, 4, 5)), cardinality([1, 3, 5]), cardinality(make_array('h', 'e', 'l', 'l', 'o')); +---- +5 3 5 + +query III +select cardinality(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), cardinality(arrow_cast([1, 3, 5], 'LargeList(Int64)')), cardinality(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)')); +---- +5 3 5 + +query III +select cardinality(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)')), cardinality(arrow_cast([1, 3, 5], 'FixedSizeList(3, Int64)')), cardinality(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)')); +---- +5 3 5 + +# cardinality scalar function #2 +query II +select cardinality(make_array([1, 2], [3, 4], [5, 6])), cardinality(array_repeat(array_repeat(array_repeat(3, 3), 2), 3)); +---- +6 18 + +query I +select cardinality(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'LargeList(List(Int64))')); +---- +6 + +query I +select cardinality(arrow_cast([[1, 2], [3, 4], [5, 6]], 'FixedSizeList(3, List(Int64))')); +---- +6 + +# cardinality scalar function #3 +query II +select cardinality(make_array()), cardinality(make_array(make_array())) +---- +0 0 + +query II +select cardinality([]), cardinality([]::int[]) as with_cast +---- +0 0 + +query II +select cardinality(arrow_cast(make_array(), 'LargeList(Int64)')), cardinality(arrow_cast(make_array(make_array()), 'LargeList(List(Int64))')) +---- +0 0 + +#TODO +#https://github.com/apache/datafusion/issues/9158 +#query II +#select cardinality(arrow_cast(make_array(), 'FixedSizeList(1, Null)')), cardinality(arrow_cast(make_array(make_array()), 'FixedSizeList(1, List(Int64))')) +#---- +#NULL 0 + +# cardinality of NULL arrays should return NULL +query II +select cardinality(NULL), cardinality(arrow_cast(NULL, 'LargeList(Int64)')) +---- +NULL NULL + +# cardinality with columns +query III +select cardinality(column1), cardinality(column2), cardinality(column3) from arrays; +---- +4 3 5 +4 3 5 +4 3 5 +4 3 3 +NULL 3 4 +4 NULL 1 +4 3 NULL + +query III +select cardinality(column1), cardinality(column2), cardinality(column3) from large_arrays; +---- +4 3 5 +4 3 5 +4 3 5 +4 3 3 +NULL 3 4 +4 NULL 1 +4 3 NULL + +query III +select cardinality(column1), cardinality(column2), cardinality(column3) from fixed_size_arrays; +---- +4 3 5 +4 3 5 +4 3 5 +4 3 5 +NULL 3 5 +4 NULL 5 +4 3 NULL + +## array_remove (aliases: `list_remove`) + +# array_remove scalar function #1 +query ??? +select array_remove(make_array(1, 2, 2, 1, 1), 2), array_remove(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0), array_remove(make_array('h', 'e', 'l', 'l', 'o'), 'l'); +---- +[1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o] + +query ??? +select array_remove(arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), 2), + array_remove(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'LargeList(Float64)'), 1.0), + array_remove(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 'l'); +---- +[1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o] + +query ??? +select array_remove(arrow_cast(make_array(1, 2, 2, 1, 1), 'FixedSizeList(5, Int64)'), 2), + array_remove(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'FixedSizeList(5, Float64)'), 1.0), + array_remove(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 'l'); +---- +[1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o] + +query ??? +select + array_remove(make_array(1, null, 2, 3), 2), + array_remove(make_array(1.1, null, 2.2, 3.3), 1.1), + array_remove(make_array('a', null, 'bc'), 'a'); +---- +[1, NULL, 3] [NULL, 2.2, 3.3] [NULL, bc] + +query ??? +select + array_remove(arrow_cast(make_array(1, null, 2, 3), 'LargeList(Int64)'), 2), + array_remove(arrow_cast(make_array(1.1, null, 2.2, 3.3), 'LargeList(Float64)'), 1.1), + array_remove(arrow_cast(make_array('a', null, 'bc'), 'LargeList(Utf8)'), 'a'); +---- +[1, NULL, 3] [NULL, 2.2, 3.3] [NULL, bc] + +query ??? +select + array_remove(arrow_cast(make_array(1, null, 2, 3), 'FixedSizeList(4, Int64)'), 2), + array_remove(arrow_cast(make_array(1.1, null, 2.2, 3.3), 'FixedSizeList(4, Float64)'), 1.1), + array_remove(arrow_cast(make_array('a', null, 'bc'), 'FixedSizeList(3, Utf8)'), 'a'); +---- +[1, NULL, 3] [NULL, 2.2, 3.3] [NULL, bc] + +#TODO: https://github.com/apache/datafusion/issues/7142 +# follow PostgreSQL behavior +#query ? +#select +# array_remove(NULL, 1) +#---- +#NULL + +query ?? +select + array_remove(make_array(1, null, 2), null), + array_remove(make_array(1, null, 2, null), null); +---- +NULL NULL + +query ?? +select + array_remove(arrow_cast(make_array(1, null, 2), 'LargeList(Int64)'), null), + array_remove(arrow_cast(make_array(1, null, 2, null), 'LargeList(Int64)'), null); +---- +NULL NULL + +query ?? +select + array_remove(arrow_cast(make_array(1, null, 2), 'FixedSizeList(3, Int64)'), null), + array_remove(arrow_cast(make_array(1, null, 2, null), 'FixedSizeList(4, Int64)'), null); +---- +NULL NULL + +# array_remove with null element from column +query ? +select array_remove(column1, column2) from (values + (make_array(1, 2, 3), 2), + (make_array(4, 5, 6), null), + (make_array(7, 8, 9), 8), + (null, 1) +) as t(column1, column2); +---- +[1, 3] +NULL +[7, 9] +NULL + +# array_remove with null element from column (LargeList) +query ? +select array_remove(column1, column2) from (values + (arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 2), + (arrow_cast(make_array(4, 5, 6), 'LargeList(Int64)'), null), + (arrow_cast(make_array(7, 8, 9), 'LargeList(Int64)'), 8) +) as t(column1, column2); +---- +[1, 3] +NULL +[7, 9] + +# array_remove scalar function #2 (element is list) +query ?? +select array_remove(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), [4, 5, 6]), array_remove(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), [2, 3, 4]); +---- +[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] + +query ?? +select array_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), [4, 5, 6]), + array_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), [2, 3, 4]); +---- +[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] + +query ?? +select array_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, List(Int64))'), [4, 5, 6]), + array_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, List(Int64))'), [2, 3, 4]); +---- +[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] + +query ?? +select array_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), [4, 5, 6]), + array_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), [2, 3, 4]); +---- +[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] + +# list_remove scalar function #3 (function alias `array_remove`) +query ??? +select list_remove(make_array(1, 2, 2, 1, 1), 2), list_remove(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0), list_remove(make_array('h', 'e', 'l', 'l', 'o'), 'l'); +---- +[1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o] + +query ?? +select list_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, List(Int64))'), [4, 5, 6]), + list_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, List(Int64))'), [2, 3, 4]); +---- +[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] + +# array_remove scalar function with columns #1 +query ? +select array_remove(column1, column2) from arrays_with_repeating_elements; +---- +[1, 1, 3, 2, 2, 1, 3, 2, 3] +[4, 5, 5, 6, 5, 5, 5, 4, 4] +[7, 7, 8, 7, 9, 7, 8, 7, 7] +[11, 12, 10, 11, 12, 10, 11, 12, 10] + +query ? +select array_remove(column1, column2) from large_arrays_with_repeating_elements; +---- +[1, 1, 3, 2, 2, 1, 3, 2, 3] +[4, 5, 5, 6, 5, 5, 5, 4, 4] +[7, 7, 8, 7, 9, 7, 8, 7, 7] +[11, 12, 10, 11, 12, 10, 11, 12, 10] + +query ? +select array_remove(column1, column2) from fixed_arrays_with_repeating_elements; +---- +[1, 1, 3, 2, 2, 1, 3, 2, 3] +[4, 5, 5, 6, 5, 5, 5, 4, 4] +[7, 7, 8, 7, 9, 7, 8, 7, 7] +[11, 12, 10, 11, 12, 10, 11, 12, 10] + +# array_remove scalar function with columns #2 (element is list) +query ? +select array_remove(column1, column2) from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +query ? +select array_remove(column1, column2) from large_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +query ? +select array_remove(column1, column2) from fixed_size_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +# array_remove scalar function with columns and scalars #1 +query ?? +select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove(column1, 1) from arrays_with_repeating_elements; +---- +[1, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 1, 3, 2, 2, 1, 3, 2, 3] +[1, 2, 2, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] + +query ?? +select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove(column1, 1) from large_arrays_with_repeating_elements; +---- +[1, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 1, 3, 2, 2, 1, 3, 2, 3] +[1, 2, 2, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] + +query ?? +select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove(column1, 1) from fixed_arrays_with_repeating_elements; +---- +[1, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 1, 3, 2, 2, 1, 3, 2, 3] +[1, 2, 2, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] + +# array_remove scalar function with columns and scalars #2 (element is list) +query ?? +select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), + array_remove(column1, make_array(1, 2, 3)) from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +query ?? +select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), + array_remove(column1, make_array(1, 2, 3)) from large_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +query ?? +select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), + array_remove(column1, make_array(1, 2, 3)) from fixed_size_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +## array_remove_n (aliases: `list_remove_n`) + +# array_remove_n with null element scalar +query ?? +select array_remove_n(make_array(1, 2, 2, 1, 1), NULL, 2), + array_remove_n(make_array(1, 2, 2, 1, 1), 2, 2); +---- +NULL [1, 1, 1] + +# array_remove_n with null element scalar (LargeList) +query ?? +select array_remove_n(arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), NULL, 2), + array_remove_n(arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), 2, 2); +---- +NULL [1, 1, 1] + +# array_remove_n with null element from column +query ? +select array_remove_n(column1, column2, column3) from (values + (make_array(1, 2, 2, 1, 1), 2, 2), + (make_array(3, 4, 4, 3, 3), null, 2), + (make_array(5, 6, 6, 5, 5), 6, 1), + (null, 1, 1) +) as t(column1, column2, column3); +---- +[1, 1, 1] +NULL +[5, 6, 5, 5] +NULL + +# array_remove_n with null element from column (LargeList) +query ? +select array_remove_n(column1, column2, column3) from (values + (arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), 2, 2), + (arrow_cast(make_array(3, 4, 4, 3, 3), 'LargeList(Int64)'), null, 2), + (arrow_cast(make_array(5, 6, 6, 5, 5), 'LargeList(Int64)'), 6, 1) +) as t(column1, column2, column3); +---- +[1, 1, 1] +NULL +[5, 6, 5, 5] + +# array_remove_n scalar function #1 +query ??? +select array_remove_n(make_array(1, 2, 2, 1, 1), 2, 2), array_remove_n(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0, 2), array_remove_n(make_array('h', 'e', 'l', 'l', 'o'), 'l', 3); +---- +[1, 1, 1] [2.0, 2.0, 1.0] [h, e, o] + +query ??? +select array_remove_n(arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int32)'), 2, 2), + array_remove_n(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'LargeList(Float32)'), 1.0, 2), + array_remove_n(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 'l', 3); +---- +[1, 1, 1] [2.0, 2.0, 1.0] [h, e, o] + +query ??? +select array_remove_n(arrow_cast(make_array(1, 2, 2, 1, 1), 'FixedSizeList(5, Int32)'), 2, 2), + array_remove_n(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'FixedSizeList(5, Float32)'), 1.0, 2), + array_remove_n(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 'l', 3); +---- +[1, 1, 1] [2.0, 2.0, 1.0] [h, e, o] + +# array_remove_n scalar function #2 (element is list) +query ?? +select array_remove_n(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), [4, 5, 6], 2), array_remove_n(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), [2, 3, 4], 2); +---- +[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] + +query ?? +select array_remove_n(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), [4, 5, 6], 2), + array_remove_n(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), [2, 3, 4], 2); +---- +[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] + +query ?? +select array_remove_n(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), [4, 5, 6], 2), + array_remove_n(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), [2, 3, 4], 2); +---- +[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] + +# list_remove_n scalar function #3 (function alias `array_remove_n`) +query ??? +select list_remove_n(make_array(1, 2, 2, 1, 1), 2, 2), list_remove_n(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0, 2), list_remove_n(make_array('h', 'e', 'l', 'l', 'o'), 'l', 3); +---- +[1, 1, 1] [2.0, 2.0, 1.0] [h, e, o] + +# array_remove_n scalar function with columns #1 +query ? +select array_remove_n(column1, column2, column4) from arrays_with_repeating_elements; +---- +[1, 1, 3, 1, 3, 2, 3] +[5, 5, 6, 5, 5, 5, 4, 4] +[8, 9, 8, 7, 7] +[11, 12, 11, 12, 11, 12] + +# array_remove_n scalar function with columns #2 (element is list) +query ? +select array_remove_n(column1, column2, column4) from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [1, 2, 3], [7, 8, 9], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[22, 23, 24], [25, 26, 27], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36]] + +# array_remove_n scalar function with columns and scalars #1 +query ??? +select array_remove_n(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2, column4), array_remove_n(column1, 1, column4), array_remove_n(column1, column2, 2) from arrays_with_repeating_elements; +---- +[1, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 3, 2, 2, 3, 2, 3] [1, 1, 3, 2, 1, 3, 2, 3] +[1, 2, 2, 5, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [7, 8, 7, 9, 7, 8, 7, 7] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [11, 12, 11, 12, 10, 11, 12, 10] + +# array_remove_n scalar function with columns and scalars #2 (element is list) +query ??? +select array_remove_n(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2, column4), array_remove_n(column1, make_array(1, 2, 3), column4), array_remove_n(column1, column2, 2) from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [7, 8, 9], [4, 5, 6], [4, 5, 6], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [1, 2, 3], [7, 8, 9], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +## array_remove_all (aliases: `list_removes`) + +#TODO: https://github.com/apache/datafusion/issues/7142 +# array_remove_all with NULL elements +#query ? +#select array_remove_all(NULL, 1); +#---- +#NULL + +query ? +select array_remove_all(make_array(1, 2, 2, 1, 1), NULL); +---- +NULL + +# array_remove_all with null element from column +query ? +select array_remove_all(column1, column2) from (values + (make_array(1, 2, 2, 1, 1), 2), + (make_array(3, 4, 4, 3, 3), null), + (make_array(5, 6, 6, 5, 5), 6), + (null, 1) +) as t(column1, column2); +---- +[1, 1, 1] +NULL +[5, 5, 5] +NULL + +# array_remove_all with null element from column (LargeList) +query ? +select array_remove_all(column1, column2) from (values + (arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), 2), + (arrow_cast(make_array(3, 4, 4, 3, 3), 'LargeList(Int64)'), null), + (arrow_cast(make_array(5, 6, 6, 5, 5), 'LargeList(Int64)'), 6) +) as t(column1, column2); +---- +[1, 1, 1] +NULL +[5, 5, 5] + +# array_remove_all scalar function #1 +query ??? +select array_remove_all(make_array(1, 2, 2, 1, 1), 2), array_remove_all(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0), array_remove_all(make_array('h', 'e', 'l', 'l', 'o'), 'l'); +---- +[1, 1, 1] [2.0, 2.0] [h, e, o] + +query ??? +select array_remove_all(arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), 2), + array_remove_all(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'LargeList(Float64)'), 1.0), + array_remove_all(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 'l'); +---- +[1, 1, 1] [2.0, 2.0] [h, e, o] + +query ??? +select array_remove_all(arrow_cast(make_array(1, 2, 2, 1, 1), 'FixedSizeList(5, Int64)'), 2), array_remove_all(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'FixedSizeList(5, Float64)'), 1.0), array_remove_all(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 'l'); +---- +[1, 1, 1] [2.0, 2.0] [h, e, o] + +# array_remove_all scalar function #2 (element is list) +query ?? +select array_remove_all(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), [4, 5, 6]), array_remove_all(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), [2, 3, 4]); +---- +[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] + +query ?? +select array_remove_all(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, List(Int64))'), [4, 5, 6]), + array_remove_all(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, List(Int64))'), [2, 3, 4]); +---- +[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] + +query ?? +select array_remove_all(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), [4, 5, 6]), + array_remove_all(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), [2, 3, 4]); +---- +[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] + +# list_remove_all scalar function #3 (function alias `array_remove_all`) +query ??? +select list_remove_all(make_array(1, 2, 2, 1, 1), 2), list_remove_all(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0), list_remove_all(make_array('h', 'e', 'l', 'l', 'o'), 'l'); +---- +[1, 1, 1] [2.0, 2.0] [h, e, o] + +query ?? +select list_remove_all(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, List(Int64))'), [4, 5, 6]), + list_remove_all(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, List(Int64))'), [2, 3, 4]); +---- +[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] + +# array_remove_all scalar function with columns #1 +query ? +select array_remove_all(column1, column2) from arrays_with_repeating_elements; +---- +[1, 1, 3, 1, 3, 3] +[5, 5, 6, 5, 5, 5] +[8, 9, 8] +[11, 12, 11, 12, 11, 12] + +query ? +select array_remove_all(column1, column2) from fixed_arrays_with_repeating_elements; +---- +[1, 1, 3, 1, 3, 3] +[5, 5, 6, 5, 5, 5] +[8, 9, 8] +[11, 12, 11, 12, 11, 12] + +# array_remove_all scalar function with columns #2 (element is list) +query ? +select array_remove_all(column1, column2) from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [1, 2, 3], [7, 8, 9], [1, 2, 3], [7, 8, 9], [7, 8, 9]] +[[13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15]] +[[22, 23, 24], [25, 26, 27], [22, 23, 24]] +[[31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36]] + +query ? +select array_remove_all(column1, column2) from fixed_size_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [1, 2, 3], [7, 8, 9], [1, 2, 3], [7, 8, 9], [7, 8, 9]] +[[13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15]] +[[22, 23, 24], [25, 26, 27], [22, 23, 24]] +[[31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36]] + +# array_remove_all scalar function with columns and scalars #1 +query ?? +select array_remove_all(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove_all(column1, 1) from arrays_with_repeating_elements; +---- +[1, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 3, 2, 2, 3, 2, 3] +[1, 2, 2, 5, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] + +query ?? +select array_remove_all(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove_all(column1, 1) from fixed_arrays_with_repeating_elements; +---- +[1, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 3, 2, 2, 3, 2, 3] +[1, 2, 2, 5, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] + +# array_remove_all scalar function with columns and scalars #2 (element is list) +query ?? +select array_remove_all(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), array_remove_all(column1, make_array(1, 2, 3)) from nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [7, 8, 9], [4, 5, 6], [4, 5, 6], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +query ?? +select array_remove_all(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), + array_remove_all(column1, make_array(1, 2, 3)) from fixed_size_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [7, 8, 9], [4, 5, 6], [4, 5, 6], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + +## trim_array (deprecated) + +## array_length (aliases: `list_length`) + +# array_length scalar function #1 +query III +select array_length(make_array(1, 2, 3, 4, 5)), array_length(make_array(1, 2, 3)), array_length(make_array([1, 2], [3, 4], [5, 6])); +---- +5 3 3 + +query III +select array_length(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), array_length(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_length(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'LargeList(List(Int64))')); +---- +5 3 3 + +# array_length scalar function #2 +query III +select array_length(make_array(1, 2, 3, 4, 5), 1), array_length(make_array(1, 2, 3), 1), array_length(make_array([1, 2], [3, 4], [5, 6]), 1); +---- +5 3 3 + +query III +select array_length(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 1), array_length(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 1), array_length(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'LargeList(List(Int64))'), 1); +---- +5 3 3 + +# array_length scalar function #3 +query III +select array_length(make_array(1, 2, 3, 4, 5), 2), array_length(make_array(1, 2, 3), 2), array_length(make_array([1, 2], [3, 4], [5, 6]), 2); +---- +NULL NULL 2 + +query III +select array_length(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2), array_length(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 2), array_length(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'LargeList(List(Int64))'), 2); +---- +NULL NULL 2 + +# array_length scalar function #4 +query II +select array_length(array_repeat(array_repeat(array_repeat(3, 5), 2), 3), 1), array_length(array_repeat(array_repeat(array_repeat(3, 5), 2), 3), 2); +---- +3 2 + +query II +select array_length(arrow_cast(array_repeat(array_repeat(array_repeat(3, 5), 2), 3), 'LargeList(List(List(Int64)))'), 1), array_length(arrow_cast(array_repeat(array_repeat(array_repeat(3, 5), 2), 3), 'LargeList(List(List(Int64)))'), 2); +---- +3 2 + +# array_length scalar function #5 +query III +select array_length(make_array()), array_length(make_array(), 1), array_length(make_array(), 2) +---- +0 0 NULL + +# array_length scalar function #6 nested array +query III +select array_length([[1, 2, 3, 4], [5, 6, 7, 8]]), array_length([[1, 2, 3, 4], [5, 6, 7, 8]], 1), array_length([[1, 2, 3, 4], [5, 6, 7, 8]], 2); +---- +2 2 4 + +# list_length scalar function #7 (function alias `array_length`) +query IIII +select list_length(make_array(1, 2, 3, 4, 5)), list_length(make_array(1, 2, 3)), list_length(make_array([1, 2], [3, 4], [5, 6])), array_length([[1, 2, 3, 4], [5, 6, 7, 8]], 3); +---- +5 3 3 NULL + +query III +select list_length(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), list_length(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), list_length(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'LargeList(List(Int64))')); +---- +5 3 3 + +# array_length with columns +query I +select array_length(column1, column3) from arrays_values; +---- +10 +NULL +NULL +NULL +NULL +NULL +NULL +NULL + +query I +select array_length(arrow_cast(column1, 'LargeList(Int64)'), column3) from arrays_values; +---- +10 +NULL +NULL +NULL +NULL +NULL +NULL +NULL + +# array_length with columns and scalars +query II +select array_length(array[array[1, 2], array[3, 4]], column3), array_length(column1, 1) from arrays_values; +---- +2 10 +2 10 +NULL 10 +NULL 10 +NULL NULL +NULL 10 +NULL 10 +NULL 10 + +query II +select array_length(arrow_cast(array[array[1, 2], array[3, 4]], 'LargeList(List(Int64))'), column3), array_length(arrow_cast(column1, 'LargeList(Int64)'), 1) from arrays_values; +---- +2 10 +2 10 +NULL 10 +NULL 10 +NULL NULL +NULL 10 +NULL 10 +NULL 10 + +# array_length for fixed sized list + +query III +select array_length(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)')), array_length(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_length(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'FixedSizeList(3, List(Int64))')); +---- +5 3 3 + +query III +select array_length(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 1), array_length(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 1), array_length(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'FixedSizeList(3, List(Int64))'), 1); +---- +5 3 3 + + +query RRR +select array_distance([2], [3]), list_distance([1], [2]), list_distance([1], [-2]); +---- +1 1 3 + +query error +select list_distance([1], [1, 2]); + +query R +select array_distance([[1, 1]], [1, 2]); +---- +1 + +query R +select array_distance([[1, 1]], [[1, 2]]); +---- +1 + +query R +select array_distance([[1, 1]], [[1, 2]]); +---- +1 + +query RR +select array_distance([1, 1, 0, 0], [2, 2, 1, 1]), list_distance([1, 2, 3], [1, 2, 3]); +---- +2 0 + +query RR +select array_distance([1.0, 1, 0, 0], [2, 2.0, 1, 1]), list_distance([1, 2.0, 3], [1, 2, 3]); +---- +2 0 + +query R +select list_distance([1, 1, NULL, 0], [2, 2, NULL, NULL]); +---- +NULL + +query R +select list_distance([NULL, NULL], [NULL, NULL]); +---- +NULL + +query R +select list_distance([1.0, 2.0, 3.0], [1.0, 2.0, 3.5]) AS distance; +---- +0.5 + +query R +select list_distance([1, 2, 3], [1, 2, 3]) AS distance; +---- +0 + +# array_distance with columns +query RRR +select array_distance(column1, column2), array_distance(column1, column3), array_distance(column1, column4) from arrays_distance_table; +---- +0 0.374165738677 NULL +5.196152422707 6.063827174318 NULL +10.392304845413 11.778794505381 NULL +15.58845726812 15.935494971917 NULL + +query RRR +select array_distance(column1, column2), array_distance(column1, column3), array_distance(column1, column4) from large_arrays_distance_table; +---- +0 0.374165738677 NULL +5.196152422707 6.063827174318 NULL +10.392304845413 11.778794505381 NULL +15.58845726812 15.935494971917 NULL + +query RRR +select array_distance(column1, column2), array_distance(column1, column3), array_distance(column1, column4) from fixed_size_arrays_distance_table; +---- +0 0.374165738677 NULL +5.196152422707 6.063827174318 NULL +10.392304845413 11.778794505381 NULL +15.58845726812 15.935494971917 NULL + + +## array_dims (aliases: `list_dims`) + +# array dims error +query error +select array_dims(1); + +# array_dims scalar function +query ??? +select array_dims(make_array(1, 2, 3)), array_dims(make_array([1, 2], [3, 4])), array_dims(make_array([[[[1], [2]]]])); +---- +[3] [2, 2] [1, 1, 1, 2, 1] + +query ??? +select array_dims(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_dims(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(List(Int64))')), array_dims(arrow_cast(make_array([[[[1], [2]]]]), 'LargeList(List(List(List(List(Int64)))))')); +---- +[3] [2, 2] [1, 1, 1, 2, 1] + +query ??? +select array_dims(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_dims(arrow_cast(make_array([1, 2], [3, 4]), 'FixedSizeList(2, List(Int64))')), array_dims(arrow_cast(make_array([[[[1], [2]]]]), 'FixedSizeList(1, List(List(List(List(Int64)))))')); +---- +[3] [2, 2] [1, 1, 1, 2, 1] + +# array_dims scalar function #2 +query ?? +select array_dims(array_repeat(array_repeat(array_repeat(2, 3), 2), 1)), array_dims(array_repeat(array_repeat(array_repeat(3, 4), 5), 2)); +---- +[1, 2, 3] [2, 5, 4] + +query ?? +select array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(2, 3), 2), 1), 'LargeList(List(List(Int64)))')), array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(3, 4), 5), 2), 'LargeList(List(List(Int64)))')); +---- +[1, 2, 3] [2, 5, 4] + +query ?? +select array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(2, 3), 2), 1), 'FixedSizeList(1, List(List(Int64)))')), array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(3, 4), 5), 2), 'FixedSizeList(2, List(List(Int64)))')); +---- +[1, 2, 3] [2, 5, 4] + +# array_dims scalar function #3 +query ?? +select array_dims(make_array()), array_dims(make_array(make_array())) +---- +NULL [1, 0] + +query ?? +select array_dims(arrow_cast(make_array(), 'LargeList(Int64)')), array_dims(arrow_cast(make_array(make_array()), 'LargeList(List(Int64))')) +---- +NULL [1, 0] + +# list_dims scalar function #4 (function alias `array_dims`) +query ??? +select list_dims(make_array(1, 2, 3)), list_dims(make_array([1, 2], [3, 4])), list_dims(make_array([[[[1], [2]]]])); +---- +[3] [2, 2] [1, 1, 1, 2, 1] + +query ??? +select list_dims(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), list_dims(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(List(Int64))')), list_dims(arrow_cast(make_array([[[[1], [2]]]]), 'LargeList(List(List(List(List(Int64)))))')); +---- +[3] [2, 2] [1, 1, 1, 2, 1] + +query ??? +select list_dims(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), list_dims(arrow_cast(make_array([1, 2], [3, 4]), 'FixedSizeList(2, List(Int64))')), list_dims(arrow_cast(make_array([[[[1], [2]]]]), 'FixedSizeList(1, List(List(List(List(Int64)))))')); +---- +[3] [2, 2] [1, 1, 1, 2, 1] + +# array_dims with columns +query ??? +select array_dims(column1), array_dims(column2), array_dims(column3) from arrays; +---- +[2, 2] [3] [5] +[2, 2] [3] [5] +[2, 2] [3] [5] +[2, 2] [3] [3] +NULL [3] [4] +[2, 2] NULL [1] +[2, 2] [3] NULL + +query ??? +select array_dims(column1), array_dims(column2), array_dims(column3) from large_arrays; +---- +[2, 2] [3] [5] +[2, 2] [3] [5] +[2, 2] [3] [5] +[2, 2] [3] [3] +NULL [3] [4] +[2, 2] NULL [1] +[2, 2] [3] NULL + +query ??? +select array_dims(column1), array_dims(column2), array_dims(column3) from fixed_size_arrays; +---- +[2, 2] [3] [5] +[2, 2] [3] [5] +[2, 2] [3] [5] +[2, 2] [3] [5] +NULL [3] [5] +[2, 2] NULL [5] +[2, 2] [3] NULL + + +## array_ndims (aliases: `list_ndims`) + +# array_ndims scalar function #1 + +#follow PostgreSQL +query I +select + array_ndims(null); +---- +NULL + +query I +select + array_ndims([2, 3]); +---- +1 + +statement ok +CREATE TABLE array_ndims_table +AS VALUES + ([1], [1, 2, 3], [[7]], [[[[[10]]]]]), + ([2], [4, 5], [[8]], [[[[[10]]]]]), + (NUll, [6, 7], [[9]], [[[[[10]]]]]), + ([3], [6], [[9]], [[[[[10]]]]]) +; + +statement ok +CREATE TABLE large_array_ndims_table +AS SELECT + column1, + arrow_cast(column2, 'LargeList(Int64)') as column2, + arrow_cast(column3, 'LargeList(List(Int64))') as column3, + arrow_cast(column4, 'LargeList(List(List(List(List(Int64)))))') as column4 +FROM array_ndims_table; + +statement ok +CREATE TABLE fixed_array_ndims_table +AS VALUES + (arrow_cast([1], 'FixedSizeList(1, Int64)'), arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)'), arrow_cast([[7]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')), + (arrow_cast([2], 'FixedSizeList(1, Int64)'), arrow_cast([4, 5, 6], 'FixedSizeList(3, Int64)'), arrow_cast([[8]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')), + (null, arrow_cast([6, 7, 8], 'FixedSizeList(3, Int64)'), arrow_cast([[9]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')), + (arrow_cast([3], 'FixedSizeList(1, Int64)'), arrow_cast([6, 7, 8], 'FixedSizeList(3, Int64)'), arrow_cast([[9]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')) +; + +query IIII +select + array_ndims(column1), + array_ndims(column2), + array_ndims(column3), + array_ndims(column4) +from array_ndims_table; +---- +1 1 2 5 +1 1 2 5 +NULL 1 2 5 +1 1 2 5 + +query IIII +select + array_ndims(column1), + array_ndims(column2), + array_ndims(column3), + array_ndims(column4) +from large_array_ndims_table; +---- +1 1 2 5 +1 1 2 5 +NULL 1 2 5 +1 1 2 5 + +query IIII +select + array_ndims(column1), + array_ndims(column2), + array_ndims(column3), + array_ndims(column4) +from fixed_array_ndims_table; +---- +1 1 2 5 +1 1 2 5 +NULL 1 2 5 +1 1 2 5 + + + +statement ok +drop table array_ndims_table; + +statement ok +drop table large_array_ndims_table + +query I +select array_ndims(arrow_cast([null], 'List(List(List(Int64)))')); +---- +3 + +# array_ndims scalar function #2 +query II +select array_ndims(array_repeat(array_repeat(array_repeat(1, 3), 2), 1)), array_ndims([[[[[[[[[[[[[[[[[[[[[1]]]]]]]]]]]]]]]]]]]]]); +---- +3 21 + +# array_ndims scalar function #3 +query II +select array_ndims(make_array()), array_ndims(make_array(make_array())) +---- +1 2 + +query II +select array_ndims(arrow_cast(make_array(), 'LargeList(Int64)')), array_ndims(arrow_cast(make_array(make_array()), 'LargeList(List(Int64))')) +---- +1 2 + +# list_ndims scalar function #4 (function alias `array_ndims`) +query III +select list_ndims(make_array(1, 2, 3)), list_ndims(make_array([1, 2], [3, 4])), list_ndims(make_array([[[[1], [2]]]])); +---- +1 2 5 + +query III +select list_ndims(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), list_ndims(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(List(Int64))')), list_ndims(arrow_cast(make_array([[[[1], [2]]]]), 'LargeList(List(List(List(List(Int64)))))')); +---- +1 2 5 + +query II +select list_ndims(make_array()), list_ndims(make_array(make_array())) +---- +1 2 + +query II +select list_ndims(arrow_cast(make_array(), 'LargeList(Int64)')), list_ndims(arrow_cast(make_array(make_array()), 'LargeList(List(Int64))')) +---- +1 2 + +# array_ndims with columns +query III +select array_ndims(column1), array_ndims(column2), array_ndims(column3) from arrays; +---- +2 1 1 +2 1 1 +2 1 1 +2 1 1 +NULL 1 1 +2 NULL 1 +2 1 NULL + +query III +select array_ndims(column1), array_ndims(column2), array_ndims(column3) from large_arrays; +---- +2 1 1 +2 1 1 +2 1 1 +2 1 1 +NULL 1 1 +2 NULL 1 +2 1 NULL + +## array_has/array_has_all/array_has_any + +# If lhs is empty, return false +query B +select array_has([], 1); +---- +false + +# If rhs is Null, we returns Null +query BBB +select array_has([], null), + array_has([1, 2, 3], null), + array_has([null, 1], null); +---- +NULL NULL NULL + +# Always return false if not contained even if list has null elements +query BB +select array_has([1, null, 2], 3), + array_has([null, null, null], 3); +---- +false false + +#TODO: array_has_all and array_has_any cannot handle NULL +#query BBBB +#select array_has_any([], null), +# array_has_any([1, 2, 3], null), +# array_has_all([], null), +# array_has_all([1, 2, 3], null); +#---- +#false false false false + +query BBBBBBBBBBBB +select array_has(make_array(1,2), 1), + array_has(make_array(1,2,NULL), 1), + array_has(make_array([2,3], [3,4]), make_array(2,3)), + array_has(make_array([[1], [2,3]], [[4,5], [6]]), make_array([1], [2,3])), + array_has(make_array([[1], [2,3]], [[4,5], [6]]), make_array([4,5], [6])), + array_has(make_array([[1], [2,3]], [[4,5], [6]]), make_array([1])), + array_has(make_array([[[1]]]), make_array([[1]])), + array_has(make_array([[[1]]], [[[1], [2]]]), make_array([[2]])), + array_has(make_array([[[1]]], [[[1], [2]]]), make_array([[1], [2]])), + list_has(make_array(1,2,3), 4), + array_contains(make_array(1,2,3), 3), + list_contains(make_array(1,2,3), 0) +; +---- +true true true true true false true false true false true false + +query BBBBBBBBBBBB +select array_has(arrow_cast(make_array(1,2), 'LargeList(Int64)'), 1), + array_has(arrow_cast(make_array(1,2,NULL), 'LargeList(Int64)'), 1), + array_has(arrow_cast(make_array([2,3], [3,4]), 'LargeList(List(Int64))'), make_array(2,3)), + array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'LargeList(List(List(Int64)))'), make_array([1], [2,3])), + array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'LargeList(List(List(Int64)))'), make_array([4,5], [6])), + array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'LargeList(List(List(Int64)))'), make_array([1])), + array_has(arrow_cast(make_array([[[1]]]), 'LargeList(List(List(List(Int64))))'), make_array([[1]])), + array_has(arrow_cast(make_array([[[1]]], [[[1], [2]]]), 'LargeList(List(List(List(Int64))))'), make_array([[2]])), + array_has(arrow_cast(make_array([[[1]]], [[[1], [2]]]), 'LargeList(List(List(List(Int64))))'), make_array([[1], [2]])), + list_has(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), 4), + array_contains(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), 3), + list_contains(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), 0) +; +---- +true true true true true false true false true false true false + +query BBBBBBBBBBBB +select array_has(arrow_cast(make_array(1,2), 'FixedSizeList(2, Int64)'), 1), + array_has(arrow_cast(make_array(1,2,NULL), 'FixedSizeList(3, Int64)'), 1), + array_has(arrow_cast(make_array([2,3], [3,4]), 'FixedSizeList(2, List(Int64))'), make_array(2,3)), + array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'FixedSizeList(2, List(List(Int64)))'), make_array([1], [2,3])), + array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'FixedSizeList(2, List(List(Int64)))'), make_array([4,5], [6])), + array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'FixedSizeList(2, List(List(Int64)))'), make_array([1])), + array_has(arrow_cast(make_array([[[1]]]), 'FixedSizeList(1, List(List(List(Int64))))'), make_array([[1]])), + array_has(arrow_cast(make_array([[[1]]], [[[1], [2]]]), 'FixedSizeList(2, List(List(List(Int64))))'), make_array([[2]])), + array_has(arrow_cast(make_array([[[1]]], [[[1], [2]]]), 'FixedSizeList(2, List(List(List(Int64))))'), make_array([[1], [2]])), + list_has(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), 4), + array_contains(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), 3), + list_contains(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), 0) +; +---- +true true true true true false true false true false true false + +query BBB +select array_has(column1, column2), + array_has_all(column3, column4), + array_has_any(column5, column6) +from array_has_table_1D; +---- +true true true +false false false + +query BBB +select array_has(arrow_cast(column1, 'LargeList(Int64)'), column2), + array_has_all(arrow_cast(column3, 'LargeList(Int64)'), arrow_cast(column4, 'LargeList(Int64)')), + array_has_any(arrow_cast(column5, 'LargeList(Int64)'), arrow_cast(column6, 'LargeList(Int64)')) +from array_has_table_1D; +---- +true true true +false false false + +query B +select array_has(column1, column2) +from array_has_table_null; +---- +true +true +false +false +false + +# array_has([1, 3, 5], 1) -> true (array contains element) +# array_has([], 1) -> false (empty array, not null) +# array_has(null, 1) -> null (null array) +query BB +select array_has(column1, column2), array_has(null, column2) +from array_has_table_empty; +---- +true NULL +false NULL +NULL NULL + +# Test for issue: array_has should return false for empty arrays, not null +# This test demonstrates the correct behavior with COALESCE to show the distinction +# array_has([1, 3, 5], 1) -> 'true' +# array_has([], 1) -> 'false' (empty array should return false) +# array_has(null, 1) -> 'null' (null array should return null) +query ?T +SELECT column1, COALESCE(CAST(array_has(column1, column2) AS VARCHAR), 'null') +from array_has_table_empty; +---- +[1, 3, 5] true +[] false +NULL null + +query B +select array_has(column1, column2) +from fixed_size_array_has_table_1D; +---- +true +false + +query BB +select array_has_all(column3, column4), + array_has_any(column5, column6) +from fixed_size_array_has_table_1D; +---- +true true +false false + +query BBB +select array_has(column1, column2), + array_has_all(column3, column4), + array_has_any(column5, column6) +from array_has_table_1D_Float; +---- +true true false +false false true + +query BBB +select array_has(arrow_cast(column1, 'LargeList(Float64)'), column2), + array_has_all(arrow_cast(column3, 'LargeList(Float64)'), arrow_cast(column4, 'LargeList(Float64)')), + array_has_any(arrow_cast(column5, 'LargeList(Float64)'), arrow_cast(column6, 'LargeList(Float64)')) +from array_has_table_1D_Float; +---- +true true false +false false true + +query B +select array_has(column1, column2) +from fixed_size_array_has_table_1D_Float; +---- +true +false + +query BB +select array_has_all(column3, column4), + array_has_any(column5, column6) +from fixed_size_array_has_table_1D_Float; +---- +true true +false true + +query BBB +select array_has(column1, column2), + array_has_all(column3, column4), + array_has_any(column5, column6) +from array_has_table_1D_Boolean; +---- +false true true +true true true + +query BBB +select array_has(arrow_cast(column1, 'LargeList(Boolean)'), column2), + array_has_all(arrow_cast(column3, 'LargeList(Boolean)'), arrow_cast(column4, 'LargeList(Boolean)')), + array_has_any(arrow_cast(column5, 'LargeList(Boolean)'), arrow_cast(column6, 'LargeList(Boolean)')) +from array_has_table_1D_Boolean; +---- +false true true +true true true + +query B +select array_has(column1, column2) +from fixed_size_array_has_table_1D_Boolean; +---- +false +true + +query BB +select array_has_all(column3, column4), + array_has_any(column5, column6) +from fixed_size_array_has_table_1D_Boolean; +---- +true true +true true + +query BBBBBBBB +select array_has_all(column3, arrow_cast(column4,'LargeList(Boolean)')), + array_has_any(column5, arrow_cast(column6,'LargeList(Boolean)')), + array_has_all(column3, arrow_cast(column4,'List(Boolean)')), + array_has_any(column5, arrow_cast(column6,'List(Boolean)')), + array_has_all(arrow_cast(column3, 'LargeList(Boolean)'), column4), + array_has_any(arrow_cast(column5, 'LargeList(Boolean)'), column6), + array_has_all(arrow_cast(column3, 'List(Boolean)'), column4), + array_has_any(arrow_cast(column5, 'List(Boolean)'), column6) +from fixed_size_array_has_table_1D_Boolean; +---- +true true true true true true true true +true true true true true true true true + +query BBB +select array_has(column1, column2), + array_has_all(column3, column4), + array_has_any(column5, column6) +from array_has_table_1D_UTF8; +---- +true true false +false false true + +query BBB +select array_has(arrow_cast(column1, 'LargeList(Utf8)'), column2), + array_has_all(arrow_cast(column3, 'LargeList(Utf8)'), arrow_cast(column4, 'LargeList(Utf8)')), + array_has_any(arrow_cast(column5, 'LargeList(Utf8)'), arrow_cast(column6, 'LargeList(Utf8)')) +from array_has_table_1D_UTF8; +---- +true true false +false false true + +query B +select array_has(column1, column2) +from fixed_size_array_has_table_1D_UTF8; +---- +true +false + +query BB +select array_has(column1, column2), + array_has_all(column3, column4) +from array_has_table_2D; +---- +false true +true false + +query BB +select array_has(arrow_cast(column1, 'LargeList(List(Int64))'), column2), + array_has_all(arrow_cast(column3, 'LargeList(List(Int64))'), arrow_cast(column4, 'LargeList(List(Int64))')) +from array_has_table_2D; +---- +false true +true false + +query B +select array_has(arrow_cast(column1, 'LargeList(List(Int64))'), column2) +from fixed_size_array_has_table_2D; +---- +false +false + +query B +select array_has_all(arrow_cast(column3, 'LargeList(List(Int64))'), arrow_cast(column4, 'LargeList(List(Int64))')) +from fixed_size_array_has_table_2D; +---- +true +false + +query B +select array_has_all(column1, column2) +from array_has_table_2D_float; +---- +true +false + +query B +select array_has_all(arrow_cast(column1, 'LargeList(List(Float64))'), arrow_cast(column2, 'LargeList(List(Float64))')) +from array_has_table_2D_float; +---- +true +false + +query B +select array_has_all(column1, column2) +from fixed_size_array_has_table_2D_float; +---- +false +false + +query B +select array_has(column1, column2) from array_has_table_3D; +---- +false +true +false +false +true +false +true + +query B +select array_has(arrow_cast(column1, 'LargeList(List(List(Int64)))'), column2) from array_has_table_3D; +---- +false +true +false +false +true +false +true + +query B +select array_has(column1, column2) from fixed_size_array_has_table_3D; +---- +false +false +false +false +true +true +true + +query BBBB +select array_has(column1, make_array(5, 6)), + array_has(column1, make_array(7, NULL)), + array_has(column2, 5.5), + array_has(column3, 'o') +from arrays; +---- +false false false true +true false true false +true false false true +false true false false +NULL NULL false false +false false NULL false +false false false NULL + +query BBBB +select array_has(arrow_cast(column1, 'LargeList(List(Int64))'), make_array(5, 6)), + array_has(arrow_cast(column1, 'LargeList(List(Int64))'), make_array(7, NULL)), + array_has(arrow_cast(column2, 'LargeList(Float64)'), 5.5), + array_has(arrow_cast(column3, 'LargeList(Utf8)'), 'o') +from arrays; +---- +false false false true +true false true false +true false false true +false true false false +NULL NULL false false +false false NULL false +false false false NULL + +# Row 1: [[NULL,2],[3,NULL]], [1.1,2.2,3.3], ['L','o','r','e','m'] +# Row 2: [[3,4],[5,6]], [NULL,5.5,6.6], ['i','p',NULL,'u','m'] +# Row 3: [[5,6],[7,8]], [7.7,8.8,9.9], ['d',NULL,'l','o','r'] +# Row 4: [[7,NULL],[9,10]], [10.1,NULL,12.2], ['s','i','t','a','b'] +# Row 5: NULL, [13.3,14.4,15.5], ['a','m','e','t','x'] +# Row 6: [[11,12],[13,14]], NULL, [',','a','b','c','d'] +# Row 7: [[15,16],[NULL,18]], [16.6,17.7,18.8], NULL +query BBBB +select array_has(column1, make_array(5, 6)), + array_has(column1, make_array(7, NULL)), + array_has(column2, 5.5), + array_has(column3, 'o') +from fixed_size_arrays; +---- +false false false true +true false true false +true false false true +false true false false +NULL NULL false false +false false NULL false +false false false NULL + +query BBBB +select array_has_all(make_array(1,2,3), []), + array_has_any(make_array(1,2,3), []), + array_has_all(make_array('aa','bb','cc'), []), + array_has_any(make_array('aa','bb','cc'), []) +; +---- +true false true false + +query BBBBBBBBBBBBB +select array_has_all(make_array(1,2,3), make_array(1,3)), + array_has_all(make_array(1,2,3), make_array(1,4)), + array_has_all(make_array([1,2], [3,4]), make_array([1,2])), + array_has_all(make_array([1,2], [3,4]), make_array([1,3])), + array_has_all(make_array([1,2], [3,4]), make_array([1,2], [3,4], [5,6])), + array_has_all(make_array([[1,2,3]]), make_array([[1]])), + array_has_all(make_array([[1,2,3]]), make_array([[1,2,3]])), + array_has_any(make_array(1,2,3), make_array(1,10,100)), + array_has_any(make_array(1,2,3), make_array(10,100)), + array_has_any(make_array([1,2], [3,4]), make_array([1,10], [10,4])), + array_has_any(make_array([1,2], [3,4]), make_array([10,20], [3,4])), + array_has_any(make_array([[1,2,3]]), make_array([[1,2,3], [4,5,6]])), + array_has_any(make_array([[1,2,3]]), make_array([[1,2,3]], [[4,5,6]])) +; +---- +true false true false false false true true false false true false true + +query BBBBBBBBBBBBB +select array_has_all(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), arrow_cast(make_array(1,3), 'LargeList(Int64)')), + array_has_all(arrow_cast(make_array(1,2,3),'LargeList(Int64)'), arrow_cast(make_array(1,4), 'LargeList(Int64)')), + array_has_all(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([1,2]), 'LargeList(List(Int64))')), + array_has_all(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([1,3]), 'LargeList(List(Int64))')), + array_has_all(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([1,2], [3,4], [5,6]), 'LargeList(List(Int64))')), + array_has_all(arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))'), arrow_cast(make_array([[1]]), 'LargeList(List(List(Int64)))')), + array_has_all(arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))'), arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))')), + array_has_any(arrow_cast(make_array(1,2,3),'LargeList(Int64)'), arrow_cast(make_array(1,10,100), 'LargeList(Int64)')), + array_has_any(arrow_cast(make_array(1,2,3),'LargeList(Int64)'), arrow_cast(make_array(10,100),'LargeList(Int64)')), + array_has_any(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([1,10], [10,4]), 'LargeList(List(Int64))')), + array_has_any(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([10,20], [3,4]), 'LargeList(List(Int64))')), + array_has_any(arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))'), arrow_cast(make_array([[1,2,3], [4,5,6]]), 'LargeList(List(List(Int64)))')), + array_has_any(arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))'), arrow_cast(make_array([[1,2,3]], [[4,5,6]]), 'LargeList(List(List(Int64)))')) +; +---- +true false true false false false true true false false true false true + +query BBBBBBBBBBBBB +select array_has_all(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(1, 3), 'FixedSizeList(2, Int64)')), + array_has_all(arrow_cast(make_array(1,2,3),'FixedSizeList(3, Int64)'), arrow_cast(make_array(1, 4), 'FixedSizeList(2, Int64)')), + array_has_all(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,2]), 'FixedSizeList(1, List(Int64))')), + array_has_all(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,3]), 'FixedSizeList(1, List(Int64))')), + array_has_all(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,2], [3,4], [5,6]), 'FixedSizeList(3, List(Int64))')), + array_has_all(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1]]), 'FixedSizeList(1, List(List(Int64)))')), + array_has_all(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))')), + array_has_any(arrow_cast(make_array(1,2,3),'FixedSizeList(3, Int64)'), arrow_cast(make_array(1,10,100), 'FixedSizeList(3, Int64)')), + array_has_any(arrow_cast(make_array(1,2,3),'FixedSizeList(3, Int64)'), arrow_cast(make_array(10, 100),'FixedSizeList(2, Int64)')), + array_has_any(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,10], [10,4]), 'FixedSizeList(2, List(Int64))')), + array_has_any(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([10,20], [3,4]), 'FixedSizeList(2, List(Int64))')), + array_has_any(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1,2,3], [4,5,6]]), 'FixedSizeList(1, List(List(Int64)))')), + array_has_any(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1,2,3]], [[4,5,6]]), 'FixedSizeList(2, List(List(Int64)))')) +; +---- +true false true false false false true true false false true false true + +# rewrite various array_has operations to InList where the haystack is a literal list +# NB that `col in (a, b, c)` is simplified to OR if there are <= 3 elements, so we make 4-element haystack lists + +query I +with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) +select count(*) from test WHERE needle IN ('7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'); +---- +1 + +query TT +explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) +select count(*) from test WHERE needle IN ('7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'); +---- +logical_plan +01)Projection: count(Int64(1)) AS count(*) +02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] +03)----SubqueryAlias: test +04)------SubqueryAlias: t +05)--------Projection: +06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")]) +07)------------TableScan: generate_series() projection=[value] +physical_plan +01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] +02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] +03)----CoalescePartitionsExec +04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] +05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]), projection=[] +06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192] + +query I +with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) +select count(*) from test WHERE needle = ANY(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c']); +---- +1 + +query TT +explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) +select count(*) from test WHERE needle = ANY(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c']); +---- +logical_plan +01)Projection: count(Int64(1)) AS count(*) +02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] +03)----SubqueryAlias: test +04)------SubqueryAlias: t +05)--------Projection: +06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")]) +07)------------TableScan: generate_series() projection=[value] +physical_plan +01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] +02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] +03)----CoalescePartitionsExec +04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] +05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]), projection=[] +06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192] + +query I +with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) +select count(*) from test WHERE array_has(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'], needle); +---- +1 + +query TT +explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) +select count(*) from test WHERE array_has(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'], needle); +---- +logical_plan +01)Projection: count(Int64(1)) AS count(*) +02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] +03)----SubqueryAlias: test +04)------SubqueryAlias: t +05)--------Projection: +06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")]) +07)------------TableScan: generate_series() projection=[value] +physical_plan +01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] +02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] +03)----CoalescePartitionsExec +04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] +05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]), projection=[] +06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192] + +query I +with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) +select count(*) from test WHERE array_has(arrow_cast(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'], 'LargeList(Utf8View)'), needle); +---- +1 + +query TT +explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) +select count(*) from test WHERE array_has(arrow_cast(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'], 'LargeList(Utf8View)'), needle); +---- +logical_plan +01)Projection: count(Int64(1)) AS count(*) +02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] +03)----SubqueryAlias: test +04)------SubqueryAlias: t +05)--------Projection: +06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")]) +07)------------TableScan: generate_series() projection=[value] +physical_plan +01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] +02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] +03)----CoalescePartitionsExec +04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] +05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]), projection=[] +06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192] + +query I +with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) +select count(*) from test WHERE array_has(arrow_cast(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'], 'FixedSizeList(4, Utf8View)'), needle); +---- +1 + +query TT +explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) +select count(*) from test WHERE array_has(arrow_cast(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'], 'FixedSizeList(4, Utf8View)'), needle); +---- +logical_plan +01)Projection: count(Int64(1)) AS count(*) +02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] +03)----SubqueryAlias: test +04)------SubqueryAlias: t +05)--------Projection: +06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")]) +07)------------TableScan: generate_series() projection=[value] +physical_plan +01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] +02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] +03)----CoalescePartitionsExec +04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] +05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]), projection=[] +06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192] + +query I +with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) +select count(*) from test WHERE array_has([needle], needle); +---- +100000 + +# The optimizer does not currently eliminate the filter; +# Instead, it's rewritten as `IS NULL OR NOT NULL` due to SQL null semantics +query TT +explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) +select count(*) from test WHERE array_has([needle], needle); +---- +logical_plan +01)Projection: count(Int64(1)) AS count(*) +02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] +03)----SubqueryAlias: test +04)------SubqueryAlias: t +05)--------Projection: +06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IS NOT NULL OR Boolean(NULL) +07)------------TableScan: generate_series() projection=[value] +physical_plan +01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] +02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] +03)----CoalescePartitionsExec +04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] +05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IS NOT NULL OR NULL, projection=[] +06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192] + +# any operator +query ? +select column3 from arrays where 'L'=any(column3); +---- +[L, o, r, e, m] + +query I +select count(*) from arrays where 'L'=any(column3); +---- +1 + +query I +select count(*) from arrays where 'X'=any(column3); +---- +0 + +# any operator with comparison operators +# Use inline arrays so the test data is visible and the needle (5) +# falls within the range of some arrays but not others. +statement ok +CREATE TABLE any_op_test AS VALUES + (1, make_array(1, 2, 3)), + (2, make_array(4, 5, 6)), + (3, make_array(7, 8, 9)), + (4, make_array(3, 5, 7)); + +# 5 > ANY(arr): true when array_min < 5 +# row1: min=1 < 5 ✓, row2: min=4 < 5 ✓, row3: min=7 < 5 ✗, row4: min=3 < 5 ✓ +query I? +select column1, column2 from any_op_test where 5 > any(column2) order by column1; +---- +1 [1, 2, 3] +2 [4, 5, 6] +4 [3, 5, 7] + +# 5 >= ANY(arr): true when array_min <= 5 +# row1: min=1 <= 5 ✓, row2: min=4 <= 5 ✓, row3: min=7 <= 5 ✗, row4: min=3 <= 5 ✓ +query I? +select column1, column2 from any_op_test where 5 >= any(column2) order by column1; +---- +1 [1, 2, 3] +2 [4, 5, 6] +4 [3, 5, 7] + +# 5 < ANY(arr): true when array_max > 5 +# row1: max=3 > 5 ✗, row2: max=6 > 5 ✓, row3: max=9 > 5 ✓, row4: max=7 > 5 ✓ +query I? +select column1, column2 from any_op_test where 5 < any(column2) order by column1; +---- +2 [4, 5, 6] +3 [7, 8, 9] +4 [3, 5, 7] + +# 5 <= ANY(arr): true when array_max >= 5 +# row1: max=3 >= 5 ✗, row2: max=6 >= 5 ✓, row3: max=9 >= 5 ✓, row4: max=7 >= 5 ✓ +query I? +select column1, column2 from any_op_test where 5 <= any(column2) order by column1; +---- +2 [4, 5, 6] +3 [7, 8, 9] +4 [3, 5, 7] + +# 5 <> ANY(arr): true when array_min != 5 OR array_max != 5 +# row1: [1,2,3] min=1!=5 ✓, row2: [4,5,6] min=4!=5 ✓, row3: [7,8,9] min=7!=5 ✓, row4: [3,5,7] min=3!=5 ✓ +query I? +select column1, column2 from any_op_test where 5 <> any(column2) order by column1; +---- +1 [1, 2, 3] +2 [4, 5, 6] +3 [7, 8, 9] +4 [3, 5, 7] + +# For a single-element array where the element equals the needle, <> should return false +query B +select 5 <> any(make_array(5)); +---- +false + +# For a uniform array [5,5,5], <> should also return false +query B +select 5 <> any(make_array(5, 5, 5)); +---- +false + +# Empty array: all operators should return false (no elements satisfy the condition) +query B +select 5 = any(make_array()); +---- +false + +query B +select 5 <> any(make_array()); +---- +false + +query B +select 5 > any(make_array()); +---- +false + +query B +select 5 < any(make_array()); +---- +false + +query B +select 5 >= any(make_array()); +---- +false + +query B +select 5 <= any(make_array()); +---- +false + +# Mixed NULL + non-NULL array where no non-NULL element satisfies the condition +# These return false (NULLs are skipped by array_min/array_max) +query B +select 5 > any(make_array(6, NULL)); +---- +false + +query B +select 5 < any(make_array(3, NULL)); +---- +false + +query B +select 5 >= any(make_array(6, NULL)); +---- +false + +query B +select 5 <= any(make_array(3, NULL)); +---- +false + +# Mixed NULL + non-NULL array where a non-NULL element satisfies the condition +query B +select 5 > any(make_array(3, NULL)); +---- +true + +query B +select 5 < any(make_array(6, NULL)); +---- +true + +query B +select 5 >= any(make_array(5, NULL)); +---- +true + +query B +select 5 <= any(make_array(5, NULL)); +---- +true + +query B +select 5 <> any(make_array(3, NULL)); +---- +true + +query B +select 5 <> any(make_array(5, NULL)); +---- +false + +# All-NULL array: all operators should return false +query B +select 5 > any(make_array(NULL::INT, NULL::INT)); +---- +false + +query B +select 5 < any(make_array(NULL::INT, NULL::INT)); +---- +false + +query B +select 5 >= any(make_array(NULL::INT, NULL::INT)); +---- +false + +query B +select 5 <= any(make_array(NULL::INT, NULL::INT)); +---- +false + +query B +select 5 <> any(make_array(NULL::INT, NULL::INT)); +---- +false + +# NULL left operand: should return NULL for non-empty arrays +query B +select NULL > any(make_array(1, 2, 3)); +---- +NULL + +query B +select NULL < any(make_array(1, 2, 3)); +---- +NULL + +query B +select NULL >= any(make_array(1, 2, 3)); +---- +NULL + +query B +select NULL <= any(make_array(1, 2, 3)); +---- +NULL + +query B +select NULL <> any(make_array(1, 2, 3)); +---- +NULL + +# NULL left operand with empty array: should return false +query B +select NULL > any(make_array()); +---- +false + +# NULL array: should return NULL +query B +select 5 > any(NULL::INT[]); +---- +NULL + +query B +select 5 < any(NULL::INT[]); +---- +NULL + +query B +select 5 >= any(NULL::INT[]); +---- +NULL + +query B +select 5 <= any(NULL::INT[]); +---- +NULL + +query B +select 5 <> any(NULL::INT[]); +---- +NULL + +statement ok +DROP TABLE any_op_test; + +## array_distinct + +#TODO: https://github.com/apache/datafusion/issues/7142 +#query ? +#select array_distinct(null); +#---- +#NULL + +# test with empty row, the row that does not match the condition has row count 0 +statement ok +create table t1(a int, b int) as values (100, 1), (101, 2), (102, 3), (101, 2); + +# rowsort is to ensure the order of group by is deterministic, array_sort has no effect here, since the sum() always returns single row. +query ? rowsort +select array_distinct([sum(a)]) from t1 where a > 100 group by b; +---- +[102] +[202] + +statement ok +drop table t1; + +query ? +select array_distinct(a) from values ([1, 2, 3]), (null), ([1, 3, 1]) as X(a); +---- +[1, 2, 3] +NULL +[1, 3] + +query ? +select array_distinct(arrow_cast(null, 'LargeList(Int64)')); +---- +NULL + +query ? +select array_distinct([]); +---- +[] + +query ? +select array_distinct([[], []]); +---- +[[]] + +query ? +select array_distinct(column1) +from array_distinct_table_1D; +---- +[1, 2, 3] +[1, 2, 3, 4, 5] +[3, 5] + +query ? +select array_distinct(column1) +from array_distinct_table_1D_UTF8; +---- +[a, bc, def] +[a, bc, def, defg] +[defg] + +query ? +select array_distinct(column1) +from array_distinct_table_2D; +---- +[[1, 2], [3, 4], [5, 6]] +[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] +[[5, 6], NULL] + +query ? +select array_distinct(column1) +from array_distinct_table_1D_large; +---- +[1, 2, 3] +[1, 2, 3, 4, 5] +[3, 5] + +query ? +select array_distinct(column1) +from array_distinct_table_1D_fixed; +---- +[1, 2, 3] +[1, 2, 3, 4, 5] +[3, 5] + +query ? +select array_distinct(column1) +from array_distinct_table_1D_UTF8_fixed; +---- +[a, bc, def] +[a, bc, def, defg] +[defg] + +query ? +select array_distinct(column1) +from array_distinct_table_2D_fixed; +---- +[[1, 2], [3, 4], [5, 6]] +[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] +[[5, 6], NULL] + +## arrays_zip (aliases: `list_zip`) + +# Spark example: arrays_zip(array(1, 2, 3), array(2, 3, 4)) +query ? +select arrays_zip([1, 2, 3], [2, 3, 4]); +---- +[{1: 1, 2: 2}, {1: 2, 2: 3}, {1: 3, 2: 4}] + +# Spark example: arrays_zip(array(1, 2), array(2, 3), array(3, 4)) +query ? +select arrays_zip([1, 2], [2, 3], [3, 4]); +---- +[{1: 1, 2: 2, 3: 3}, {1: 2, 2: 3, 3: 4}] + +# basic: two integer arrays of equal length +query ? +select arrays_zip([1, 2, 3], [10, 20, 30]); +---- +[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}] + +# basic: two arrays with different element types (int + string) +query ? +select arrays_zip([1, 2, 3], ['a', 'b', 'c']); +---- +[{1: 1, 2: a}, {1: 2, 2: b}, {1: 3, 2: c}] + +# three arrays of equal length +query ? +select arrays_zip([1, 2, 3], [10, 20, 30], [100, 200, 300]); +---- +[{1: 1, 2: 10, 3: 100}, {1: 2, 2: 20, 3: 200}, {1: 3, 2: 30, 3: 300}] + +# four arrays of equal length +query ? +select arrays_zip([1], [2], [3], [4]); +---- +[{1: 1, 2: 2, 3: 3, 4: 4}] + +# mixed element types: float + boolean +query ? +select arrays_zip([1.5, 2.5], [true, false]); +---- +[{1: 1.5, 2: true}, {1: 2.5, 2: false}] + +# different length arrays: shorter array padded with NULLs +query ? +select arrays_zip([1, 2], [3, 4, 5]); +---- +[{1: 1, 2: 3}, {1: 2, 2: 4}, {1: NULL, 2: 5}] + +# different length arrays: first longer +query ? +select arrays_zip([1, 2, 3], [10]); +---- +[{1: 1, 2: 10}, {1: 2, 2: NULL}, {1: 3, 2: NULL}] + +# different length: one single element, other three elements +query ? +select arrays_zip([1], ['a', 'b', 'c']); +---- +[{1: 1, 2: a}, {1: NULL, 2: b}, {1: NULL, 2: c}] + +# empty arrays +query ? +select arrays_zip([], []); +---- +[] + +# one empty, one non-empty +query ? +select arrays_zip([], [1, 2, 3]); +---- +[{1: NULL, 2: 1}, {1: NULL, 2: 2}, {1: NULL, 2: 3}] + +# NULL elements inside arrays +query ? +select arrays_zip([1, NULL, 3], ['a', 'b', 'c']); +---- +[{1: 1, 2: a}, {1: NULL, 2: b}, {1: 3, 2: c}] + +# all NULL elements +query ? +select arrays_zip([NULL::int, NULL, NULL], [NULL::text, NULL, NULL]); +---- +[{1: NULL, 2: NULL}, {1: NULL, 2: NULL}, {1: NULL, 2: NULL}] + +# both args are NULL (entire list null) +query ? +select arrays_zip(NULL::int[], NULL::int[]); +---- +NULL + +# one arg is NULL list, other is real array +query ? +select arrays_zip(NULL::int[], [1, 2, 3]); +---- +[{1: NULL, 2: 1}, {1: NULL, 2: 2}, {1: NULL, 2: 3}] + +# real array + NULL list +query ? +select arrays_zip([1, 2], NULL::text[]); +---- +[{1: 1, 2: NULL}, {1: 2, 2: NULL}] + +# column-level test with multiple rows +query ? +select arrays_zip(a, b) from (values ([1, 2], [10, 20]), ([3, 4, 5], [30]), ([6], [60, 70])) as t(a, b); +---- +[{1: 1, 2: 10}, {1: 2, 2: 20}] +[{1: 3, 2: 30}, {1: 4, 2: NULL}, {1: 5, 2: NULL}] +[{1: 6, 2: 60}, {1: NULL, 2: 70}] + +# column-level test with NULL rows +query ? +select arrays_zip(a, b) from (values ([1, 2], [10, 20]), (null, [30, 40]), ([5, 6], null)) as t(a, b); +---- +[{1: 1, 2: 10}, {1: 2, 2: 20}] +[{1: NULL, 2: 30}, {1: NULL, 2: 40}] +[{1: 5, 2: NULL}, {1: 6, 2: NULL}] + +# column-level test with single argument +query ? +select arrays_zip(a) from (values ([1, 2], [10, 20]), (null, [30, 40]), ([5, 6], null)) as t(a, b); +---- +[{1: 1}, {1: 2}] +NULL +[{1: 5}, {1: 6}] + +query ? +select arrays_zip(b) from (values ([1, 2], [10, 20]), (null, [30, 40]), ([5, 6], null)) as t(a, b); +---- +[{1: 10}, {1: 20}] +[{1: 30}, {1: 40}] +NULL + +# No input +query error Error during planning: 'arrays_zip' does not support zero arguments +select arrays_zip(); + +# Non-array input +query error DataFusion error: Execution error: arrays_zip expects array arguments, got Int64 +select arrays_zip(1, 2); + +# null input +query ? +select arrays_zip(null) +---- +NULL + +# single empty array +query ? +select arrays_zip([]) +---- +[] + + +# single array of null +query ? +select arrays_zip([null]) +---- +[{1: NULL}] + +query ? +select arrays_zip([NULL::int]) +---- +[{1: NULL}] + +query ? +select arrays_zip([NULL::int[]]) +---- +[{1: NULL}] + +# alias: list_zip +query ? +select list_zip([1, 2], [3, 4]); +---- +[{1: 1, 2: 3}, {1: 2, 2: 4}] + +# column test: total values equal (3 each) but per-row lengths differ +# a: [1] b: [10, 20] → row 0: a has 1, b has 2 +# a: [2, 3] b: [30] → row 1: a has 2, b has 1 +# total a values = 3, total b values = 3 (same!) but rows are misaligned +query ? +select arrays_zip(a, b) from (values ([1], [10, 20]), ([2, 3], [30])) as t(a, b); +---- +[{1: 1, 2: 10}, {1: NULL, 2: 20}] +[{1: 2, 2: 30}, {1: 3, 2: NULL}] + +# single element arrays +query ? +select arrays_zip([42], ['hello']); +---- +[{1: 42, 2: hello}] + +# single argument +query ? +select arrays_zip([1, 2, 3]); +---- +[{1: 1}, {1: 2}, {1: 3}] + +# arrays_zip with LargeList inputs +query ? +select arrays_zip( + arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), + arrow_cast(make_array(10, 20, 30), 'LargeList(Int64)') +); +---- +[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}] + +# arrays_zip with LargeList different lengths (padding) +query ? +select arrays_zip( + arrow_cast(make_array(1, 2), 'LargeList(Int64)'), + arrow_cast(make_array(10, 20, 30), 'LargeList(Int64)') +); +---- +[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: NULL, 2: 30}] + +# single argument from LargeList +query ? +select arrays_zip(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')); +---- +[{1: 1}, {1: 2}, {1: 3}] + +# arrays_zip with FixedSizeList inputs +query ? +select arrays_zip( + arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), + arrow_cast(make_array(10, 20, 30), 'FixedSizeList(3, Int64)') +); +---- +[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}] + +# single argument from FixedSizeList +query ? +select arrays_zip(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')); +---- +[{1: 1}, {1: 2}, {1: 3}] + +# arrays_zip mixing List and LargeList +query ? +select arrays_zip( + [1, 2, 3], + arrow_cast(make_array(10, 20, 30), 'LargeList(Int64)') +); +---- +[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}] + +# arrays_zip mixing List and FixedSizeList with different lengths (padding) +query ? +select arrays_zip( + [1, 2, 3], + arrow_cast(make_array(10, 20), 'FixedSizeList(2, Int64)') +); +---- +[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: NULL}] + +# arrays_zip with LargeList and FixedSizeList mixed types +query ? +select arrays_zip( + arrow_cast(make_array(1, 2), 'LargeList(Int64)'), + arrow_cast(make_array('a', 'b'), 'FixedSizeList(2, Utf8)') +); +---- +[{1: 1, 2: a}, {1: 2, 2: b}] + +query ??? +select array_intersect(column1, column2), + array_intersect(column3, column4), + array_intersect(column5, column6) +from array_intersect_table_1D; +---- +[1] [1, 3] [1, 3] +[11] [11, 33] [11, 33] + +query ??? +select array_intersect(column1, column2), + array_intersect(column3, column4), + array_intersect(column5, column6) +from large_array_intersect_table_1D; +---- +[1] [1, 3] [1, 3] +[11] [11, 33] [11, 33] + +query ??? +select array_intersect(column1, column2), + array_intersect(column3, column4), + array_intersect(column5, column6) +from array_intersect_table_1D_Float; +---- +[1.0] [1.0, 3.0] [] +[] [2.0] [1.11] + +query ??? +select array_intersect(column1, column2), + array_intersect(column3, column4), + array_intersect(column5, column6) +from array_intersect_table_1D_Boolean; +---- +[] [true, false] [false] +[false] [true] [true] + +query ??? +select array_intersect(column1, column2), + array_intersect(column3, column4), + array_intersect(column5, column6) +from large_array_intersect_table_1D_Boolean; +---- +[] [true, false] [false] +[false] [true] [true] + +query ??? +select array_intersect(column1, column2), + array_intersect(column3, column4), + array_intersect(column5, column6) +from array_intersect_table_1D_UTF8; +---- +[bc] [rust, arrow] [] +[] [datafusion, rust, arrow] [rust, arrow] + +query ??? +select array_intersect(column1, column2), + array_intersect(column3, column4), + array_intersect(column5, column6) +from large_array_intersect_table_1D_UTF8; +---- +[bc] [rust, arrow] [] +[] [datafusion, rust, arrow] [rust, arrow] + +query ? +select array_intersect(column1, column2) +from array_intersect_table_1D_NULL; +---- +[2, 3] +[3] +[3] +NULL +NULL +NULL + +query ?? +select array_intersect(column1, column2), + array_intersect(column3, column4) +from array_intersect_table_2D; +---- +[] [[4, 5], [6, 7]] +[[3, 4]] [[5, 6, 7], [8, 9, 10]] + +query ?? +select array_intersect(column1, column2), + array_intersect(column3, column4) +from large_array_intersect_table_2D; +---- +[] [[4, 5], [6, 7]] +[[3, 4]] [[5, 6, 7], [8, 9, 10]] + + +query ? +select array_intersect(column1, column2) +from array_intersect_table_2D_float; +---- +[[1.1, 2.2], [3.3]] +[[1.1, 2.2], [3.3]] + +query ? +select array_intersect(column1, column2) +from large_array_intersect_table_2D_float; +---- +[[1.1, 2.2], [3.3]] +[[1.1, 2.2], [3.3]] + +query ? +select array_intersect(column1, column2) +from array_intersect_table_3D; +---- +[] +[[[1, 2]]] + +query ? +select array_intersect(column1, column2) +from large_array_intersect_table_3D; +---- +[] +[[[1, 2]]] + +query ?????? +SELECT array_intersect(make_array(1,2,3), make_array(2,3,4)), + array_intersect(make_array(1,3,5), make_array(2,4,6)), + array_intersect(make_array('aa','bb','cc'), make_array('cc','aa','dd')), + array_intersect(make_array(true, false), make_array(true)), + array_intersect(make_array(1.1, 2.2, 3.3), make_array(2.2, 3.3, 4.4)), + array_intersect(make_array([1, 1], [2, 2], [3, 3]), make_array([2, 2], [3, 3], [4, 4])) +; +---- +[2, 3] [] [aa, cc] [true] [2.2, 3.3] [[2, 2], [3, 3]] + +query ?????? +SELECT array_intersect(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), arrow_cast(make_array(2,3,4), 'LargeList(Int64)')), + array_intersect(arrow_cast(make_array(1,3,5), 'LargeList(Int64)'), arrow_cast(make_array(2,4,6), 'LargeList(Int64)')), + array_intersect(arrow_cast(make_array('aa','bb','cc'), 'LargeList(Utf8)'), arrow_cast(make_array('cc','aa','dd'), 'LargeList(Utf8)')), + array_intersect(arrow_cast(make_array(true, false), 'LargeList(Boolean)'), arrow_cast(make_array(true), 'LargeList(Boolean)')), + array_intersect(arrow_cast(make_array(1.1, 2.2, 3.3), 'LargeList(Float64)'), arrow_cast(make_array(2.2, 3.3, 4.4), 'LargeList(Float64)')), + array_intersect(arrow_cast(make_array([1, 1], [2, 2], [3, 3]), 'LargeList(List(Int64))'), arrow_cast(make_array([2, 2], [3, 3], [4, 4]), 'LargeList(List(Int64))')) +; +---- +[2, 3] [] [aa, cc] [true] [2.2, 3.3] [[2, 2], [3, 3]] + +query ?????? +SELECT array_intersect(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(2,3,4), 'FixedSizeList(3, Int64)')), + array_intersect(arrow_cast(make_array(1,3,5), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(2,4,6), 'FixedSizeList(3, Int64)')), + array_intersect(arrow_cast(make_array('aa','bb','cc'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array('cc','aa','dd'), 'FixedSizeList(3, Utf8)')), + array_intersect(arrow_cast(make_array(true, false), 'FixedSizeList(2, Boolean)'), arrow_cast(make_array(true), 'FixedSizeList(1, Boolean)')), + array_intersect(arrow_cast(make_array(1.1, 2.2, 3.3), 'FixedSizeList(3, Float64)'), arrow_cast(make_array(2.2, 3.3, 4.4), 'FixedSizeList(3, Float64)')), + array_intersect(arrow_cast(make_array([1, 1], [2, 2], [3, 3]), 'FixedSizeList(3, List(Int64))'), arrow_cast(make_array([2, 2], [3, 3], [4, 4]), 'FixedSizeList(3, List(Int64))')) +; +---- +[2, 3] [] [aa, cc] [true] [2.2, 3.3] [[2, 2], [3, 3]] + +query ? +select array_intersect([], []); +---- +[] + +query ? +select array_intersect(arrow_cast([], 'LargeList(Int64)'), arrow_cast([], 'LargeList(Int64)')); +---- +[] + +query ? +select array_intersect([1, 1, 2, 2, 3, 3], null); +---- +NULL + +query ? +select array_intersect(arrow_cast([1, 1, 2, 2, 3, 3], 'LargeList(Int64)'), null); +---- +NULL + +query ? +select array_intersect(null, [1, 1, 2, 2, 3, 3]); +---- +NULL + +query ? +select array_intersect(null, arrow_cast([1, 1, 2, 2, 3, 3], 'LargeList(Int64)')); +---- +NULL + +query ? +select array_intersect([], null); +---- +NULL + +query ? +select array_intersect([[1,2,3]], [[]]); +---- +[] + +query ? +select array_intersect([[null]], [[]]); +---- +[] + +query ? +select array_intersect(arrow_cast([], 'LargeList(Int64)'), null); +---- +NULL + +query ? +select array_intersect(null, []); +---- +NULL + +query ? +select array_intersect(null, arrow_cast([], 'LargeList(Int64)')); +---- +NULL + +query ? +select array_intersect(null, null); +---- +NULL + +query ?????? +SELECT list_intersect(make_array(1,2,3), make_array(2,3,4)), + list_intersect(make_array(1,3,5), make_array(2,4,6)), + list_intersect(make_array('aa','bb','cc'), make_array('cc','aa','dd')), + list_intersect(make_array(true, false), make_array(true)), + list_intersect(make_array(1.1, 2.2, 3.3), make_array(2.2, 3.3, 4.4)), + list_intersect(make_array([1, 1], [2, 2], [3, 3]), make_array([2, 2], [3, 3], [4, 4])) +; +---- +[2, 3] [] [aa, cc] [true] [2.2, 3.3] [[2, 2], [3, 3]] + +query ?????? +SELECT list_intersect(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), arrow_cast(make_array(2,3,4), 'LargeList(Int64)')), + list_intersect(arrow_cast(make_array(1,3,5), 'LargeList(Int64)'), arrow_cast(make_array(2,4,6), 'LargeList(Int64)')), + list_intersect(arrow_cast(make_array('aa','bb','cc'), 'LargeList(Utf8)'), arrow_cast(make_array('cc','aa','dd'), 'LargeList(Utf8)')), + list_intersect(arrow_cast(make_array(true, false), 'LargeList(Boolean)'), arrow_cast(make_array(true), 'LargeList(Boolean)')), + list_intersect(arrow_cast(make_array(1.1, 2.2, 3.3), 'LargeList(Float64)'), arrow_cast(make_array(2.2, 3.3, 4.4), 'LargeList(Float64)')), + list_intersect(arrow_cast(make_array([1, 1], [2, 2], [3, 3]), 'LargeList(List(Int64))'), arrow_cast(make_array([2, 2], [3, 3], [4, 4]), 'LargeList(List(Int64))')) +; +---- +[2, 3] [] [aa, cc] [true] [2.2, 3.3] [[2, 2], [3, 3]] + +query BBBB +select list_has_all(make_array(1,2,3), make_array(4,5,6)), + list_has_all(make_array(1,2,3), make_array(1,2)), + list_has_any(make_array(1,2,3), make_array(4,5,6)), + list_has_any(make_array(1,2,3), make_array(1,2,4)) +; +---- +false true false true + +query BBBB +select arrays_overlap(make_array(1,2,3), make_array(4,5,6)), + arrays_overlap(make_array(1,2,3), make_array(1,2,4)), + arrays_overlap(make_array(['aa']), make_array(['aa'],['bb'])), + arrays_overlap(make_array('aa',NULL), make_array('bb',NULL)) +; +---- +false true true true + +query ??? +select range(column2), + range(column1, column2), + range(column1, column2, column3) +from arrays_range; +---- +[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] [3, 4, 5, 6, 7, 8, 9] [3, 5, 7, 9] +[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] [4, 5, 6, 7, 8, 9, 10, 11, 12] [4, 7, 10] + +query ??????????? +select range(5), + range(2, 5), + range(2, 10, 3), + range(10, 2, -3), + range(1, 5, -1), + range(1, -5, 1), + range(1, -5, -1), + range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH), + range(DATE '1993-02-01', DATE '1993-01-01', INTERVAL '-1' DAY), + range(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '1' YEAR), + range(DATE '1993-03-01', DATE '1989-04-01', INTERVAL '1' YEAR) +; +---- +[0, 1, 2, 3, 4] [2, 3, 4] [2, 5, 8] [10, 7, 4] [] [] [1, 0, -1, -2, -3, -4] [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01] [1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02] [1989-04-01, 1990-04-01, 1991-04-01] [] + +# Ensure can coerce from other valid types +query ??????????? +select range(5), + range(2, 5), + range(2, 10, 3), + range(10, 2, -3), + range(arrow_cast(1, 'Int8'), 5, -1), + range(arrow_cast(1, 'Int16'), arrow_cast(-5, 'Int8'), 1), + range(arrow_cast(1, 'Int32'), arrow_cast(-5, 'Int16'), arrow_cast(-1, 'Int8')), + range(DATE '1992-09-01', DATE '1993-03-01', arrow_cast('1 MONTH', 'Interval(YearMonth)')), + range(DATE '1993-02-01', arrow_cast(DATE '1993-01-01', 'Date64'), INTERVAL '-1' DAY), + range(arrow_cast(DATE '1989-04-01', 'Date64'), DATE '1993-03-01', INTERVAL '1' YEAR), + range(arrow_cast(DATE '1993-03-01', 'Date64'), arrow_cast(DATE '1989-04-01', 'Date64'), INTERVAL '1' YEAR) +; +---- +[0, 1, 2, 3, 4] [2, 3, 4] [2, 5, 8] [10, 7, 4] [] [] [1, 0, -1, -2, -3, -4] [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01] [1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02] [1989-04-01, 1990-04-01, 1991-04-01] [] + +# Test range with zero step +query error DataFusion error: Execution error: step can't be 0 for function range\(start \[, stop, step\]\) +select range(1, 1, 0); + +# Test range with big steps +query ???? +select + range(-9223372036854775808, -9223372036854775808, -9223372036854775808) as c1, + range(9223372036854775807, 9223372036854775807, 9223372036854775807) as c2, + range(0, -9223372036854775808, -9223372036854775808) as c3, + range(0, 9223372036854775807, 9223372036854775807) as c4; +---- +[] [] [0] [0] + +# Test range for other edge cases +query ???????? +select + range(9223372036854775807, 9223372036854775807, -1) as c1, + range(9223372036854775807, 9223372036854775806, -1) as c2, + range(9223372036854775807, 9223372036854775807, 1) as c3, + range(9223372036854775806, 9223372036854775807, 1) as c4, + range(-9223372036854775808, -9223372036854775808, -1) as c5, + range(-9223372036854775807, -9223372036854775808, -1) as c6, + range(-9223372036854775808, -9223372036854775808, 1) as c7, + range(-9223372036854775808, -9223372036854775807, 1) as c8; +---- +[] [9223372036854775807] [] [9223372036854775806] [] [-9223372036854775807] [] [-9223372036854775808] + +# Test range(start, stop, step) with NULL values +query ? +select range(start, stop, step) from + (values (1), (NULL)) as start_values(start), + (values (10), (NULL)) as stop_values(stop), + (values (3), (NULL)) as step_values(step) +where start is null or stop is null or step is null +---- +NULL +NULL +NULL +NULL +NULL +NULL +NULL + +# Test range(start, stop) with NULL values +query ? +select range(start, stop) from + (values (1), (NULL)) as start_values(start), + (values (10), (NULL)) as stop_values(stop) +where start is null or stop is null +---- +NULL +NULL +NULL + +# Test range(stop) with NULL value +query ? +select range(NULL) +---- +NULL + +## should return NULL +query ? +select range(DATE '1992-09-01', NULL, INTERVAL '1' YEAR); +---- +NULL + +## should return NULL +query ? +select range(TIMESTAMP '1992-09-01', NULL, INTERVAL '1' YEAR); +---- +NULL + +query ? +select range(DATE '1992-09-01', DATE '1993-03-01', NULL); +---- +NULL + +query ? +select range(TIMESTAMP '1992-09-01', TIMESTAMP '1993-03-01', NULL); +---- +NULL + +query ? +select range(NULL, DATE '1993-03-01', INTERVAL '1' YEAR); +---- +NULL + +query ? +select generate_series(NULL::Date, DATE '1993-03-01', INTERVAL '1' YEAR); +---- +NULL + +query ? +select generate_series(DATE '1993-03-01', NULL::Date, INTERVAL '1' YEAR); +---- +NULL + +query ? +select generate_series(DATE '1993-02-01', DATE '1993-03-01', NULL::Interval); +---- +NULL + +query ? +select range(NULL, TIMESTAMP '1993-03-01', INTERVAL '1' YEAR); +---- +NULL + +query ? +select range(NULL, NULL, NULL); +---- +NULL + +query ? +select range(NULL::timestamp, NULL::timestamp, NULL); +---- +NULL + +query ? +select range(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '-1' YEAR) +---- +[] + +query ? +select range(TIMESTAMP '1989-04-01', TIMESTAMP '1993-03-01', INTERVAL '-1' YEAR) +---- +[] + +query ? +select range(DATE '1993-03-01', DATE '1989-04-01', INTERVAL '1' YEAR) +---- +[] + +query ? +select range(TIMESTAMP '1993-03-01', TIMESTAMP '1989-04-01', INTERVAL '1' YEAR) +---- +[] + +query error DataFusion error: Execution error: Cannot generate date range less than 1 day\. +select range(DATE '1993-03-01', DATE '1993-03-01', INTERVAL '1' HOUR) + +query ? +select range(TIMESTAMP '1993-03-01', TIMESTAMP '1993-03-01', INTERVAL '1' HOUR) +---- +[] + +query ????????? +select generate_series(5), + generate_series(2, 5), + generate_series(2, 10, 3), + generate_series(1, 5, 1), + generate_series(5, 1, -1), + generate_series(10, 2, -3), + generate_series(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH), + generate_series(DATE '1993-02-01', DATE '1993-01-01', INTERVAL '-1' DAY), + generate_series(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '1' YEAR) +; +---- +[0, 1, 2, 3, 4, 5] [2, 3, 4, 5] [2, 5, 8] [1, 2, 3, 4, 5] [5, 4, 3, 2, 1] [10, 7, 4] [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01, 1993-03-01] [1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02, 1993-01-01] [1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01] + +query ? +select generate_series('2021-01-01'::timestamp, '2021-01-01T15:00:00'::timestamp, INTERVAL '1' HOUR); +---- +[2021-01-01T00:00:00, 2021-01-01T01:00:00, 2021-01-01T02:00:00, 2021-01-01T03:00:00, 2021-01-01T04:00:00, 2021-01-01T05:00:00, 2021-01-01T06:00:00, 2021-01-01T07:00:00, 2021-01-01T08:00:00, 2021-01-01T09:00:00, 2021-01-01T10:00:00, 2021-01-01T11:00:00, 2021-01-01T12:00:00, 2021-01-01T13:00:00, 2021-01-01T14:00:00, 2021-01-01T15:00:00] + +# Other timestamp types are coerced to nanosecond +query ? +select generate_series(arrow_cast('2021-01-01'::timestamp, 'Timestamp(s)'), '2021-01-01T15:00:00'::timestamp, INTERVAL '1' HOUR); +---- +[2021-01-01T00:00:00, 2021-01-01T01:00:00, 2021-01-01T02:00:00, 2021-01-01T03:00:00, 2021-01-01T04:00:00, 2021-01-01T05:00:00, 2021-01-01T06:00:00, 2021-01-01T07:00:00, 2021-01-01T08:00:00, 2021-01-01T09:00:00, 2021-01-01T10:00:00, 2021-01-01T11:00:00, 2021-01-01T12:00:00, 2021-01-01T13:00:00, 2021-01-01T14:00:00, 2021-01-01T15:00:00] + +query ? +select generate_series('2021-01-01'::timestamp, arrow_cast('2021-01-01T15:00:00'::timestamp, 'Timestamp(µs)'), INTERVAL '1' HOUR); +---- +[2021-01-01T00:00:00, 2021-01-01T01:00:00, 2021-01-01T02:00:00, 2021-01-01T03:00:00, 2021-01-01T04:00:00, 2021-01-01T05:00:00, 2021-01-01T06:00:00, 2021-01-01T07:00:00, 2021-01-01T08:00:00, 2021-01-01T09:00:00, 2021-01-01T10:00:00, 2021-01-01T11:00:00, 2021-01-01T12:00:00, 2021-01-01T13:00:00, 2021-01-01T14:00:00, 2021-01-01T15:00:00] + +query ? +select generate_series('2021-01-01T00:00:00EST'::timestamp, '2021-01-01T15:00:00-12:00'::timestamp, INTERVAL '1' HOUR); +---- +[2021-01-01T05:00:00, 2021-01-01T06:00:00, 2021-01-01T07:00:00, 2021-01-01T08:00:00, 2021-01-01T09:00:00, 2021-01-01T10:00:00, 2021-01-01T11:00:00, 2021-01-01T12:00:00, 2021-01-01T13:00:00, 2021-01-01T14:00:00, 2021-01-01T15:00:00, 2021-01-01T16:00:00, 2021-01-01T17:00:00, 2021-01-01T18:00:00, 2021-01-01T19:00:00, 2021-01-01T20:00:00, 2021-01-01T21:00:00, 2021-01-01T22:00:00, 2021-01-01T23:00:00, 2021-01-02T00:00:00, 2021-01-02T01:00:00, 2021-01-02T02:00:00, 2021-01-02T03:00:00] + +query ? +select generate_series(arrow_cast('2021-01-01T00:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), arrow_cast('2021-01-01T15:00:00', 'Timestamp(Nanosecond, Some("+05:00"))'), INTERVAL '1' HOUR); +---- +[2021-01-01T00:00:00-05:00, 2021-01-01T01:00:00-05:00, 2021-01-01T02:00:00-05:00, 2021-01-01T03:00:00-05:00, 2021-01-01T04:00:00-05:00, 2021-01-01T05:00:00-05:00] + +## -5500000000 ns is -5.5 sec +query ? +select generate_series(arrow_cast('2021-01-01T00:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), arrow_cast('2021-01-01T06:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), INTERVAL '1 HOUR 30 MINUTE -5500000000 NANOSECOND'); +---- +[2021-01-01T00:00:00-05:00, 2021-01-01T01:29:54.500-05:00, 2021-01-01T02:59:49-05:00, 2021-01-01T04:29:43.500-05:00, 2021-01-01T05:59:38-05:00] + +## mixing types for timestamps is not supported +query error DataFusion error: Error during planning: Internal error: Function 'generate_series' failed to match any signature +select generate_series(arrow_cast('2021-01-01T00:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), DATE '2021-01-02', INTERVAL '1' HOUR); + +## mixing types not allowed even if an argument is null +query error DataFusion error: Error during planning: Internal error: Function 'generate_series' failed to match any signature +select generate_series(TIMESTAMP '1992-09-01', DATE '1993-03-01', NULL); + +query error DataFusion error: Error during planning: Internal error: Function 'generate_series' failed to match any signature +select generate_series(1, '2024-01-01', '2025-01-02'); + +query error DataFusion error: Error during planning: Internal error: Function 'generate_series' failed to match any signature +select generate_series('2024-01-01'::timestamp, '2025-01-02', interval '1 day'); + +## should return NULL +query ? +select generate_series(DATE '1992-09-01', NULL, INTERVAL '1' YEAR); +---- +NULL + +## should return NULL +query ? +select generate_series(TIMESTAMP '1992-09-01', NULL, INTERVAL '1' YEAR); +---- +NULL + +query ? +select generate_series(DATE '1992-09-01', DATE '1993-03-01', NULL); +---- +NULL + +query ? +select generate_series(NULL, DATE '1993-03-01', INTERVAL '1' YEAR); +---- +NULL + +query ? +select generate_series(NULL::Date, DATE '1993-03-01', INTERVAL '1' YEAR); +---- +NULL + +query ? +select generate_series(DATE '1993-03-01', NULL::Date, INTERVAL '1' YEAR); +---- +NULL + +query ? +select generate_series(DATE '1993-02-01', DATE '1993-03-01', NULL::Interval); +---- +NULL + +query ? +select generate_series(NULL, TIMESTAMP '1993-03-01', INTERVAL '1' YEAR); +---- +NULL + +query ? +select generate_series(NULL, NULL, NULL); +---- +NULL + +query ? +select generate_series(NULL::timestamp, NULL::timestamp, NULL); +---- +NULL + +query ? +select generate_series(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '-1' YEAR) +---- +[] + +query ? +select generate_series(TIMESTAMP '1989-04-01', TIMESTAMP '1993-03-01', INTERVAL '-1' YEAR) +---- +[] + +query ? +select generate_series(DATE '1993-03-01', DATE '1989-04-01', INTERVAL '1' YEAR) +---- +[] + +query ? +select generate_series(TIMESTAMP '1993-03-01', TIMESTAMP '1989-04-01', INTERVAL '1' YEAR) +---- +[] + +query error DataFusion error: Execution error: Cannot generate date range less than 1 day. +select generate_series(DATE '2000-01-01', DATE '2000-01-03', INTERVAL '1' HOUR) + +query error DataFusion error: Execution error: Cannot generate date range less than 1 day. +select generate_series(DATE '2000-01-01', DATE '2000-01-03', INTERVAL '-1' HOUR) + +query ? +select generate_series(TIMESTAMP '2000-01-01', TIMESTAMP '2000-01-02', INTERVAL '1' HOUR) +---- +[2000-01-01T00:00:00, 2000-01-01T01:00:00, 2000-01-01T02:00:00, 2000-01-01T03:00:00, 2000-01-01T04:00:00, 2000-01-01T05:00:00, 2000-01-01T06:00:00, 2000-01-01T07:00:00, 2000-01-01T08:00:00, 2000-01-01T09:00:00, 2000-01-01T10:00:00, 2000-01-01T11:00:00, 2000-01-01T12:00:00, 2000-01-01T13:00:00, 2000-01-01T14:00:00, 2000-01-01T15:00:00, 2000-01-01T16:00:00, 2000-01-01T17:00:00, 2000-01-01T18:00:00, 2000-01-01T19:00:00, 2000-01-01T20:00:00, 2000-01-01T21:00:00, 2000-01-01T22:00:00, 2000-01-01T23:00:00, 2000-01-02T00:00:00] + +query ? +select generate_series(TIMESTAMP '2000-01-02', TIMESTAMP '2000-01-01', INTERVAL '-1' HOUR) +---- +[2000-01-02T00:00:00, 2000-01-01T23:00:00, 2000-01-01T22:00:00, 2000-01-01T21:00:00, 2000-01-01T20:00:00, 2000-01-01T19:00:00, 2000-01-01T18:00:00, 2000-01-01T17:00:00, 2000-01-01T16:00:00, 2000-01-01T15:00:00, 2000-01-01T14:00:00, 2000-01-01T13:00:00, 2000-01-01T12:00:00, 2000-01-01T11:00:00, 2000-01-01T10:00:00, 2000-01-01T09:00:00, 2000-01-01T08:00:00, 2000-01-01T07:00:00, 2000-01-01T06:00:00, 2000-01-01T05:00:00, 2000-01-01T04:00:00, 2000-01-01T03:00:00, 2000-01-01T02:00:00, 2000-01-01T01:00:00, 2000-01-01T00:00:00] + +# Test generate_series with small intervals +query ? +select generate_series('2000-01-01T00:00:00.000000001Z'::timestamp, '2000-01-01T00:00:00.00000001Z'::timestamp, INTERVAL '1' NANOSECONDS) +---- +[2000-01-01T00:00:00.000000001, 2000-01-01T00:00:00.000000002, 2000-01-01T00:00:00.000000003, 2000-01-01T00:00:00.000000004, 2000-01-01T00:00:00.000000005, 2000-01-01T00:00:00.000000006, 2000-01-01T00:00:00.000000007, 2000-01-01T00:00:00.000000008, 2000-01-01T00:00:00.000000009, 2000-01-01T00:00:00.000000010] + +# Test generate_series with zero step +query error DataFusion error: Execution error: step can't be 0 for function generate_series\(start \[, stop, step\]\) +select generate_series(1, 1, 0); + +# Test generate_series with zero step +query error DataFusion error: Execution error: Interval argument to generate_series must not be 0 +select generate_series(TIMESTAMP '2000-01-02', TIMESTAMP '2000-01-01', INTERVAL '0' MINUTE); + +# Test generate_series with big steps +query ???? +select + generate_series(-9223372036854775808, -9223372036854775808, -9223372036854775808) as c1, + generate_series(9223372036854775807, 9223372036854775807, 9223372036854775807) as c2, + generate_series(0, -9223372036854775808, -9223372036854775808) as c3, + generate_series(0, 9223372036854775807, 9223372036854775807) as c4; +---- +[-9223372036854775808] [9223372036854775807] [0, -9223372036854775808] [0, 9223372036854775807] + + +# Test generate_series for other edge cases +query ???? +select + generate_series(9223372036854775807, 9223372036854775807, -1) as c1, + generate_series(9223372036854775807, 9223372036854775807, 1) as c2, + generate_series(-9223372036854775808, -9223372036854775808, -1) as c3, + generate_series(-9223372036854775808, -9223372036854775808, 1) as c4; +---- +[9223372036854775807] [9223372036854775807] [-9223372036854775808] [-9223372036854775808] + +# Test generate_series(start, stop, step) with NULL values +query ? +select generate_series(start, stop, step) from + (values (1), (NULL)) as start_values(start), + (values (10), (NULL)) as stop_values(stop), + (values (3), (NULL)) as step_values(step) +where start is null or stop is null or step is null +---- +NULL +NULL +NULL +NULL +NULL +NULL +NULL + +# Test generate_series(start, stop) with NULL values +query ? +select generate_series(start, stop) from + (values (1), (NULL)) as start_values(start), + (values (10), (NULL)) as stop_values(stop) +where start is null or stop is null +---- +NULL +NULL +NULL + +# Test generate_series(stop) with NULL value +query ? +select generate_series(NULL) +---- +NULL + +# Test generate_series with a table of date values +statement ok +CREATE TABLE date_table( + start DATE, + stop DATE, + step INTERVAL +) AS VALUES + (DATE '1992-01-01', DATE '1993-01-02', INTERVAL '1' MONTH), + (DATE '1993-02-01', DATE '1993-01-01', INTERVAL '-1' DAY), + (DATE '1989-04-01', DATE '1993-03-01', INTERVAL '1' YEAR); + +query ? +select generate_series(start, stop, step) from date_table; +---- +[1992-01-01, 1992-02-01, 1992-03-01, 1992-04-01, 1992-05-01, 1992-06-01, 1992-07-01, 1992-08-01, 1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01] +[1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02, 1993-01-01] +[1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01] + +query ? +select generate_series(start, stop, INTERVAL '1 year') from date_table; +---- +[1992-01-01, 1993-01-01] +[] +[1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01] + +query ? +select generate_series(start, '1993-03-01'::date, INTERVAL '1 year') from date_table; +---- +[1992-01-01, 1993-01-01] +[1993-02-01] +[1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01] + +# Test generate_series with a table of timestamp values +statement ok +CREATE TABLE timestamp_table( + start TIMESTAMP, + stop TIMESTAMP, + step INTERVAL +) AS VALUES + (TIMESTAMP '1992-01-01T00:00:00', TIMESTAMP '1993-01-02T00:00:00', INTERVAL '1' MONTH), + (TIMESTAMP '1993-02-01T00:00:00', TIMESTAMP '1993-01-01T00:00:00', INTERVAL '-1' DAY), + (TIMESTAMP '1989-04-01T00:00:00', TIMESTAMP '1993-03-01T00:00:00', INTERVAL '1' YEAR); + +query ? +select generate_series(start, stop, step) from timestamp_table; +---- +[1992-01-01T00:00:00, 1992-02-01T00:00:00, 1992-03-01T00:00:00, 1992-04-01T00:00:00, 1992-05-01T00:00:00, 1992-06-01T00:00:00, 1992-07-01T00:00:00, 1992-08-01T00:00:00, 1992-09-01T00:00:00, 1992-10-01T00:00:00, 1992-11-01T00:00:00, 1992-12-01T00:00:00, 1993-01-01T00:00:00] +[1993-02-01T00:00:00, 1993-01-31T00:00:00, 1993-01-30T00:00:00, 1993-01-29T00:00:00, 1993-01-28T00:00:00, 1993-01-27T00:00:00, 1993-01-26T00:00:00, 1993-01-25T00:00:00, 1993-01-24T00:00:00, 1993-01-23T00:00:00, 1993-01-22T00:00:00, 1993-01-21T00:00:00, 1993-01-20T00:00:00, 1993-01-19T00:00:00, 1993-01-18T00:00:00, 1993-01-17T00:00:00, 1993-01-16T00:00:00, 1993-01-15T00:00:00, 1993-01-14T00:00:00, 1993-01-13T00:00:00, 1993-01-12T00:00:00, 1993-01-11T00:00:00, 1993-01-10T00:00:00, 1993-01-09T00:00:00, 1993-01-08T00:00:00, 1993-01-07T00:00:00, 1993-01-06T00:00:00, 1993-01-05T00:00:00, 1993-01-04T00:00:00, 1993-01-03T00:00:00, 1993-01-02T00:00:00, 1993-01-01T00:00:00] +[1989-04-01T00:00:00, 1990-04-01T00:00:00, 1991-04-01T00:00:00, 1992-04-01T00:00:00] + +query ? +select generate_series(start, stop, INTERVAL '1 year') from timestamp_table; +---- +[1992-01-01T00:00:00, 1993-01-01T00:00:00] +[] +[1989-04-01T00:00:00, 1990-04-01T00:00:00, 1991-04-01T00:00:00, 1992-04-01T00:00:00] + +query ? +select generate_series(start, '1993-03-01T00:00:00'::timestamp, INTERVAL '1 year') from timestamp_table; +---- +[1992-01-01T00:00:00, 1993-01-01T00:00:00] +[1993-02-01T00:00:00] +[1989-04-01T00:00:00, 1990-04-01T00:00:00, 1991-04-01T00:00:00, 1992-04-01T00:00:00] + +# https://github.com/apache/datafusion/issues/11922 +query ? +select generate_series(start, '1993-03-01T00:00:00'::timestamp, INTERVAL '1 year') from timestamp_table; +---- +[1992-01-01T00:00:00, 1993-01-01T00:00:00] +[1993-02-01T00:00:00] +[1989-04-01T00:00:00, 1990-04-01T00:00:00, 1991-04-01T00:00:00, 1992-04-01T00:00:00] + +## array_except + +statement ok +CREATE TABLE array_except_table +AS VALUES + ([1, 2, 2, 3], [2, 3, 4]), + ([2, 3, 3], [3]), + ([3], [3, 3, 4]), + (null, [3, 4]), + ([1, 2], null), + (null, null) +; + +query ? +select array_except(column1, column2) from array_except_table; +---- +[1] +[2] +[] +NULL +NULL +NULL + +statement ok +drop table array_except_table; + +statement ok +CREATE TABLE array_except_nested_list_table +AS VALUES + ([[1, 2], [3]], [[2], [3], [4, 5]]), + ([[1, 2], [3]], [[2], [1, 2]]), + ([[1, 2], [3]], null), + (null, [[1], [2, 3], [4, 5, 6]]), + ([[1], [2, 3], [4, 5, 6]], [[2, 3], [4, 5, 6], [1]]) +; + +query ? +select array_except(column1, column2) from array_except_nested_list_table; +---- +[[1, 2]] +[[3]] +NULL +NULL +[] + +statement ok +drop table array_except_nested_list_table; + +statement ok +CREATE TABLE array_except_table_float +AS VALUES + ([1.1, 2.2, 3.3], [2.2]), + ([1.1, 2.2, 3.3], [4.4]), + ([1.1, 2.2, 3.3], [3.3, 2.2, 1.1]) +; + +query ? +select array_except(column1, column2) from array_except_table_float; +---- +[1.1, 3.3] +[1.1, 2.2, 3.3] +[] + +statement ok +drop table array_except_table_float; + +statement ok +CREATE TABLE array_except_table_ut8 +AS VALUES + (['a', 'b', 'c'], ['a']), + (['a', 'bc', 'def'], ['g', 'def']), + (['a', 'bc', 'def'], null), + (null, ['a']) +; + +query ? +select array_except(column1, column2) from array_except_table_ut8; +---- +[b, c] +[a, bc] +NULL +NULL + +statement ok +drop table array_except_table_ut8; + +statement ok +CREATE TABLE array_except_table_bool +AS VALUES + ([true, false, false], [false]), + ([true, true, true], [false]), + ([false, false, false], [true]), + ([true, false], null), + (null, [true, false]) +; + +query ? +select array_except(column1, column2) from array_except_table_bool; +---- +[true] +[true] +[false] +NULL +NULL + +statement ok +drop table array_except_table_bool; + +query ? +select array_except([], null); +---- +NULL + +query ? +select array_except([], []); +---- +[] + +query ? +select array_except(null, []); +---- +NULL + +query ? +select array_except(null, null) +---- +NULL + +query ? +select array_except(arrow_cast([1, 2, 3, 4], 'LargeList(Int64)'), arrow_cast([5, 6, 3, 4], 'LargeList(Int64)')); +---- +[1, 2] + +query ? +select array_except(arrow_cast([1, 2, 3, 4], 'FixedSizeList(4, Int64)'), arrow_cast([5, 6, 3, 4], 'FixedSizeList(4, Int64)')); +---- +[1, 2] + +### Array operators tests + + +## array concatenate operator + +# array concatenate operator with scalars #1 (like array_concat scalar function) +query ?? +select make_array(1, 2, 3) || make_array(4, 5, 6) || make_array(7, 8, 9), make_array([1], [2]) || make_array([3], [4]); +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] + +# array concatenate operator with scalars #2 (like array_append scalar function) +query ??? +select make_array(1, 2, 3) || 4, make_array(1.0, 2.0, 3.0) || 4.0, make_array('h', 'e', 'l', 'l') || 'o'; +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# array concatenate operator with scalars #3 (like array_prepend scalar function) +query ??? +select 1 || make_array(2, 3, 4), 1.0 || make_array(2.0, 3.0, 4.0), 'h' || make_array('e', 'l', 'l', 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# array concatenate operator with scalars #4 (mixed) +query ? +select 0 || [1,2,3] || 4 || [5] || [6,7]; +---- +[0, 1, 2, 3, 4, 5, 6, 7] + +# array concatenate operator with nd-list #5 (mixed) +query ? +select 0 || [1,2,3] || [[4,5]] || [[6,7,8]] || [9,10]; +---- +[[0, 1, 2, 3], [4, 5], [6, 7, 8], [9, 10]] + +# array concatenate operator non-valid cases +## concat 2D with scalar is not valid +query error +select 0 || [1,2,3] || [[4,5]] || [[6,7,8]] || [9,10] || 11; + +## concat scalar with 2D is not valid +query error +select 0 || [[1,2,3]]; + +# array concatenate operator with column + +statement ok +CREATE TABLE array_concat_operator_table +AS VALUES + (0, [1, 2, 2, 3], 4, [5, 6, 5]), + (-1, [4, 5, 6], 7, [8, 1, 1]) +; + +query ? +select column1 || column2 || column3 || column4 from array_concat_operator_table; +---- +[0, 1, 2, 2, 3, 4, 5, 6, 5] +[-1, 4, 5, 6, 7, 8, 1, 1] + +statement ok +drop table array_concat_operator_table; + +## array containment operator + +# array containment operator with scalars #1 (at arrow) +query BBBBBBB +select make_array(1,2,3) @> make_array(1,3), + make_array(1,2,3) @> make_array(1,4), + make_array([1,2], [3,4]) @> make_array([1,2]), + make_array([1,2], [3,4]) @> make_array([1,3]), + make_array([1,2], [3,4]) @> make_array([1,2], [3,4], [5,6]), + make_array([[1,2,3]]) @> make_array([[1]]), + make_array([[1,2,3]]) @> make_array([[1,2,3]]); +---- +true false true false false false true + +# Make sure it is rewritten to function array_has_all() +query TT +explain select [1,2,3] @> [1,3]; +---- +logical_plan +01)Projection: Boolean(true) AS array_has_all(make_array(Int64(1),Int64(2),Int64(3)),make_array(Int64(1),Int64(3))) +02)--EmptyRelation: rows=1 +physical_plan +01)ProjectionExec: expr=[true as array_has_all(make_array(Int64(1),Int64(2),Int64(3)),make_array(Int64(1),Int64(3)))] +02)--PlaceholderRowExec + +# array containment operator with scalars #2 (arrow at) +query BBBBBBB +select make_array(1,3) <@ make_array(1,2,3), + make_array(1,4) <@ make_array(1,2,3), + make_array([1,2]) <@ make_array([1,2], [3,4]), + make_array([1,3]) <@ make_array([1,2], [3,4]), + make_array([1,2], [3,4], [5,6]) <@ make_array([1,2], [3,4]), + make_array([[1]]) <@ make_array([[1,2,3]]), + make_array([[1,2,3]]) <@ make_array([[1,2,3]]); +---- +true false true false false false true + +# Make sure it is rewritten to function array_has_all() +query TT +explain select [1,3] <@ [1,2,3]; +---- +logical_plan +01)Projection: Boolean(true) AS array_has_all(make_array(Int64(1),Int64(2),Int64(3)),make_array(Int64(1),Int64(3))) +02)--EmptyRelation: rows=1 +physical_plan +01)ProjectionExec: expr=[true as array_has_all(make_array(Int64(1),Int64(2),Int64(3)),make_array(Int64(1),Int64(3)))] +02)--PlaceholderRowExec + +### Array casting tests + + +## make_array + +# make_array scalar function #1 +query ? +select make_array(1, 2.0) +---- +[1.0, 2.0] + +# make_array scalar function #2 +query ? +select make_array(null, 1.0) +---- +[NULL, 1.0] + +# make_array scalar function #3 +query ? +select make_array(1, 2.0, null, 3) +---- +[1.0, 2.0, NULL, 3.0] + +# make_array scalar function #4 +query ? +select make_array(1.0, '2', null) +---- +[1.0, 2.0, NULL] + +### FixedSizeListArray + +statement ok +CREATE EXTERNAL TABLE fixed_size_list_array STORED AS PARQUET LOCATION '../core/tests/data/fixed_size_list_array.parquet'; + +query T +select arrow_typeof(f0) from fixed_size_list_array; +---- +FixedSizeList(2 x Int64) +FixedSizeList(2 x Int64) + +query ? +select * from fixed_size_list_array; +---- +[1, 2] +[3, 4] + +query ? +select f0 from fixed_size_list_array; +---- +[1, 2] +[3, 4] + +query ? +select arrow_cast(f0, 'List(Int64)') from fixed_size_list_array; +---- +[1, 2] +[3, 4] + +query ? +select make_array(arrow_cast(f0, 'List(Int64)')) from fixed_size_list_array +---- +[[1, 2]] +[[3, 4]] + +query T +select arrow_typeof(make_array(arrow_cast(f0, 'List(Int64)'))) from fixed_size_list_array +---- +List(List(Int64)) +List(List(Int64)) + +query ? +select make_array(f0) from fixed_size_list_array +---- +[[1, 2]] +[[3, 4]] + +query T +select arrow_typeof(make_array(f0)) from fixed_size_list_array +---- +List(FixedSizeList(2 x Int64)) +List(FixedSizeList(2 x Int64)) + +query ? +select array_concat(column1, [7]) from arrays_values_v2; +---- +[NULL, 2, 3, 7] +[7] +[9, NULL, 10, 7] +[NULL, 1, 7] +[11, 12, 7] +[7] + +# flatten + +query ? +select flatten(NULL); +---- +NULL + +# flatten with scalar values #1 +query ??? +select flatten(make_array(1, 2, 1, 3, 2)), + flatten(make_array([1], [2, 3], [null], make_array(4, null, 5))), + flatten(make_array([[1.1]], [[2.2]], [[3.3], [4.4]])); +---- +[1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]] + +query ??? +select flatten(arrow_cast(make_array(1, 2, 1, 3, 2), 'LargeList(Int64)')), + flatten(arrow_cast(make_array([1], null, [2, 3], [null], make_array(4, null, 5)), 'LargeList(LargeList(Int64))')), + flatten(arrow_cast(make_array([[1.1]], [[2.2]], [[3.3], [4.4]]), 'LargeList(LargeList(LargeList(Float64)))')); +---- +[1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]] + +query ??? +select flatten(arrow_cast(make_array(1, 2, 1, 3, 2), 'FixedSizeList(5, Int64)')), + flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'FixedSizeList(4, List(Int64))')), + flatten(arrow_cast(make_array([[1.1], [2.2]], [[3.3], [4.4]]), 'FixedSizeList(2, List(List(Float64)))')); +---- +[1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]] + +query ??TT +select flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'FixedSizeList(4, LargeList(Int64))')), + flatten(arrow_cast(make_array([[1.1], [2.2]], [[3.3], [4.4]]), 'List(LargeList(FixedSizeList(1, Float64)))')), + arrow_typeof(flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'FixedSizeList(4, LargeList(Int64))'))), + arrow_typeof(flatten(arrow_cast(make_array([[1.1], [2.2]], [[3.3], [4.4]]), 'List(LargeList(FixedSizeList(1, Float64)))'))); +---- +[1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]] LargeList(Int64) LargeList(FixedSizeList(1 x Float64)) + +# flatten with column values +query ???? +select flatten(column1), + flatten(column2), + flatten(column3), + flatten(column4) +from flatten_table; +---- +[1, 2, 3] [[1, 2, 3], [4, 5], [6]] [[[1]], [[2, 3]]] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4] +[1, 2, 3, 4, 5, 6] [[8]] [[[1, 2]], [[3]]] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] + +query ???? +select flatten(column1), + flatten(column2), + flatten(column3), + flatten(column4) +from large_flatten_table; +---- +[1, 2, 3] [[1, 2, 3], [4, 5], [6]] [[[1]], [[2, 3]]] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4] +[1, 2, 3, 4, 5, 6] [[8]] [[[1, 2]], [[3]]] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] + +query ???? +select flatten(column1), + flatten(column2), + flatten(column3), + flatten(column4) +from fixed_size_flatten_table; +---- +[1, 2, 3] [[1, 2, 3], [4, 5], [6]] [[[1]], [[2, 3]]] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4] +[1, 2, 3, 4, 5, 6] [[8], [9, 10], [11, 12, 13]] [[[1, 2]], [[3]]] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] + +# flatten with different inner list type +query ?????? +select flatten(arrow_cast(make_array([1, 2], [3, 4]), 'List(FixedSizeList(2, Int64))')), + flatten(arrow_cast(make_array([[1, 2]], [[3, 4]]), 'List(FixedSizeList(1, List(Int64)))')), + flatten(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(List(Int64))')), + flatten(arrow_cast(make_array([[1, 2]], [[3, 4]]), 'LargeList(List(List(Int64)))')), + flatten(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(FixedSizeList(2, Int64))')), + flatten(arrow_cast(make_array([[1, 2]], [[3, 4]]), 'LargeList(FixedSizeList(1, List(Int64)))')) +---- +[1, 2, 3, 4] [[1, 2], [3, 4]] [1, 2, 3, 4] [[1, 2], [3, 4]] [1, 2, 3, 4] [[1, 2], [3, 4]] + +## empty (aliases: `array_empty`, `list_empty`) +# empty scalar function #1 +query B +select empty(make_array(1)); +---- +false + +query B +select empty(arrow_cast(make_array(1), 'LargeList(Int64)')); +---- +false + +query B +select empty(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); +---- +false + +# empty scalar function #2 +query B +select empty(make_array()); +---- +true + +query B +select empty(arrow_cast(make_array(), 'LargeList(Int64)')); +---- +true + +#TODO: https://github.com/apache/datafusion/issues/9158 +#query B +#select empty(arrow_cast(make_array(), 'FixedSizeList(0, Null)')); +#---- +#true + +# empty scalar function #3 +query B +select empty(make_array(NULL)); +---- +false + +query B +select empty(arrow_cast(make_array(NULL), 'LargeList(Int64)')); +---- +false + +query B +select empty(arrow_cast(make_array(NULL), 'FixedSizeList(1, Int64)')); +---- +false + +#TODO: https://github.com/apache/datafusion/issues/7142 +# empty scalar function #4 +#query B +#select empty(NULL); +#---- +#NULL + +# empty scalar function #5 +query B +select empty(column1) from arrays; +---- +false +false +false +false +NULL +false +false + +query B +select empty(arrow_cast(column1, 'LargeList(List(Int64))')) from arrays; +---- +false +false +false +false +NULL +false +false + +query B +select empty(column1) from fixed_size_arrays; +---- +false +false +false +false +NULL +false +false + +## array_empty (aliases: `empty`, `list_empty`) +# array_empty scalar function #1 +query B +select array_empty(make_array(1)); +---- +false + +query B +select array_empty(arrow_cast(make_array(1), 'LargeList(Int64)')); +---- +false + +# array_empty scalar function #2 +query B +select array_empty(make_array()); +---- +true + +query B +select array_empty(arrow_cast(make_array(), 'LargeList(Int64)')); +---- +true + +# array_empty scalar function #3 +query B +select array_empty(make_array(NULL)); +---- +false + +query B +select array_empty(arrow_cast(make_array(NULL), 'LargeList(Int64)')); +---- +false + +## list_empty (aliases: `empty`, `array_empty`) +# list_empty scalar function #1 +query B +select list_empty(make_array(1)); +---- +false + +query B +select list_empty(arrow_cast(make_array(1), 'LargeList(Int64)')); +---- +false + +# list_empty scalar function #2 +query B +select list_empty(make_array()); +---- +true + +query B +select list_empty(arrow_cast(make_array(), 'LargeList(Int64)')); +---- +true + +# list_empty scalar function #3 +query B +select list_empty(make_array(NULL)); +---- +false + +query B +select list_empty(arrow_cast(make_array(NULL), 'LargeList(Int64)')); +---- +false + +# string_to_array scalar function +query ? +SELECT string_to_array('abcxxxdef', 'xxx') +---- +[abc, def] + +query I +SELECT cardinality(string_to_array('', ',')) +---- +0 + +query I +SELECT cardinality(string_to_array('', '')) +---- +0 + +query I +SELECT cardinality(string_to_array('', ',', 'x')) +---- +0 + +query I +SELECT cardinality(string_to_array('', '', 'x')) +---- +0 + +query ? +SELECT string_to_array('abc', '') +---- +[abc] + +query ? +SELECT string_to_array('abc', NULL) +---- +[a, b, c] + +query ? +SELECT string_to_array('abc def', ' ', 'def') +---- +[abc, NULL] + +query ? +select string_to_array(e, ',') from values; +---- +[Lorem] +[ipsum] +[dolor] +[sit] +[amet] +[, ] +[consectetur] +[adipiscing] +NULL + +# karge string tests for string_to_array + +# string_to_array scalar function +query ? +SELECT string_to_array(arrow_cast('abcxxxdef', 'LargeUtf8'), 'xxx') +---- +[abc, def] + +# string_to_array scalar function +query ? +SELECT string_to_array(arrow_cast('abcxxxdef', 'LargeUtf8'), arrow_cast('xxx', 'LargeUtf8')) +---- +[abc, def] + +query ? +SELECT string_to_array(arrow_cast('abc', 'LargeUtf8'), NULL) +---- +[a, b, c] + +query ? +select string_to_array(arrow_cast(e, 'LargeUtf8'), ',') from values; +---- +[Lorem] +[ipsum] +[dolor] +[sit] +[amet] +[, ] +[consectetur] +[adipiscing] +NULL + +query ? +select string_to_array(arrow_cast(e, 'LargeUtf8'), ',', arrow_cast('Lorem', 'LargeUtf8')) from values; +---- +[NULL] +[ipsum] +[dolor] +[sit] +[amet] +[, ] +[consectetur] +[adipiscing] +NULL + +# string view tests for string_to_array + +# string_to_array scalar function +query ? +SELECT string_to_array(arrow_cast('abcxxxdef', 'Utf8View'), 'xxx') +---- +[abc, def] + +query ? +SELECT string_to_array(arrow_cast('abc', 'Utf8View'), NULL) +---- +[a, b, c] + +query ? +select string_to_array(arrow_cast(e, 'Utf8View'), ',') from values; +---- +[Lorem] +[ipsum] +[dolor] +[sit] +[amet] +[, ] +[consectetur] +[adipiscing] +NULL + +# test string_to_array aliases + +query ? +select string_to_list(e, 'm') from values; +---- +[Lore, ] +[ipsu, ] +[dolor] +[sit] +[a, et] +[,] +[consectetur] +[adipiscing] +NULL + +# string_to_array: single-char delimiter producing multiple elements +query ? +SELECT string_to_array('a,b,c', ',') +---- +[a, b, c] + +# string_to_array: delimiter not found in input +query ? +SELECT string_to_array('abc', ',') +---- +[abc] + +# string_to_array: empty string input +query ? +SELECT string_to_array('', ',') +---- +[] + +# string_to_array: null_str matching multiple elements +query ? +SELECT string_to_array('a,NULL,b,NULL,c', ',', 'NULL') +---- +[a, NULL, b, NULL, c] + +# string_to_array: null_str matching all elements +query ? +SELECT string_to_array('x,x,x', ',', 'x') +---- +[NULL, NULL, NULL] + +# string_to_array: null_str with empty-string delimiter +query ? +SELECT string_to_array('abc', '', 'abc') +---- +[NULL] + +# string_to_array: NULL string input +query ? +SELECT string_to_array(NULL, ',') +---- +NULL + +# string_to_array: columnar delimiter +query ?? +SELECT string_to_array('a,b,c', col1), string_to_array('a::b::c', col2) + FROM (VALUES (',', '::')) AS t(col1, col2) +---- +[a, b, c] [a, b, c] + +# string_to_array: columnar null_str +query ? +SELECT string_to_array('a,NULL,b', ',', col1) + FROM (VALUES ('NULL')) AS t(col1) +---- +[a, NULL, b] + +# string_to_array: adjacent delimiters produce empty strings +query ? +SELECT string_to_array('a,,b', ',') +---- +[a, , b] + +# string_to_array: delimiter at start and end +query ? +SELECT string_to_array(',a,b,', ',') +---- +[, a, b, ] + +# array_resize scalar function #1 +query ? +select array_resize(make_array(1, 2, 3), 1); +---- +[1] + +query ? +select array_resize(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 1); +---- +[1] + +query ? +select array_resize(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 1); +---- +[1] + +# array_resize scalar function #2 +query ? +select array_resize(make_array(1, 2, 3), 5); +---- +[1, 2, 3, NULL, NULL] + +query ? +select array_resize(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 5); +---- +[1, 2, 3, NULL, NULL] + +query ? +select array_resize(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 5); +---- +[1, 2, 3, NULL, NULL] + +# array_resize scalar function #3 +query ? +select array_resize(make_array(1, 2, 3), 5, 4); +---- +[1, 2, 3, 4, 4] + +query ? +select array_resize(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 5, 4); +---- +[1, 2, 3, 4, 4] + +# array_resize scalar function #4 +query error +select array_resize(make_array(1, 2, 3), -5, 2); + +# array_resize scalar function #5 +query ? +select array_resize(make_array(1.1, 2.2, 3.3), 10, 9.9); +---- +[1.1, 2.2, 3.3, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9] + +query ? +select array_resize(arrow_cast(make_array(1.1, 2.2, 3.3), 'LargeList(Float64)'), 10, 9.9); +---- +[1.1, 2.2, 3.3, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9] + +# array_resize scalar function #5 +query ? +select array_resize(column1, column2, column3) from arrays_values; +---- +[NULL] +[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 2, 2] +[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] +[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4] +NULL +[] +[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL] +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7] + +query ? +select array_resize(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) from arrays_values; +---- +[NULL] +[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 2, 2] +[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] +[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4] +NULL +[] +[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL] +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7] + +# array_resize scalar function #5 +query ? +select array_resize([[1], [2], [3]], 10, [5]); +---- +[[1], [2], [3], [5], [5], [5], [5], [5], [5], [5]] + +query ? +select array_resize(arrow_cast([[1], [2], [3]], 'LargeList(List(Int64))'), 10, [5]); +---- +[[1], [2], [3], [5], [5], [5], [5], [5], [5], [5]] + +# array_resize null value +query ? +select array_resize(arrow_cast(NULL, 'List(Int8)'), 1); +---- +NULL + +statement ok +CREATE TABLE array_resize_values +AS VALUES + (make_array(1, NULL, 3, 4, 5, 6, 7, 8, 9, 10), 2, 1), + (make_array(11, 12, NULL, 14, 15, 16, 17, 18, 19, 20), 5, 2), + (make_array(21, 22, 23, 24, NULL, 26, 27, 28, 29, 30), 8, 3), + (make_array(31, 32, 33, 34, 35, 36, NULL, 38, 39, 40), 12, 4), + (NULL, 3, 0), + (make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), NULL, 6), + (make_array(51, 52, 53, 54, 55, NULL, 57, 58, 59, 60), 13, NULL), + (make_array(61, 62, 63, 64, 65, 66, 67, 68, 69, 70), 15, 7) +; + +# array_resize columnar test #1 +query ? +select array_resize(column1, column2, column3) from array_resize_values; +---- +[1, NULL] +[11, 12, NULL, 14, 15] +[21, 22, 23, 24, NULL, 26, 27, 28] +[31, 32, 33, 34, 35, 36, NULL, 38, 39, 40, 4, 4] +NULL +[] +[51, 52, 53, 54, 55, NULL, 57, 58, 59, 60, NULL, NULL, NULL] +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7] + +# array_resize columnar test #2 +query ? +select array_resize(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) from array_resize_values; +---- +[1, NULL] +[11, 12, NULL, 14, 15] +[21, 22, 23, 24, NULL, 26, 27, 28] +[31, 32, 33, 34, 35, 36, NULL, 38, 39, 40, 4, 4] +NULL +[] +[51, 52, 53, 54, 55, NULL, 57, 58, 59, 60, NULL, NULL, NULL] +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7] + +## array_reverse +query ?? +select array_reverse(make_array(1, 2, 3)), array_reverse(make_array(1)); +---- +[3, 2, 1] [1] + +query ?? +select array_reverse(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_reverse(arrow_cast(make_array(1), 'LargeList(Int64)')); +---- +[3, 2, 1] [1] + +query ???? +select array_reverse(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), + array_reverse(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')), + array_reverse(arrow_cast(make_array(1, NULL, 3), 'FixedSizeList(3, Int64)')), + array_reverse(arrow_cast(make_array(NULL, NULL, NULL), 'FixedSizeList(3, Int64)')); +---- +[3, 2, 1] [1] [3, NULL, 1] [NULL, NULL, NULL] + +query ?? +select array_reverse(NULL), array_reverse([]); +---- +NULL [] + +query ?? +select array_reverse(column1), column1 from arrays_values; +---- +[10, 9, 8, 7, 6, 5, 4, 3, 2, NULL] [NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] +[20, NULL, 18, 17, 16, 15, 14, 13, 12, 11] [11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] +[30, 29, 28, 27, 26, 25, NULL, 23, 22, 21] [21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] +[40, 39, 38, 37, NULL, 35, 34, 33, 32, 31] [31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] +NULL NULL +[50, 49, 48, 47, 46, 45, 44, 43, 42, 41] [41, 42, 43, 44, 45, 46, 47, 48, 49, 50] +[60, 59, 58, 57, 56, 55, 54, NULL, 52, 51] [51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] +[70, 69, 68, 67, 66, 65, 64, 63, 62, 61] [61, 62, 63, 64, 65, 66, 67, 68, 69, 70] + +statement ok +CREATE TABLE test_reverse_fixed_size AS VALUES + (arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)')), + (arrow_cast([4, 5, 6], 'FixedSizeList(3, Int64)')), + (arrow_cast([NULL, 8, 9], 'FixedSizeList(3, Int64)')), + (NULL); + +query ? +SELECT array_reverse(column1) FROM test_reverse_fixed_size; +---- +[3, 2, 1] +[6, 5, 4] +[9, 8, NULL] +NULL + +statement ok +DROP TABLE test_reverse_fixed_size; + +# Test defining a table with array columns +statement ok +create table test_create_array_table( + a int[], + b text[], + -- two-dimensional array + c int[][], + d int +); + +query I +insert into test_create_array_table values + ([1, 2, 3], ['a', 'b', 'c'], [[4,6], [6,7,8]], 1); +---- +1 + +query ???I +select * from test_create_array_table; +---- +[1, 2, 3] [a, b, c] [[4, 6], [6, 7, 8]] 1 + +query T +select arrow_typeof(a) from test_create_array_table; +---- +List(Int32) + +query T +select arrow_typeof(c) from test_create_array_table; +---- +List(List(Int32)) + +# Test casting to array types +# issue: https://github.com/apache/datafusion/issues/9440 +query ??T +select [1,2,3]::int[], [['1']]::int[][], arrow_typeof([]::text[]); +---- +[1, 2, 3] [[1]] List(Utf8View) + +# test empty arrays return length +# issue: https://github.com/apache/datafusion/pull/12459 +statement ok +create table values_all_empty (a int[]) as values ([]), ([]); + +query B +select array_has(a, 1) from values_all_empty; +---- +false +false + +# Test create table with fixed sized array +statement ok +create table fixed_size_col_table (a int[3]) as values ([1,2,3]), ([4,5,6]); + +query T +select arrow_typeof(a) from fixed_size_col_table; +---- +FixedSizeList(3 x Int32) +FixedSizeList(3 x Int32) + +query ? rowsort +SELECT DISTINCT a FROM fixed_size_col_table +---- +[1, 2, 3] +[4, 5, 6] + +query ?I rowsort +SELECT a, count(*) FROM fixed_size_col_table GROUP BY a +---- +[1, 2, 3] 1 +[4, 5, 6] 1 + +statement error Cast error: Cannot cast to FixedSizeList\(3\): value at index 0 has length 2 +create table varying_fixed_size_col_table (a int[3]) as values ([1,2,3]), ([4,5]); + +# https://github.com/apache/datafusion/issues/16187 +# should be NULL in case of out of bounds for Null Type +query ? +select [named_struct('a', 1, 'b', null)][-2]; +---- +NULL + +statement ok +COPY (select [[true, false], [false, true]] a, [false, true] b union select [[null, null]], null) to 'test_files/scratch/array/array_has/single_file.parquet' stored as parquet; + +statement ok +CREATE EXTERNAL TABLE array_has STORED AS PARQUET location 'test_files/scratch/array/array_has/single_file.parquet'; + +query B +select array_contains(a, b) from array_has order by 1 nulls last; +---- +true +NULL + +# Expected output (once supported): +# ---- +# [5, 4, 3, 2, 1] +query error +select array_reverse(arrow_cast(make_array(1, 2, 3, 4, 5), 'ListView(Int64)')); + +### Delete tables + +statement ok +drop table values; + +statement ok +drop table values_without_nulls; + +statement ok +drop table nested_arrays; + +statement ok +drop table large_nested_arrays; + +statement ok +drop table fixed_size_nested_arrays; + +statement ok +drop table arrays; + +statement ok +drop table large_arrays; + +statement ok +drop table fixed_size_arrays; + +statement ok +drop table slices; + +statement ok +drop table fixed_slices; + +statement ok +drop table arrayspop; + +statement ok +drop table large_arrayspop; + +statement ok +drop table arrays_values; + +statement ok +drop table arrays_values_v2; + +statement ok +drop table large_arrays_values_v2; + +statement ok +drop table array_has_table_1D; + +statement ok +drop table array_has_table_1D_Float; + +statement ok +drop table array_has_table_1D_Boolean; + +statement ok +drop table array_has_table_1D_UTF8; + +statement ok +drop table array_has_table_2D; + +statement ok +drop table array_has_table_2D_float; + +statement ok +drop table array_has_table_3D; + +statement ok +drop table array_intersect_table_1D; + +statement ok +drop table large_array_intersect_table_1D; + +statement ok +drop table array_intersect_table_1D_Float; + +statement ok +drop table large_array_intersect_table_1D_Float; + +statement ok +drop table array_intersect_table_1D_Boolean; + +statement ok +drop table large_array_intersect_table_1D_Boolean; + +statement ok +drop table array_intersect_table_1D_UTF8; + +statement ok +drop table large_array_intersect_table_1D_UTF8; + +statement ok +drop table array_intersect_table_2D; + +statement ok +drop table large_array_intersect_table_2D; + +statement ok +drop table array_intersect_table_2D_float; + +statement ok +drop table large_array_intersect_table_2D_float; + +statement ok +drop table array_intersect_table_3D; + +statement ok +drop table large_array_intersect_table_3D; + +statement ok +drop table fixed_size_array_has_table_1D; + +statement ok +drop table fixed_size_array_has_table_1D_Float; + +statement ok +drop table fixed_size_array_has_table_1D_Boolean; + +statement ok +drop table fixed_size_array_has_table_1D_UTF8; + +statement ok +drop table fixed_size_array_has_table_2D; + +statement ok +drop table fixed_size_array_has_table_2D_float; + +statement ok +drop table fixed_size_array_has_table_3D; + +statement ok +drop table arrays_range; + +statement ok +drop table arrays_with_repeating_elements; + +statement ok +drop table large_arrays_with_repeating_elements; + +statement ok +drop table fixed_arrays_with_repeating_elements; + +statement ok +drop table nested_arrays_with_repeating_elements; + +statement ok +drop table large_nested_arrays_with_repeating_elements; + +statement ok +drop table fixed_size_nested_arrays_with_repeating_elements; + +statement ok +drop table flatten_table; + +statement ok +drop table large_flatten_table; + +statement ok +drop table fixed_size_flatten_table; + +statement ok +drop table arrays_values_without_nulls; + +statement ok +drop table large_arrays_values_without_nulls; + +statement ok +drop table fixed_size_arrays_values_without_nulls; + +statement ok +drop table test_create_array_table; + +statement ok +drop table values_all_empty; + +statement ok +drop table fixed_size_col_table; + +statement ok +drop table array_has; From df3fff39424eacdd1addeb29f1f7ff58f9c51c11 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 9 Apr 2026 00:46:31 +0200 Subject: [PATCH 32/83] Register cache from cachemanager at listing table --- datafusion/core/src/execution/context/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 767e52005f52c..9604ee639e014 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1762,7 +1762,9 @@ impl SessionContext { let config = ListingTableConfig::new(table_path) .with_listing_options(options) .with_schema(resolved_schema); - let table = ListingTable::try_new(config)?.with_definition(sql_definition); + let table = ListingTable::try_new(config)?.with_definition(sql_definition).with_cache( + self.runtime_env().cache_manager.get_file_statistic_cache() + ); self.register_table(table_ref, Arc::new(table))?; Ok(()) } From d1b3a05dc587fb9056b4cc018a7758bea9f269aa Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 9 Apr 2026 01:44:45 +0200 Subject: [PATCH 33/83] Revert slt --- datafusion/sqllogictest/test_files/array.slt | 4 ---- datafusion/sqllogictest/test_files/encrypted_parquet.slt | 4 ---- .../sqllogictest/test_files/parquet_filter_pushdown.slt | 3 --- .../sqllogictest/test_files/parquet_sorted_statistics.slt | 4 ---- 4 files changed, 15 deletions(-) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 45cf02700c39a..81d5c8f91a5bc 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -689,10 +689,6 @@ AS FROM arrays_distance_table ; -# Disable file statistics cache because file statistics have been previously created -statement ok -set datafusion.runtime.file_statistics_cache_limit = "0K"; - # Array literal diff --git a/datafusion/sqllogictest/test_files/encrypted_parquet.slt b/datafusion/sqllogictest/test_files/encrypted_parquet.slt index fd375778b7a53..d580b7d1ad2b8 100644 --- a/datafusion/sqllogictest/test_files/encrypted_parquet.slt +++ b/datafusion/sqllogictest/test_files/encrypted_parquet.slt @@ -77,10 +77,6 @@ ORDER BY double_field 3 4 5 6 -# Disable file statistics cache because file statistics have been previously created -statement ok -set datafusion.runtime.file_statistics_cache_limit = "0K"; - statement count 0 CREATE EXTERNAL TABLE parquet_table ( diff --git a/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt b/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt index 80a1a838cb7e9..85f9549357138 100644 --- a/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt +++ b/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt @@ -37,9 +37,6 @@ COPY ( ) TO 'test_files/scratch/parquet_filter_pushdown/parquet_table/2.parquet' STORED AS PARQUET; -statement ok -set datafusion.runtime.file_statistics_cache_limit = "0K"; - ## Create table without filter pushdown ## (pushdown setting is part of the table, but is copied from the session settings) diff --git a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt index 53ec7e72d9f16..a4a613e383ec8 100644 --- a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt +++ b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt @@ -177,10 +177,6 @@ physical_plan statement ok DROP TABLE test_table; -# Disable file statistics cache because file statistics have been previously created -statement ok -set datafusion.runtime.file_statistics_cache_limit = "0K"; - statement ok CREATE EXTERNAL TABLE test_table ( partition_col TEXT NOT NULL, From a6a4b2f3d7e3b1a4e9597efaaec112780345caf3 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 18 Feb 2026 14:10:14 +0100 Subject: [PATCH 34/83] Add tablescoping for file stats cache --- datafusion/catalog-listing/src/helpers.rs | 3 +- datafusion/catalog-listing/src/table.rs | 6 +- datafusion/core/src/execution/context/mod.rs | 4 +- datafusion/datasource/src/mod.rs | 14 +- .../execution/src/cache/cache_manager.rs | 5 +- datafusion/execution/src/cache/cache_unit.rs | 141 ++++++++++++------ 6 files changed, 121 insertions(+), 52 deletions(-) diff --git a/datafusion/catalog-listing/src/helpers.rs b/datafusion/catalog-listing/src/helpers.rs index c6305c30008ce..b0118fdbb556d 100644 --- a/datafusion/catalog-listing/src/helpers.rs +++ b/datafusion/catalog-listing/src/helpers.rs @@ -382,7 +382,8 @@ fn try_into_partitioned_file( let mut pf: PartitionedFile = object_meta.into(); pf.partition_values = partition_values; - + pf.table_reference = table_path.get_table_ref().clone(); + Ok(Some(pf)) } diff --git a/datafusion/catalog-listing/src/table.rs b/datafusion/catalog-listing/src/table.rs index 0ed2b452bc626..84d2b467524d8 100644 --- a/datafusion/catalog-listing/src/table.rs +++ b/datafusion/catalog-listing/src/table.rs @@ -799,12 +799,12 @@ impl ListingTable { ) -> datafusion_common::Result<(Arc, Option)> { use datafusion_execution::cache::cache_manager::CachedFileMetadata; - let path = &part_file.object_meta.location; + let path = TableScopedPath { table: part_file.table_reference.clone(), path : part_file.object_meta.location.clone()}; let meta = &part_file.object_meta; // Check cache first - if we have valid cached statistics and ordering if let Some(cache) = &self.collected_statistics - && let Some(cached) = cache.get(path) + && let Some(cached) = cache.get(&path) && cached.is_valid_for(meta) { // Return cached statistics and ordering @@ -823,7 +823,7 @@ impl ListingTable { // Store in cache if let Some(cache) = &self.collected_statistics { cache.put( - path, + &path, CachedFileMetadata::new( meta.clone(), Arc::clone(&statistics), diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 9604ee639e014..a998f9243fb6c 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1428,8 +1428,10 @@ impl SessionContext { schema.deregister_table(&table)?; if table_type == TableType::Base && let Some(lfc) = self.runtime_env().cache_manager.get_list_files_cache() + && let Some(fsc) = self.runtime_env().cache_manager.get_file_statistic_cache() { - lfc.drop_table_entries(&Some(table_ref))?; + lfc.drop_table_entries(&Some(table_ref.clone()))?; + fsc.drop_table_entries(&Some(table_ref.clone()))?; } return Ok(true); } diff --git a/datafusion/datasource/src/mod.rs b/datafusion/datasource/src/mod.rs index a9600271c28ce..575ffbb66fa3e 100644 --- a/datafusion/datasource/src/mod.rs +++ b/datafusion/datasource/src/mod.rs @@ -56,7 +56,7 @@ pub use self::url::ListingTableUrl; use crate::file_groups::FileGroup; use chrono::TimeZone; use datafusion_common::stats::Precision; -use datafusion_common::{ColumnStatistics, Result, exec_datafusion_err}; +use datafusion_common::{ColumnStatistics, Result, exec_datafusion_err, TableReference}; use datafusion_common::{ScalarValue, Statistics}; use datafusion_physical_expr::LexOrdering; use futures::{Stream, StreamExt}; @@ -152,6 +152,7 @@ pub struct PartitionedFile { pub extensions: Option>, /// The estimated size of the parquet metadata, in bytes pub metadata_size_hint: Option, + pub table_reference: Option, } impl PartitionedFile { @@ -171,6 +172,7 @@ impl PartitionedFile { ordering: None, extensions: None, metadata_size_hint: None, + table_reference: None } } @@ -184,6 +186,7 @@ impl PartitionedFile { ordering: None, extensions: None, metadata_size_hint: None, + table_reference: None } } @@ -203,6 +206,7 @@ impl PartitionedFile { ordering: None, extensions: None, metadata_size_hint: None, + table_reference: None } .with_range(start, end) } @@ -214,6 +218,12 @@ impl PartitionedFile { self } + pub fn with_table_reference(mut self, table_reference: Option) -> Self { + self.table_reference = table_reference; + self + } + + /// Size of the file to be scanned (taking into account the range, if present). pub fn effective_size(&self) -> u64 { if let Some(range) = &self.range { @@ -340,6 +350,7 @@ impl From for PartitionedFile { ordering: None, extensions: None, metadata_size_hint: None, + table_reference: None, } } } @@ -537,6 +548,7 @@ pub fn generate_test_files(num_files: usize, overlap_factor: f64) -> Vec { +pub trait FileStatisticsCache: CacheAccessor { /// Cache memory limit in bytes. fn cache_limit(&self) -> usize; @@ -104,6 +104,9 @@ pub trait FileStatisticsCache: CacheAccessor { /// Retrieves the information about the entries currently cached. fn list_entries(&self) -> HashMap; + + fn drop_table_entries(&self, table_ref: &Option) -> Result<()>; + } impl DFHeapSize for CachedFileMetadata { diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 1c1b6b9e6e692..7a5ddcdfedbfc 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::cache::CacheAccessor; +use crate::cache::{CacheAccessor, TableScopedPath}; use crate::cache::cache_manager::{ CachedFileMetadata, FileStatisticsCache, FileStatisticsCacheEntry, }; @@ -26,6 +26,7 @@ use std::sync::Mutex; pub use crate::cache::DefaultFilesMetadataCache; use crate::cache::lru_queue::LruQueue; use datafusion_common::heap_size::DFHeapSize; +use datafusion_common::TableReference; /// Default implementation of [`FileStatisticsCache`] /// @@ -65,7 +66,7 @@ impl DefaultFileStatisticsCache { } struct DefaultFileStatisticsCacheState { - lru_queue: LruQueue, + lru_queue: LruQueue, memory_limit: usize, memory_used: usize, } @@ -90,16 +91,16 @@ impl DefaultFileStatisticsCacheState { memory_used: 0, } } - fn get(&mut self, key: &Path) -> Option { + fn get(&mut self, key: &TableScopedPath) -> Option { self.lru_queue.get(key).cloned() } fn put( &mut self, - key: &Path, + key: &TableScopedPath, value: CachedFileMetadata, ) -> Option { - let key_size = key.as_ref().heap_size(); + let key_size = key.path.as_ref().heap_size(); let entry_size = value.heap_size(); if entry_size + key_size > self.memory_limit { @@ -114,7 +115,7 @@ impl DefaultFileStatisticsCacheState { if let Some(old_entry) = &old_value { self.memory_used -= old_entry.heap_size(); } else { - self.memory_used += key.as_ref().heap_size(); + self.memory_used += key.path.as_ref().heap_size(); } self.evict_entries(); @@ -122,9 +123,9 @@ impl DefaultFileStatisticsCacheState { old_value } - fn remove(&mut self, k: &Path) -> Option { + fn remove(&mut self, k: &TableScopedPath) -> Option { if let Some(old_entry) = self.lru_queue.remove(k) { - self.memory_used -= k.as_ref().heap_size(); + self.memory_used -= k.path.as_ref().heap_size(); self.memory_used -= old_entry.heap_size(); Some(old_entry) } else { @@ -132,7 +133,7 @@ impl DefaultFileStatisticsCacheState { } } - fn contains_key(&self, k: &Path) -> bool { + fn contains_key(&self, k: &TableScopedPath) -> bool { self.lru_queue.contains_key(k) } @@ -148,7 +149,7 @@ impl DefaultFileStatisticsCacheState { fn evict_entries(&mut self) { while self.memory_used > self.memory_limit { if let Some(removed) = self.lru_queue.pop() { - self.memory_used -= removed.0.as_ref().heap_size(); + self.memory_used -= removed.0.path.as_ref().heap_size(); self.memory_used -= removed.1.heap_size(); } else { // cache is empty while memory_used > memory_limit, cannot happen @@ -168,23 +169,23 @@ impl DefaultFileStatisticsCacheState { } } } -impl CacheAccessor for DefaultFileStatisticsCache { - fn get(&self, key: &Path) -> Option { +impl CacheAccessor for DefaultFileStatisticsCache { + fn get(&self, key: &TableScopedPath) -> Option { let mut state = self.state.lock().unwrap(); state.get(key) } - fn put(&self, key: &Path, value: CachedFileMetadata) -> Option { + fn put(&self, key: &TableScopedPath, value: CachedFileMetadata) -> Option { let mut state = self.state.lock().unwrap(); state.put(key, value) } - fn remove(&self, key: &Path) -> Option { + fn remove(&self, key: &TableScopedPath) -> Option { let mut state = self.state.lock().unwrap(); state.remove(key) } - fn contains_key(&self, k: &Path) -> bool { + fn contains_key(&self, k: &TableScopedPath) -> bool { let state = self.state.lock().unwrap(); state.contains_key(k) } @@ -222,7 +223,7 @@ impl FileStatisticsCache for DefaultFileStatisticsCache { let path = entry.0.clone(); let cached = entry.1.clone(); entries.insert( - path, + path.path, FileStatisticsCacheEntry { object_meta: cached.meta.clone(), num_rows: cached.statistics.num_rows, @@ -236,6 +237,20 @@ impl FileStatisticsCache for DefaultFileStatisticsCache { entries } + + fn drop_table_entries(&self, table_ref: &Option) -> datafusion_common::Result<()> { + let mut state = self.state.lock().unwrap(); + let mut table_paths = vec![]; + for (path, _) in state.lru_queue.list_entries() { + if path.table == *table_ref { + table_paths.push(path.clone()); + } + } + for path in table_paths { + state.remove(&path); + } + Ok(()) + } } #[cfg(test)] @@ -279,8 +294,13 @@ mod tests { false, )]); + let path = TableScopedPath{ + path: meta.location.clone(), + table: None, + }; + // Cache miss - assert!(cache.get(&meta.location).is_none()); + assert!(cache.get(&path).is_none()); // Put a value let cached_value = CachedFileMetadata::new( @@ -288,17 +308,24 @@ mod tests { Arc::new(Statistics::new_unknown(&schema)), None, ); - cache.put(&meta.location, cached_value); + cache.put(&path, cached_value); // Cache hit - let result = cache.get(&meta.location); + let result = cache.get(&path); assert!(result.is_some()); let cached = result.unwrap(); assert!(cached.is_valid_for(&meta)); + // File size changed - validation should fail let meta2 = create_test_meta("test", 2048); - let cached = cache.get(&meta2.location).unwrap(); + + let path_2 = TableScopedPath{ + path: meta2.location.clone(), + table: None, + }; + + let cached = cache.get(&path_2).unwrap(); assert!(!cached.is_valid_for(&meta2)); // Update with new value @@ -307,12 +334,18 @@ mod tests { Arc::new(Statistics::new_unknown(&schema)), None, ); - cache.put(&meta2.location, cached_value2); + cache.put(&path_2, cached_value2); // Test list_entries let entries = cache.list_entries(); assert_eq!(entries.len(), 1); - let entry = entries.get(&Path::from("test")).unwrap(); + + let path_3 = TableScopedPath{ + path: Path::from("test"), + table: None, + }; + + let entry = entries.get(&path_3.path).unwrap(); assert_eq!(entry.object_meta.size, 2048); // Should be updated value } @@ -379,31 +412,37 @@ mod tests { Arc::new(Statistics::new_unknown(&schema)), None, // No ordering yet ); - cache.put(&meta.location, cached_value); - let result = cache.get(&meta.location).unwrap(); + let path = TableScopedPath { + path: meta.location.clone(), + table: None, + }; + + cache.put(&path, cached_value); + + let result = cache.get(&path).unwrap(); assert!(result.ordering.is_none()); // Update to add ordering - let mut cached = cache.get(&meta.location).unwrap(); + let mut cached = cache.get(&path).unwrap(); if cached.is_valid_for(&meta) && cached.ordering.is_none() { cached.ordering = Some(ordering()); } - cache.put(&meta.location, cached); + cache.put(&path, cached); - let result2 = cache.get(&meta.location).unwrap(); + let result2 = cache.get(&path).unwrap(); assert!(result2.ordering.is_some()); // Verify list_entries shows has_ordering = true let entries = cache.list_entries(); assert_eq!(entries.len(), 1); - assert!(entries.get(&meta.location).unwrap().has_ordering); + assert!(entries.get(&path.path).unwrap().has_ordering); } #[test] fn test_cache_invalidation_on_file_modification() { let cache = DefaultFileStatisticsCache::default(); - let path = Path::from("test.parquet"); + let path = TableScopedPath { table: None, path : Path::from("test.parquet"), }; let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); let meta_v1 = create_test_meta("test.parquet", 100); @@ -439,12 +478,12 @@ mod tests { #[test] fn test_ordering_cache_invalidation_on_file_modification() { let cache = DefaultFileStatisticsCache::default(); - let path = Path::from("test.parquet"); + let path = TableScopedPath { path: Path::from("test.parquet"), table: None }; let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); // Cache with original metadata and ordering let meta_v1 = ObjectMeta { - location: path.clone(), + location: path.path.clone(), last_modified: DateTime::parse_from_rfc3339("2022-09-27T22:36:00+02:00") .unwrap() .into(), @@ -467,7 +506,7 @@ mod tests { // File modified (size changed) let meta_v2 = ObjectMeta { - location: path.clone(), + location: path.path.clone(), last_modified: DateTime::parse_from_rfc3339("2022-09-28T10:00:00+02:00") .unwrap() .into(), @@ -510,14 +549,20 @@ mod tests { Arc::new(Statistics::new_unknown(&schema)), None, ); - cache.put(&meta1.location, cached_value); + + let path_1 = TableScopedPath { path: meta1.location.clone(), table: None }; + + cache.put(&path_1, cached_value); let meta2 = create_test_meta("test2.parquet", 200); let cached_value = CachedFileMetadata::new( meta2.clone(), Arc::new(Statistics::new_unknown(&schema)), Some(ordering()), ); - cache.put(&meta2.location, cached_value); + + let path_2 = TableScopedPath { path: meta2.location.clone(), table: None }; + + cache.put(&path_2, cached_value); let entries = cache.list_entries(); assert_eq!( @@ -562,33 +607,37 @@ mod tests { // create a cache with a limit which fits exactly 2 entries let cache = DefaultFileStatisticsCache::new(limit_for_2_entries); - - cache.put(&meta_1.location, value_1.clone()); - cache.put(&meta_2.location, value_2.clone()); + let path_1 = TableScopedPath { path: meta_1.location.clone(), table: None }; + let path_2 = TableScopedPath { path: meta_2.location.clone(), table: None }; + cache.put(&path_1, value_1.clone()); + cache.put(&path_2, value_2.clone()); assert_eq!(cache.len(), 2); assert_eq!(cache.memory_used(), limit_for_2_entries); - let result_1 = cache.get(&meta_1.location); - let result_2 = cache.get(&meta_2.location); + let result_1 = cache.get(&path_1); + let result_2 = cache.get(&path_2); assert_eq!(result_1.unwrap(), value_1); assert_eq!(result_2.unwrap(), value_2); + let path_3 = TableScopedPath { path: meta_3.location.clone(), table: None }; + + // adding the third entry evicts the first entry - cache.put(&meta_3.location, value_3.clone()); + cache.put(&path_3, value_3.clone()); assert_eq!(cache.len(), 2); assert_eq!(cache.memory_used(), limit_for_2_entries); - let result_1 = cache.get(&meta_1.location); + let result_1 = cache.get(&path_1); assert!(result_1.is_none()); - let result_2 = cache.get(&meta_2.location); - let result_3 = cache.get(&meta_3.location); + let result_2 = cache.get(&path_2); + let result_3 = cache.get(&path_3); assert_eq!(result_2.unwrap(), value_2); assert_eq!(result_3.unwrap(), value_3); - cache.remove(&meta_2.location); + cache.remove(&path_2); assert_eq!(cache.len(), 1); assert_eq!( cache.memory_used(), @@ -609,7 +658,9 @@ mod tests { // create a cache with a size less than the entry let cache = DefaultFileStatisticsCache::new(limit_less_than_the_entry); - cache.put(&meta.location, value); + let path_1 = TableScopedPath { path: meta.location.clone(), table: None }; + + cache.put(&path_1, value); assert_eq!(cache.len(), 0); assert_eq!(cache.memory_used(), 0); From 8874a7654597d98804b3cfb12bc0141a8ac5f3fc Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 9 Apr 2026 11:35:48 +0200 Subject: [PATCH 35/83] Adapt slt --- datafusion/sqllogictest/test_files/encrypted_parquet.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/encrypted_parquet.slt b/datafusion/sqllogictest/test_files/encrypted_parquet.slt index d580b7d1ad2b8..f51d84f0c88eb 100644 --- a/datafusion/sqllogictest/test_files/encrypted_parquet.slt +++ b/datafusion/sqllogictest/test_files/encrypted_parquet.slt @@ -85,5 +85,5 @@ float_field float ) STORED AS PARQUET LOCATION 'test_files/scratch/encrypted_parquet/' -query error DataFusion error: Parquet error: Parquet error: Parquet file has an encrypted footer but decryption properties were not provided +query error Parquet error: Parquet error: Parquet file has an encrypted footer but decryption properties were not provided SELECT * FROM parquet_table From d37e7a73ff570c6abc423e67ad2bb26839ed9d97 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 9 Apr 2026 12:35:07 +0200 Subject: [PATCH 36/83] Fix linter --- datafusion-cli/src/main.rs | 5 +- datafusion/catalog-listing/src/helpers.rs | 2 +- datafusion/catalog-listing/src/table.rs | 5 +- datafusion/core/src/execution/context/mod.rs | 11 ++-- datafusion/datasource/src/mod.rs | 14 ++-- .../execution/src/cache/cache_manager.rs | 5 +- datafusion/execution/src/cache/cache_unit.rs | 66 ++++++++++++++----- 7 files changed, 72 insertions(+), 36 deletions(-) diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs index 1909ed392afd3..a2ec8445e9437 100644 --- a/datafusion-cli/src/main.rs +++ b/datafusion-cli/src/main.rs @@ -443,10 +443,7 @@ mod tests { use super::*; use datafusion::{ common::test_util::batches_to_string, - execution::cache::{ - DefaultListFilesCache, cache_manager::CacheManagerConfig, - cache_unit::DefaultFileStatisticsCache, - }, + execution::cache::{DefaultListFilesCache, cache_manager::CacheManagerConfig}, prelude::{ParquetReadOptions, col, lit, split_part}, }; use insta::assert_snapshot; diff --git a/datafusion/catalog-listing/src/helpers.rs b/datafusion/catalog-listing/src/helpers.rs index b0118fdbb556d..110bdcb6f9fc0 100644 --- a/datafusion/catalog-listing/src/helpers.rs +++ b/datafusion/catalog-listing/src/helpers.rs @@ -383,7 +383,7 @@ fn try_into_partitioned_file( let mut pf: PartitionedFile = object_meta.into(); pf.partition_values = partition_values; pf.table_reference = table_path.get_table_ref().clone(); - + Ok(Some(pf)) } diff --git a/datafusion/catalog-listing/src/table.rs b/datafusion/catalog-listing/src/table.rs index 84d2b467524d8..7ee743a6abe71 100644 --- a/datafusion/catalog-listing/src/table.rs +++ b/datafusion/catalog-listing/src/table.rs @@ -799,7 +799,10 @@ impl ListingTable { ) -> datafusion_common::Result<(Arc, Option)> { use datafusion_execution::cache::cache_manager::CachedFileMetadata; - let path = TableScopedPath { table: part_file.table_reference.clone(), path : part_file.object_meta.location.clone()}; + let path = TableScopedPath { + table: part_file.table_reference.clone(), + path: part_file.object_meta.location.clone(), + }; let meta = &part_file.object_meta; // Check cache first - if we have valid cached statistics and ordering diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index a998f9243fb6c..f3521c3c93ab9 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1185,7 +1185,7 @@ impl SessionContext { builder.with_object_list_cache_ttl(Some(duration)) } "file_statistics_cache_limit" => { - let limit = Self::parse_memory_limit(value)?; + let limit = Self::parse_capacity_limit(variable, value)?; builder.with_file_statistics_cache_limit(limit) } _ => return plan_err!("Unknown runtime configuration: {variable}"), @@ -1428,7 +1428,8 @@ impl SessionContext { schema.deregister_table(&table)?; if table_type == TableType::Base && let Some(lfc) = self.runtime_env().cache_manager.get_list_files_cache() - && let Some(fsc) = self.runtime_env().cache_manager.get_file_statistic_cache() + && let Some(fsc) = + self.runtime_env().cache_manager.get_file_statistic_cache() { lfc.drop_table_entries(&Some(table_ref.clone()))?; fsc.drop_table_entries(&Some(table_ref.clone()))?; @@ -1764,9 +1765,9 @@ impl SessionContext { let config = ListingTableConfig::new(table_path) .with_listing_options(options) .with_schema(resolved_schema); - let table = ListingTable::try_new(config)?.with_definition(sql_definition).with_cache( - self.runtime_env().cache_manager.get_file_statistic_cache() - ); + let table = ListingTable::try_new(config)? + .with_definition(sql_definition) + .with_cache(self.runtime_env().cache_manager.get_file_statistic_cache()); self.register_table(table_ref, Arc::new(table))?; Ok(()) } diff --git a/datafusion/datasource/src/mod.rs b/datafusion/datasource/src/mod.rs index 575ffbb66fa3e..d971762782258 100644 --- a/datafusion/datasource/src/mod.rs +++ b/datafusion/datasource/src/mod.rs @@ -56,7 +56,7 @@ pub use self::url::ListingTableUrl; use crate::file_groups::FileGroup; use chrono::TimeZone; use datafusion_common::stats::Precision; -use datafusion_common::{ColumnStatistics, Result, exec_datafusion_err, TableReference}; +use datafusion_common::{ColumnStatistics, Result, TableReference, exec_datafusion_err}; use datafusion_common::{ScalarValue, Statistics}; use datafusion_physical_expr::LexOrdering; use futures::{Stream, StreamExt}; @@ -172,7 +172,7 @@ impl PartitionedFile { ordering: None, extensions: None, metadata_size_hint: None, - table_reference: None + table_reference: None, } } @@ -186,7 +186,7 @@ impl PartitionedFile { ordering: None, extensions: None, metadata_size_hint: None, - table_reference: None + table_reference: None, } } @@ -206,7 +206,7 @@ impl PartitionedFile { ordering: None, extensions: None, metadata_size_hint: None, - table_reference: None + table_reference: None, } .with_range(start, end) } @@ -218,12 +218,14 @@ impl PartitionedFile { self } - pub fn with_table_reference(mut self, table_reference: Option) -> Self { + pub fn with_table_reference( + mut self, + table_reference: Option, + ) -> Self { self.table_reference = table_reference; self } - /// Size of the file to be scanned (taking into account the range, if present). pub fn effective_size(&self) -> u64 { if let Some(range) = &self.range { diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index 32f2efcaeebbb..89c9d6a314be4 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -95,7 +95,9 @@ impl CachedFileMetadata { /// 3. If invalid or missing, compute new value and call `put(path, new_value)` /// /// See [`crate::runtime_env::RuntimeEnv`] for more details -pub trait FileStatisticsCache: CacheAccessor { +pub trait FileStatisticsCache: + CacheAccessor +{ /// Cache memory limit in bytes. fn cache_limit(&self) -> usize; @@ -106,7 +108,6 @@ pub trait FileStatisticsCache: CacheAccessor HashMap; fn drop_table_entries(&self, table_ref: &Option) -> Result<()>; - } impl DFHeapSize for CachedFileMetadata { diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 7a5ddcdfedbfc..0d3cba519ad92 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -15,18 +15,18 @@ // specific language governing permissions and limitations // under the License. -use crate::cache::{CacheAccessor, TableScopedPath}; use crate::cache::cache_manager::{ CachedFileMetadata, FileStatisticsCache, FileStatisticsCacheEntry, }; +use crate::cache::{CacheAccessor, TableScopedPath}; use object_store::path::Path; use std::collections::HashMap; use std::sync::Mutex; pub use crate::cache::DefaultFilesMetadataCache; use crate::cache::lru_queue::LruQueue; -use datafusion_common::heap_size::DFHeapSize; use datafusion_common::TableReference; +use datafusion_common::heap_size::DFHeapSize; /// Default implementation of [`FileStatisticsCache`] /// @@ -175,7 +175,11 @@ impl CacheAccessor for DefaultFileStatistic state.get(key) } - fn put(&self, key: &TableScopedPath, value: CachedFileMetadata) -> Option { + fn put( + &self, + key: &TableScopedPath, + value: CachedFileMetadata, + ) -> Option { let mut state = self.state.lock().unwrap(); state.put(key, value) } @@ -238,7 +242,10 @@ impl FileStatisticsCache for DefaultFileStatisticsCache { entries } - fn drop_table_entries(&self, table_ref: &Option) -> datafusion_common::Result<()> { + fn drop_table_entries( + &self, + table_ref: &Option, + ) -> datafusion_common::Result<()> { let mut state = self.state.lock().unwrap(); let mut table_paths = vec![]; for (path, _) in state.lru_queue.list_entries() { @@ -294,7 +301,7 @@ mod tests { false, )]); - let path = TableScopedPath{ + let path = TableScopedPath { path: meta.location.clone(), table: None, }; @@ -313,14 +320,14 @@ mod tests { // Cache hit let result = cache.get(&path); assert!(result.is_some()); + let cached = result.unwrap(); assert!(cached.is_valid_for(&meta)); - // File size changed - validation should fail let meta2 = create_test_meta("test", 2048); - let path_2 = TableScopedPath{ + let path_2 = TableScopedPath { path: meta2.location.clone(), table: None, }; @@ -340,7 +347,7 @@ mod tests { let entries = cache.list_entries(); assert_eq!(entries.len(), 1); - let path_3 = TableScopedPath{ + let path_3 = TableScopedPath { path: Path::from("test"), table: None, }; @@ -442,7 +449,10 @@ mod tests { #[test] fn test_cache_invalidation_on_file_modification() { let cache = DefaultFileStatisticsCache::default(); - let path = TableScopedPath { table: None, path : Path::from("test.parquet"), }; + let path = TableScopedPath { + path: Path::from("test.parquet"), + table: None, + }; let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); let meta_v1 = create_test_meta("test.parquet", 100); @@ -478,7 +488,10 @@ mod tests { #[test] fn test_ordering_cache_invalidation_on_file_modification() { let cache = DefaultFileStatisticsCache::default(); - let path = TableScopedPath { path: Path::from("test.parquet"), table: None }; + let path = TableScopedPath { + path: Path::from("test.parquet"), + table: None, + }; let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); // Cache with original metadata and ordering @@ -550,7 +563,10 @@ mod tests { None, ); - let path_1 = TableScopedPath { path: meta1.location.clone(), table: None }; + let path_1 = TableScopedPath { + path: meta1.location.clone(), + table: None, + }; cache.put(&path_1, cached_value); let meta2 = create_test_meta("test2.parquet", 200); @@ -560,7 +576,10 @@ mod tests { Some(ordering()), ); - let path_2 = TableScopedPath { path: meta2.location.clone(), table: None }; + let path_2 = TableScopedPath { + path: meta2.location.clone(), + table: None, + }; cache.put(&path_2, cached_value); @@ -607,8 +626,16 @@ mod tests { // create a cache with a limit which fits exactly 2 entries let cache = DefaultFileStatisticsCache::new(limit_for_2_entries); - let path_1 = TableScopedPath { path: meta_1.location.clone(), table: None }; - let path_2 = TableScopedPath { path: meta_2.location.clone(), table: None }; + let path_1 = TableScopedPath { + path: meta_1.location.clone(), + table: None, + }; + + let path_2 = TableScopedPath { + path: meta_2.location.clone(), + table: None, + }; + cache.put(&path_1, value_1.clone()); cache.put(&path_2, value_2.clone()); @@ -620,8 +647,10 @@ mod tests { assert_eq!(result_1.unwrap(), value_1); assert_eq!(result_2.unwrap(), value_2); - let path_3 = TableScopedPath { path: meta_3.location.clone(), table: None }; - + let path_3 = TableScopedPath { + path: meta_3.location.clone(), + table: None, + }; // adding the third entry evicts the first entry cache.put(&path_3, value_3.clone()); @@ -658,7 +687,10 @@ mod tests { // create a cache with a size less than the entry let cache = DefaultFileStatisticsCache::new(limit_less_than_the_entry); - let path_1 = TableScopedPath { path: meta.location.clone(), table: None }; + let path_1 = TableScopedPath { + path: meta.location.clone(), + table: None, + }; cache.put(&path_1, value); From 9f054c406d14f6bb9f32a912733bb8aabff7d79d Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 9 Apr 2026 12:40:03 +0200 Subject: [PATCH 37/83] Remove uneeded clone --- datafusion/execution/src/cache/cache_unit.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 0d3cba519ad92..d008a626170a4 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -225,7 +225,7 @@ impl FileStatisticsCache for DefaultFileStatisticsCache { let mut entries = HashMap::::new(); for entry in self.state.lock().unwrap().lru_queue.list_entries() { let path = entry.0.clone(); - let cached = entry.1.clone(); + let cached = entry.1; entries.insert( path.path, FileStatisticsCacheEntry { From 376bd4495293b2f2f22c111d2ceeb54044f48f11 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 9 Apr 2026 12:44:35 +0200 Subject: [PATCH 38/83] Rename cache_unit to file_statistics_cache --- datafusion/core/src/datasource/listing_table_factory.rs | 2 +- datafusion/core/src/execution/context/mod.rs | 2 +- datafusion/core/tests/parquet/file_statistics.rs | 2 +- datafusion/core/tests/sql/runtime_config.rs | 2 +- datafusion/execution/src/cache/cache_manager.rs | 2 +- .../src/cache/{cache_unit.rs => file_statistics_cache.rs} | 0 datafusion/execution/src/cache/mod.rs | 2 +- 7 files changed, 6 insertions(+), 6 deletions(-) rename datafusion/execution/src/cache/{cache_unit.rs => file_statistics_cache.rs} (100%) diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs index ce0f214c06d26..0e8bf6a40e973 100644 --- a/datafusion/core/src/datasource/listing_table_factory.rs +++ b/datafusion/core/src/datasource/listing_table_factory.rs @@ -231,7 +231,7 @@ mod tests { }; use datafusion_execution::cache::CacheAccessor; use datafusion_execution::cache::cache_manager::CacheManagerConfig; - use datafusion_execution::cache::cache_unit::DefaultFileStatisticsCache; + use datafusion_execution::cache::file_statistics_cache::DefaultFileStatisticsCache; use datafusion_execution::config::SessionConfig; use datafusion_execution::runtime_env::RuntimeEnvBuilder; use glob::Pattern; diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index f3521c3c93ab9..a9d91a2e6af71 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -102,7 +102,7 @@ use datafusion_session::SessionStore; use async_trait::async_trait; use chrono::{DateTime, Utc}; -use datafusion_execution::cache::cache_unit::DEFAULT_FILE_STATISTICS_MEMORY_LIMIT; +use datafusion_execution::cache::file_statistics_cache::DEFAULT_FILE_STATISTICS_MEMORY_LIMIT; use object_store::ObjectStore; use parking_lot::RwLock; use url::Url; diff --git a/datafusion/core/tests/parquet/file_statistics.rs b/datafusion/core/tests/parquet/file_statistics.rs index da89b89cee116..4dcdf543b929b 100644 --- a/datafusion/core/tests/parquet/file_statistics.rs +++ b/datafusion/core/tests/parquet/file_statistics.rs @@ -31,7 +31,7 @@ use datafusion_common::DFSchema; use datafusion_common::stats::Precision; use datafusion_execution::cache::DefaultListFilesCache; use datafusion_execution::cache::cache_manager::CacheManagerConfig; -use datafusion_execution::cache::cache_unit::DefaultFileStatisticsCache; +use datafusion_execution::cache::file_statistics_cache::DefaultFileStatisticsCache; use datafusion_execution::config::SessionConfig; use datafusion_execution::runtime_env::RuntimeEnvBuilder; use datafusion_expr::{Expr, col, lit}; diff --git a/datafusion/core/tests/sql/runtime_config.rs b/datafusion/core/tests/sql/runtime_config.rs index 5998148c42d0b..407d7f95106bb 100644 --- a/datafusion/core/tests/sql/runtime_config.rs +++ b/datafusion/core/tests/sql/runtime_config.rs @@ -25,7 +25,7 @@ use datafusion::execution::context::TaskContext; use datafusion::prelude::SessionConfig; use datafusion_execution::cache::DefaultListFilesCache; use datafusion_execution::cache::cache_manager::CacheManagerConfig; -use datafusion_execution::cache::cache_unit::DefaultFileStatisticsCache; +use datafusion_execution::cache::file_statistics_cache::DefaultFileStatisticsCache; use datafusion_execution::runtime_env::RuntimeEnvBuilder; use datafusion_physical_plan::common::collect; diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index 89c9d6a314be4..d8f46ff40ee4f 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -17,7 +17,7 @@ use crate::cache::CacheAccessor; use crate::cache::DefaultListFilesCache; -use crate::cache::cache_unit::{ +use crate::cache::file_statistics_cache::{ DEFAULT_FILE_STATISTICS_MEMORY_LIMIT, DefaultFileStatisticsCache, DefaultFilesMetadataCache, }; diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/file_statistics_cache.rs similarity index 100% rename from datafusion/execution/src/cache/cache_unit.rs rename to datafusion/execution/src/cache/file_statistics_cache.rs diff --git a/datafusion/execution/src/cache/mod.rs b/datafusion/execution/src/cache/mod.rs index 0380e50c0935c..76bd660e6c7d5 100644 --- a/datafusion/execution/src/cache/mod.rs +++ b/datafusion/execution/src/cache/mod.rs @@ -16,7 +16,7 @@ // under the License. pub mod cache_manager; -pub mod cache_unit; +pub mod file_statistics_cache; pub mod lru_queue; mod file_metadata_cache; From 8177a7b86a771dacccaeb35c6d4b11224c75a5b3 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 9 Apr 2026 13:01:12 +0200 Subject: [PATCH 39/83] Simplify heap size accounting --- datafusion/execution/src/cache/file_statistics_cache.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index d008a626170a4..f0529579db2ee 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -111,11 +111,10 @@ impl DefaultFileStatisticsCacheState { let old_value = self.lru_queue.put(key.clone(), value); self.memory_used += entry_size; + self.memory_used += key.path.as_ref().heap_size(); if let Some(old_entry) = &old_value { self.memory_used -= old_entry.heap_size(); - } else { - self.memory_used += key.path.as_ref().heap_size(); } self.evict_entries(); From a8d2c539fe10271447fd8cb9d656918bcbd93ee9 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Fri, 10 Apr 2026 12:18:14 +0200 Subject: [PATCH 40/83] Adapt comments in test --- datafusion-cli/src/main.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs index a2ec8445e9437..6e99969aa763e 100644 --- a/datafusion-cli/src/main.rs +++ b/datafusion-cli/src/main.rs @@ -653,8 +653,6 @@ mod tests { Ok(()) } - /// Shows that the statistics cache is not enabled by default yet - /// See https://github.com/apache/datafusion/issues/19217 #[tokio::test] async fn test_statistics_cache_default() -> Result<(), DataFusionError> { let ctx = SessionContext::new(); @@ -684,8 +682,6 @@ mod tests { .await?; } - // When the cache manager creates a StatisticsCache by default, - // the contents will show up here let sql = "SELECT split_part(path, '/', -1) as filename, file_size_bytes, num_rows, num_columns, table_size_bytes from statistics_cache() order by filename"; let df = ctx.sql(sql).await?; let rbs = df.collect().await?; From 58da87f19beecc4fe1009501f9acb790a05fb83b Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Fri, 10 Apr 2026 15:06:30 +0200 Subject: [PATCH 41/83] Seperate drop table clean-ups --- datafusion/core/src/execution/context/mod.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index a9d91a2e6af71..a28b59f83c82c 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1426,17 +1426,16 @@ impl SessionContext { && table_provider.table_type() == table_type { schema.deregister_table(&table)?; - if table_type == TableType::Base - && let Some(lfc) = self.runtime_env().cache_manager.get_list_files_cache() - && let Some(fsc) = - self.runtime_env().cache_manager.get_file_statistic_cache() - { - lfc.drop_table_entries(&Some(table_ref.clone()))?; - fsc.drop_table_entries(&Some(table_ref.clone()))?; + if table_type == TableType::Base { + if let Some(lfc) = self.runtime_env().cache_manager.get_list_files_cache() { + lfc.drop_table_entries(&Some(table_ref.clone()))?; + } + if let Some(fsc) = self.runtime_env().cache_manager.get_file_statistic_cache() { + fsc.drop_table_entries(&Some(table_ref.clone()))?; + } + return Ok(true); } - return Ok(true); } - Ok(false) } From 66938190349235a8a5c10a16db9f732c5cfbad19 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Fri, 10 Apr 2026 20:10:15 +0200 Subject: [PATCH 42/83] fixup! Seperate drop table clean-ups --- datafusion/core/src/execution/context/mod.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index a28b59f83c82c..7a37e0b7dd1fb 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1427,10 +1427,13 @@ impl SessionContext { { schema.deregister_table(&table)?; if table_type == TableType::Base { - if let Some(lfc) = self.runtime_env().cache_manager.get_list_files_cache() { + if let Some(lfc) = self.runtime_env().cache_manager.get_list_files_cache() + { lfc.drop_table_entries(&Some(table_ref.clone()))?; } - if let Some(fsc) = self.runtime_env().cache_manager.get_file_statistic_cache() { + if let Some(fsc) = + self.runtime_env().cache_manager.get_file_statistic_cache() + { fsc.drop_table_entries(&Some(table_ref.clone()))?; } return Ok(true); From 1f3812ba030bf15bedaf2ce6929a61b244d55c67 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 15 Apr 2026 10:33:46 +0200 Subject: [PATCH 43/83] Increase default limit to 10 mb --- datafusion/execution/src/cache/file_statistics_cache.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index f0529579db2ee..8c45763c12538 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -71,7 +71,7 @@ struct DefaultFileStatisticsCacheState { memory_used: usize, } -pub const DEFAULT_FILE_STATISTICS_MEMORY_LIMIT: usize = 1024 * 1024; // 1MiB +pub const DEFAULT_FILE_STATISTICS_MEMORY_LIMIT: usize = 10 * 1024 * 1024; // 10MiB impl Default for DefaultFileStatisticsCacheState { fn default() -> Self { From 2034e620f76c5f6197e8076c62553ae0b9be5739 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 15 Apr 2026 10:49:35 +0200 Subject: [PATCH 44/83] Increase default limit to 20 mb --- datafusion/execution/src/cache/file_statistics_cache.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index 8c45763c12538..46f08ebf884e1 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -71,7 +71,7 @@ struct DefaultFileStatisticsCacheState { memory_used: usize, } -pub const DEFAULT_FILE_STATISTICS_MEMORY_LIMIT: usize = 10 * 1024 * 1024; // 10MiB +pub const DEFAULT_FILE_STATISTICS_MEMORY_LIMIT: usize = 20 * 1024 * 1024; // 10MiB impl Default for DefaultFileStatisticsCacheState { fn default() -> Self { From f48d223cf34b70aefe38557f11394b8d5857becd Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 15 Apr 2026 11:41:30 +0200 Subject: [PATCH 45/83] Fix comment --- datafusion/execution/src/cache/file_statistics_cache.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index 46f08ebf884e1..4d3f87724b941 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -71,7 +71,7 @@ struct DefaultFileStatisticsCacheState { memory_used: usize, } -pub const DEFAULT_FILE_STATISTICS_MEMORY_LIMIT: usize = 20 * 1024 * 1024; // 10MiB +pub const DEFAULT_FILE_STATISTICS_MEMORY_LIMIT: usize = 20 * 1024 * 1024; // 20MiB impl Default for DefaultFileStatisticsCacheState { fn default() -> Self { From ab39f39aa11b200b7da461581e7bbc330f7fa3ed Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 15 Apr 2026 11:59:10 +0200 Subject: [PATCH 46/83] Fix deregister logic --- datafusion/core/src/execution/context/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 7a37e0b7dd1fb..5674ae4cfa9b0 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1436,8 +1436,8 @@ impl SessionContext { { fsc.drop_table_entries(&Some(table_ref.clone()))?; } - return Ok(true); } + return Ok(true); } Ok(false) } From 9bb35db51d9ed113a95393d04c53e4c4d985b172 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 15 Apr 2026 12:49:48 +0200 Subject: [PATCH 47/83] Fix slt --- datafusion/sqllogictest/test_files/information_schema.slt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index b4faa414e3acb..b7593e13c7296 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -331,8 +331,8 @@ datafusion.optimizer.repartition_windows true datafusion.optimizer.skip_failed_rules false datafusion.optimizer.subset_repartition_threshold 4 datafusion.optimizer.top_down_join_key_reordering true -datafusion.runtime.file_statistics_cache_limit 1M datafusion.optimizer.use_statistics_registry false +datafusion.runtime.file_statistics_cache_limit 20M datafusion.runtime.list_files_cache_limit 1M datafusion.runtime.list_files_cache_ttl NULL datafusion.runtime.max_temp_directory_size 100G @@ -479,8 +479,8 @@ datafusion.optimizer.repartition_windows true Should DataFusion repartition data datafusion.optimizer.skip_failed_rules false When set to true, the logical plan optimizer will produce warning messages if any optimization rules produce errors and then proceed to the next rule. When set to false, any rules that produce errors will cause the query to fail datafusion.optimizer.subset_repartition_threshold 4 Partition count threshold for subset satisfaction optimization. When the current partition count is >= this threshold, DataFusion will skip repartitioning if the required partitioning expression is a subset of the current partition expression such as Hash(a) satisfies Hash(a, b). When the current partition count is < this threshold, DataFusion will repartition to increase parallelism even when subset satisfaction applies. Set to 0 to always repartition (disable subset satisfaction optimization). Set to a high value to always use subset satisfaction. Example (subset_repartition_threshold = 4): ```text Hash([a]) satisfies Hash([a, b]) because (Hash([a, b]) is subset of Hash([a]) If current partitions (3) < threshold (4), repartition: AggregateExec: mode=FinalPartitioned, gby=[a, b], aggr=[SUM(x)] RepartitionExec: partitioning=Hash([a, b], 8), input_partitions=3 AggregateExec: mode=Partial, gby=[a, b], aggr=[SUM(x)] DataSourceExec: file_groups={...}, output_partitioning=Hash([a], 3) If current partitions (8) >= threshold (4), use subset satisfaction: AggregateExec: mode=SinglePartitioned, gby=[a, b], aggr=[SUM(x)] DataSourceExec: file_groups={...}, output_partitioning=Hash([a], 8) ``` datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys -datafusion.runtime.file_statistics_cache_limit 1M Maximum memory to use for file statistics cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. datafusion.optimizer.use_statistics_registry false When set to true, the physical plan optimizer uses the pluggable `StatisticsRegistry` for statistics propagation across operators. This enables more accurate cardinality estimates compared to each operator's built-in `partition_statistics`. +datafusion.runtime.file_statistics_cache_limit 20M Maximum memory to use for file statistics cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. datafusion.runtime.list_files_cache_limit 1M Maximum memory to use for list files cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. datafusion.runtime.list_files_cache_ttl NULL TTL (time-to-live) of the entries in the list file cache. Supports units m (minutes), and s (seconds). Example: '2m' for 2 minutes. datafusion.runtime.max_temp_directory_size 100G Maximum temporary file directory size. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. From 8b543e5292811d211ad1b5da2a629c2ea937a7eb Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 15 Apr 2026 14:07:04 +0200 Subject: [PATCH 48/83] Add table reference to FileStatisticsCacheEntry --- datafusion/execution/src/cache/cache_manager.rs | 2 ++ .../execution/src/cache/file_statistics_cache.rs | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index d8f46ff40ee4f..0f80ae1ad1371 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -137,6 +137,8 @@ pub struct FileStatisticsCacheEntry { pub statistics_size_bytes: usize, /// Whether ordering information is cached for this file. pub has_ordering: bool, + /// Reference to the table associated with this statistics entry. + pub table_reference: Option } /// Cached file listing. diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index 4d3f87724b941..142f269ff4c66 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -115,6 +115,7 @@ impl DefaultFileStatisticsCacheState { if let Some(old_entry) = &old_value { self.memory_used -= old_entry.heap_size(); + self.memory_used -= key.path.as_ref().heap_size(); } self.evict_entries(); @@ -234,6 +235,7 @@ impl FileStatisticsCache for DefaultFileStatisticsCache { table_size_bytes: cached.statistics.total_byte_size, statistics_size_bytes: cached.statistics.heap_size(), has_ordering: cached.ordering.is_some(), + table_reference: path.table }, ); } @@ -595,6 +597,7 @@ mod tests { table_size_bytes: Precision::Absent, statistics_size_bytes: 304, has_ordering: false, + table_reference: None, } ), ( @@ -606,6 +609,7 @@ mod tests { table_size_bytes: Precision::Absent, statistics_size_bytes: 304, has_ordering: true, + table_reference: None, } ), ]) @@ -665,6 +669,12 @@ mod tests { assert_eq!(result_2.unwrap(), value_2); assert_eq!(result_3.unwrap(), value_3); + // add the third entry again, making sure memory usage remains the same + cache.put(&path_3, value_3.clone()); + assert_eq!(cache.memory_used(), limit_for_2_entries); + cache.put(&path_3, value_3.clone()); + assert_eq!(cache.memory_used(), limit_for_2_entries); + cache.remove(&path_2); assert_eq!(cache.len(), 1); assert_eq!( From 5144ef576eaf9476091d66612a336020dbb30942 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 15 Apr 2026 14:13:52 +0200 Subject: [PATCH 49/83] fixup! Add table reference to FileStatisticsCacheEntry --- datafusion/execution/src/cache/cache_manager.rs | 2 +- datafusion/execution/src/cache/file_statistics_cache.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index 0f80ae1ad1371..5ad5ea0293073 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -138,7 +138,7 @@ pub struct FileStatisticsCacheEntry { /// Whether ordering information is cached for this file. pub has_ordering: bool, /// Reference to the table associated with this statistics entry. - pub table_reference: Option + pub table_reference: Option, } /// Cached file listing. diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index 142f269ff4c66..1faeff4fa7a93 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -235,7 +235,7 @@ impl FileStatisticsCache for DefaultFileStatisticsCache { table_size_bytes: cached.statistics.total_byte_size, statistics_size_bytes: cached.statistics.heap_size(), has_ordering: cached.ordering.is_some(), - table_reference: path.table + table_reference: path.table, }, ); } From ac06906d0070cf5bde6e324712f34b8e39c26f52 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 15 Apr 2026 22:54:53 +0200 Subject: [PATCH 50/83] Fix comment --- datafusion/execution/src/cache/cache_manager.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index 5ad5ea0293073..066876945f995 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -458,7 +458,7 @@ pub struct CacheManagerConfig { /// Enabling the cache avoids repeatedly reading file statistics in a DataFusion session. /// Default is enabled with 1MiB. Currently only Parquet files are supported. pub file_statistics_cache: Option>, - /// Limit of the file statistics cache, in bytes. Default: 1MiB. + /// Limit of the file statistics cache, in bytes. Default: 20MiB. pub file_statistics_cache_limit: usize, /// Enable caching of file metadata when listing files. /// Enabling the cache avoids repeat list and object metadata fetch operations, which may be From 3c834b3543438c222c490ab4247c5c0f3efd2a13 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Mon, 20 Apr 2026 01:53:41 +0200 Subject: [PATCH 51/83] Fix runtime_env entry --- datafusion/execution/src/runtime_env.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/execution/src/runtime_env.rs b/datafusion/execution/src/runtime_env.rs index e393a7a127873..0fbfc26d6d6b8 100644 --- a/datafusion/execution/src/runtime_env.rs +++ b/datafusion/execution/src/runtime_env.rs @@ -535,7 +535,7 @@ impl RuntimeEnvBuilder { Some("50M".to_owned()), Some("1M".to_owned()), None, - Some("1M".to_owned()), + Some("20M".to_owned()), ) } From ae34df2af28bd70544b42102618174739f76bb9a Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 21 Apr 2026 10:38:46 +0200 Subject: [PATCH 52/83] Add cache for all benchmark runs --- benchmarks/src/bin/external_aggr.rs | 4 +++- benchmarks/src/imdb/run.rs | 4 +++- benchmarks/src/sort_pushdown.rs | 4 +++- benchmarks/src/sort_tpch.rs | 4 +++- benchmarks/src/tpcds/run.rs | 4 +++- benchmarks/src/tpch/run.rs | 4 +++- 6 files changed, 18 insertions(+), 6 deletions(-) diff --git a/benchmarks/src/bin/external_aggr.rs b/benchmarks/src/bin/external_aggr.rs index ee604ec7365a1..a6e322c7fabc0 100644 --- a/benchmarks/src/bin/external_aggr.rs +++ b/benchmarks/src/bin/external_aggr.rs @@ -326,7 +326,9 @@ impl ExternalAggrConfig { let config = ListingTableConfig::new(table_path).with_listing_options(options); let config = config.infer_schema(&state).await?; - Ok(Arc::new(ListingTable::try_new(config)?)) + Ok(Arc::new(ListingTable::try_new(config)?.with_cache( + ctx.runtime_env().cache_manager.get_file_statistic_cache(), + ))) } fn iterations(&self) -> usize { diff --git a/benchmarks/src/imdb/run.rs b/benchmarks/src/imdb/run.rs index ca9710a920517..6d3b5c6bafb40 100644 --- a/benchmarks/src/imdb/run.rs +++ b/benchmarks/src/imdb/run.rs @@ -470,7 +470,9 @@ impl RunOpt { _ => unreachable!(), }; - Ok(Arc::new(ListingTable::try_new(config)?)) + Ok(Arc::new(ListingTable::try_new(config)?.with_cache( + ctx.runtime_env().cache_manager.get_file_statistic_cache(), + ))) } fn iterations(&self) -> usize { diff --git a/benchmarks/src/sort_pushdown.rs b/benchmarks/src/sort_pushdown.rs index e7fce1921e7a8..8e34706ac140a 100644 --- a/benchmarks/src/sort_pushdown.rs +++ b/benchmarks/src/sort_pushdown.rs @@ -273,7 +273,9 @@ impl RunOpt { .with_listing_options(options) .with_schema(schema); - Ok(Arc::new(ListingTable::try_new(config)?)) + Ok(Arc::new(ListingTable::try_new(config)?.with_cache( + ctx.runtime_env().cache_manager.get_file_statistic_cache(), + ))) } fn iterations(&self) -> usize { diff --git a/benchmarks/src/sort_tpch.rs b/benchmarks/src/sort_tpch.rs index 95c90d826de20..206911c45adde 100644 --- a/benchmarks/src/sort_tpch.rs +++ b/benchmarks/src/sort_tpch.rs @@ -351,7 +351,9 @@ impl RunOpt { .with_listing_options(options) .with_schema(schema); - Ok(Arc::new(ListingTable::try_new(config)?)) + Ok(Arc::new(ListingTable::try_new(config)?.with_cache( + ctx.runtime_env().cache_manager.get_file_statistic_cache(), + ))) } fn iterations(&self) -> usize { diff --git a/benchmarks/src/tpcds/run.rs b/benchmarks/src/tpcds/run.rs index f7ef6991515da..58821340034da 100644 --- a/benchmarks/src/tpcds/run.rs +++ b/benchmarks/src/tpcds/run.rs @@ -347,7 +347,9 @@ impl RunOpt { .with_listing_options(options) .with_schema(schema); - Ok(Arc::new(ListingTable::try_new(config)?)) + Ok(Arc::new(ListingTable::try_new(config)?.with_cache( + ctx.runtime_env().cache_manager.get_file_statistic_cache(), + ))) } fn iterations(&self) -> usize { diff --git a/benchmarks/src/tpch/run.rs b/benchmarks/src/tpch/run.rs index ec7aa8c554a28..75983ee141d93 100644 --- a/benchmarks/src/tpch/run.rs +++ b/benchmarks/src/tpch/run.rs @@ -342,7 +342,9 @@ impl RunOpt { .with_listing_options(options) .with_schema(schema); - Ok(Arc::new(ListingTable::try_new(config)?)) + Ok(Arc::new(ListingTable::try_new(config)?.with_cache( + ctx.runtime_env().cache_manager.get_file_statistic_cache(), + ))) } fn iterations(&self) -> usize { From c60fb8c622e5a83c0d71455bbfc1d6844e4e60d9 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 21 Apr 2026 12:58:18 +0200 Subject: [PATCH 53/83] Add cache to listing table creation --- datafusion/core/tests/parquet/file_statistics.rs | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/datafusion/core/tests/parquet/file_statistics.rs b/datafusion/core/tests/parquet/file_statistics.rs index 4dcdf543b929b..98ffa119d5a3d 100644 --- a/datafusion/core/tests/parquet/file_statistics.rs +++ b/datafusion/core/tests/parquet/file_statistics.rs @@ -30,7 +30,7 @@ use datafusion::prelude::SessionContext; use datafusion_common::DFSchema; use datafusion_common::stats::Precision; use datafusion_execution::cache::DefaultListFilesCache; -use datafusion_execution::cache::cache_manager::CacheManagerConfig; +use datafusion_execution::cache::cache_manager::{CacheManagerConfig, FileStatisticsCache}; use datafusion_execution::cache::file_statistics_cache::DefaultFileStatisticsCache; use datafusion_execution::config::SessionConfig; use datafusion_execution::runtime_env::RuntimeEnvBuilder; @@ -236,7 +236,7 @@ async fn list_files_with_session_level_cache() { async fn get_listing_table( table_path: &ListingTableUrl, - static_cache: Option>, + static_cache: Option>, opt: &ListingOptions, ) -> ListingTable { let schema = opt @@ -249,12 +249,7 @@ async fn get_listing_table( let config1 = ListingTableConfig::new(table_path.clone()) .with_listing_options(opt.clone()) .with_schema(schema); - let table = ListingTable::try_new(config1).unwrap(); - if let Some(c) = static_cache { - table.with_cache(Some(c)) - } else { - table - } + ListingTable::try_new(config1).unwrap().with_cache(static_cache) } fn get_cache_runtime_state() -> ( From 71fd2dd03681b37a585bfb3a99e46782a0f7866d Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 21 Apr 2026 20:19:14 +0200 Subject: [PATCH 54/83] fixup! Add cache to listing table creation --- datafusion/core/tests/parquet/file_statistics.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/datafusion/core/tests/parquet/file_statistics.rs b/datafusion/core/tests/parquet/file_statistics.rs index 98ffa119d5a3d..3e3b90a348b04 100644 --- a/datafusion/core/tests/parquet/file_statistics.rs +++ b/datafusion/core/tests/parquet/file_statistics.rs @@ -30,7 +30,9 @@ use datafusion::prelude::SessionContext; use datafusion_common::DFSchema; use datafusion_common::stats::Precision; use datafusion_execution::cache::DefaultListFilesCache; -use datafusion_execution::cache::cache_manager::{CacheManagerConfig, FileStatisticsCache}; +use datafusion_execution::cache::cache_manager::{ + CacheManagerConfig, FileStatisticsCache, +}; use datafusion_execution::cache::file_statistics_cache::DefaultFileStatisticsCache; use datafusion_execution::config::SessionConfig; use datafusion_execution::runtime_env::RuntimeEnvBuilder; @@ -249,7 +251,9 @@ async fn get_listing_table( let config1 = ListingTableConfig::new(table_path.clone()) .with_listing_options(opt.clone()) .with_schema(schema); - ListingTable::try_new(config1).unwrap().with_cache(static_cache) + ListingTable::try_new(config1) + .unwrap() + .with_cache(static_cache) } fn get_cache_runtime_state() -> ( From 3fede3bed2ebc3c13da3a3902811fcf085ab1960 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 22 Apr 2026 07:04:50 +0200 Subject: [PATCH 55/83] Adapt limit to 20M in configs.md --- docs/source/user-guide/configs.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 6a8014ddf1d8f..4a3f3cc6d7bc9 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -230,8 +230,8 @@ SET datafusion.runtime.memory_limit = '2G'; The following runtime configuration settings are available: | key | default | description | -| ---------------------------------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| datafusion.runtime.file_statistics_cache_limit | 1M | Maximum memory to use for file statistics cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | +| ---------------------------------------------- |---------| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| datafusion.runtime.file_statistics_cache_limit | 20M | Maximum memory to use for file statistics cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | | datafusion.runtime.list_files_cache_limit | 1M | Maximum memory to use for list files cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | | datafusion.runtime.list_files_cache_ttl | NULL | TTL (time-to-live) of the entries in the list file cache. Supports units m (minutes), and s (seconds). Example: '2m' for 2 minutes. | | datafusion.runtime.max_temp_directory_size | 100G | Maximum temporary file directory size. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | From 274e0552853d8fb2e8c1b80901dbb23ff1f814ed Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 22 Apr 2026 07:20:17 +0200 Subject: [PATCH 56/83] fixup! Adapt limit to 20M in configs.md --- docs/source/user-guide/configs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 4a3f3cc6d7bc9..8b486bcf6cdc9 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -230,7 +230,7 @@ SET datafusion.runtime.memory_limit = '2G'; The following runtime configuration settings are available: | key | default | description | -| ---------------------------------------------- |---------| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ---------------------------------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | datafusion.runtime.file_statistics_cache_limit | 20M | Maximum memory to use for file statistics cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | | datafusion.runtime.list_files_cache_limit | 1M | Maximum memory to use for list files cache. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. | | datafusion.runtime.list_files_cache_ttl | NULL | TTL (time-to-live) of the entries in the list file cache. Supports units m (minutes), and s (seconds). Example: '2m' for 2 minutes. | From 8b13a1a66c05c14feecd7faccf5480e3187677b0 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 22 Apr 2026 09:23:17 +0200 Subject: [PATCH 57/83] Fix linter --- datafusion/catalog-listing/src/helpers.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/catalog-listing/src/helpers.rs b/datafusion/catalog-listing/src/helpers.rs index 110bdcb6f9fc0..a25eabff6bd9e 100644 --- a/datafusion/catalog-listing/src/helpers.rs +++ b/datafusion/catalog-listing/src/helpers.rs @@ -382,7 +382,7 @@ fn try_into_partitioned_file( let mut pf: PartitionedFile = object_meta.into(); pf.partition_values = partition_values; - pf.table_reference = table_path.get_table_ref().clone(); + pf.table_reference.clone_from(table_path.get_table_ref()); Ok(Some(pf)) } From 66f8a5bb07221652b6d0ec7614d4183ea231c8ca Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 22 Apr 2026 09:24:12 +0200 Subject: [PATCH 58/83] Add cache to listing table in _read_type() --- datafusion/core/src/execution/context/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 5674ae4cfa9b0..0ec36e6152c42 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1672,7 +1672,8 @@ impl SessionContext { let config = ListingTableConfig::new_with_multi_paths(table_paths) .with_listing_options(listing_options) .with_schema(resolved_schema); - let provider = ListingTable::try_new(config)?; + let provider = ListingTable::try_new(config)? + .with_cache(self.runtime_env().cache_manager.get_file_statistic_cache()); self.read_table(Arc::new(provider)) } From aa0350da913bd990a6448196f98899ac99b1013a Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 22 Apr 2026 13:07:55 +0200 Subject: [PATCH 59/83] Add ListView and LargeListView to heapsize --- datafusion/common/src/heap_size.rs | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index 1acc3486eb51c..78cba7bd41e26 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -17,9 +17,7 @@ use crate::stats::Precision; use crate::{ColumnStatistics, ScalarValue, Statistics}; -use arrow::array::{ - Array, FixedSizeListArray, LargeListArray, ListArray, MapArray, StructArray, -}; +use arrow::array::{Array, FixedSizeListArray, LargeListArray, LargeListViewArray, ListArray, ListViewArray, MapArray, StructArray}; use arrow::datatypes::{ DataType, Field, Fields, IntervalDayTime, IntervalMonthDayNano, IntervalUnit, TimeUnit, UnionFields, UnionMode, i256, @@ -122,6 +120,8 @@ impl DFHeapSize for ScalarValue { Union(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), Dictionary(a, b) => a.heap_size() + b.heap_size(), RunEndEncoded(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), + ListView(a) => a.heap_size(), + LargeListView(a) => a.heap_size(), } } } @@ -258,12 +258,24 @@ impl DFHeapSize for LargeListArray { } } +impl DFHeapSize for LargeListViewArray { + fn heap_size(&self) -> usize { + self.get_array_memory_size() + } +} + impl DFHeapSize for ListArray { fn heap_size(&self) -> usize { self.get_array_memory_size() } } +impl DFHeapSize for ListViewArray { + fn heap_size(&self) -> usize { + self.get_array_memory_size() + } +} + impl DFHeapSize for FixedSizeListArray { fn heap_size(&self) -> usize { self.get_array_memory_size() From d072b7d78d3184cef5f597339f7b4d6e73a73594 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 22 Apr 2026 13:14:22 +0200 Subject: [PATCH 60/83] fixup! Add ListView and LargeListView to heapsize --- datafusion/common/src/heap_size.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index 78cba7bd41e26..b744225681450 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -17,7 +17,10 @@ use crate::stats::Precision; use crate::{ColumnStatistics, ScalarValue, Statistics}; -use arrow::array::{Array, FixedSizeListArray, LargeListArray, LargeListViewArray, ListArray, ListViewArray, MapArray, StructArray}; +use arrow::array::{ + Array, FixedSizeListArray, LargeListArray, LargeListViewArray, ListArray, + ListViewArray, MapArray, StructArray, +}; use arrow::datatypes::{ DataType, Field, Fields, IntervalDayTime, IntervalMonthDayNano, IntervalUnit, TimeUnit, UnionFields, UnionMode, i256, From fbd1d5519e9b7abe8a5c817ee6b0d7b3bea41d63 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 22 Apr 2026 14:31:19 +0200 Subject: [PATCH 61/83] Remove array.slt --- datafusion/sqllogictest/test_files/array.slt | 9945 ------------------ 1 file changed, 9945 deletions(-) delete mode 100644 datafusion/sqllogictest/test_files/array.slt diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt deleted file mode 100644 index 81d5c8f91a5bc..0000000000000 --- a/datafusion/sqllogictest/test_files/array.slt +++ /dev/null @@ -1,9945 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -############# -## Array Expressions Tests -############# - -### Tables - -statement ok -CREATE TABLE values( - a INT, - b INT, - c INT, - d FLOAT, - e VARCHAR, - f VARCHAR -) AS VALUES - (1, 1, 2, 1.1, 'Lorem', 'A'), - (2, 3, 4, 2.2, 'ipsum', ''), - (3, 5, 6, 3.3, 'dolor', 'BB'), - (4, 7, 8, 4.4, 'sit', NULL), - (NULL, 9, 10, 5.5, 'amet', 'CCC'), - (5, NULL, 12, 6.6, ',', 'DD'), - (6, 11, NULL, 7.7, 'consectetur', 'E'), - (7, 13, 14, NULL, 'adipiscing', 'F'), - (8, 15, 16, 8.8, NULL, '') -; - -statement ok -CREATE TABLE values_without_nulls -AS VALUES - (1, 1, 2, 1.1, 'Lorem', 'A'), - (2, 3, 4, 2.2, 'ipsum', ''), - (3, 5, 6, 3.3, 'dolor', 'BB'), - (4, 7, 8, 4.4, 'sit', NULL), - (5, 9, 10, 5.5, 'amet', 'CCC'), - (6, 11, 12, 6.6, ',', 'DD'), - (7, 13, 14, 7.7, 'consectetur', 'E'), - (8, 15, 16, 8.8, 'adipiscing', 'F'), - (9, 17, 18, 9.9, 'elit', '') -; - -statement ok -CREATE TABLE arrays -AS VALUES - (make_array(make_array(NULL, 2),make_array(3, NULL)), make_array(1.1, 2.2, 3.3), make_array('L', 'o', 'r', 'e', 'm')), - (make_array(make_array(3, 4),make_array(5, 6)), make_array(NULL, 5.5, 6.6), make_array('i', 'p', NULL, 'u', 'm')), - (make_array(make_array(5, 6),make_array(7, 8)), make_array(7.7, 8.8, 9.9), make_array('d', NULL, 'l', 'o', 'r')), - (make_array(make_array(7, NULL),make_array(9, 10)), make_array(10.1, NULL, 12.2), make_array('s', 'i', 't')), - (NULL, make_array(13.3, 14.4, 15.5), make_array('a', 'm', 'e', 't')), - (make_array(make_array(11, 12),make_array(13, 14)), NULL, make_array(',')), - (make_array(make_array(15, 16),make_array(NULL, 18)), make_array(16.6, 17.7, 18.8), NULL) -; - -statement ok -CREATE TABLE large_arrays -AS - SELECT - arrow_cast(column1, 'LargeList(List(Int64))') AS column1, - arrow_cast(column2, 'LargeList(Float64)') AS column2, - arrow_cast(column3, 'LargeList(Utf8)') AS column3 - FROM arrays -; - -statement ok -CREATE TABLE fixed_size_arrays -AS VALUES - (arrow_cast(make_array(make_array(NULL, 2),make_array(3, NULL)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(1.1, 2.2, 3.3), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('L', 'o', 'r', 'e', 'm'), 'FixedSizeList(5, Utf8)')), - (arrow_cast(make_array(make_array(3, 4),make_array(5, 6)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(NULL, 5.5, 6.6), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('i', 'p', NULL, 'u', 'm'), 'FixedSizeList(5, Utf8)')), - (arrow_cast(make_array(make_array(5, 6),make_array(7, 8)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(7.7, 8.8, 9.9), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('d', NULL, 'l', 'o', 'r'), 'FixedSizeList(5, Utf8)')), - (arrow_cast(make_array(make_array(7, NULL),make_array(9, 10)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(10.1, NULL, 12.2), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('s', 'i', 't', 'a', 'b'), 'FixedSizeList(5, Utf8)')), - (NULL, arrow_cast(make_array(13.3, 14.4, 15.5), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('a', 'm', 'e', 't', 'x'), 'FixedSizeList(5, Utf8)')), - (arrow_cast(make_array(make_array(11, 12),make_array(13, 14)), 'FixedSizeList(2, List(Int64))'), NULL, arrow_cast(make_array(',','a','b','c','d'), 'FixedSizeList(5, Utf8)')), - (arrow_cast(make_array(make_array(15, 16),make_array(NULL, 18)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(16.6, 17.7, 18.8), 'FixedSizeList(3, Float64)'), NULL) -; - -statement ok -CREATE TABLE slices -AS VALUES - (make_array(NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1), - (make_array(11, 12, 13, 14, 15, 16, 17, 18, NULL, 20), 2, -4), - (make_array(21, 22, 23, NULL, 25, 26, 27, 28, 29, 30), 0, 0), - (make_array(31, 32, 33, 34, 35, NULL, 37, 38, 39, 40), -4, -7), - (NULL, 4, 5), - (make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), NULL, 6), - (make_array(51, 52, NULL, 54, 55, 56, 57, 58, 59, 60), 5, NULL) -; - -statement ok -CREATE TABLE fixed_slices -AS VALUES - (arrow_cast(make_array(NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10), 'FixedSizeList(10, Int64)'), 1, 1), - (arrow_cast(make_array(11, 12, 13, 14, 15, 16, 17, 18, NULL, 20), 'FixedSizeList(10, Int64)'), 2, -4), - (arrow_cast(make_array(21, 22, 23, NULL, 25, 26, 27, 28, 29, 30), 'FixedSizeList(10, Int64)'), 0, 0), - (arrow_cast(make_array(31, 32, 33, 34, 35, NULL, 37, 38, 39, 40), 'FixedSizeList(10, Int64)'), -4, -7), - (arrow_cast(make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), 'FixedSizeList(10, Int64)'), NULL, 6), - (arrow_cast(make_array(51, 52, NULL, 54, 55, 56, 57, 58, 59, 60),'FixedSizeList(10, Int64)'), 5, NULL) -; - -statement ok -CREATE TABLE arrayspop -AS VALUES - (make_array(1, 2, NULL)), - (make_array(3, 4, 5, NULL)), - (make_array(6, 7, 8, NULL, 9)), - (make_array(NULL, NULL, 100)), - (NULL), - (make_array(NULL, 10, 11, 12)) -; - -statement ok -CREATE TABLE large_arrayspop -AS SELECT - arrow_cast(column1, 'LargeList(Int64)') AS column1 -FROM arrayspop -; - -statement ok -CREATE TABLE nested_arrays -AS VALUES - (make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), make_array(7, 8, 9), 2, make_array([[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]), make_array(11, 12, 13)), - (make_array(make_array(4, 5, 6), make_array(10, 11, 12), make_array(4, 9, 8), make_array(7, 8, 9), make_array(10, 11, 12), make_array(1, 8, 7)), make_array(10, 11, 12), 3, make_array([[11, 12, 13], [14, 15, 16]], [[17, 18, 19], [20, 21, 22]]), make_array(121, 131, 141)) -; - -statement ok -CREATE TABLE large_nested_arrays -AS - SELECT - arrow_cast(column1, 'LargeList(LargeList(Int64))') AS column1, - arrow_cast(column2, 'LargeList(Int64)') AS column2, - column3, - arrow_cast(column4, 'LargeList(LargeList(List(Int64)))') AS column4, - arrow_cast(column5, 'LargeList(Int64)') AS column5 - FROM nested_arrays -; - -statement ok -CREATE TABLE fixed_size_nested_arrays -AS VALUES - (arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'FixedSizeList(6, List(Int64))'), arrow_cast(make_array(7, 8, 9), 'FixedSizeList(3, Int64)'), 2, arrow_cast(make_array([[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array(11, 12, 13), 'FixedSizeList(3, Int64)')), - (arrow_cast(make_array(make_array(4, 5, 6), make_array(10, 11, 12), make_array(4, 9, 8), make_array(7, 8, 9), make_array(10, 11, 12), make_array(1, 8, 7)), 'FixedSizeList(6, List(Int64))'), arrow_cast(make_array(10, 11, 12), 'FixedSizeList(3, Int64)'), 3, arrow_cast(make_array([[11, 12, 13], [14, 15, 16]], [[17, 18, 19], [20, 21, 22]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array(121, 131, 141), 'FixedSizeList(3, Int64)')) -; - -statement ok -CREATE TABLE arrays_values -AS VALUES - (make_array(NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ','), - (make_array(11, 12, 13, 14, 15, 16, 17, 18, NULL, 20), 12, 2, '.'), - (make_array(21, 22, 23, NULL, 25, 26, 27, 28, 29, 30), 23, 3, '-'), - (make_array(31, 32, 33, 34, 35, NULL, 37, 38, 39, 40), 34, 4, 'ok'), - (NULL, 44, 5, '@'), - (make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), NULL, 6, '$'), - (make_array(51, 52, NULL, 54, 55, 56, 57, 58, 59, 60), 55, NULL, '^'), - (make_array(61, 62, 63, 64, 65, 66, 67, 68, 69, 70), 66, 7, NULL) -; - -statement ok -CREATE TABLE large_arrays_values -AS SELECT - arrow_cast(column1, 'LargeList(Int64)') AS column1, - column2, - column3, - column4 -FROM arrays_values -; - -statement ok -CREATE TABLE fixed_arrays_values -AS SELECT - arrow_cast(column1, 'FixedSizeList(10, Int64)') AS column1, - column2, - column3, - column4 -FROM arrays_values -; - -statement ok -CREATE TABLE arrays_values_v2 -AS VALUES - (make_array(NULL, 2, 3), make_array(4, 5, NULL), 12, make_array([30, 40, 50])), - (NULL, make_array(7, NULL, 8), 13, make_array(make_array(NULL,NULL,60))), - (make_array(9, NULL, 10), NULL, 14, make_array(make_array(70,NULL,NULL))), - (make_array(NULL, 1), make_array(NULL, 21), NULL, NULL), - (make_array(11, 12), NULL, NULL, NULL), - (NULL, NULL, NULL, NULL) -; - -statement ok -CREATE TABLE large_arrays_values_v2 -AS SELECT - arrow_cast(column1, 'LargeList(Int64)') AS column1, - arrow_cast(column2, 'LargeList(Int64)') AS column2, - column3, - arrow_cast(column4, 'LargeList(LargeList(Int64))') AS column4 -FROM arrays_values_v2 -; - -statement ok -CREATE TABLE flatten_table -AS VALUES - (make_array([1], [2], [3]), make_array([[1, 2, 3]], [[4, 5]], [[6]]), make_array([[[1]]], [[[2, 3]]]), make_array([1.0], [2.1, 2.2], [3.2, 3.3, 3.4])), - (make_array([1, 2], [3, 4], [5, 6]), make_array([[8]]), make_array([[[1,2]]], [[[3]]]), make_array([1.0, 2.0], [3.0, 4.0], [5.0, 6.0])) -; - -statement ok -CREATE TABLE large_flatten_table -AS - SELECT - arrow_cast(column1, 'LargeList(LargeList(Int64))') AS column1, - arrow_cast(column2, 'LargeList(LargeList(LargeList(Int64)))') AS column2, - arrow_cast(column3, 'LargeList(LargeList(LargeList(LargeList(Int64))))') AS column3, - arrow_cast(column4, 'LargeList(LargeList(Float64))') AS column4 - FROM flatten_table -; - -statement ok -CREATE TABLE fixed_size_flatten_table -AS VALUES - (arrow_cast(make_array([1], [2], [3]), 'FixedSizeList(3, List(Int64))'), - arrow_cast(make_array([[1, 2, 3]], [[4, 5]], [[6]]), 'FixedSizeList(3, List(List(Int64)))'), - arrow_cast(make_array([[[1]]], [[[2, 3]]]), 'FixedSizeList(2, List(List(List(Int64))))'), - arrow_cast(make_array([1.0], [2.1, 2.2], [3.2, 3.3, 3.4]), 'FixedSizeList(3, List(Float64))') - ), - ( - arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'FixedSizeList(3, List(Int64))'), - arrow_cast(make_array([[8]], [[9, 10]], [[11, 12, 13]]), 'FixedSizeList(3, List(List(Int64)))'), - arrow_cast(make_array([[[1,2]]], [[[3]]]), 'FixedSizeList(2, List(List(List(Int64))))'), - arrow_cast(make_array([1.0, 2.0], [3.0, 4.0], [5.0, 6.0]), 'FixedSizeList(3, List(Float64))') - ) -; - -statement ok -CREATE TABLE array_has_table_1D -AS VALUES - (make_array(1, 2), 1, make_array(1,2,3), make_array(1,3), make_array(1,3,5), make_array(2,4,6,8,1,3,5)), - (make_array(3, 4, 5), 2, make_array(1,2,3,4), make_array(2,5), make_array(2,4,6), make_array(1,3,5)) -; - -statement ok -CREATE TABLE fixed_size_array_has_table_1D -AS VALUES - (arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 1, arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)'), arrow_cast(make_array(1,3), 'FixedSizeList(2, Int64)'), arrow_cast(make_array(1,3,5), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(2, 4, 6, 8, 1, 3, 5), 'FixedSizeList(7, Int64)')), - (arrow_cast(make_array(3, 4, 5), 'FixedSizeList(3, Int64)'), 2, arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)'), arrow_cast(make_array(2,5), 'FixedSizeList(2, Int64)'), arrow_cast(make_array(2,4,6), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(1, 3, 5, 7, 9, 11, 13), 'FixedSizeList(7, Int64)')) -; - -statement ok -CREATE TABLE array_has_table_1D_Float -AS VALUES - (make_array(1.0, 2.0), 1.0, make_array(1.0,2.0,3.0), make_array(1.0,3.0), make_array(1.11), make_array(2.22, 3.33)), - (make_array(3.0, 4.0, 5.0), 2.0, make_array(1.0,2.0,3.0,4.0), make_array(2.0,5.0), make_array(2.22, 1.11), make_array(1.11, 3.33)) -; - -statement ok -CREATE TABLE fixed_size_array_has_table_1D_Float -AS VALUES - (arrow_cast(make_array(1.0, 2.0, 3.0), 'FixedSizeList(3, Float64)'), 1.0, arrow_cast(make_array(1.0, 2.0, 3.0, 4.0), 'FixedSizeList(4, Float64)'), arrow_cast(make_array(1.0,3.0), 'FixedSizeList(2, Float64)'), arrow_cast(make_array(1.11, 2.22), 'FixedSizeList(2, Float64)'), arrow_cast(make_array(2.22, 3.33), 'FixedSizeList(2, Float64)')), - (arrow_cast(make_array(3.0, 4.0, 5.0), 'FixedSizeList(3, Float64)'), 2.0, arrow_cast(make_array(1.0, 2.0, 3.0, 4.0), 'FixedSizeList(4, Float64)'), arrow_cast(make_array(2.0,5.0), 'FixedSizeList(2, Float64)'), arrow_cast(make_array(2.22, 1.11), 'FixedSizeList(2, Float64)'), arrow_cast(make_array(1.11, 3.33), 'FixedSizeList(2, Float64)')) -; - -statement ok -CREATE TABLE array_has_table_1D_Boolean -AS VALUES - (make_array(true, true, true), false, make_array(true, true, false, true, false), make_array(true, false, true), make_array(false), make_array(true, false)), - (make_array(false, false, false), false, make_array(true, false, true), make_array(true, true), make_array(true, true), make_array(false,false,true)) -; - -statement ok -CREATE TABLE fixed_size_array_has_table_1D_Boolean -AS VALUES - (arrow_cast(make_array(true, true, true), 'FixedSizeList(3, Boolean)'), false, arrow_cast(make_array(true, true, false, true, false), 'FixedSizeList(5, Boolean)'), arrow_cast(make_array(true, false, true), 'FixedSizeList(3, Boolean)'), arrow_cast(make_array(false, true), 'FixedSizeList(2, Boolean)'), arrow_cast(make_array(true, false, true), 'FixedSizeList(3, Boolean)')), - (arrow_cast(make_array(false, false, false), 'FixedSizeList(3, Boolean)'), false, arrow_cast(make_array(true, false, true, true, false), 'FixedSizeList(5, Boolean)'), arrow_cast(make_array(true, true, false), 'FixedSizeList(3, Boolean)'), arrow_cast(make_array(true, true), 'FixedSizeList(2, Boolean)'), arrow_cast(make_array(false,false,true), 'FixedSizeList(3, Boolean)')) -; - -statement ok -CREATE TABLE array_has_table_1D_UTF8 -AS VALUES - (make_array('a', 'bc', 'def'), 'bc', make_array('datafusion', 'rust', 'arrow'), make_array('rust', 'arrow'), make_array('rust', 'arrow', 'python'), make_array('data')), - (make_array('a', 'bc', 'def'), 'defg', make_array('datafusion', 'rust', 'arrow'), make_array('datafusion', 'rust', 'arrow', 'python'), make_array('rust', 'arrow'), make_array('datafusion', 'rust', 'arrow')) -; - -statement ok -CREATE TABLE fixed_size_array_has_table_1D_UTF8 -AS VALUES - (arrow_cast(make_array('a', 'bc', 'def'), 'FixedSizeList(3, Utf8)'), 'bc', arrow_cast(make_array('datafusion', 'rust', 'arrow'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array('rust', 'arrow', 'datafusion', 'rust'), 'FixedSizeList(4, Utf8)'), arrow_cast(make_array('rust', 'arrow', 'python'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array('data', 'fusion', 'rust'), 'FixedSizeList(3, Utf8)')), - (arrow_cast(make_array('a', 'bc', 'def'), 'FixedSizeList(3, Utf8)'), 'defg', arrow_cast(make_array('datafusion', 'rust', 'arrow'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array('datafusion', 'rust', 'arrow', 'python'), 'FixedSizeList(4, Utf8)'), arrow_cast(make_array('rust', 'arrow', 'python'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array('datafusion', 'rust', 'arrow'), 'FixedSizeList(3, Utf8)')) -; - -statement ok -CREATE TABLE array_has_table_2D -AS VALUES - (make_array([1,2]), make_array(1,3), make_array([1,2,3], [4,5], [6,7]), make_array([4,5], [6,7])), - (make_array([3,4], [5]), make_array(5), make_array([1,2,3,4], [5,6,7], [8,9,10]), make_array([1,2,3], [5,6,7], [8,9,10])) -; - -statement ok -CREATE TABLE fixed_size_array_has_table_2D -AS VALUES - (arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(1,3), 'FixedSizeList(2, Int64)'), arrow_cast(make_array([1,2,3], [4,5], [6,7]), 'FixedSizeList(3, List(Int64))'), arrow_cast(make_array([4,5], [6,7], [1,2,3]), 'FixedSizeList(3, List(Int64))')), - (arrow_cast(make_array([3,4], [5]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(5, 3), 'FixedSizeList(2, Int64)'), arrow_cast(make_array([1,2,3,4], [5,6,7], [8,9,10]), 'FixedSizeList(3, List(Int64))'), arrow_cast(make_array([1,2,3], [5,6,7], [8,9,10]), 'FixedSizeList(3, List(Int64))')) -; - -statement ok -CREATE TABLE array_has_table_2D_float -AS VALUES - (make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), make_array([1.1, 2.2], [3.3])), - (make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), make_array([1.0], [1.1, 2.2], [3.3])) -; - -statement ok -CREATE TABLE fixed_size_array_has_table_2D_Float -AS VALUES - (arrow_cast(make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), 'FixedSizeList(3, List(Float64))'), arrow_cast(make_array([1.1, 2.2], [3.3], [4.4]), 'FixedSizeList(3, List(Float64))')), - (arrow_cast(make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), 'FixedSizeList(3, List(Float64))'), arrow_cast(make_array([1.0], [1.1, 2.2], [3.3]), 'FixedSizeList(3, List(Float64))')) -; - -statement ok -CREATE TABLE array_has_table_3D -AS VALUES - (make_array([[1,2]]), make_array([1])), - (make_array([[1,2]]), make_array([1,2])), - (make_array([[1,2]]), make_array([1,2,3])), - (make_array([[1], [2]]), make_array([2])), - (make_array([[1], [2]]), make_array([1], [2])), - (make_array([[1], [2]], [[2], [3]]), make_array([1], [2], [3])), - (make_array([[1], [2]], [[2], [3]]), make_array([1], [2])) -; - -statement ok -CREATE TABLE fixed_size_array_has_table_3D -AS VALUES - (arrow_cast(make_array([[1,2]], [[3, 4]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')), - (arrow_cast(make_array([[1,2]], [[4, 4]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1,2], [3, 4]), 'FixedSizeList(2, List(Int64))')), - (arrow_cast(make_array([[1,2]], [[4, 4]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1,2,3], [1]), 'FixedSizeList(2, List(Int64))')), - (arrow_cast(make_array([[1], [2]], [[]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([2], [3]), 'FixedSizeList(2, List(Int64))')), - (arrow_cast(make_array([[1], [2]], [[]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')), - (arrow_cast(make_array([[1], [2]], [[2], [3]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')), - (arrow_cast(make_array([[1], [2]], [[2], [3]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')) -; - -statement ok -CREATE TABLE array_has_table_null -AS VALUES - (make_array(1, 2), 1), - (make_array(1, NULL), 1), - (make_array(3, 4, 5), 2), - (make_array(3, NULL, 5), 2), - (make_array(NULL, NULL, NULL), 2) -; - -statement ok -CREATE TABLE array_has_table_empty -AS VALUES - (make_array(1, 3, 5), 1), - (make_array(), 1), - (NULL, 1) -; - -statement ok -CREATE TABLE array_distinct_table_1D -AS VALUES - (make_array(1, 1, 2, 2, 3)), - (make_array(1, 2, 3, 4, 5)), - (make_array(3, 5, 3, 3, 3)) -; - -statement ok -CREATE TABLE array_distinct_table_1D_UTF8 -AS VALUES - (make_array('a', 'a', 'bc', 'bc', 'def')), - (make_array('a', 'bc', 'def', 'defg', 'defg')), - (make_array('defg', 'defg', 'defg', 'defg', 'defg')) -; - -statement ok -CREATE TABLE array_distinct_table_2D -AS VALUES - (make_array([1,2], [1,2], [3,4], [3,4], [5,6])), - (make_array([1,2], [3,4], [5,6], [7,8], [9,10])), - (make_array([5,6], [5,6], NULL)) -; - -statement ok -CREATE TABLE array_distinct_table_1D_large -AS SELECT - arrow_cast(column1, 'LargeList(Int64)') AS column1 -FROM array_distinct_table_1D -; - -statement ok -CREATE TABLE array_distinct_table_1D_fixed -AS SELECT - arrow_cast(column1, 'FixedSizeList(5, Int64)') AS column1 -FROM array_distinct_table_1D -; - -statement ok -CREATE TABLE array_distinct_table_1D_UTF8_fixed -AS SELECT - arrow_cast(column1, 'FixedSizeList(5, Utf8)') AS column1 -FROM array_distinct_table_1D_UTF8 -; - -statement ok -CREATE TABLE array_distinct_table_2D_fixed -AS VALUES - (arrow_cast(make_array([1,2], [1,2], [3,4], [3,4], [5,6]), 'FixedSizeList(5, List(Int64))')), - (arrow_cast(make_array([1,2], [3,4], [5,6], [7,8], [9,10]), 'FixedSizeList(5, List(Int64))')), - (arrow_cast(make_array([5,6], [5,6], NULL, NULL, NULL), 'FixedSizeList(5, List(Int64))')) -; - -statement ok -CREATE TABLE array_intersect_table_1D -AS VALUES - (make_array(1, 2), make_array(1), make_array(1,2,3), make_array(1,3), make_array(1,3,5), make_array(2,4,6,8,1,3)), - (make_array(11, 22), make_array(11), make_array(11,22,33), make_array(11,33), make_array(11,33,55), make_array(22,44,66,88,11,33)) -; - -statement ok -CREATE TABLE large_array_intersect_table_1D -AS - SELECT - arrow_cast(column1, 'LargeList(Int64)') as column1, - arrow_cast(column2, 'LargeList(Int64)') as column2, - arrow_cast(column3, 'LargeList(Int64)') as column3, - arrow_cast(column4, 'LargeList(Int64)') as column4, - arrow_cast(column5, 'LargeList(Int64)') as column5, - arrow_cast(column6, 'LargeList(Int64)') as column6 -FROM array_intersect_table_1D -; - -statement ok -CREATE TABLE array_intersect_table_1D_Float -AS VALUES - (make_array(1.0, 2.0), make_array(1.0), make_array(1.0,2.0,3.0), make_array(1.0,3.0), make_array(1.11), make_array(2.22, 3.33)), - (make_array(3.0, 4.0, 5.0), make_array(2.0), make_array(1.0,2.0,3.0,4.0), make_array(2.0,5.0), make_array(2.22, 1.11), make_array(1.11, 3.33)) -; - -statement ok -CREATE TABLE large_array_intersect_table_1D_Float -AS - SELECT - arrow_cast(column1, 'LargeList(Float64)') as column1, - arrow_cast(column2, 'LargeList(Float64)') as column2, - arrow_cast(column3, 'LargeList(Float64)') as column3, - arrow_cast(column4, 'LargeList(Float64)') as column4, - arrow_cast(column5, 'LargeList(Float64)') as column5, - arrow_cast(column6, 'LargeList(Float64)') as column6 -FROM array_intersect_table_1D_Float -; - -statement ok -CREATE TABLE array_intersect_table_1D_Boolean -AS VALUES - (make_array(true, true, true), make_array(false), make_array(true, true, false, true, false), make_array(true, false, true), make_array(false), make_array(true, false)), - (make_array(false, false, false), make_array(false), make_array(true, false, true), make_array(true, true), make_array(true, true), make_array(false,false,true)) -; - -statement ok -CREATE TABLE large_array_intersect_table_1D_Boolean -AS - SELECT - arrow_cast(column1, 'LargeList(Boolean)') as column1, - arrow_cast(column2, 'LargeList(Boolean)') as column2, - arrow_cast(column3, 'LargeList(Boolean)') as column3, - arrow_cast(column4, 'LargeList(Boolean)') as column4, - arrow_cast(column5, 'LargeList(Boolean)') as column5, - arrow_cast(column6, 'LargeList(Boolean)') as column6 -FROM array_intersect_table_1D_Boolean -; - -statement ok -CREATE TABLE array_intersect_table_1D_UTF8 -AS VALUES - (make_array('a', 'bc', 'def'), make_array('bc'), make_array('datafusion', 'rust', 'arrow'), make_array('rust', 'arrow'), make_array('rust', 'arrow', 'python'), make_array('data')), - (make_array('a', 'bc', 'def'), make_array('defg'), make_array('datafusion', 'rust', 'arrow'), make_array('datafusion', 'rust', 'arrow', 'python'), make_array('rust', 'arrow'), make_array('datafusion', 'rust', 'arrow')) -; - -statement ok -CREATE TABLE large_array_intersect_table_1D_UTF8 -AS - SELECT - arrow_cast(column1, 'LargeList(Utf8)') as column1, - arrow_cast(column2, 'LargeList(Utf8)') as column2, - arrow_cast(column3, 'LargeList(Utf8)') as column3, - arrow_cast(column4, 'LargeList(Utf8)') as column4, - arrow_cast(column5, 'LargeList(Utf8)') as column5, - arrow_cast(column6, 'LargeList(Utf8)') as column6 -FROM array_intersect_table_1D_UTF8 -; - -statement ok -CREATE TABLE array_intersect_table_1D_NULL -AS VALUES - ([1, 2, 2, 3], [2, 3, 4]), - ([2, 3, 3], [3]), - ([3], [3, 3, 4]), - (null, [3, 4]), - ([1, 2], null), - (null, null) -; - -statement ok -CREATE TABLE array_intersect_table_2D -AS VALUES - (make_array([1,2]), make_array([1,3]), make_array([1,2,3], [4,5], [6,7]), make_array([4,5], [6,7])), - (make_array([3,4], [5]), make_array([3,4]), make_array([1,2,3,4], [5,6,7], [8,9,10]), make_array([1,2,3], [5,6,7], [8,9,10])) -; - -statement ok -CREATE TABLE large_array_intersect_table_2D -AS - SELECT - arrow_cast(column1, 'LargeList(List(Int64))') as column1, - arrow_cast(column2, 'LargeList(List(Int64))') as column2, - arrow_cast(column3, 'LargeList(List(Int64))') as column3, - arrow_cast(column4, 'LargeList(List(Int64))') as column4 -FROM array_intersect_table_2D -; - -statement ok -CREATE TABLE array_intersect_table_2D_float -AS VALUES - (make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), make_array([1.1, 2.2], [3.3])), - (make_array([1.0, 2.0, 3.0], [1.1, 2.2], [3.3]), make_array([1.0], [1.1, 2.2], [3.3])) -; - -statement ok -CREATE TABLE large_array_intersect_table_2D_Float -AS - SELECT - arrow_cast(column1, 'LargeList(List(Float64))') as column1, - arrow_cast(column2, 'LargeList(List(Float64))') as column2 -FROM array_intersect_table_2D_Float -; - -statement ok -CREATE TABLE array_intersect_table_3D -AS VALUES - (make_array([[1,2]]), make_array([[1]])), - (make_array([[1,2]]), make_array([[1,2]])) -; - -statement ok -CREATE TABLE large_array_intersect_table_3D -AS - SELECT - arrow_cast(column1, 'LargeList(List(List(Int64)))') as column1, - arrow_cast(column2, 'LargeList(List(List(Int64)))') as column2 -FROM array_intersect_table_3D -; - -statement ok -CREATE TABLE arrays_values_without_nulls -AS VALUES - (make_array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ',', [2,3]), - (make_array(11, 12, 13, 14, 15, 16, 17, 18, 19, 20), 12, 2, '.', [4,5]), - (make_array(21, 22, 23, 24, 25, 26, 27, 28, 29, 30), 23, 3, '-', [6,7]), - (make_array(31, 32, 33, 34, 35, 26, 37, 38, 39, 40), 34, 4, 'ok', [8,9]) -; - -statement ok -CREATE TABLE large_arrays_values_without_nulls -AS SELECT - arrow_cast(column1, 'LargeList(Int64)') AS column1, - column2, - column3, - column4, - arrow_cast(column5, 'LargeList(Int64)') AS column5 -FROM arrays_values_without_nulls -; - -statement ok -CREATE TABLE fixed_size_arrays_values_without_nulls -AS VALUES - (arrow_cast(make_array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 'FixedSizeList(10, Int64)'), 1, 1, ',', [2,3]), - (arrow_cast(make_array(11, 12, 13, 14, 15, 16, 17, 18, 19, 20), 'FixedSizeList(10, Int64)'), 12, 2, '.', [4,5]), - (arrow_cast(make_array(21, 22, 23, 24, 25, 26, 27, 28, 29, 30), 'FixedSizeList(10, Int64)'), 23, 3, '-', [6,7]), - (arrow_cast(make_array(31, 32, 33, 34, 35, 26, 37, 38, 39, 40), 'FixedSizeList(10, Int64)'), 34, 4, 'ok', [8,9]) -; - -statement ok -CREATE TABLE arrays_range -AS VALUES - (3, 10, 2), - (4, 13, 3) -; - -statement ok -CREATE TABLE arrays_with_repeating_elements -AS VALUES - (make_array(1, 2, 1, 3, 2, 2, 1, 3, 2, 3), 2, 4, 3), - (make_array(4, 4, 5, 5, 6, 5, 5, 5, 4, 4), 4, 7, 2), - (make_array(7, 7, 7, 8, 7, 9, 7, 8, 7, 7), 7, 10, 5), - (make_array(10, 11, 12, 10, 11, 12, 10, 11, 12, 10), 10, 13, 10) -; - -statement ok -CREATE TABLE large_arrays_with_repeating_elements -AS - SELECT - arrow_cast(column1, 'LargeList(Int64)') AS column1, - column2, - column3, - column4 - FROM arrays_with_repeating_elements -; - -statement ok -CREATE TABLE fixed_arrays_with_repeating_elements -AS VALUES - (arrow_cast(make_array(1, 2, 1, 3, 2, 2, 1, 3, 2, 3), 'FixedSizeList(10, Int64)'), 2, 4, 3), - (arrow_cast(make_array(4, 4, 5, 5, 6, 5, 5, 5, 4, 4), 'FixedSizeList(10, Int64)'), 4, 7, 2), - (arrow_cast(make_array(7, 7, 7, 8, 7, 9, 7, 8, 7, 7), 'FixedSizeList(10, Int64)'), 7, 10, 5), - (arrow_cast(make_array(10, 11, 12, 10, 11, 12, 10, 11, 12, 10), 'FixedSizeList(10, Int64)'), 10, 13, 10) -; - -statement ok -CREATE TABLE nested_arrays_with_repeating_elements -AS VALUES - (make_array([1, 2, 3], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]), [4, 5, 6], [10, 11, 12], 3), - (make_array([10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]), [10, 11, 12], [19, 20, 21], 2), - (make_array([19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]), [19, 20, 21], [28, 29, 30], 5), - (make_array([28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]), [28, 29, 30], [37, 38, 39], 10) -; - -statement ok -CREATE TABLE large_nested_arrays_with_repeating_elements -AS - SELECT - arrow_cast(column1, 'LargeList(List(Int64))') AS column1, - column2, - column3, - column4 - FROM nested_arrays_with_repeating_elements -; - -statement ok -CREATE TABLE fixed_size_nested_arrays_with_repeating_elements -AS VALUES - (arrow_cast(make_array([1, 2, 3], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(10, List(Int64))'), [4, 5, 6], [10, 11, 12], 3), - (arrow_cast(make_array([10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]), 'FixedSizeList(10, List(Int64))'), [10, 11, 12], [19, 20, 21], 2), - (arrow_cast(make_array([19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]), 'FixedSizeList(10, List(Int64))'), [19, 20, 21], [28, 29, 30], 5), - (arrow_cast(make_array([28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]), 'FixedSizeList(10, List(Int64))'), [28, 29, 30], [28, 29, 30], 10) -; - -statement ok -CREATE TABLE arrays_distance_table -AS VALUES - (make_array(1, 2, 3), make_array(1, 2, 3), make_array(1.1, 2.2, 3.3) , make_array(1.1, NULL, 3.3)), - (make_array(1, 2, 3), make_array(4, 5, 6), make_array(4.4, 5.5, 6.6), make_array(4.4, NULL, 6.6)), - (make_array(1, 2, 3), make_array(7, 8, 9), make_array(7.7, 8.8, 9.9), make_array(7.7, NULL, 9.9)), - (make_array(1, 2, 3), make_array(10, 11, 12), make_array(10.1, 11.2, 12.3), make_array(10.1, NULL, 12.3)) -; - -statement ok -CREATE TABLE large_arrays_distance_table -AS - SELECT - arrow_cast(column1, 'LargeList(Int64)') AS column1, - arrow_cast(column2, 'LargeList(Int64)') AS column2, - arrow_cast(column3, 'LargeList(Float64)') AS column3, - arrow_cast(column4, 'LargeList(Float64)') AS column4 -FROM arrays_distance_table -; - -statement ok -CREATE TABLE fixed_size_arrays_distance_table -AS - SELECT - arrow_cast(column1, 'FixedSizeList(3, Int64)') AS column1, - arrow_cast(column2, 'FixedSizeList(3, Int64)') AS column2, - arrow_cast(column3, 'FixedSizeList(3, Float64)') AS column3, - arrow_cast(column4, 'FixedSizeList(3, Float64)') AS column4 -FROM arrays_distance_table -; - - -# Array literal - -## boolean coercion is not supported -query error -select [1, true, null] - -## wrapped in array_length to get deterministic results -query I -SELECT array_length([now()]) ----- -1 - -## array literal with functions -query ? -select [abs(-1.2), sin(-1), log(2), ceil(3.141)] ----- -[1.2, -0.8414709848078965, 0.30102999566398114, 4.0] - -## array literal with nested types -query ??? -select - [struct('foo', 1)], - [struct('foo', [1,2,3])], - [struct('foo', [struct(3, 'x')])] -; ----- -[{c0: foo, c1: 1}] [{c0: foo, c1: [1, 2, 3]}] [{c0: foo, c1: [{c0: 3, c1: x}]}] - -query TTT -select arrow_typeof(column1), arrow_typeof(column2), arrow_typeof(column3) from arrays; ----- -List(List(Int64)) List(Float64) List(Utf8) -List(List(Int64)) List(Float64) List(Utf8) -List(List(Int64)) List(Float64) List(Utf8) -List(List(Int64)) List(Float64) List(Utf8) -List(List(Int64)) List(Float64) List(Utf8) -List(List(Int64)) List(Float64) List(Utf8) -List(List(Int64)) List(Float64) List(Utf8) - -# arrays table -query ??? -select column1, column2, column3 from arrays; ----- -[[NULL, 2], [3, NULL]] [1.1, 2.2, 3.3] [L, o, r, e, m] -[[3, 4], [5, 6]] [NULL, 5.5, 6.6] [i, p, NULL, u, m] -[[5, 6], [7, 8]] [7.7, 8.8, 9.9] [d, NULL, l, o, r] -[[7, NULL], [9, 10]] [10.1, NULL, 12.2] [s, i, t] -NULL [13.3, 14.4, 15.5] [a, m, e, t] -[[11, 12], [13, 14]] NULL [,] -[[15, 16], [NULL, 18]] [16.6, 17.7, 18.8] NULL - -# nested_arrays table -query ??I?? -select column1, column2, column3, column4, column5 from nested_arrays; ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] [7, 8, 9] 2 [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]] [11, 12, 13] -[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] [10, 11, 12] 3 [[[11, 12, 13], [14, 15, 16]], [[17, 18, 19], [20, 21, 22]]] [121, 131, 141] - -# values table -query IIIRT -select a, b, c, d, e from values; ----- -1 1 2 1.1 Lorem -2 3 4 2.2 ipsum -3 5 6 3.3 dolor -4 7 8 4.4 sit -NULL 9 10 5.5 amet -5 NULL 12 6.6 , -6 11 NULL 7.7 consectetur -7 13 14 NULL adipiscing -8 15 16 8.8 NULL - -# arrays_values table -query ?IIT -select column1, column2, column3, column4 from arrays_values; ----- -[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 , -[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] 12 2 . -[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] 23 3 - -[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] 34 4 ok -NULL 44 5 @ -[41, 42, 43, 44, 45, 46, 47, 48, 49, 50] NULL 6 $ -[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] 55 NULL ^ -[61, 62, 63, 64, 65, 66, 67, 68, 69, 70] 66 7 NULL - -# slices table -query ?II -select column1, column2, column3 from slices; ----- -[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 -[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] 2 -4 -[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] 0 0 -[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] -4 -7 -NULL 4 5 -[41, 42, 43, 44, 45, 46, 47, 48, 49, 50] NULL 6 -[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] 5 NULL - -query ??I? -select column1, column2, column3, column4 from arrays_values_v2; ----- -[NULL, 2, 3] [4, 5, NULL] 12 [[30, 40, 50]] -NULL [7, NULL, 8] 13 [[NULL, NULL, 60]] -[9, NULL, 10] NULL 14 [[70, NULL, NULL]] -[NULL, 1] [NULL, 21] NULL NULL -[11, 12] NULL NULL NULL -NULL NULL NULL NULL - -# arrays_values_without_nulls table -query ?IIT -select column1, column2, column3, column4 from arrays_values_without_nulls; ----- -[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 , -[11, 12, 13, 14, 15, 16, 17, 18, 19, 20] 12 2 . -[21, 22, 23, 24, 25, 26, 27, 28, 29, 30] 23 3 - -[31, 32, 33, 34, 35, 26, 37, 38, 39, 40] 34 4 ok - -# arrays_with_repeating_elements table -query ?III -select column1, column2, column3, column4 from arrays_with_repeating_elements; ----- -[1, 2, 1, 3, 2, 2, 1, 3, 2, 3] 2 4 3 -[4, 4, 5, 5, 6, 5, 5, 5, 4, 4] 4 7 2 -[7, 7, 7, 8, 7, 9, 7, 8, 7, 7] 7 10 5 -[10, 11, 12, 10, 11, 12, 10, 11, 12, 10] 10 13 10 - -# nested_arrays_with_repeating_elements table -query ???I -select column1, column2, column3, column4 from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [4, 5, 6] [10, 11, 12] 3 -[[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [10, 11, 12] [19, 20, 21] 2 -[[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [19, 20, 21] [28, 29, 30] 5 -[[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [28, 29, 30] [37, 38, 39] 10 - - -### Array index - - -## array[i] - -# single index with scalars #1 (positive index) -query IRT -select make_array(1, 2, 3)[1], make_array(1.0, 2.0, 3.0)[2], make_array('h', 'e', 'l', 'l', 'o')[3]; ----- -1 2 l - -# single index with scalars #2 (zero index) -query I -select make_array(1, 2, 3)[0]; ----- -NULL - -# single index with scalars #3 (negative index) -query IRT -select make_array(1, 2, 3)[-1], make_array(1.0, 2.0, 3.0)[-2], make_array('h', 'e', 'l', 'l', 'o')[-3]; ----- -3 2 l - -# single index with scalars #4 (complex index) -query IRT -select make_array(1, 2, 3)[1 + 2 - 1], make_array(1.0, 2.0, 3.0)[2 * 1 * 0 - 2], make_array('h', 'e', 'l', 'l', 'o')[2 - 3]; ----- -2 2 o - -# single index with columns #1 (positive index) -query ?RT -select column1[2], column2[3], column3[1] from arrays; ----- -[3, NULL] 3.3 L -[5, 6] 6.6 i -[7, 8] 9.9 d -[9, 10] 12.2 s -NULL 15.5 a -[13, 14] NULL , -[NULL, 18] 18.8 NULL - -# single index with columns #2 (zero index) -query ?RT -select column1[0], column2[0], column3[0] from arrays; ----- -NULL NULL NULL -NULL NULL NULL -NULL NULL NULL -NULL NULL NULL -NULL NULL NULL -NULL NULL NULL -NULL NULL NULL - -# single index with columns #3 (negative index) -query ?RT -select column1[-2], column2[-3], column3[-1] from arrays; ----- -[NULL, 2] 1.1 m -[3, 4] NULL m -[5, 6] 7.7 r -[7, NULL] 10.1 t -NULL 13.3 t -[11, 12] NULL , -[15, 16] 16.6 NULL - -# single index with columns #4 (complex index) -query ?RT -select column1[9 - 7], column2[2 * 0], column3[1 - 3] from arrays; ----- -[3, NULL] NULL e -[5, 6] NULL u -[7, 8] NULL o -[9, 10] NULL i -NULL NULL e -[13, 14] NULL NULL -[NULL, 18] NULL NULL - -# TODO: support index as column -# single index with columns #5 (index as column) -# query ? -# select make_array(1, 2, 3, 4, 5)[column2] from arrays_with_repeating_elements; -# ---- - -# TODO: support argument and index as columns -# single index with columns #6 (argument and index as columns) -# query I -# select column1[column2] from arrays_with_repeating_elements; -# ---- - -## array[i:j] - -# multiple index with columns #1 (positive index) -query ??? -select make_array(1, 2, 3)[1:2], make_array(1.0, 2.0, 3.0)[2:3], make_array('h', 'e', 'l', 'l', 'o')[2:4]; ----- -[1, 2] [2.0, 3.0] [e, l, l] - -query ??? -select arrow_cast([1, 2, 3], 'LargeList(Int64)')[1:2], - arrow_cast([1.0, 2.0, 3.0], 'LargeList(Int64)')[2:3], - arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)')[2:4] -; ----- -[1, 2] [2, 3] [e, l, l] - -# multiple index with columns #2 (zero index) -query ??? -select make_array(1, 2, 3)[0:0], make_array(1.0, 2.0, 3.0)[0:2], make_array('h', 'e', 'l', 'l', 'o')[0:6]; ----- -[] [1.0, 2.0] [h, e, l, l, o] - -query ??? -select arrow_cast([1, 2, 3], 'LargeList(Int64)')[0:0], - arrow_cast([1.0, 2.0, 3.0], 'LargeList(Int64)')[0:2], - arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)')[0:6] -; ----- -[] [1, 2] [h, e, l, l, o] - -query I -select arrow_cast([1, 2, 3], 'LargeList(Int64)')[1]; ----- -1 - -# TODO: support multiple negative index -# multiple index with columns #3 (negative index) -# query II -# select make_array(1, 2, 3)[-3:-1], make_array(1.0, 2.0, 3.0)[-3:-1], make_array('h', 'e', 'l', 'l', 'o')[-2:0]; -# ---- - -# TODO: support complex index -# multiple index with columns #4 (complex index) -# query III -# select make_array(1, 2, 3)[2 + 1 - 1:10], make_array(1.0, 2.0, 3.0)[2 | 2:10], make_array('h', 'e', 'l', 'l', 'o')[6 ^ 6:10]; -# ---- - -# multiple index with columns #1 (positive index) -query ??? -select column1[2:4], column2[1:4], column3[3:4] from arrays; ----- -[[3, NULL]] [1.1, 2.2, 3.3] [r, e] -[[5, 6]] [NULL, 5.5, 6.6] [NULL, u] -[[7, 8]] [7.7, 8.8, 9.9] [l, o] -[[9, 10]] [10.1, NULL, 12.2] [t] -NULL [13.3, 14.4, 15.5] [e, t] -[[13, 14]] NULL [] -[[NULL, 18]] [16.6, 17.7, 18.8] NULL - -# multiple index with columns #2 (zero index) -query ??? -select column1[0:5], column2[0:3], column3[0:9] from arrays; ----- -[[NULL, 2], [3, NULL]] [1.1, 2.2, 3.3] [L, o, r, e, m] -[[3, 4], [5, 6]] [NULL, 5.5, 6.6] [i, p, NULL, u, m] -[[5, 6], [7, 8]] [7.7, 8.8, 9.9] [d, NULL, l, o, r] -[[7, NULL], [9, 10]] [10.1, NULL, 12.2] [s, i, t] -NULL [13.3, 14.4, 15.5] [a, m, e, t] -[[11, 12], [13, 14]] NULL [,] -[[15, 16], [NULL, 18]] [16.6, 17.7, 18.8] NULL - -# TODO: support negative index -# multiple index with columns #3 (negative index) -# query ?RT -# select column1[-2:-4], column2[-3:-5], column3[-1:-4] from arrays; -# ---- -# [NULL, 2] 1.1 m - -# TODO: support complex index -# multiple index with columns #4 (complex index) -# query ?RT -# select column1[9 - 7:2 + 2], column2[1 * 0:2 * 3], column3[1 + 1 - 0:5 % 3] from arrays; -# ---- - -# TODO: support first index as column -# multiple index with columns #5 (first index as column) -# query ? -# select make_array(1, 2, 3, 4, 5)[column2:4] from arrays_with_repeating_elements -# ---- - -# TODO: support last index as column -# multiple index with columns #6 (last index as column) -# query ?RT -# select make_array(1, 2, 3, 4, 5)[2:column3] from arrays_with_repeating_elements; -# ---- - -# TODO: support argument and indices as column -# multiple index with columns #7 (argument and indices as column) -# query ?RT -# select column1[column2:column3] from arrays_with_repeating_elements; -# ---- - -# array[i:j:k] - -# multiple index with columns #1 (positive index) -query ??? -select make_array(1, 2, 3)[1:2:2], make_array(1.0, 2.0, 3.0)[2:3:2], make_array('h', 'e', 'l', 'l', 'o')[2:4:2]; ----- -[1] [2.0] [e, l] - -# multiple index with columns #2 (zero index) -query ??? -select make_array(1, 2, 3)[0:0:2], make_array(1.0, 2.0, 3.0)[0:2:2], make_array('h', 'e', 'l', 'l', 'o')[0:6:2]; ----- -[] [1.0] [h, l, o] - -#TODO: sqlparser does not support negative index -## multiple index with columns #3 (negative index) -#query ??? -#select make_array(1, 2, 3)[-1:-2:-2], make_array(1.0, 2.0, 3.0)[-2:-3:-2], make_array('h', 'e', 'l', 'l', 'o')[-2:-4:-2]; -#---- -#[1] [2.0] [e, l] - -# multiple index with columns #1 (positive index) -query ??? -select column1[2:4:2], column2[1:4:2], column3[3:4:2] from arrays; ----- -[[3, NULL]] [1.1, 3.3] [r] -[[5, 6]] [NULL, 6.6] [NULL] -[[7, 8]] [7.7, 9.9] [l] -[[9, 10]] [10.1, 12.2] [t] -NULL [13.3, 15.5] [e] -[[13, 14]] NULL [] -[[NULL, 18]] [16.6, 18.8] NULL - -# multiple index with columns #2 (zero index) -query ??? -select column1[0:5:2], column2[0:3:2], column3[0:9:2] from arrays; ----- -[[NULL, 2]] [1.1, 3.3] [L, r, m] -[[3, 4]] [NULL, 6.6] [i, NULL, m] -[[5, 6]] [7.7, 9.9] [d, l, r] -[[7, NULL]] [10.1, 12.2] [s, t] -NULL [13.3, 15.5] [a, e] -[[11, 12]] NULL [,] -[[15, 16]] [16.6, 18.8] NULL - - -### Array function tests - - -## make_array (aliases: `make_list`) - -# make_array scalar function #1 -query ??? -select make_array(1, 2, 3), make_array(1.0, 2.0, 3.0), make_array('h', 'e', 'l', 'l', 'o'); ----- -[1, 2, 3] [1.0, 2.0, 3.0] [h, e, l, l, o] - -# make_array scalar function #2 -query ??? -select make_array(1, 2, 3), make_array(make_array(1, 2), make_array(3, 4)), make_array([[[[1], [2]]]]); ----- -[1, 2, 3] [[1, 2], [3, 4]] [[[[[1], [2]]]]] - -# make_array scalar function #3 -query ?? -select make_array([1, 2, 3], [4, 5, 6], [7, 8, 9]), make_array([[1, 2], [3, 4]], [[5, 6], [7, 8]]); ----- -[[1, 2, 3], [4, 5, 6], [7, 8, 9]] [[[1, 2], [3, 4]], [[5, 6], [7, 8]]] - -# make_array scalar function #4 -query ?? -select make_array([1.0, 2.0], [3.0, 4.0]), make_array('h', 'e', 'l', 'l', 'o'); ----- -[[1.0, 2.0], [3.0, 4.0]] [h, e, l, l, o] - -# make_array scalar function #5 -query ? -select make_array(make_array(make_array(make_array(1, 2, 3), make_array(4, 5, 6)), make_array(make_array(7, 8, 9), make_array(10, 11, 12)))) ----- -[[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]] - -# make_array scalar function #6 -query ? -select make_array() ----- -[] - -# make_array scalar function #7 -query ?? -select make_array(make_array()), make_array(make_array(make_array())) ----- -[[]] [[[]]] - -# make_list scalar function #8 (function alias: `make_array`) -query ??? -select make_list(1, 2, 3), make_list(1.0, 2.0, 3.0), make_list('h', 'e', 'l', 'l', 'o'); ----- -[1, 2, 3] [1.0, 2.0, 3.0] [h, e, l, l, o] - -# make_array scalar function with nulls -query ??? -select make_array(1, NULL, 3), make_array(NULL, 2.0, NULL), make_array('h', NULL, 'l', NULL, 'o'); ----- -[1, NULL, 3] [NULL, 2.0, NULL] [h, NULL, l, NULL, o] - -# make_array scalar function with nulls #2 -query ?? -select make_array(1, 2, NULL), make_array(make_array(NULL, 2), make_array(NULL, 3)); ----- -[1, 2, NULL] [[NULL, 2], [NULL, 3]] - -# make_array scalar function with nulls #3 -query ??? -select make_array(NULL), make_array(NULL, NULL, NULL), make_array(make_array(NULL, NULL), make_array(NULL, NULL)); ----- -[NULL] [NULL, NULL, NULL] [[NULL, NULL], [NULL, NULL]] - -# make_array with 1 columns -query ??? -select make_array(a), make_array(d), make_array(e) from values; ----- -[1] [1.1] [Lorem] -[2] [2.2] [ipsum] -[3] [3.3] [dolor] -[4] [4.4] [sit] -[NULL] [5.5] [amet] -[5] [6.6] [,] -[6] [7.7] [consectetur] -[7] [NULL] [adipiscing] -[8] [8.8] [NULL] - -# make_array with 2 columns #1 -query ?? -select make_array(b, c), make_array(e, f) from values; ----- -[1, 2] [Lorem, A] -[3, 4] [ipsum, ] -[5, 6] [dolor, BB] -[7, 8] [sit, NULL] -[9, 10] [amet, CCC] -[NULL, 12] [,, DD] -[11, NULL] [consectetur, E] -[13, 14] [adipiscing, F] -[15, 16] [NULL, ] - -# make_array with 4 columns -query ? -select make_array(a, b, c, d) from values; ----- -[1.0, 1.0, 2.0, 1.1] -[2.0, 3.0, 4.0, 2.2] -[3.0, 5.0, 6.0, 3.3] -[4.0, 7.0, 8.0, 4.4] -[NULL, 9.0, 10.0, 5.5] -[5.0, NULL, 12.0, 6.6] -[6.0, 11.0, NULL, 7.7] -[7.0, 13.0, 14.0, NULL] -[8.0, 15.0, 16.0, 8.8] - -# make_array with column of list -query ?? -select column1, column5 from arrays_values_without_nulls; ----- -[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] [2, 3] -[11, 12, 13, 14, 15, 16, 17, 18, 19, 20] [4, 5] -[21, 22, 23, 24, 25, 26, 27, 28, 29, 30] [6, 7] -[31, 32, 33, 34, 35, 26, 37, 38, 39, 40] [8, 9] - -# make array with arrays of different types -query ? -select make_array(make_array(1), arrow_cast(make_array(-1), 'LargeList(Int8)')) ----- -[[1], [-1]] - -query T -select arrow_typeof(make_array(make_array(1), arrow_cast(make_array(-1), 'LargeList(Int8)'))); ----- -List(LargeList(Int64)) - - -query ??? -select make_array(column1), - make_array(column1, column5), - make_array(column1, make_array(50,51,52)) -from arrays_values_without_nulls; ----- -[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]] [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [2, 3]] [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [50, 51, 52]] -[[11, 12, 13, 14, 15, 16, 17, 18, 19, 20]] [[11, 12, 13, 14, 15, 16, 17, 18, 19, 20], [4, 5]] [[11, 12, 13, 14, 15, 16, 17, 18, 19, 20], [50, 51, 52]] -[[21, 22, 23, 24, 25, 26, 27, 28, 29, 30]] [[21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [6, 7]] [[21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [50, 51, 52]] -[[31, 32, 33, 34, 35, 26, 37, 38, 39, 40]] [[31, 32, 33, 34, 35, 26, 37, 38, 39, 40], [8, 9]] [[31, 32, 33, 34, 35, 26, 37, 38, 39, 40], [50, 51, 52]] - -## array_element (aliases: array_extract, list_extract, list_element) - -# Testing with empty arguments should result in an error -query error DataFusion error: Error during planning: 'array_element' does not support zero arguments -select array_element(); - -# array_element error -query error -select array_element(1, 2); - -# array_element with null -query I -select array_element([1, 2], NULL); ----- -NULL - -query ? -select array_element(NULL, 2); ----- -NULL - -# array_element scalar function #1 (with positive index) -query IT -select array_element(make_array(1, 2, 3, 4, 5), 2), array_element(make_array('h', 'e', 'l', 'l', 'o'), 3); ----- -2 l - -query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3); ----- -2 l - -query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 2), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 3); ----- -2 l - -# array_element scalar function #2 (with positive index; out of bounds) -query IT -select array_element(make_array(1, 2, 3, 4, 5), 7), array_element(make_array('h', 'e', 'l', 'l', 'o'), 11); ----- -NULL NULL - -query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 7), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 11); ----- -NULL NULL - -query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 7), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 11); ----- -NULL NULL - -# array_element scalar function #3 (with zero) -query IT -select array_element(make_array(1, 2, 3, 4, 5), 0), array_element(make_array('h', 'e', 'l', 'l', 'o'), 0); ----- -NULL NULL - -query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 0), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 0); ----- -NULL NULL - -query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 0), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 0); ----- -NULL NULL - -# array_element scalar function #4 (with NULL) -query IT -select array_element(make_array(1, 2, 3, 4, 5), NULL), array_element(make_array('h', 'e', 'l', 'l', 'o'), NULL); ----- -NULL NULL - -query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), NULL); ----- -NULL NULL - -query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), NULL), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), NULL); ----- -NULL NULL - -# array_element scalar function #5 (with negative index) -query IT -select array_element(make_array(1, 2, 3, 4, 5), -2), array_element(make_array('h', 'e', 'l', 'l', 'o'), -3); ----- -4 l - -query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -2), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3); ----- -4 l - -query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), -2), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), -3); ----- -4 l - -# array_element scalar function #6 (with negative index; out of bounds) -query IT -select array_element(make_array(1, 2, 3, 4, 5), -11), array_element(make_array('h', 'e', 'l', 'l', 'o'), -7); ----- -NULL NULL - -query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -11), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -7); ----- -NULL NULL - -query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), -11), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), -7); ----- -NULL NULL - -# array_element scalar function #7 (nested array) -query ? -select array_element(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 1); ----- -[1, 2, 3, 4, 5] - -query ? -select array_element(arrow_cast(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 'LargeList(List(Int64))'), 1); ----- -[1, 2, 3, 4, 5] - -query ? -select array_element(arrow_cast(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 'FixedSizeList(2, List(Int64))'), 1); ----- -[1, 2, 3, 4, 5] - -# array_extract scalar function #8 (function alias `array_element`) -query IT -select array_extract(make_array(1, 2, 3, 4, 5), 2), array_extract(make_array('h', 'e', 'l', 'l', 'o'), 3); ----- -2 l - -query IT -select array_extract(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2), array_extract(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3); ----- -2 l - -query IT -select array_extract(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 2), array_extract(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 3); ----- -2 l - -# list_element scalar function #9 (function alias `array_element`) -query IT -select list_element(make_array(1, 2, 3, 4, 5), 2), list_element(make_array('h', 'e', 'l', 'l', 'o'), 3); ----- -2 l - -query IT -select list_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2), array_extract(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3); ----- -2 l - -query IT -select list_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 2), list_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 3); ----- -2 l - -# list_extract scalar function #10 (function alias `array_element`) -query IT -select list_extract(make_array(1, 2, 3, 4, 5), 2), list_extract(make_array('h', 'e', 'l', 'l', 'o'), 3); ----- -2 l - -query IT -select list_extract(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2), list_extract(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3); ----- -2 l - -query IT -select list_extract(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 2), list_extract(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 3); ----- -2 l - -# array_element with columns -query I -select array_element(column1, column2) from slices; ----- -NULL -12 -NULL -37 -NULL -NULL -55 - -query I -select array_element(arrow_cast(column1, 'LargeList(Int64)'), column2) from slices; ----- -NULL -12 -NULL -37 -NULL -NULL -55 - -query I -select array_element(column1, column2) from fixed_slices; ----- -NULL -12 -NULL -37 -NULL -55 - -# array_element with columns and scalars -query II -select array_element(make_array(1, 2, 3, 4, 5), column2), array_element(column1, 3) from slices; ----- -1 3 -2 13 -NULL 23 -2 33 -4 NULL -NULL 43 -5 NULL - -query II -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), column2), array_element(arrow_cast(column1, 'LargeList(Int64)'), 3) from slices; ----- -1 3 -2 13 -NULL 23 -2 33 -4 NULL -NULL 43 -5 NULL - -query II -select array_element(make_array(1, 2, 3, 4, 5), column2), array_element(column1, 3) from fixed_slices; ----- -1 3 -2 13 -NULL 23 -2 33 -NULL 43 -5 NULL - -# array_element of empty array -query T -select coalesce(array_element([], 1), array_element(NULL, 1), 'ok'); ----- -ok - - -## array_max -# array_max scalar function #1 (with positive index) -query I -select array_max(make_array(5, 3, 6, 4)); ----- -6 - -query I -select array_max(make_array(5, 3, 4, NULL, 6, NULL)); ----- -6 - -query ? -select array_max(make_array(NULL, NULL)); ----- -NULL - -query T -select array_max(make_array('h', 'e', 'o', 'l', 'l')); ----- -o - -query T -select array_max(make_array('h', 'e', 'l', NULL, 'l', 'o', NULL)); ----- -o - -query B -select array_max(make_array(false, true, false, true)); ----- -true - -query B -select array_max(make_array(false, true, NULL, false, true)); ----- -true - -query D -select array_max(make_array(DATE '1992-09-01', DATE '1993-03-01', DATE '1999-05-01', DATE '1985-11-01')); ----- -1999-05-01 - -query D -select array_max(make_array(DATE '1995-09-01', DATE '1999-05-01', DATE '1993-03-01', NULL)); ----- -1999-05-01 - -query P -select array_max(make_array(TIMESTAMP '1992-09-01', TIMESTAMP '1995-06-01', TIMESTAMP '1984-10-01')); ----- -1995-06-01T00:00:00 - -query P -select array_max(make_array(NULL, TIMESTAMP '1996-10-01', TIMESTAMP '1995-06-01')); ----- -1996-10-01T00:00:00 - -query R -select array_max(make_array(5.1, -3.2, 6.3, 4.9)); ----- -6.3 - -query ?I -select input, array_max(input) from (select make_array(d - 1, d, d + 1) input from (values (0), (10), (20), (30), (NULL)) t(d)) ----- -[-1, 0, 1] 1 -[9, 10, 11] 11 -[19, 20, 21] 21 -[29, 30, 31] 31 -[NULL, NULL, NULL] NULL - -query II -select array_max(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_max(arrow_cast(make_array(1), 'LargeList(Int64)')); ----- -3 1 - -query II -select array_max(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_max(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); ----- -3 1 - -query ? -select array_max(make_array()); ----- -NULL - -# Testing with empty arguments should result in an error -query error DataFusion error: Error during planning: 'array_max' does not support zero arguments -select array_max(); - -# array_max over multiple rows (exercises the offsets-based iteration) -query I -select array_max(column1) from (values - (make_array(1, 5, 3)), - (make_array(10, 2, 8)), - (NULL), - (make_array(NULL, 7, NULL)), - (make_array(100)) -) as t(column1); ----- -5 -10 -NULL -7 -100 - -# array_max with NaN values (NaN should not be returned as max) -query R -select array_max(make_array(1.0, 'NaN'::double, 3.0)); ----- -NaN - -query R -select array_max(make_array('NaN'::double, 'NaN'::double)); ----- -NaN - -query R -select array_max(make_array('NaN'::double, NULL)); ----- -NaN - -# array_max with Int32 (exercises a different primitive type than Int64) -query I -select array_max(arrow_cast(make_array(10, -5, 3), 'List(Int32)')); ----- -10 - -## array_min - -query I -select array_min(make_array(5, 3, 6, 4)); ----- -3 - -query I -select array_min(make_array(5, 3, 4, NULL, 6, NULL)); ----- -3 - -query ? -select array_min(make_array(NULL, NULL)); ----- -NULL - -query T -select array_min(make_array('h', 'e', 'o', 'l', 'l')); ----- -e - -query T -select array_min(make_array('h', 'e', 'l', NULL, 'l', 'o', NULL)); ----- -e - -query B -select array_min(make_array(false, true, false, true)); ----- -false - -query B -select array_min(make_array(false, true, NULL, false, true)); ----- -false - -query D -select array_min(make_array(DATE '1992-09-01', DATE '1993-03-01', DATE '1999-05-01', DATE '1985-11-01')); ----- -1985-11-01 - -query D -select array_min(make_array(DATE '1995-09-01', DATE '1999-05-01', DATE '1993-03-01', NULL)); ----- -1993-03-01 - -query P -select array_min(make_array(TIMESTAMP '1992-09-01', TIMESTAMP '1995-06-01', TIMESTAMP '1984-10-01')); ----- -1984-10-01T00:00:00 - -query P -select array_min(make_array(NULL, TIMESTAMP '1996-10-01', TIMESTAMP '1995-06-01')); ----- -1995-06-01T00:00:00 - -query R -select array_min(make_array(5.1, -3.2, 6.3, 4.9)); ----- --3.2 - -query ?I -select input, array_min(input) from (select make_array(d - 1, d, d + 1) input from (values (0), (10), (20), (30), (NULL)) t(d)) ----- -[-1, 0, 1] -1 -[9, 10, 11] 9 -[19, 20, 21] 19 -[29, 30, 31] 29 -[NULL, NULL, NULL] NULL - -query II -select array_min(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_min(arrow_cast(make_array(1), 'LargeList(Int64)')); ----- -1 1 - -query II -select array_min(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_min(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); ----- -1 1 - -query ? -select array_min(make_array()); ----- -NULL - -# Testing with empty arguments should result in an error -query error DataFusion error: Error during planning: 'array_min' does not support zero arguments -select array_min(); - -# array_min over multiple rows (exercises the offsets-based iteration) -query I -select array_min(column1) from (values - (make_array(1, 5, 3)), - (make_array(10, 2, 8)), - (NULL), - (make_array(NULL, 7, NULL)), - (make_array(100)) -) as t(column1); ----- -1 -2 -NULL -7 -100 - -# array_min with NaN values (NaN should not be returned as min) -query R -select array_min(make_array(1.0, 'NaN'::double, 3.0)); ----- -1 - -query R -select array_min(make_array('NaN'::double, 'NaN'::double)); ----- -NaN - -query R -select array_min(make_array('NaN'::double, NULL)); ----- -NaN - -# array_min with Int32 (exercises a different primitive type than Int64) -query I -select array_min(arrow_cast(make_array(10, -5, 3), 'List(Int32)')); ----- --5 - -# array_min/array_max preserve parameterized primitive metadata -query PPTT -select - array_min(ts_list), - array_max(ts_list), - arrow_typeof(array_min(ts_list)), - arrow_typeof(array_max(ts_list)) -from ( - select arrow_cast( - make_array( - arrow_cast(20, 'Timestamp(Nanosecond, Some("UTC"))'), - arrow_cast(10, 'Timestamp(Nanosecond, Some("UTC"))'), - arrow_cast(30, 'Timestamp(Nanosecond, Some("UTC"))') - ), - 'List(Timestamp(Nanosecond, Some("UTC")))' - ) as ts_list -) t; ----- -1970-01-01T00:00:00.000000010Z 1970-01-01T00:00:00.000000030Z Timestamp(ns, "UTC") Timestamp(ns, "UTC") - -query RRTT -select - array_min(dec_list), - array_max(dec_list), - arrow_typeof(array_min(dec_list)), - arrow_typeof(array_max(dec_list)) -from ( - select arrow_cast( - make_array( - arrow_cast(200, 'Decimal128(20, 4)'), - arrow_cast(100, 'Decimal128(20, 4)'), - arrow_cast(300, 'Decimal128(20, 4)') - ), - 'List(Decimal128(20, 4))' - ) as dec_list -) t; ----- -100 300 Decimal128(20, 4) Decimal128(20, 4) - - -## array_pop_back (aliases: `list_pop_back`) - -# array_pop_back scalar function with null -#TODO: https://github.com/apache/datafusion/issues/7142 -# follow clickhouse and duckdb -#query ? -#select array_pop_back(null); -#---- -#NULL - -# array_pop_back scalar function #1 -query ?? -select array_pop_back(make_array(1, 2, 3, 4, 5)), array_pop_back(make_array('h', 'e', 'l', 'l', 'o')); ----- -[1, 2, 3, 4] [h, e, l, l] - -query ?? -select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), array_pop_back(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)')); ----- -[1, 2, 3, 4] [h, e, l, l] - -query ?? -select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)')), array_pop_back(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)')); ----- -[1, 2, 3, 4] [h, e, l, l] - -# array_pop_back scalar function #2 (after array_pop_back, array is empty) -query ? -select array_pop_back(make_array(1)); ----- -[] - -query ? -select array_pop_back(arrow_cast(make_array(1), 'LargeList(Int64)')); ----- -[] - -query ? -select array_pop_back(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); ----- -[] - -# array_pop_back scalar function #3 (array_pop_back the empty array) -query ? -select array_pop_back(array_pop_back(make_array(1))); ----- -[] - -query ? -select array_pop_back(array_pop_back(arrow_cast(make_array(1), 'LargeList(Int64)'))); ----- -[] - -query ? -select array_pop_back(array_pop_back(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'))); ----- -[] - -# array_pop_back scalar function #4 (array_pop_back the arrays which have NULL) -query ?? -select array_pop_back(make_array(1, 2, 3, 4, NULL)), array_pop_back(make_array(NULL, 'e', 'l', NULL, 'o')); ----- -[1, 2, 3, 4] [NULL, e, l, NULL] - -query ?? -select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, NULL), 'LargeList(Int64)')), array_pop_back(arrow_cast(make_array(NULL, 'e', 'l', NULL, 'o'), 'LargeList(Utf8)')); ----- -[1, 2, 3, 4] [NULL, e, l, NULL] - -query ?? -select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, NULL), 'FixedSizeList(5, Int64)')), array_pop_back(arrow_cast(make_array(NULL, 'e', 'l', NULL, 'o'), 'FixedSizeList(5, Utf8)')); ----- -[1, 2, 3, 4] [NULL, e, l, NULL] - -# array_pop_back scalar function #5 (array_pop_back the nested arrays) -query ? -select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6))); ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] - -query ? -select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'LargeList(List(Int64))')); ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] - -query ? -select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'FixedSizeList(6, List(Int64))')); ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] - -# array_pop_back scalar function #6 (array_pop_back the nested arrays with NULL) -query ? -select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), NULL)); ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] - -query ? -select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), NULL), 'LargeList(List(Int64))')); ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] - -query ? -select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), NULL), 'FixedSizeList(6, List(Int64))')); ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] - -# array_pop_back scalar function #7 (array_pop_back the nested arrays with NULL) -query ? -select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), NULL, make_array(1, 7, 4))); ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], NULL] - -query ? -select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), NULL, make_array(1, 7, 4)), 'LargeList(List(Int64))')); ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], NULL] - -query ? -select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), NULL, make_array(1, 7, 4)), 'FixedSizeList(5, List(Int64))')); ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], NULL] - -# array_pop_back scalar function #8 (after array_pop_back, nested array is empty) -query ? -select array_pop_back(make_array(make_array(1, 2, 3))); ----- -[] - -query ? -select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3)), 'LargeList(List(Int64))')); ----- -[] - -query ? -select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3)), 'FixedSizeList(1, List(Int64))')); ----- -[] - -# array_pop_back with columns -query ? -select array_pop_back(column1) from arrayspop; ----- -[1, 2] -[3, 4, 5] -[6, 7, 8, NULL] -[NULL, NULL] -NULL -[NULL, 10, 11] - -query ? -select array_pop_back(arrow_cast(column1, 'LargeList(Int64)')) from arrayspop; ----- -[1, 2] -[3, 4, 5] -[6, 7, 8, NULL] -[NULL, NULL] -NULL -[NULL, 10, 11] - -query ? -select array_pop_back(column1) from large_arrayspop; ----- -[1, 2] -[3, 4, 5] -[6, 7, 8, NULL] -[NULL, NULL] -NULL -[NULL, 10, 11] - -query ? -select array_pop_back(arrow_cast(column1, 'LargeList(Int64)')) from large_arrayspop; ----- -[1, 2] -[3, 4, 5] -[6, 7, 8, NULL] -[NULL, NULL] -NULL -[NULL, 10, 11] - -## array_pop_front (aliases: `list_pop_front`) - -#TODO:https://github.com/apache/datafusion/issues/7142 -# array_pop_front scalar function with null -# follow clickhouse and duckdb -#query ? -#select array_pop_front(null); -#---- -#NULL - -# array_pop_front scalar function #1 -query ?? -select array_pop_front(make_array(1, 2, 3, 4, 5)), array_pop_front(make_array('h', 'e', 'l', 'l', 'o')); ----- -[2, 3, 4, 5] [e, l, l, o] - -query ?? -select array_pop_front(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), array_pop_front(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)')); ----- -[2, 3, 4, 5] [e, l, l, o] - -query ?? -select array_pop_front(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)')), array_pop_front(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)')); ----- -[2, 3, 4, 5] [e, l, l, o] - -# array_pop_front scalar function #2 (after array_pop_front, array is empty) -query ? -select array_pop_front(make_array(1)); ----- -[] - -query ? -select array_pop_front(arrow_cast(make_array(1), 'LargeList(Int64)')); ----- -[] - -query ? -select array_pop_front(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); ----- -[] - -# array_pop_front scalar function #3 (array_pop_front the empty array) -query ? -select array_pop_front(array_pop_front(make_array(1))); ----- -[] - -query ? -select array_pop_front(array_pop_front(arrow_cast(make_array(1), 'LargeList(Int64)'))); ----- -[] - -query ? -select array_pop_front(array_pop_front(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'))); ----- -[] - -# array_pop_front scalar function #5 (array_pop_front the nested arrays) -query ? -select array_pop_front(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6))); ----- -[[2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] - -query ? -select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'LargeList(List(Int64))')); ----- -[[2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] - -query ? -select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'FixedSizeList(6, List(Int64))')); ----- -[[2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] - -# array_pop_front scalar function #6 (array_pop_front the nested arrays with NULL) -query ? -select array_pop_front(make_array(NULL, make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4))); ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] - -query ? -select array_pop_front(arrow_cast(make_array(NULL, make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4)), 'LargeList(List(Int64))')); ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] - -query ? -select array_pop_front(arrow_cast(make_array(NULL, make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4)), 'FixedSizeList(6, List(Int64))')); ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] - -# array_pop_front scalar function #8 (after array_pop_front, nested array is empty) -query ? -select array_pop_front(make_array(make_array(1, 2, 3))); ----- -[] - -query ? -select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3)), 'LargeList(List(Int64))')); ----- -[] - -query ? -select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3)), 'FixedSizeList(1, List(Int64))')); ----- -[] - -## array_slice (aliases: list_slice) - -# array_slice scalar function #1 (with positive indexes) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 2, 4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 2); ----- -[2, 3, 4] [h, e] - -query ???? -select array_slice(make_array(1, 2, 3, 4, 5), 1, 5, 2), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 5, 2), - array_slice(make_array(1, 2, 3, 4, 5), 0, 5, 2), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 0, 5, 2); ----- -[1, 3, 5] [h, l, o] [1, 3, 5] [h, l, o] - -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 1, 5, -1), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 5, -1); ----- -[] [] - -query error Execution error: array_slice got invalid stride: 0, it cannot be 0 -select array_slice(make_array(1, 2, 3, 4, 5), 1, 5, 0), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 5, 0); - -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 5, 1, -2), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 5, 1, -2); ----- -[5, 3, 1] [o, l, h] - -# Test NULL stride -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 1, 5, NULL), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 5, NULL); ----- -NULL NULL - -# Test NULL stride -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 1, 5, NULL), - array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 1, 5, NULL); ----- -NULL NULL - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, 4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 1, 2); ----- -[2, 3, 4] [h, e] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 2, 4), - array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 1, 2); ----- -[2, 3, 4] [h, e] - -# array_slice scalar function #2 (with positive indexes; full array) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 0, 6), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 0, 5); ----- -[1, 2, 3, 4, 5] [h, e, l, l, o] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 0, 6), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 0, 5); ----- -[1, 2, 3, 4, 5] [h, e, l, l, o] - -# TODO make error message nicer: https://github.com/apache/datafusion/issues/19004 -# Expected output (once supported): -# ---- -# [1, 2, 3, 4, 5] [h, e, l, l, o] -query error Failed to coerce arguments to satisfy a call to 'array_slice' function: -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'ListView(Int64)'), 0, 6), - array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'ListView(Utf8)'), 0, 5); - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 0, 6), - array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 0, 5); ----- -[1, 2, 3, 4, 5] [h, e, l, l, o] - -# array_slice scalar function #3 (with positive indexes; first index = second index) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 4, 4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 3, 3); ----- -[4] [l] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 4, 4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3, 3); ----- -[4] [l] - -# array_slice scalar function #4 (with positive indexes; first index > second_index) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 2, 1), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 4, 1); ----- -[] [] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, 1), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 4, 1); ----- -[] [] - -# array_slice scalar function #5 (with positive indexes; out of bounds) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 2, 6), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 3, 7); ----- -[2, 3, 4, 5] [l, l, o] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, 6), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3, 7); ----- -[2, 3, 4, 5] [l, l, o] - -# TODO: Enable once array_slice supports LargeListView types. -# Expected output (once supported): -# ---- -# [2, 3, 4, 5] [l, l, o] -query error Failed to coerce arguments to satisfy a call to 'array_slice' function: -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeListView(Int64)'), 2, 6), - array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeListView(Utf8)'), 3, 7); - - -# array_slice scalar function #6 (with positive indexes; nested array) -query ? -select array_slice(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 1, 1); ----- -[[1, 2, 3, 4, 5]] - -query ? -select array_slice(arrow_cast(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 'LargeList(List(Int64))'), 1, 1); ----- -[[1, 2, 3, 4, 5]] - -# array_slice scalar function #7 (with zero and positive number) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 0, 4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 0, 3); ----- -[1, 2, 3, 4] [h, e, l] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 0, 4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 0, 3); ----- -[1, 2, 3, 4] [h, e, l] - -# array_slice scalar function #8 (with NULL and positive number) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), NULL, 4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), NULL, 3); ----- -NULL NULL - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL, 4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), NULL, 3); ----- -NULL NULL - -# array_slice scalar function #9 (with positive number and NULL) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 2, NULL), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 3, NULL); ----- -NULL NULL - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, NULL), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 3, NULL); ----- -NULL NULL - -# array_slice scalar function #10 (with zero-zero) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 0, 0), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 0, 0); ----- -[] [] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 0, 0), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 0, 0); ----- -[] [] - -# array_slice scalar function #11 (with NULL-NULL) -query error -select array_slice(make_array(1, 2, 3, 4, 5), NULL), array_slice(make_array('h', 'e', 'l', 'l', 'o'), NULL); - -query error -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), NULL); - -# array_slice scalar function #12 (with zero and negative number) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 0, -4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 0, -3); ----- -[1, 2] [h, e, l] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 0, -4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 0, -3); ----- -[1, 2] [h, e, l] - -# array_slice scalar function #13 (with negative number and NULL) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), -2, NULL), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -3, NULL); ----- -NULL NULL - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -2, NULL), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3, NULL); ----- -NULL NULL - -# array_slice scalar function #14 (with NULL and negative number) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), NULL, -4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), NULL, -3); ----- -NULL NULL - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL, -4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), NULL, -3); ----- -NULL NULL - -# array_slice scalar function #15 (with negative indexes) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), -4, -1), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -3, -1); ----- -[2, 3, 4, 5] [l, l, o] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -4, -1), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3, -1); ----- -[2, 3, 4, 5] [l, l, o] - -# array_slice scalar function #16 (with negative indexes; almost full array (only with negative indices cannot return full array)) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), -5, -1), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -5, -1); ----- -[1, 2, 3, 4, 5] [h, e, l, l, o] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -5, -1), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -5, -1); ----- -[1, 2, 3, 4, 5] [h, e, l, l, o] - -# array_slice scalar function #17 (with negative indexes; first index = second index) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), -4, -4), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -3, -3); ----- -[2] [l] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -4, -4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3, -3); ----- -[2] [l] - -# array_slice scalar function #18 (with negative indexes; first index > second_index) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), -4, -6), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -3, -6); ----- -[] [] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -4, -6), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3, -6); ----- -[] [] - -# array_slice scalar function #19 (with negative indexes; out of bounds) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), -7, -2), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -7, -3); ----- -[1, 2, 3, 4] [h, e, l] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -7, -2), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -7, -3); ----- -[1, 2, 3, 4] [h, e, l] - -# array_slice scalar function #20 (with negative indexes; nested array) -query ?? -select array_slice(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), -2, -1), array_slice(make_array(make_array(1, 2, 3), make_array(6, 7, 8)), -1, -1); ----- -[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]] [[6, 7, 8]] - -query ?? -select array_slice(arrow_cast(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 'LargeList(List(Int64))'), -2, -1), array_slice(arrow_cast(make_array(make_array(1, 2, 3), make_array(6, 7, 8)), 'LargeList(List(Int64))'), -1, -1); ----- -[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]] [[6, 7, 8]] - - -# array_slice scalar function #21 (with first positive index and last negative index) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 2, -3), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 2, -2); ----- -[2, 3] [e, l, l] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, -3), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 2, -2); ----- -[2, 3] [e, l, l] - -# array_slice scalar function #22 (with first negative index and last positive index) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), -2, 5), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -3, 4); ----- -[4, 5] [l, l] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -2, 5), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -3, 4); ----- -[4, 5] [l, l] - -# list_slice scalar function #23 (function alias `array_slice`) -query ?? -select list_slice(make_array(1, 2, 3, 4, 5), 2, 4), list_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 2); ----- -[2, 3, 4] [h, e] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, 4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 1, 2); ----- -[2, 3, 4] [h, e] - -# array_slice scalar function #24 (with first negative index larger than len) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), -2147483648, 1), list_slice(make_array('h', 'e', 'l', 'l', 'o'), -2147483648, 1); ----- -[1] [h] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -9223372036854775808, 1), list_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -9223372036854775808, 1); ----- -[1] [h] - -# array_slice scalar function #25 (with negative step and equal indexes) -query ?? -select array_slice(make_array(1, 2, 3, 4, 5), 2, 2, -1), list_slice(make_array('h', 'e', 'l', 'l', 'o'), 2, 2, -1); ----- -[2] [e] - -query ?? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, 2, -1), list_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 2, 2, -1); ----- -[2] [e] - -# array_slice with columns -query ? -select array_slice(column1, column2, column3) from slices; ----- -[NULL] -[12, 13, 14, 15, 16, 17] -[] -[] -NULL -NULL -NULL - -query ? -select array_slice(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) from slices; ----- -[NULL] -[12, 13, 14, 15, 16, 17] -[] -[] -NULL -NULL -NULL - -# TODO: support NULLS in output instead of `[]` -# array_slice with columns and scalars -query ??? -select array_slice(make_array(1, 2, 3, 4, 5), column2, column3), array_slice(column1, 3, column3), array_slice(column1, column2, 5) from slices; ----- -[1] [] [NULL, 2, 3, 4, 5] -[2] [13, 14, 15, 16, 17] [12, 13, 14, 15] -[] [] [21, 22, 23, NULL, 25] -[] [33, 34] [] -[4, 5] NULL NULL -NULL [43, 44, 45, 46] NULL -NULL NULL [55] - -query ??? -select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), column2, column3), array_slice(arrow_cast(column1, 'LargeList(Int64)'), 3, column3), array_slice(arrow_cast(column1, 'LargeList(Int64)'), column2, 5) from slices; ----- -[1] [] [NULL, 2, 3, 4, 5] -[2] [13, 14, 15, 16, 17] [12, 13, 14, 15] -[] [] [21, 22, 23, NULL, 25] -[] [33, 34] [] -[4, 5] NULL NULL -NULL [43, 44, 45, 46] NULL -NULL NULL [55] - -# Test issue: https://github.com/apache/datafusion/issues/10425 -# `from` may be larger than `to` and `stride` is positive -query ???? -select array_slice(a, -1, 2, 1), array_slice(a, -1, 2), - array_slice(a, 3, 2, 1), array_slice(a, 3, 2) - from (values ([1.0, 2.0, 3.0, 3.0]), ([4.0, 5.0, 3.0]), ([6.0])) t(a); ----- -[] [] [] [] -[] [] [] [] -[6.0] [6.0] [] [] - -# array_slice with overlapping nulls across multiple inputs -query ? -select array_slice(column1, column2, column3) from ( - values - (make_array(1, 2, 3), NULL, NULL), - (NULL, NULL, 3), - (NULL, 1, NULL), - (make_array(4, 5, 6), 1, 3) -) as t(column1, column2, column3); ----- -NULL -NULL -NULL -[4, 5, 6] - -query ? -select array_slice(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) from ( - values - (make_array(1, 2, 3), NULL, NULL), - (NULL, NULL, 3), - (NULL, 1, NULL), - (make_array(4, 5, 6), 1, 3) -) as t(column1, column2, column3); ----- -NULL -NULL -NULL -[4, 5, 6] - -# array_slice with overlapping nulls including stride -query ? -select array_slice(column1, column2, column3, column4) from ( - values - (make_array(1, 2, 3, 4, 5), 1, 5, NULL), - (NULL, NULL, 3, 2), - (make_array(1, 2, 3, 4, 5), NULL, NULL, NULL), - (make_array(1, 2, 3, 4, 5), 1, 5, 2) -) as t(column1, column2, column3, column4); ----- -NULL -NULL -NULL -[1, 3, 5] - -# Testing with empty arguments should result in an error -query error DataFusion error: Error during planning: 'array_slice' does not support zero arguments -select array_slice(); - -query error Failed to coerce arguments -select array_slice(3.5, NULL, NULL); - -## array_any_value (aliases: list_any_value) - -# Testing with empty arguments should result in an error -query error -select array_any_value(); - -# Testing with non-array arguments should result in an error -query error -select array_any_value(1), array_any_value('a'), array_any_value(NULL); - -# array_any_value scalar function #1 (with null and non-null elements) - -query IT?I -select array_any_value(make_array(NULL, 1, 2, 3, 4, 5)), array_any_value(make_array(NULL, 'h', 'e', 'l', 'l', 'o')), array_any_value(make_array(NULL, NULL)), array_any_value(make_array(NULL, NULL, 1, 2, 3)); ----- -1 h NULL 1 - -query ITITI -select array_any_value(arrow_cast(make_array(NULL, 1, 2, 3, 4, 5), 'LargeList(Int64)')), array_any_value(arrow_cast(make_array(NULL, 'h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)')), array_any_value(arrow_cast(make_array(NULL, NULL), 'LargeList(Int64)')), array_any_value(arrow_cast(make_array(NULL, NULL), 'LargeList(Utf8)')), array_any_value(arrow_cast(make_array(NULL, NULL, 1, 2, 3), 'LargeList(Int64)'));; ----- -1 h NULL NULL 1 - -query ITITI -select array_any_value(arrow_cast(make_array(NULL, 1, 2, 3, 4, 5), 'FixedSizeList(6, Int64)')), array_any_value(arrow_cast(make_array(NULL, 'h', 'e', 'l', 'l', 'o'), 'FixedSizeList(6, Utf8)')), array_any_value(arrow_cast(make_array(NULL, NULL), 'FixedSizeList(2, Int64)')), array_any_value(arrow_cast(make_array(NULL, NULL), 'FixedSizeList(2, Utf8)')), array_any_value(arrow_cast(make_array(NULL, NULL, 1, 2, 3, 4), 'FixedSizeList(6, Int64)')); ----- -1 h NULL NULL 1 - -# array_any_value scalar function #2 (with nested array) - -query ? -select array_any_value(make_array(NULL, make_array(NULL, 1, 2, 3, 4, 5), make_array(NULL, 6, 7, 8, 9, 10))); ----- -[NULL, 1, 2, 3, 4, 5] - -query ? -select array_any_value(arrow_cast(make_array(NULL, make_array(NULL, 1, 2, 3, 4, 5), make_array(NULL, 6, 7, 8, 9, 10)), 'LargeList(List(Int64))')); ----- -[NULL, 1, 2, 3, 4, 5] - -query ? -select array_any_value(arrow_cast(make_array(NULL, make_array(NULL, 1, 2, 3, 4, 5), make_array(NULL, 6, 7, 8, 9, 10)), 'FixedSizeList(3, List(Int64))')); ----- -[NULL, 1, 2, 3, 4, 5] - -# array_any_value scalar function #3 (using function alias `list_any_value`) -query IT -select list_any_value(make_array(NULL, 1, 2, 3, 4, 5)), list_any_value(make_array(NULL, 'h', 'e', 'l', 'l', 'o')); ----- -1 h - -query IT -select list_any_value(arrow_cast(make_array(NULL, 1, 2, 3, 4, 5), 'LargeList(Int64)')), list_any_value(arrow_cast(make_array(NULL, 'h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)')); ----- -1 h - -query IT -select list_any_value(arrow_cast(make_array(NULL, 1, 2, 3, 4, 5), 'FixedSizeList(6, Int64)')), list_any_value(arrow_cast(make_array(NULL, 'h', 'e', 'l', 'l', 'o'), 'FixedSizeList(6, Utf8)')); ----- -1 h - -# array_any_value with columns - -query I -select array_any_value(column1) from slices; ----- -2 -11 -21 -31 -NULL -41 -51 - -query I -select array_any_value(arrow_cast(column1, 'LargeList(Int64)')) from slices; ----- -2 -11 -21 -31 -NULL -41 -51 - -query I -select array_any_value(column1) from fixed_slices; ----- -2 -11 -21 -31 -41 -51 - -# array_any_value with columns and scalars - -query II -select array_any_value(make_array(NULL, 1, 2, 3, 4, 5)), array_any_value(column1) from slices; ----- -1 2 -1 11 -1 21 -1 31 -1 NULL -1 41 -1 51 - -query II -select array_any_value(arrow_cast(make_array(NULL, 1, 2, 3, 4, 5), 'LargeList(Int64)')), array_any_value(arrow_cast(column1, 'LargeList(Int64)')) from slices; ----- -1 2 -1 11 -1 21 -1 31 -1 NULL -1 41 -1 51 - -query II -select array_any_value(make_array(NULL, 1, 2, 3, 4, 5)), array_any_value(column1) from fixed_slices; ----- -1 2 -1 11 -1 21 -1 31 -1 41 -1 51 - -# make_array with nulls -query ??????? -select make_array(make_array('a','b'), null), - make_array(make_array('a','b'), null, make_array('c','d')), - make_array(null, make_array('a','b'), null), - make_array(null, make_array('a','b'), null, null, make_array('c','d')), - make_array(['a', 'bc', 'def'], null, make_array('rust')), - make_array([1,2,3], null, make_array(4,5,6,7)), - make_array(null, 1, null, 2, null, 3, null, null, 4, 5); ----- -[[a, b], NULL] [[a, b], NULL, [c, d]] [NULL, [a, b], NULL] [NULL, [a, b], NULL, NULL, [c, d]] [[a, bc, def], NULL, [rust]] [[1, 2, 3], NULL, [4, 5, 6, 7]] [NULL, 1, NULL, 2, NULL, 3, NULL, NULL, 4, 5] - -query ? -select make_array(column5, null, column5) from arrays_values_without_nulls; ----- -[[2, 3], NULL, [2, 3]] -[[4, 5], NULL, [4, 5]] -[[6, 7], NULL, [6, 7]] -[[8, 9], NULL, [8, 9]] - -query ? -select make_array(['a','b'], null); ----- -[[a, b], NULL] - -## array_sort (aliases: `list_sort`) -query ??? -select array_sort(make_array(1, 3, null, 5, NULL, -5)), array_sort(make_array(1, 3, null, 2), 'ASC'), array_sort(make_array(1, 3, null, 2), 'desc', 'NULLS FIRST'); ----- -[NULL, NULL, -5, 1, 3, 5] [NULL, 1, 2, 3] [NULL, 3, 2, 1] - -query ??? -select array_sort(arrow_cast(make_array(1, 3, null, 5, NULL, -5), 'LargeList(Int64)')), - array_sort(arrow_cast(make_array(1, 3, null, 2), 'LargeList(Int64)'), 'ASC'), - array_sort(arrow_cast(make_array(1, 3, null, 2), 'LargeList(Int64)'), 'desc', 'NULLS FIRST'); ----- -[NULL, NULL, -5, 1, 3, 5] [NULL, 1, 2, 3] [NULL, 3, 2, 1] - -query ??? -select array_sort(arrow_cast(make_array(1, 3, null, 5, NULL, -5), 'FixedSizeList(6, Int64)')), - array_sort(arrow_cast(make_array(1, 3, null, 2), 'FixedSizeList(4, Int64)'), 'ASC'), - array_sort(arrow_cast(make_array(1, 3, null, 2), 'FixedSizeList(4, Int64)'), 'desc', 'NULLS FIRST'); ----- -[NULL, NULL, -5, 1, 3, 5] [NULL, 1, 2, 3] [NULL, 3, 2, 1] - -query ? -select array_sort(column1, 'DESC', 'NULLS LAST') from arrays_values; ----- -[10, 9, 8, 7, 6, 5, 4, 3, 2, NULL] -[20, 18, 17, 16, 15, 14, 13, 12, 11, NULL] -[30, 29, 28, 27, 26, 25, 23, 22, 21, NULL] -[40, 39, 38, 37, 35, 34, 33, 32, 31, NULL] -NULL -[50, 49, 48, 47, 46, 45, 44, 43, 42, 41] -[60, 59, 58, 57, 56, 55, 54, 52, 51, NULL] -[70, 69, 68, 67, 66, 65, 64, 63, 62, 61] - -query ? -select array_sort(column1, 'ASC', 'NULLS FIRST') from arrays_values; ----- -[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] -[NULL, 11, 12, 13, 14, 15, 16, 17, 18, 20] -[NULL, 21, 22, 23, 25, 26, 27, 28, 29, 30] -[NULL, 31, 32, 33, 34, 35, 37, 38, 39, 40] -NULL -[41, 42, 43, 44, 45, 46, 47, 48, 49, 50] -[NULL, 51, 52, 54, 55, 56, 57, 58, 59, 60] -[61, 62, 63, 64, 65, 66, 67, 68, 69, 70] - -# test with empty table -query ? -select array_sort(column1, 'DESC', 'NULLS FIRST') from arrays_values where false; ----- - -# test with empty array -query ? -select array_sort([]); ----- -[] - -# empty-but-non-null string arrays should remain non-null, not become null -query ?B -select array_sort(column1), array_sort(column1) is null -from (values (arrow_cast(make_array('b', 'a'), 'List(Utf8)')), (arrow_cast([], 'List(Utf8)'))) as t(column1); ----- -[a, b] false -[] false - -# test with null arguments -query ? -select array_sort(NULL); ----- -NULL - -query ? -select array_sort(column1, NULL) from arrays_values; ----- -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL - -query ?? -select array_sort(column1, 'DESC', NULL), array_sort(column1, 'ASC', NULL) from arrays_values; ----- -NULL NULL -NULL NULL -NULL NULL -NULL NULL -NULL NULL -NULL NULL -NULL NULL -NULL NULL - -query ?? -select array_sort(column1, NULL, 'NULLS FIRST'), array_sort(column1, NULL, 'NULLS LAST') from arrays_values; ----- -NULL NULL -NULL NULL -NULL NULL -NULL NULL -NULL NULL -NULL NULL -NULL NULL -NULL NULL - -# maintains inner nullability -query ?T -select array_sort(column1), arrow_typeof(array_sort(column1)) -from values - (arrow_cast([], 'List(non-null Int32)')), - (arrow_cast(NULL, 'List(non-null Int32)')), - (arrow_cast([1, 3, 5, -5], 'List(non-null Int32)')) -; ----- -[] List(non-null Int32) -NULL List(non-null Int32) -[-5, 1, 3, 5] List(non-null Int32) - -query ?T -select column1, arrow_typeof(column1) -from values (array_sort(arrow_cast([1, 3, 5, -5], 'LargeList(non-null Int32)'))); ----- -[-5, 1, 3, 5] LargeList(non-null Int32) - -query ?T -select column1, arrow_typeof(column1) -from values (array_sort(arrow_cast([1, 3, 5, -5], 'FixedSizeList(4 x non-null Int32)'))); ----- -[-5, 1, 3, 5] List(non-null Int32) - -# arrays of strings -query ??? -select array_sort(make_array('banana', 'apple', null, 'cherry')), - array_sort(make_array('banana', 'apple', null, 'cherry'), 'DESC', 'NULLS LAST'), - array_sort(make_array('banana', 'apple', null, 'cherry'), 'ASC', 'NULLS LAST'); ----- -[NULL, apple, banana, cherry] [cherry, banana, apple, NULL] [apple, banana, cherry, NULL] - -query ? -select array_sort([struct('foo', 3), struct('foo', 1), struct('bar', 1)]) ----- -[{c0: bar, c1: 1}, {c0: foo, c1: 1}, {c0: foo, c1: 3}] - -## test with argument of incorrect types -query error DataFusion error: Execution error: the second parameter of array_sort expects DESC or ASC -select array_sort([1, 3, null, 5, NULL, -5], 1), array_sort([1, 3, null, 5, NULL, -5], 'DESC', 1), array_sort([1, 3, null, 5, NULL, -5], 1, 1); - -# test with empty row, the row that does not match the condition has row count 0 -statement ok -create table t1(a int, b int) as values (100, 1), (101, 2), (102, 3), (101, 2); - -# rowsort is to ensure the order of group by is deterministic, array_sort has no effect here, since the sum() always returns single row. -query ? rowsort -select array_sort([sum(a)]) from t1 where a > 100 group by b; ----- -[102] -[202] - -statement ok -drop table t1; - -# float arrays with NaN and Infinity (NaN sorts after Infinity per IEEE totalOrder) -query ??? -select array_sort(make_array(1.0, 'NaN'::double, -1.0, 'Infinity'::double, '-Infinity'::double, null)), - array_sort(make_array(1.0, 'NaN'::double, -1.0, 'Infinity'::double, '-Infinity'::double, null), 'DESC', 'NULLS LAST'), - array_sort(make_array('NaN'::double, 'NaN'::double, 1.0)); ----- -[NULL, -inf, -1.0, 1.0, inf, NaN] [NaN, inf, 1.0, -1.0, -inf, NULL] [1.0, NaN, NaN] - -# float32 arrays -query ?? -select array_sort(arrow_cast(make_array(3.0, 1.0, 'NaN'::double, null, 2.0), 'List(Float32)')), - array_sort(arrow_cast(make_array(3.0, 1.0, 'NaN'::double, null, 2.0), 'List(Float32)'), 'DESC', 'NULLS LAST'); ----- -[NULL, 1.0, 2.0, 3.0, NaN] [NaN, 3.0, 2.0, 1.0, NULL] - -# element-level nulls with all sort option combinations -query ???? -select array_sort(make_array(3, null, 1, null, 2), 'ASC', 'NULLS FIRST'), - array_sort(make_array(3, null, 1, null, 2), 'ASC', 'NULLS LAST'), - array_sort(make_array(3, null, 1, null, 2), 'DESC', 'NULLS FIRST'), - array_sort(make_array(3, null, 1, null, 2), 'DESC', 'NULLS LAST'); ----- -[NULL, NULL, 1, 2, 3] [1, 2, 3, NULL, NULL] [NULL, NULL, 3, 2, 1] [3, 2, 1, NULL, NULL] - -# timestamp arrays -query ?? -select array_sort(make_array(arrow_cast('2024-01-15T10:00:00', 'Timestamp(Nanosecond, None)'), - arrow_cast('2024-01-01T00:00:00', 'Timestamp(Nanosecond, None)'), - null, - arrow_cast('2024-06-15T12:00:00', 'Timestamp(Nanosecond, None)'))), - array_sort(make_array(arrow_cast('2024-01-15T10:00:00', 'Timestamp(Nanosecond, None)'), - arrow_cast('2024-01-01T00:00:00', 'Timestamp(Nanosecond, None)'), - null, - arrow_cast('2024-06-15T12:00:00', 'Timestamp(Nanosecond, None)')), 'DESC', 'NULLS LAST'); ----- -[NULL, 2024-01-01T00:00:00, 2024-01-15T10:00:00, 2024-06-15T12:00:00] [2024-06-15T12:00:00, 2024-01-15T10:00:00, 2024-01-01T00:00:00, NULL] - -# date arrays -query ?? -select array_sort(make_array('2024-03-01'::date, '2024-01-01'::date, null, '2024-02-01'::date)), - array_sort(make_array('2024-03-01'::date, '2024-01-01'::date, null, '2024-02-01'::date), 'DESC', 'NULLS LAST'); ----- -[NULL, 2024-01-01, 2024-02-01, 2024-03-01] [2024-03-01, 2024-02-01, 2024-01-01, NULL] - -# struct arrays with nulls and DESC -query ?? -select array_sort([struct('b', 2), struct('a', 1), null, struct('a', 3)]), - array_sort([struct('b', 2), struct('a', 1), null, struct('a', 3)], 'DESC', 'NULLS LAST'); ----- -[NULL, {c0: a, c1: 1}, {c0: a, c1: 3}, {c0: b, c1: 2}] [{c0: b, c1: 2}, {c0: a, c1: 3}, {c0: a, c1: 1}, NULL] - -# boolean arrays -query ?? -select array_sort(make_array(true, false, null, true, false)), - array_sort(make_array(true, false, null, true, false), 'DESC', 'NULLS LAST'); ----- -[NULL, false, false, true, true] [true, true, false, false, NULL] - -# all-null array -query ? -select array_sort(make_array(null, null, null)); ----- -[NULL, NULL, NULL] - -# single-element arrays -query ?? -select array_sort(make_array(42)), array_sort(make_array(null::int)); ----- -[42] [NULL] - -## list_sort (aliases: `array_sort`) -query ??? -select list_sort(make_array(1, 3, null, 5, NULL, -5)), list_sort(make_array(1, 3, null, 2), 'ASC'), list_sort(make_array(1, 3, null, 2), 'desc', 'NULLS FIRST'); ----- -[NULL, NULL, -5, 1, 3, 5] [NULL, 1, 2, 3] [NULL, 3, 2, 1] - - -## array_append (aliases: `list_append`, `array_push_back`, `list_push_back`) - -# array_append with NULLs - -query ? -select array_append(null, 1); ----- -[1] - -query ? -select array_append(null, [2, 3]); ----- -[[2, 3]] - -query ? -select array_append(null, [[4]]); ----- -[[[4]]] - -query ???? -select - array_append(make_array(), 4), - array_append(make_array(), null), - array_append(make_array(1, null, 3), 4), - array_append(make_array(null, null), 1) -; ----- -[4] [NULL] [1, NULL, 3, 4] [NULL, NULL, 1] - -query ???? -select - array_append(arrow_cast(make_array(), 'LargeList(Int64)'), 4), - array_append(arrow_cast(make_array(), 'LargeList(Int64)'), null), - array_append(arrow_cast(make_array(1, null, 3), 'LargeList(Int64)'), 4), - array_append(arrow_cast(make_array(null, null), 'LargeList(Int64)'), 1) -; ----- -[4] [NULL] [1, NULL, 3, 4] [NULL, NULL, 1] - -query ?? -select - array_append(arrow_cast(make_array(1, null, 3), 'FixedSizeList(3, Int64)'), 4), - array_append(arrow_cast(make_array(null, null), 'FixedSizeList(2, Int64)'), 1) -; ----- -[1, NULL, 3, 4] [NULL, NULL, 1] - -# test invalid (non-null) -query error -select array_append(1, 2); - -query error -select array_append(1, [2]); - -query error -select array_append([1], [2]); - -query ?? -select - array_append(make_array(make_array(1, null, 3)), make_array(null)), - array_append(make_array(make_array(1, null, 3)), null); ----- -[[1, NULL, 3], [NULL]] [[1, NULL, 3], NULL] - -query ?? -select - array_append(arrow_cast(make_array(make_array(1, null, 3)), 'LargeList(LargeList(Int64))'), arrow_cast(make_array(null), 'LargeList(Int64)')), - array_append(arrow_cast(make_array(make_array(1, null, 3)), 'LargeList(LargeList(Int64))'), null); ----- -[[1, NULL, 3], [NULL]] [[1, NULL, 3], NULL] - -query ?? -select - array_append(arrow_cast(make_array(make_array(1, null, 3)), 'FixedSizeList(1, List(Int64))'), [null]), - array_append(arrow_cast(make_array(make_array(1, null, 3)), 'FixedSizeList(1, List(Int64))'), null); ----- -[[1, NULL, 3], [NULL]] [[1, NULL, 3], NULL] - -# array_append scalar function #3 -query ??? -select array_append(make_array(1, 2, 3), 4), array_append(make_array(1.0, 2.0, 3.0), 4.0), array_append(make_array('h', 'e', 'l', 'l'), 'o'); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -query ??? -select array_append(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4), array_append(arrow_cast(make_array(1.0, 2.0, 3.0), 'LargeList(Float64)'), 4.0), array_append(arrow_cast(make_array('h', 'e', 'l', 'l'), 'LargeList(Utf8)'), 'o'); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -query ??? -select array_append(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 4), array_append(arrow_cast(make_array(1.0, 2.0, 3.0), 'FixedSizeList(3, Float64)'), 4.0), array_append(arrow_cast(make_array('h', 'e', 'l', 'l'), 'FixedSizeList(4, Utf8)'), 'o'); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -# array_append scalar function #4 (element is list) -query ??? -select array_append(make_array([1], [2], [3]), make_array(4)), array_append(make_array([1.0], [2.0], [3.0]), make_array(4.0)), array_append(make_array(['h'], ['e'], ['l'], ['l']), make_array('o')); ----- -[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] - -query ??? -select array_append(arrow_cast(make_array([1], [2], [3]), 'LargeList(LargeList(Int64))'), arrow_cast(make_array(4), 'LargeList(Int64)')), array_append(arrow_cast(make_array([1.0], [2.0], [3.0]), 'LargeList(LargeList(Float64))'), arrow_cast(make_array(4.0), 'LargeList(Float64)')), array_append(arrow_cast(make_array(['h'], ['e'], ['l'], ['l']), 'LargeList(LargeList(Utf8))'), arrow_cast(make_array('o'), 'LargeList(Utf8)')); ----- -[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] - -query ??? -select array_append(arrow_cast(make_array([1], [2], [3]), 'FixedSizeList(3, List(Int64))'), [4]), array_append(arrow_cast(make_array([1.0], [2.0], [3.0]), 'FixedSizeList(3, List(Float64))'), [4.0]), array_append(arrow_cast(make_array(['h'], ['e'], ['l'], ['l']), 'FixedSizeList(4, List(Utf8))'), ['o']); ----- -[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] - -# list_append scalar function #5 (function alias `array_append`) -query ??? -select list_append(make_array(1, 2, 3), 4), list_append(make_array(1.0, 2.0, 3.0), 4.0), list_append(make_array('h', 'e', 'l', 'l'), 'o'); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -query ??? -select list_append(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4), list_append(arrow_cast(make_array(1.0, 2.0, 3.0), 'LargeList(Float64)'), 4.0), list_append(make_array('h', 'e', 'l', 'l'), 'o'); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -# array_push_back scalar function #6 (function alias `array_append`) -query ??? -select array_push_back(make_array(1, 2, 3), 4), array_push_back(make_array(1.0, 2.0, 3.0), 4.0), array_push_back(make_array('h', 'e', 'l', 'l'), 'o'); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -query ??? -select array_push_back(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4), array_push_back(arrow_cast(make_array(1.0, 2.0, 3.0), 'LargeList(Float64)'), 4.0), array_push_back(make_array('h', 'e', 'l', 'l'), 'o'); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -# list_push_back scalar function #7 (function alias `array_append`) -query ??? -select list_push_back(make_array(1, 2, 3), 4), list_push_back(make_array(1.0, 2.0, 3.0), 4.0), list_push_back(make_array('h', 'e', 'l', 'l'), 'o'); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -query ??? -select list_push_back(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4), list_push_back(arrow_cast(make_array(1.0, 2.0, 3.0), 'LargeList(Float64)'), 4.0), list_push_back(make_array('h', 'e', 'l', 'l'), 'o'); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -# array_append with columns #1 -query ? -select array_append(column1, column2) from arrays_values; ----- -[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1] -[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 12] -[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 23] -[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 34] -[44] -[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, NULL] -[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, 55] -[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66] - -query ? -select array_append(column1, column2) from large_arrays_values; ----- -[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1] -[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 12] -[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 23] -[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 34] -[44] -[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, NULL] -[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, 55] -[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66] - -query ? -select array_append(column1, column2) from fixed_arrays_values; ----- -[NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1] -[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 12] -[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 23] -[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 34] -[NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 44] -[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, NULL] -[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, 55] -[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66] - -# array_append with columns #2 (element is list) -query ? -select array_append(column1, column2) from nested_arrays; ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [7, 8, 9]] -[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [10, 11, 12]] - -query ? -select array_append(column1, column2) from large_nested_arrays; ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [7, 8, 9]] -[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [10, 11, 12]] - -query ? -select array_append(column1, column2) from fixed_size_nested_arrays; ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [7, 8, 9]] -[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [10, 11, 12]] - -# array_append with columns and scalars #1 -query ?? -select array_append(column2, 100.1), array_append(column3, '.') from arrays; ----- -[1.1, 2.2, 3.3, 100.1] [L, o, r, e, m, .] -[NULL, 5.5, 6.6, 100.1] [i, p, NULL, u, m, .] -[7.7, 8.8, 9.9, 100.1] [d, NULL, l, o, r, .] -[10.1, NULL, 12.2, 100.1] [s, i, t, .] -[13.3, 14.4, 15.5, 100.1] [a, m, e, t, .] -[100.1] [,, .] -[16.6, 17.7, 18.8, 100.1] [.] - -query ?? -select array_append(column2, 100.1), array_append(column3, '.') from large_arrays; ----- -[1.1, 2.2, 3.3, 100.1] [L, o, r, e, m, .] -[NULL, 5.5, 6.6, 100.1] [i, p, NULL, u, m, .] -[7.7, 8.8, 9.9, 100.1] [d, NULL, l, o, r, .] -[10.1, NULL, 12.2, 100.1] [s, i, t, .] -[13.3, 14.4, 15.5, 100.1] [a, m, e, t, .] -[100.1] [,, .] -[16.6, 17.7, 18.8, 100.1] [.] - -query ?? -select array_append(column2, 100.1), array_append(column3, '.') from fixed_size_arrays; ----- -[1.1, 2.2, 3.3, 100.1] [L, o, r, e, m, .] -[NULL, 5.5, 6.6, 100.1] [i, p, NULL, u, m, .] -[7.7, 8.8, 9.9, 100.1] [d, NULL, l, o, r, .] -[10.1, NULL, 12.2, 100.1] [s, i, t, a, b, .] -[13.3, 14.4, 15.5, 100.1] [a, m, e, t, x, .] -[NULL, NULL, NULL, 100.1] [,, a, b, c, d, .] -[16.6, 17.7, 18.8, 100.1] [NULL, NULL, NULL, NULL, NULL, .] - -# array_append with columns and scalars #2 -query ?? -select array_append(column1, make_array(1, 11, 111)), array_append(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), column2) from nested_arrays; ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [7, 8, 9]] -[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [10, 11, 12]] - -query ?? -select array_append(column1, arrow_cast(make_array(1, 11, 111), 'LargeList(Int64)')), array_append(arrow_cast(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), 'LargeList(LargeList(Int64))'), column2) from large_nested_arrays; ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [7, 8, 9]] -[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [10, 11, 12]] - -query ?? -select array_append(column1, arrow_cast(make_array(1, 11, 111), 'FixedSizeList(3, Int64)')), array_append(arrow_cast(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), 'FixedSizeList(2, List(Int64))'), column2) from fixed_size_nested_arrays; ----- -[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [7, 8, 9]] -[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [10, 11, 12]] - -## array_prepend (aliases: `list_prepend`, `array_push_front`, `list_push_front`) - -# array_prepend with NULLs - -# DuckDB: [4] -# ClickHouse: Null -query ? -select array_prepend(4, NULL); ----- -[4] - -query ? -select array_prepend(4, []); ----- -[4] - -query ? -select array_prepend(4, [null]); ----- -[4, NULL] - -# DuckDB: [null] -# ClickHouse: [null] -query ? -select array_prepend(null, []); ----- -[NULL] - -query ? -select array_prepend(null, [1]); ----- -[NULL, 1] - -query ? -select array_prepend(null, [[1,2,3]]); ----- -[NULL, [1, 2, 3]] - -# DuckDB: [[]] -# ClickHouse: [[]] -# TODO: We may also return [[]] -query ? -select array_prepend([], []); ----- -[[]] - -query ? -select array_prepend(null, null); ----- -[NULL] - -query ? -select array_append([], null); ----- -[NULL] - - -# array_prepend scalar function #3 -query ??? -select array_prepend(1, make_array(2, 3, 4)), array_prepend(1.0, make_array(2.0, 3.0, 4.0)), array_prepend('h', make_array('e', 'l', 'l', 'o')); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -query ??? -select array_prepend(1, arrow_cast(make_array(2, 3, 4), 'LargeList(Int64)')), array_prepend(1.0, arrow_cast(make_array(2.0, 3.0, 4.0), 'LargeList(Float64)')), array_prepend('h', arrow_cast(make_array('e', 'l', 'l', 'o'), 'LargeList(Utf8)')); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -query ??? -select array_prepend(1, arrow_cast([2, 3, 4], 'FixedSizeList(3, Int64)')), array_prepend(1.0, arrow_cast([2.0, 3.0, 4.0], 'FixedSizeList(3, Float64)')), array_prepend('h', arrow_cast(['e', 'l', 'l', 'o'], 'FixedSizeList(4, Utf8)')); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -# array_prepend scalar function #4 (element is list) -query ??? -select array_prepend(make_array(1), make_array(make_array(2), make_array(3), make_array(4))), array_prepend(make_array(1.0), make_array([2.0], [3.0], [4.0])), array_prepend(make_array('h'), make_array(['e'], ['l'], ['l'], ['o'])); ----- -[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] - -query ??? -select array_prepend(arrow_cast(make_array(1), 'LargeList(Int64)'), arrow_cast(make_array(make_array(2), make_array(3), make_array(4)), 'LargeList(LargeList(Int64))')), - array_prepend(arrow_cast(make_array(1.0), 'LargeList(Float64)'), arrow_cast(make_array([2.0], [3.0], [4.0]), 'LargeList(LargeList(Float64))')), - array_prepend(arrow_cast(make_array('h'), 'LargeList(Utf8)'), arrow_cast(make_array(['e'], ['l'], ['l'], ['o']), 'LargeList(LargeList(Utf8))')); ----- -[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] - -query ??? -select array_prepend(arrow_cast([1], 'FixedSizeList(1, Int64)'), arrow_cast([[1], [2], [3]], 'FixedSizeList(3, List(Int64))')), - array_prepend(arrow_cast([1.0], 'FixedSizeList(1, Float64)'), arrow_cast([[2.0], [3.0], [4.0]], 'FixedSizeList(3, List(Float64))')), - array_prepend(arrow_cast(['h'], 'FixedSizeList(1, Utf8)'), arrow_cast([['e'], ['l'], ['l'], ['o']], 'FixedSizeList(4, List(Utf8))')); ----- -[[1], [1], [2], [3]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] - -# list_prepend scalar function #5 (function alias `array_prepend`) -query ??? -select list_prepend(1, make_array(2, 3, 4)), list_prepend(1.0, make_array(2.0, 3.0, 4.0)), list_prepend('h', make_array('e', 'l', 'l', 'o')); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -query ??? -select list_prepend(1, arrow_cast(make_array(2, 3, 4), 'LargeList(Int64)')), list_prepend(1.0, arrow_cast(make_array(2.0, 3.0, 4.0), 'LargeList(Float64)')), list_prepend('h', arrow_cast(make_array('e', 'l', 'l', 'o'), 'LargeList(Utf8)')); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -# array_push_front scalar function #6 (function alias `array_prepend`) -query ??? -select array_push_front(1, make_array(2, 3, 4)), array_push_front(1.0, make_array(2.0, 3.0, 4.0)), array_push_front('h', make_array('e', 'l', 'l', 'o')); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -query ??? -select array_push_front(1, arrow_cast(make_array(2, 3, 4), 'LargeList(Int64)')), array_push_front(1.0, arrow_cast(make_array(2.0, 3.0, 4.0), 'LargeList(Float64)')), array_push_front('h', arrow_cast(make_array('e', 'l', 'l', 'o'), 'LargeList(Utf8)')); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -# list_push_front scalar function #7 (function alias `array_prepend`) -query ??? -select list_push_front(1, make_array(2, 3, 4)), list_push_front(1.0, make_array(2.0, 3.0, 4.0)), list_push_front('h', make_array('e', 'l', 'l', 'o')); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -query ??? -select list_push_front(1, arrow_cast(make_array(2, 3, 4), 'LargeList(Int64)')), list_push_front(1.0, arrow_cast(make_array(2.0, 3.0, 4.0), 'LargeList(Float64)')), list_push_front('h', arrow_cast(make_array('e', 'l', 'l', 'o'), 'LargeList(Utf8)')); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -# array_prepend scalar function #7 (element is fixed size list) -query ??? -select array_prepend(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'), make_array(arrow_cast(make_array(2), 'FixedSizeList(1, Int64)'), arrow_cast(make_array(3), 'FixedSizeList(1, Int64)'), arrow_cast(make_array(4), 'FixedSizeList(1, Int64)'))), - array_prepend(arrow_cast(make_array(1.0), 'FixedSizeList(1, Float64)'), make_array(arrow_cast([2.0], 'FixedSizeList(1, Float64)'), arrow_cast([3.0], 'FixedSizeList(1, Float64)'), arrow_cast([4.0], 'FixedSizeList(1, Float64)'))), - array_prepend(arrow_cast(make_array('h'), 'FixedSizeList(1, Utf8)'), make_array(arrow_cast(['e'], 'FixedSizeList(1, Utf8)'), arrow_cast(['l'], 'FixedSizeList(1, Utf8)'), arrow_cast(['l'], 'FixedSizeList(1, Utf8)'), arrow_cast(['o'], 'FixedSizeList(1, Utf8)'))); ----- -[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] - -query ??? -select array_prepend(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'), arrow_cast(make_array(make_array(2), make_array(3), make_array(4)), 'LargeList(FixedSizeList(1, Int64))')), - array_prepend(arrow_cast(make_array(1.0), 'FixedSizeList(1, Float64)'), arrow_cast(make_array([2.0], [3.0], [4.0]), 'LargeList(FixedSizeList(1, Float64))')), - array_prepend(arrow_cast(make_array('h'), 'FixedSizeList(1, Utf8)'), arrow_cast(make_array(['e'], ['l'], ['l'], ['o']), 'LargeList(FixedSizeList(1, Utf8))')); ----- -[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] - -query ??? -select array_prepend(arrow_cast([1], 'FixedSizeList(1, Int64)'), arrow_cast([[1], [2], [3]], 'FixedSizeList(3, FixedSizeList(1, Int64))')), - array_prepend(arrow_cast([1.0], 'FixedSizeList(1, Float64)'), arrow_cast([[2.0], [3.0], [4.0]], 'FixedSizeList(3, FixedSizeList(1, Float64))')), - array_prepend(arrow_cast(['h'], 'FixedSizeList(1, Utf8)'), arrow_cast([['e'], ['l'], ['l'], ['o']], 'FixedSizeList(4, FixedSizeList(1, Utf8))')); ----- -[[1], [1], [2], [3]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] - -# array_prepend with columns #1 -query ? -select array_prepend(column2, column1) from arrays_values; ----- -[1, NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] -[12, 11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] -[23, 21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] -[34, 31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] -[44] -[NULL, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] -[55, 51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] -[66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70] - -query ? -select array_prepend(column2, column1) from large_arrays_values; ----- -[1, NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] -[12, 11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] -[23, 21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] -[34, 31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] -[44] -[NULL, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] -[55, 51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] -[66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70] - -query ? -select array_prepend(column2, column1) from fixed_arrays_values; ----- -[1, NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] -[12, 11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] -[23, 21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] -[34, 31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] -[44, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL] -[NULL, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] -[55, 51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] -[66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70] - -# array_prepend with columns #2 (element is list) -query ? -select array_prepend(column2, column1) from nested_arrays; ----- -[[7, 8, 9], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] -[[10, 11, 12], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] - -query ? -select array_prepend(column2, column1) from large_nested_arrays; ----- -[[7, 8, 9], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] -[[10, 11, 12], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] - -query ? -select array_prepend(column2, column1) from fixed_size_nested_arrays; ----- -[[7, 8, 9], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] -[[10, 11, 12], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] - -# array_prepend with columns and scalars #1 -query ?? -select array_prepend(100.1, column2), array_prepend('.', column3) from arrays; ----- -[100.1, 1.1, 2.2, 3.3] [., L, o, r, e, m] -[100.1, NULL, 5.5, 6.6] [., i, p, NULL, u, m] -[100.1, 7.7, 8.8, 9.9] [., d, NULL, l, o, r] -[100.1, 10.1, NULL, 12.2] [., s, i, t] -[100.1, 13.3, 14.4, 15.5] [., a, m, e, t] -[100.1] [., ,] -[100.1, 16.6, 17.7, 18.8] [.] - -query ?? -select array_prepend(100.1, column2), array_prepend('.', column3) from large_arrays; ----- -[100.1, 1.1, 2.2, 3.3] [., L, o, r, e, m] -[100.1, NULL, 5.5, 6.6] [., i, p, NULL, u, m] -[100.1, 7.7, 8.8, 9.9] [., d, NULL, l, o, r] -[100.1, 10.1, NULL, 12.2] [., s, i, t] -[100.1, 13.3, 14.4, 15.5] [., a, m, e, t] -[100.1] [., ,] -[100.1, 16.6, 17.7, 18.8] [.] - -query ?? -select array_prepend(100.1, column2), array_prepend('.', column3) from fixed_size_arrays; ----- -[100.1, 1.1, 2.2, 3.3] [., L, o, r, e, m] -[100.1, NULL, 5.5, 6.6] [., i, p, NULL, u, m] -[100.1, 7.7, 8.8, 9.9] [., d, NULL, l, o, r] -[100.1, 10.1, NULL, 12.2] [., s, i, t, a, b] -[100.1, 13.3, 14.4, 15.5] [., a, m, e, t, x] -[100.1, NULL, NULL, NULL] [., ,, a, b, c, d] -[100.1, 16.6, 17.7, 18.8] [., NULL, NULL, NULL, NULL, NULL] - -# array_prepend with columns and scalars #2 (element is list) -query ?? -select array_prepend(make_array(1, 11, 111), column1), array_prepend(column2, make_array(make_array(1, 2, 3), make_array(11, 12, 13))) from nested_arrays; ----- -[[1, 11, 111], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] [[7, 8, 9], [1, 2, 3], [11, 12, 13]] -[[1, 11, 111], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] [[10, 11, 12], [1, 2, 3], [11, 12, 13]] - -query ?? -select array_prepend(arrow_cast(make_array(1, 11, 111), 'LargeList(Int64)'), column1), array_prepend(column2, arrow_cast(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), 'LargeList(LargeList(Int64))')) from large_nested_arrays; ----- -[[1, 11, 111], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] [[7, 8, 9], [1, 2, 3], [11, 12, 13]] -[[1, 11, 111], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] [[10, 11, 12], [1, 2, 3], [11, 12, 13]] - -query ?? -select array_prepend(arrow_cast(make_array(1, 11, 111), 'FixedSizeList(3, Int64)'), column1), array_prepend(column2, arrow_cast(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), 'FixedSizeList(2, List(Int64))')) from fixed_size_nested_arrays; ----- -[[1, 11, 111], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] [[7, 8, 9], [1, 2, 3], [11, 12, 13]] -[[1, 11, 111], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] [[10, 11, 12], [1, 2, 3], [11, 12, 13]] - -## array_repeat (aliases: `list_repeat`) - -# array_repeat scalar function #1 -query ???????? -select - array_repeat(1, 5), - array_repeat(3.14, 3), - array_repeat('l', 4), - array_repeat(null, 2), - list_repeat(-1, 5), - list_repeat(-3.14, 0), - list_repeat('rust', 4), - list_repeat(null, 0); ----- -[1, 1, 1, 1, 1] [3.14, 3.14, 3.14] [l, l, l, l] [NULL, NULL] [-1, -1, -1, -1, -1] [] [rust, rust, rust, rust] [] - -# array_repeat scalar function #2 (element as list) -query ???? -select - array_repeat([1], 5), - array_repeat([1.1, 2.2, 3.3], 3), - array_repeat([null, null], 3), - array_repeat([[1, 2], [3, 4]], 2); ----- -[[1], [1], [1], [1], [1]] [[1.1, 2.2, 3.3], [1.1, 2.2, 3.3], [1.1, 2.2, 3.3]] [[NULL, NULL], [NULL, NULL], [NULL, NULL]] [[[1, 2], [3, 4]], [[1, 2], [3, 4]]] - -query ???? -select - array_repeat(arrow_cast([1], 'LargeList(Int64)'), 5), - array_repeat(arrow_cast([1.1, 2.2, 3.3], 'LargeList(Float64)'), 3), - array_repeat(arrow_cast([null, null], 'LargeList(Int64)'), 3), - array_repeat(arrow_cast([[1, 2], [3, 4]], 'LargeList(List(Int64))'), 2); ----- -[[1], [1], [1], [1], [1]] [[1.1, 2.2, 3.3], [1.1, 2.2, 3.3], [1.1, 2.2, 3.3]] [[NULL, NULL], [NULL, NULL], [NULL, NULL]] [[[1, 2], [3, 4]], [[1, 2], [3, 4]]] - -# array_repeat scalar function with count of different integer types -query ???????? -Select - array_repeat(1, arrow_cast(2,'Int8')), - array_repeat(2, arrow_cast(2,'Int16')), - array_repeat(3, arrow_cast(2,'Int32')), - array_repeat(4, arrow_cast(2,'Int64')), - array_repeat(1, arrow_cast(2,'UInt8')), - array_repeat(2, arrow_cast(2,'UInt16')), - array_repeat(3, arrow_cast(2,'UInt32')), - array_repeat(4, arrow_cast(2,'UInt64')); ----- -[1, 1] [2, 2] [3, 3] [4, 4] [1, 1] [2, 2] [3, 3] [4, 4] - -# array_repeat scalar function with count of negative integer types -query ???? -Select - array_repeat(1, arrow_cast(-2,'Int8')), - array_repeat(2, arrow_cast(-2,'Int16')), - array_repeat(3, arrow_cast(-2,'Int32')), - array_repeat(4, arrow_cast(-2,'Int64')); ----- -[] [] [] [] - -# array_repeat with columns #1 - -statement ok -CREATE TABLE array_repeat_table -AS VALUES - (1, 1, 1.1, 'a', make_array(4, 5, 6)), - (2, null, null, null, null), - (3, 2, 2.2, 'rust', make_array(7)), - (0, 3, 3.3, 'datafusion', make_array(8, 9)); - -statement ok -CREATE TABLE large_array_repeat_table -AS SELECT - column1, - column2, - column3, - column4, - arrow_cast(column5, 'LargeList(Int64)') as column5 -FROM array_repeat_table; - -query ?????? -select - array_repeat(column2, column1), - array_repeat(column3, column1), - array_repeat(column4, column1), - array_repeat(column5, column1), - array_repeat(column2, 3), - array_repeat(make_array(1), column1) -from array_repeat_table; ----- -[1] [1.1] [a] [[4, 5, 6]] [1, 1, 1] [[1]] -[NULL, NULL] [NULL, NULL] [NULL, NULL] [NULL, NULL] [NULL, NULL, NULL] [[1], [1]] -[2, 2, 2] [2.2, 2.2, 2.2] [rust, rust, rust] [[7], [7], [7]] [2, 2, 2] [[1], [1], [1]] -[] [] [] [] [3, 3, 3] [] - -query ?????? -select - array_repeat(column2, column1), - array_repeat(column3, column1), - array_repeat(column4, column1), - array_repeat(column5, column1), - array_repeat(column2, 3), - array_repeat(make_array(1), column1) -from large_array_repeat_table; ----- -[1] [1.1] [a] [[4, 5, 6]] [1, 1, 1] [[1]] -[NULL, NULL] [NULL, NULL] [NULL, NULL] [NULL, NULL] [NULL, NULL, NULL] [[1], [1]] -[2, 2, 2] [2.2, 2.2, 2.2] [rust, rust, rust] [[7], [7], [7]] [2, 2, 2] [[1], [1], [1]] -[] [] [] [] [3, 3, 3] [] - -statement ok -drop table array_repeat_table; - -statement ok -drop table large_array_repeat_table; - -# array_repeat: arrays with NULL counts -statement ok -create table array_repeat_null_count_table -as values -(1, 2), -(2, null), -(3, 1), -(4, -1), -(null, null); - -query I? -select column1, array_repeat(column1, column2) from array_repeat_null_count_table; ----- -1 [1, 1] -2 NULL -3 [3] -4 [] -NULL NULL - -statement ok -drop table array_repeat_null_count_table - -# array_repeat: nested arrays with NULL counts -statement ok -create table array_repeat_nested_null_count_table -as values -([[1, 2], [3, 4]], 2), -([[5, 6], [7, 8]], null), -([[null, null], [9, 10]], 1), -(null, 3), -([[11, 12]], -1); - -query ?? -select column1, array_repeat(column1, column2) from array_repeat_nested_null_count_table; ----- -[[1, 2], [3, 4]] [[[1, 2], [3, 4]], [[1, 2], [3, 4]]] -[[5, 6], [7, 8]] NULL -[[NULL, NULL], [9, 10]] [[[NULL, NULL], [9, 10]]] -NULL [NULL, NULL, NULL] -[[11, 12]] [] - -statement ok -drop table array_repeat_nested_null_count_table - -# array_repeat edge cases: empty arrays -query ??? -select array_repeat([], 3), array_repeat([], 0), array_repeat([], null); ----- -[[], [], []] [] NULL - -query ?? -select array_repeat(null::int, 0), array_repeat(null::int, null); ----- -[] NULL - -# array_repeat LargeList with NULL count -statement ok -create table array_repeat_large_list_null_table -as values -(arrow_cast([1, 2, 3], 'LargeList(Int64)'), 2), -(arrow_cast([4, 5], 'LargeList(Int64)'), null), -(arrow_cast(null, 'LargeList(Int64)'), 3); - -query ?? -select column1, array_repeat(column1, column2) from array_repeat_large_list_null_table; ----- -[1, 2, 3] [[1, 2, 3], [1, 2, 3]] -[4, 5] NULL -NULL [NULL, NULL, NULL] - -statement ok -drop table array_repeat_large_list_null_table - -# array_repeat edge cases: LargeList nested with NULL count -statement ok -create table array_repeat_large_nested_null_table -as values -(arrow_cast([[1, 2], [3, 4]], 'LargeList(List(Int64))'), 2), -(arrow_cast([[5, 6], [7, 8]], 'LargeList(List(Int64))'), null), -(arrow_cast([[null, null]], 'LargeList(List(Int64))'), 1), -(null, 3); - -query ?? -select column1, array_repeat(column1, column2) from array_repeat_large_nested_null_table; ----- -[[1, 2], [3, 4]] [[[1, 2], [3, 4]], [[1, 2], [3, 4]]] -[[5, 6], [7, 8]] NULL -[[NULL, NULL]] [[[NULL, NULL]]] -NULL [NULL, NULL, NULL] - -statement ok -drop table array_repeat_large_nested_null_table - -## array_concat (aliases: `array_cat`, `list_concat`, `list_cat`) - -# test with empty array -query ? -select array_concat([]); ----- -[] - -# test with NULL array -query ? -select array_concat(NULL::integer[]); ----- -NULL - -# test with multiple NULL arrays -query ? -select array_concat(NULL::integer[], NULL::integer[]); ----- -NULL - -# test with NULL LargeList -query ? -select array_concat(arrow_cast(NULL::string[], 'LargeList(Utf8)')); ----- -NULL - -# test with NULL FixedSizeList -query ? -select array_concat(arrow_cast(NULL::string[], 'FixedSizeList(2, Utf8)')); ----- -NULL - -# test with mix of NULL and empty arrays -query ? -select array_concat(NULL::integer[], []); ----- -[] - -# test with mix of NULL and non-empty arrays -query ? -select array_concat(NULL::integer[], [1, 2, 3]); ----- -[1, 2, 3] - -# Concatenating strings arrays -query ? -select array_concat( - ['1', '2'], - ['3'] -); ----- -[1, 2, 3] - -query ? -select array_concat( - arrow_cast(['1', '2'], 'LargeList(Utf8)'), - arrow_cast(['3'], 'LargeList(Utf8)') -); ----- -[1, 2, 3] - -query ? -select array_concat( - arrow_cast(['1', '2'], 'FixedSizeList(2, Utf8)'), - arrow_cast(['3'], 'FixedSizeList(1, Utf8)') -); ----- -[1, 2, 3] - -# Concatenating string arrays -query ? -select array_concat( - [arrow_cast('1', 'LargeUtf8'), arrow_cast('2', 'LargeUtf8')], - [arrow_cast('3', 'LargeUtf8')] -); ----- -[1, 2, 3] - -# Concatenating stringview -query ? -select array_concat( - [arrow_cast('1', 'Utf8View'), arrow_cast('2', 'Utf8View')], - [arrow_cast('3', 'Utf8View')] -); ----- -[1, 2, 3] - -# Concatenating Mixed types -query ? -select array_concat( - [arrow_cast('1', 'Utf8'), arrow_cast('2', 'Utf8')], - [arrow_cast('3', 'LargeUtf8')] -); ----- -[1, 2, 3] - -# Concatenating Mixed types -query ?T -select - array_concat([arrow_cast('1', 'Utf8'), arrow_cast('2', 'Utf8')], [arrow_cast('3', 'Utf8View')]), - arrow_typeof(array_concat([arrow_cast('1', 'Utf8'), arrow_cast('2', 'Utf8')], [arrow_cast('3', 'Utf8View')])); ----- -[1, 2, 3] List(Utf8View) - -# array_concat with NULL elements inside arrays -query ? -select array_concat([1, NULL, 3], [NULL, 5]); ----- -[1, NULL, 3, NULL, 5] - -query ? -select array_concat([NULL, NULL], [1, 2], [NULL]); ----- -[NULL, NULL, 1, 2, NULL] - -query ? -select array_concat([NULL, NULL], [NULL, NULL]); ----- -[NULL, NULL, NULL, NULL] - -# array_concat error -query error DataFusion error: Error during planning: Execution error: Function 'array_concat' user-defined coercion failed with: Error during planning: array_concat does not support type Int64 -select array_concat(1, 2); - -# array_concat scalar function #1 -query ?? -select array_concat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), array_concat(make_array([1], [2]), make_array([3], [4])); ----- -[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] - -# array_concat scalar function #2 -query ? -select array_concat(make_array(make_array(1, 2), make_array(3, 4)), make_array(make_array(5, 6), make_array(7, 8))); ----- -[[1, 2], [3, 4], [5, 6], [7, 8]] - -# array_concat scalar function #3 -query ? -select array_concat(make_array([1], [2], [3]), make_array([4], [5], [6]), make_array([7], [8], [9])); ----- -[[1], [2], [3], [4], [5], [6], [7], [8], [9]] - -# array_concat scalar function #4 -query ? -select array_concat(make_array([[1]]), make_array([[2]])); ----- -[[[1]], [[2]]] - -# array_concat scalar function #5 -query ? -select array_concat(make_array(2, 3), make_array()); ----- -[2, 3] - -# array_concat scalar function #6 -query ? -select array_concat(make_array(), make_array(2, 3)); ----- -[2, 3] - -# array_concat scalar function #7 (with empty arrays) -query ? -select array_concat(make_array(make_array(1, 2), make_array(3, 4)), make_array(make_array())); ----- -[[1, 2], [3, 4], []] - -# array_concat scalar function #8 (with empty arrays) -query ? -select array_concat(make_array(make_array(1, 2), make_array(3, 4)), make_array(make_array()), make_array(make_array(), make_array()), make_array(make_array(5, 6), make_array(7, 8))); ----- -[[1, 2], [3, 4], [], [], [], [5, 6], [7, 8]] - -# array_concat scalar function #9 (with empty arrays) -query ? -select array_concat(make_array(make_array()), make_array(make_array(1, 2), make_array(3, 4))); ----- -[[], [1, 2], [3, 4]] - -# array_cat scalar function #10 (function alias `array_concat`) -query ?? -select array_cat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), array_cat(make_array([1], [2]), make_array([3], [4])); ----- -[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] - -# list_concat scalar function #11 (function alias `array_concat`) -query ?? -select list_concat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), list_concat(make_array([1], [2]), make_array([3], [4])); ----- -[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] - -# list_cat scalar function #12 (function alias `array_concat`) -query ?? -select list_cat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), list_cat(make_array([1], [2]), make_array([3], [4])); ----- -[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] - -# array_concat with different dimensions #1 (2D + 1D) -query ? -select array_concat(make_array([1,2], [3,4]), make_array(5, 6)); ----- -[[1, 2], [3, 4], [5, 6]] - -# array_concat with different dimensions #2 (1D + 2D) -query ? -select array_concat(make_array(5, 6), make_array([1,2], [3,4])); ----- -[[5, 6], [1, 2], [3, 4]] - -# array_concat with different dimensions #3 (2D + 1D + 1D) -query ? -select array_concat(make_array([1,2], [3,4]), make_array(5, 6), make_array(7,8)); ----- -[[1, 2], [3, 4], [5, 6], [7, 8]] - -# array_concat with different dimensions #4 (1D + 2D + 3D) -query ? -select array_concat(make_array(10, 20), make_array([30, 40]), make_array([[50, 60]])); ----- -[[[10, 20]], [[30, 40]], [[50, 60]]] - -# array_concat with different dimensions #5 (2D + 1D + 3D) -query ? -select array_concat(make_array([30, 40]), make_array(10, 20), make_array([[50, 60]])); ----- -[[[30, 40]], [[10, 20]], [[50, 60]]] - -# array_concat with different dimensions #6 (2D + 1D + 3D + 4D + 3D) -query ? -select array_concat(make_array([30, 40]), make_array(10, 20), make_array([[50, 60]]), make_array([[[70, 80]]]), make_array([[80, 40]])); ----- -[[[[30, 40]]], [[[10, 20]]], [[[50, 60]]], [[[70, 80]]], [[[80, 40]]]] - -# array_concat column-wise #1 -query ? -select array_concat(column1, make_array(0)) from arrays_values_without_nulls; ----- -[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0] -[11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 0] -[21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0] -[31, 32, 33, 34, 35, 26, 37, 38, 39, 40, 0] - -# array_concat column-wise #2 -query ? -select array_concat(column1, column1) from arrays_values_without_nulls; ----- -[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] -[11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] -[21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30] -[31, 32, 33, 34, 35, 26, 37, 38, 39, 40, 31, 32, 33, 34, 35, 26, 37, 38, 39, 40] - -# array_concat column-wise #3 -query ? -select array_concat(make_array(column2), make_array(column3)) from arrays_values_without_nulls; ----- -[1, 1] -[12, 2] -[23, 3] -[34, 4] - -# array_concat column-wise #4 -query ? -select array_concat(make_array(column2), make_array(0)) from arrays_values; ----- -[1, 0] -[12, 0] -[23, 0] -[34, 0] -[44, 0] -[NULL, 0] -[55, 0] -[66, 0] - -# array_concat column-wise #5 -query ??? -select array_concat(column1, column1), array_concat(column2, column2), array_concat(column3, column3) from arrays; ----- -[[NULL, 2], [3, NULL], [NULL, 2], [3, NULL]] [1.1, 2.2, 3.3, 1.1, 2.2, 3.3] [L, o, r, e, m, L, o, r, e, m] -[[3, 4], [5, 6], [3, 4], [5, 6]] [NULL, 5.5, 6.6, NULL, 5.5, 6.6] [i, p, NULL, u, m, i, p, NULL, u, m] -[[5, 6], [7, 8], [5, 6], [7, 8]] [7.7, 8.8, 9.9, 7.7, 8.8, 9.9] [d, NULL, l, o, r, d, NULL, l, o, r] -[[7, NULL], [9, 10], [7, NULL], [9, 10]] [10.1, NULL, 12.2, 10.1, NULL, 12.2] [s, i, t, s, i, t] -NULL [13.3, 14.4, 15.5, 13.3, 14.4, 15.5] [a, m, e, t, a, m, e, t] -[[11, 12], [13, 14], [11, 12], [13, 14]] NULL [,, ,] -[[15, 16], [NULL, 18], [15, 16], [NULL, 18]] [16.6, 17.7, 18.8, 16.6, 17.7, 18.8] NULL - -# array_concat column-wise #6 -query ?? -select array_concat(column1, make_array(make_array(1, 2), make_array(3, 4))), array_concat(column2, make_array(1.1, 2.2, 3.3)) from arrays; ----- -[[NULL, 2], [3, NULL], [1, 2], [3, 4]] [1.1, 2.2, 3.3, 1.1, 2.2, 3.3] -[[3, 4], [5, 6], [1, 2], [3, 4]] [NULL, 5.5, 6.6, 1.1, 2.2, 3.3] -[[5, 6], [7, 8], [1, 2], [3, 4]] [7.7, 8.8, 9.9, 1.1, 2.2, 3.3] -[[7, NULL], [9, 10], [1, 2], [3, 4]] [10.1, NULL, 12.2, 1.1, 2.2, 3.3] -[[1, 2], [3, 4]] [13.3, 14.4, 15.5, 1.1, 2.2, 3.3] -[[11, 12], [13, 14], [1, 2], [3, 4]] [1.1, 2.2, 3.3] -[[15, 16], [NULL, 18], [1, 2], [3, 4]] [16.6, 17.7, 18.8, 1.1, 2.2, 3.3] - -# array_concat column-wise #7 -query ? -select array_concat(column3, make_array('.', '.', '.')) from arrays; ----- -[L, o, r, e, m, ., ., .] -[i, p, NULL, u, m, ., ., .] -[d, NULL, l, o, r, ., ., .] -[s, i, t, ., ., .] -[a, m, e, t, ., ., .] -[,, ., ., .] -[., ., .] - -# query ??I? -# select column1, column2, column3, column4 from arrays_values_v2; -# ---- -# [NULL, 2, 3] [4, 5, NULL] 12 [[30, 40, 50]] -# NULL [7, NULL, 8] 13 [[NULL, NULL, 60]] -# [9, NULL, 10] NULL 14 [[70, NULL, NULL]] -# [NULL, 1] [NULL, 21] NULL NULL -# [11, 12] NULL NULL NULL -# NULL NULL NULL NULL - - -# array_concat column-wise #8 (1D + 1D) -query ? -select array_concat(column1, column2) from arrays_values_v2; ----- -[NULL, 2, 3, 4, 5, NULL] -[7, NULL, 8] -[9, NULL, 10] -[NULL, 1, NULL, 21] -[11, 12] -NULL - -# array_concat column-wise #9 (2D + 1D) -query ? -select array_concat(column4, make_array(column3)) from arrays_values_v2; ----- -[[30, 40, 50], [12]] -[[NULL, NULL, 60], [13]] -[[70, NULL, NULL], [14]] -[[NULL]] -[[NULL]] -[[NULL]] - -# array_concat column-wise #10 (3D + 2D + 1D) -query ? -select array_concat(column4, column1, column2) from nested_arrays; ----- -[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]], [[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]], [[7, 8, 9]]] -[[[11, 12, 13], [14, 15, 16]], [[17, 18, 19], [20, 21, 22]], [[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]], [[10, 11, 12]]] - -# array_concat column-wise #11 (2D + 1D) -query ? -select array_concat(column4, column1) from arrays_values_v2; ----- -[[30, 40, 50], [NULL, 2, 3]] -[[NULL, NULL, 60], NULL] -[[70, NULL, NULL], [9, NULL, 10]] -[[NULL, 1]] -[[11, 12]] -[NULL] - -# array_concat column-wise #12 (1D + 1D + 1D) -query ? -select array_concat(make_array(column3), column1, column2) from arrays_values_v2; ----- -[12, NULL, 2, 3, 4, 5, NULL] -[13, 7, NULL, 8] -[14, 9, NULL, 10] -[NULL, NULL, 1, NULL, 21] -[NULL, 11, 12] -[NULL] - -## array_position (aliases: `list_position`, `array_indexof`, `list_indexof`) - -## array_position with NULL (follow PostgreSQL) -query II -select array_position([1, 2, 3, 4, 5], arrow_cast(NULL, 'Int64')), array_position(arrow_cast(NULL, 'List(Int64)'), 1); ----- -NULL NULL - -# array_position with no match (incl. empty array) returns NULL -query II -select array_position([], 1), array_position([2], 1); ----- -NULL NULL - -# array_position scalar function #1 -query III -select array_position(['h', 'e', 'l', 'l', 'o'], 'l'), array_position([1, 2, 3, 4, 5], 5), array_position([1, 1, 1], 1); ----- -3 5 1 - -query III -select array_position(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l'), array_position(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5), array_position(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1); ----- -3 5 1 - -query III -select array_position(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), 'l'), array_position(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), 5), array_position(arrow_cast([1, 1, 1], 'FixedSizeList(3, Int64)'), 1); ----- -3 5 1 - -# array_position scalar function #2 (with optional argument) -query III -select array_position(['h', 'e', 'l', 'l', 'o'], 'l', 4), array_position([1, 2, 5, 4, 5], 5, 4), array_position([1, 1, 1], 1, 2); ----- -4 5 2 - -query III -select array_position(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l', 4), array_position(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5, 4), array_position(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1, 2); ----- -4 5 2 - -query III -select array_position(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), 'l', 4), array_position(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), 5, 4), array_position(arrow_cast([1, 1, 1], 'FixedSizeList(3, Int64)'), 1, 2); ----- -4 5 2 - -# array_position scalar function #3 (element is list) -query II -select array_position(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), [4, 5, 6]), array_position(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), [2, 3, 4]); ----- -2 2 - -# array_position scalar function #4 (element in list; with optional argument) -query II -select array_position(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), [4, 5, 6], 3), array_position(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), [2, 3, 4], 3); ----- -4 3 - -query II -select array_position(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), [4, 5, 6]), array_position(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), [2, 3, 4]); ----- -2 2 - -query I -SELECT array_position(arrow_cast([5, 2, 3, 4, 5], 'List(Int32)'), 5) ----- -1 - -query I -SELECT array_position(arrow_cast([5, 2, 3, 4, 5], 'List(Int32)'), 5, 2) ----- -5 - -query I -SELECT array_position(arrow_cast([1, 1, 100, 1, 1], 'LargeList(Int32)'), 100) ----- -3 - -query error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'array_position' function: coercion from -SELECT array_position([1, 2, 3], 'foo') - -query error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'array_position' function: coercion from -SELECT array_position([1, 2, 3], 'foo', 2) - -# list_position scalar function #5 (function alias `array_position`) -query III -select list_position(['h', 'e', 'l', 'l', 'o'], 'l'), list_position([1, 2, 3, 4, 5], 5), list_position([1, 1, 1], 1); ----- -3 5 1 - -query III -select list_position(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l'), list_position(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5), list_position(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1); ----- -3 5 1 - -# array_indexof scalar function #6 (function alias `array_position`) -query III -select array_indexof(['h', 'e', 'l', 'l', 'o'], 'l'), array_indexof([1, 2, 3, 4, 5], 5), array_indexof([1, 1, 1], 1); ----- -3 5 1 - -query III -select array_indexof(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l'), array_indexof(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5), array_indexof(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1); ----- -3 5 1 - -# list_indexof scalar function #7 (function alias `array_position`) -query III -select list_indexof(['h', 'e', 'l', 'l', 'o'], 'l'), list_indexof([1, 2, 3, 4, 5], 5), list_indexof([1, 1, 1], 1); ----- -3 5 1 - -query III -select list_indexof(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l'), list_indexof(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5), list_indexof(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1); ----- -3 5 1 - -# array_position with columns #1 -query II -select array_position(column1, column2), array_position(column1, column2, column3) from arrays_values_without_nulls; ----- -1 1 -2 2 -3 3 -4 4 - -query II -select array_position(column1, column2), array_position(column1, column2, column3) from large_arrays_values_without_nulls; ----- -1 1 -2 2 -3 3 -4 4 - -# array_position with columns #2 (element is list) -query II -select array_position(column1, column2), array_position(column1, column2, column3) from nested_arrays; ----- -3 3 -2 5 - -query II -select array_position(column1, column2), array_position(column1, column2, column3) from nested_arrays; ----- -3 3 -2 5 - -# array_position with columns and scalars #1 -query III -select array_position(make_array(1, 2, 3, 4, 5), column2), array_position(column1, 3), array_position(column1, 3, 5) from arrays_values_without_nulls; ----- -1 3 NULL -NULL NULL NULL -NULL NULL NULL -NULL NULL NULL - -query III -select array_position(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), column2), array_position(column1, 3), array_position(column1, 3, 5) from large_arrays_values_without_nulls; ----- -1 3 NULL -NULL NULL NULL -NULL NULL NULL -NULL NULL NULL - -# array_position with columns and scalars #2 (element is list) -query III -select array_position(make_array([1, 2, 3], [4, 5, 6], [11, 12, 13]), column2), array_position(column1, make_array(4, 5, 6)), array_position(column1, make_array(1, 2, 3), 2) from nested_arrays; ----- -NULL 6 4 -NULL 1 NULL - -query III -select array_position(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [11, 12, 13]), 'LargeList(LargeList(Int64))'), column2), array_position(column1, arrow_cast(make_array(4, 5, 6), 'LargeList(Int64)')), array_position(column1, arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 2) from large_nested_arrays; ----- -NULL 6 4 -NULL 1 NULL - -# array_position with NULL element in haystack array (NULL = NULL semantics) -query III -select array_position([1, NULL, 3], arrow_cast(NULL, 'Int64')), array_position([NULL, 2, 3], arrow_cast(NULL, 'Int64')), array_position([1, 2, NULL], arrow_cast(NULL, 'Int64')); ----- -2 1 3 - -query I -select array_position(arrow_cast([1, NULL, 3], 'LargeList(Int64)'), arrow_cast(NULL, 'Int64')); ----- -2 - -# array_position with NULL element in array and start_from -query II -select array_position([NULL, 1, NULL, 2], arrow_cast(NULL, 'Int64'), 2), array_position([NULL, 1, NULL, 2], arrow_cast(NULL, 'Int64'), 1); ----- -3 1 - -# array_position with column array and scalar element -query IIII -select array_position(column1, 3), array_position(column1, 10), array_position(column1, 20), array_position(column1, 999) from arrays_values_without_nulls; ----- -3 10 NULL NULL -NULL NULL 10 NULL -NULL NULL NULL NULL -NULL NULL NULL NULL - -query II -select array_position(column1, 3), array_position(column1, 20) from large_arrays_values_without_nulls; ----- -3 NULL -NULL 10 -NULL NULL -NULL NULL - -query II -select array_position(column1, 3), array_position(column1, 20) from fixed_size_arrays_values_without_nulls; ----- -3 NULL -NULL 10 -NULL NULL -NULL NULL - -# array_position with column array, scalar element, and scalar start_from -query II -select array_position(column1, 3, 1), array_position(column1, 3, 4) from arrays_values_without_nulls; ----- -3 NULL -NULL NULL -NULL NULL -NULL NULL - -query II -select array_position(column1, 3, 1), array_position(column1, 3, 4) from large_arrays_values_without_nulls; ----- -3 NULL -NULL NULL -NULL NULL -NULL NULL - -# array_position with column array, scalar element, and column start_from -query I -select array_position(column1, 3, column3) from arrays_values_without_nulls; ----- -3 -NULL -NULL -NULL - -# array_position with scalar haystack, scalar element, and column start_from -query I -select array_position([1, 2, 1, 2], 2, column3) from arrays_values_without_nulls; ----- -2 -2 -4 -4 - -# array_position start_from boundary cases -query IIII -select array_position([1, 2, 3], 3, 3), array_position([1, 2, 3], 1, 2), array_position([1, 2, 3], 1, 1), array_position([1, 2, 3], 3, 4); ----- -3 NULL 1 NULL - -query II -select array_position([1, 2, 3], 3, 4), array_position([1], 1, 2); ----- -NULL NULL - -# array_position with empty array in various contexts -query II -select array_position(arrow_cast(make_array(), 'List(Int64)'), 1), array_position(arrow_cast(make_array(), 'LargeList(Int64)'), 1); ----- -NULL NULL - -# FixedSizeList with start_from -query II -select array_position(arrow_cast([1, 2, 3, 1, 2], 'FixedSizeList(5, Int64)'), 1, 2), array_position(arrow_cast([1, 2, 3, 1, 2], 'FixedSizeList(5, Int64)'), 2, 4); ----- -4 5 - -query I -select array_position(arrow_cast(['a', 'b', 'c', 'b'], 'FixedSizeList(4, Utf8)'), 'b', 3); ----- -4 - -## array_positions (aliases: `list_positions`) - -# array_positions with empty array -query ? -select array_positions(arrow_cast(make_array(), 'List(Int64)'), 1); ----- -[] - -query ? -select array_positions([1, 2, 3, 4, 5], null); ----- -[] - -#TODO: https://github.com/apache/datafusion/issues/7142 -# array_positions with NULL (follow PostgreSQL) -#query ? -#select array_positions(null, 1); -#---- -#NULL - -# array_positions scalar function #1 -query ??? -select array_positions(['h', 'e', 'l', 'l', 'o'], 'l'), array_positions([1, 2, 3, 4, 5], 5), array_positions([1, 1, 1], 1); ----- -[3, 4] [5] [1, 2, 3] - -query ??? -select array_positions(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l'), array_positions(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5), array_positions(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1); ----- -[3, 4] [5] [1, 2, 3] - -query ??? -select array_positions(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), 'l'), array_positions(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), 5), array_positions(arrow_cast([1, 1, 1], 'FixedSizeList(3, Int64)'), 1); ----- -[3, 4] [5] [1, 2, 3] - -# array_positions scalar function #2 (element is list) -query ? -select array_positions(make_array([1, 2, 3], [2, 1, 3], [1, 5, 6], [2, 1, 3], [4, 5, 6]), [2, 1, 3]); ----- -[2, 4] - -query ? -select array_positions(arrow_cast(make_array([1, 2, 3], [2, 1, 3], [1, 5, 6], [2, 1, 3], [4, 5, 6]), 'LargeList(List(Int64))'), [2, 1, 3]); ----- -[2, 4] - -query ? -select array_positions(arrow_cast(make_array([1, 2, 3], [2, 1, 3], [1, 5, 6], [2, 1, 3], [4, 5, 6]), 'FixedSizeList(5, List(Int64))'), [2, 1, 3]); ----- -[2, 4] - -# list_positions scalar function #3 (function alias `array_positions`) -query ??? -select list_positions(['h', 'e', 'l', 'l', 'o'], 'l'), list_positions([1, 2, 3, 4, 5], 5), list_positions([1, 1, 1], 1); ----- -[3, 4] [5] [1, 2, 3] - -query ??? -select list_positions(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 'l'), list_positions(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), 5), list_positions(arrow_cast([1, 1, 1], 'LargeList(Int64)'), 1); ----- -[3, 4] [5] [1, 2, 3] - -query ??? -select list_positions(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), 'l'), - list_positions(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), 5), - list_positions(arrow_cast([1, 1, 1], 'FixedSizeList(3, Int64)'), 1); ----- -[3, 4] [5] [1, 2, 3] - -# array_positions with columns #1 -query ? -select array_positions(column1, column2) from arrays_values_without_nulls; ----- -[1] -[2] -[3] -[4] - -query ? -select array_positions(arrow_cast(column1, 'LargeList(Int64)'), column2) from arrays_values_without_nulls; ----- -[1] -[2] -[3] -[4] - -query ? -select array_positions(arrow_cast(column1, 'LargeList(Int64)'), column2) from fixed_size_arrays_values_without_nulls; ----- -[1] -[2] -[3] -[4] - -# array_positions with columns #2 (element is list) -query ? -select array_positions(column1, column2) from nested_arrays; ----- -[3] -[2, 5] - -query ? -select array_positions(arrow_cast(column1, 'LargeList(List(Int64))'), column2) from nested_arrays; ----- -[3] -[2, 5] - -query ? -select array_positions(column1, column2) from fixed_size_nested_arrays; ----- -[3] -[2, 5] - -# array_positions with columns and scalars #1 -query ?? -select array_positions(column1, 4), array_positions(array[1, 2, 23, 13, 33, 45], column2) from arrays_values_without_nulls; ----- -[4] [1] -[] [] -[] [3] -[] [] - -query ?? -select array_positions(arrow_cast(column1, 'LargeList(Int64)'), 4), array_positions(array[1, 2, 23, 13, 33, 45], column2) from arrays_values_without_nulls; ----- -[4] [1] -[] [] -[] [3] -[] [] - -query ?? -select array_positions(column1, 4), array_positions(array[1, 2, 23, 13, 33, 45], column2) from fixed_size_arrays_values_without_nulls; ----- -[4] [1] -[] [] -[] [3] -[] [] - -# array_positions with columns and scalars #2 (element is list) -query ?? -select array_positions(column1, make_array(4, 5, 6)), array_positions(make_array([1, 2, 3], [11, 12, 13], [4, 5, 6]), column2) from nested_arrays; ----- -[6] [] -[1] [] - -query ?? -select array_positions(arrow_cast(column1, 'LargeList(List(Int64))'), make_array(4, 5, 6)), array_positions(arrow_cast(make_array([1, 2, 3], [11, 12, 13], [4, 5, 6]), 'LargeList(List(Int64))'), column2) from nested_arrays; ----- -[6] [] -[1] [] - -query ?? -select array_positions(column1, make_array(4, 5, 6)), array_positions(make_array([1, 2, 3], [11, 12, 13], [4, 5, 6]), column2) from fixed_size_nested_arrays; ----- -[6] [] -[1] [] - -## array_replace (aliases: `list_replace`) - -# array_replace scalar function #1 -query ??? -select - array_replace(make_array(1, 2, 3, 4), 2, 3), - array_replace(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0), - array_replace(make_array(1, 2, 3), 4, 0); ----- -[1, 3, 3, 4] [1, 0, 4, 5, 4, 6, 7] [1, 2, 3] - -query ??? -select - array_replace(arrow_cast(make_array(1, 2, 3, 4), 'LargeList(Int64)'), 2, 3), - array_replace(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'LargeList(Int64)'), 4, 0), - array_replace(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4, 0); ----- -[1, 3, 3, 4] [1, 0, 4, 5, 4, 6, 7] [1, 2, 3] - -query ??? -select - array_replace(arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)'), 2, 3), - array_replace(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'FixedSizeList(7, Int64)'), 4, 0), - array_replace(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 4, 0); ----- -[1, 3, 3, 4] [1, 0, 4, 5, 4, 6, 7] [1, 2, 3] - -# array_replace scalar function #2 (element is list) -query ?? -select - array_replace( - make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), - [4, 5, 6], - [1, 1, 1] - ), - array_replace( - make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), - [2, 3, 4], - [3, 1, 4] - ); ----- -[[1, 2, 3], [1, 1, 1], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]] - -query ?? -select - array_replace( - arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), - [4, 5, 6], - [1, 1, 1] - ), - array_replace( - arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), - [2, 3, 4], - [3, 1, 4] - ); ----- -[[1, 2, 3], [1, 1, 1], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]] - -query ?? -select - array_replace( - arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), - [4, 5, 6], - [1, 1, 1] - ), - array_replace( - arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), - [2, 3, 4], - [3, 1, 4] - ); ----- -[[1, 2, 3], [1, 1, 1], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]] - -# list_replace scalar function #3 (function alias `list_replace`) -query ??? -select list_replace( - make_array(1, 2, 3, 4), 2, 3), - list_replace(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0), - list_replace(make_array(1, 2, 3), 4, 0); ----- -[1, 3, 3, 4] [1, 0, 4, 5, 4, 6, 7] [1, 2, 3] - -query ??? -select list_replace( - arrow_cast(make_array(1, 2, 3, 4), 'LargeList(Int64)'), 2, 3), - list_replace(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'LargeList(Int64)'), 4, 0), - list_replace(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4, 0); ----- -[1, 3, 3, 4] [1, 0, 4, 5, 4, 6, 7] [1, 2, 3] - -# array_replace scalar function #4 (null input) -query ? -select array_replace(make_array(1, 2, 3, 4, 5), NULL, NULL); ----- -[1, 2, 3, 4, 5] - -query ? -select array_replace(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL, NULL); ----- -[1, 2, 3, 4, 5] - -# array_replace scalar function with columns #1 -query ? -select array_replace(column1, column2, column3) from arrays_with_repeating_elements; ----- -[1, 4, 1, 3, 2, 2, 1, 3, 2, 3] -[7, 4, 5, 5, 6, 5, 5, 5, 4, 4] -[10, 7, 7, 8, 7, 9, 7, 8, 7, 7] -[13, 11, 12, 10, 11, 12, 10, 11, 12, 10] - -query ? -select array_replace(column1, column2, column3) from large_arrays_with_repeating_elements; ----- -[1, 4, 1, 3, 2, 2, 1, 3, 2, 3] -[7, 4, 5, 5, 6, 5, 5, 5, 4, 4] -[10, 7, 7, 8, 7, 9, 7, 8, 7, 7] -[13, 11, 12, 10, 11, 12, 10, 11, 12, 10] - -# array_replace scalar function with columns #2 (element is list) -query ? -select array_replace(column1, column2, column3) from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[19, 20, 21], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[28, 29, 30], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[37, 38, 39], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -query ? -select array_replace(column1, column2, column3) from large_nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[19, 20, 21], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[28, 29, 30], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[37, 38, 39], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -# array_replace scalar function with columns and scalars #1 -query ??? -select - array_replace(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2, column3), - array_replace(column1, 1, column3), - array_replace(column1, column2, 4) -from arrays_with_repeating_elements; ----- -[1, 4, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [4, 2, 1, 3, 2, 2, 1, 3, 2, 3] [1, 4, 1, 3, 2, 2, 1, 3, 2, 3] -[1, 2, 2, 7, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 10, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [4, 7, 7, 8, 7, 9, 7, 8, 7, 7] -[1, 2, 2, 4, 5, 4, 4, 7, 7, 13, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [4, 11, 12, 10, 11, 12, 10, 11, 12, 10] - -query ??? -select - array_replace(arrow_cast(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), 'LargeList(Int64)'), column2, column3), - array_replace(column1, 1, column3), - array_replace(column1, column2, 4) -from large_arrays_with_repeating_elements; ----- -[1, 4, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [4, 2, 1, 3, 2, 2, 1, 3, 2, 3] [1, 4, 1, 3, 2, 2, 1, 3, 2, 3] -[1, 2, 2, 7, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 10, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [4, 7, 7, 8, 7, 9, 7, 8, 7, 7] -[1, 2, 2, 4, 5, 4, 4, 7, 7, 13, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [4, 11, 12, 10, 11, 12, 10, 11, 12, 10] - -# array_replace scalar function with columns and scalars #2 (element is list) -query ??? -select - array_replace( - make_array( - [1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), - column2, - column3 - ), - array_replace(column1, make_array(1, 2, 3), column3), - array_replace(column1, column2, make_array(11, 12, 13)) -from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [11, 12, 13], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [19, 20, 21], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[11, 12, 13], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[11, 12, 13], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [37, 38, 39], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[11, 12, 13], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -query ??? -select - array_replace( - arrow_cast(make_array( - [1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]),'LargeList(List(Int64))'), - column2, - column3 - ), - array_replace(column1, make_array(1, 2, 3), column3), - array_replace(column1, column2, make_array(11, 12, 13)) -from large_nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [11, 12, 13], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [19, 20, 21], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[11, 12, 13], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[11, 12, 13], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [37, 38, 39], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[11, 12, 13], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -## array_replace_n (aliases: `list_replace_n`) - -# array_replace_n scalar function #1 -query ??? -select - array_replace_n(make_array(1, 2, 3, 4), 2, 3, 2), - array_replace_n(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0, 2), - array_replace_n(make_array(1, 2, 3), 4, 0, 3); ----- -[1, 3, 3, 4] [1, 0, 0, 5, 4, 6, 7] [1, 2, 3] - -query ??? -select - array_replace_n(arrow_cast(make_array(1, 2, 3, 4), 'LargeList(Int64)'), 2, 3, 2), - array_replace_n(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'LargeList(Int64)'), 4, 0, 2), - array_replace_n(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4, 0, 3); ----- -[1, 3, 3, 4] [1, 0, 0, 5, 4, 6, 7] [1, 2, 3] - -query ??? -select - array_replace_n(arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)'), 2, 3, 2), - array_replace_n(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'FixedSizeList(7, Int64)'), 4, 0, 2), - array_replace_n(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 4, 0, 3); ----- -[1, 3, 3, 4] [1, 0, 0, 5, 4, 6, 7] [1, 2, 3] - -# array_replace_n scalar function #2 (element is list) -query ?? -select - array_replace_n( - make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), - [4, 5, 6], - [1, 1, 1], - 2 - ), - array_replace_n( - make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), - [2, 3, 4], - [3, 1, 4], - 2 - ); ----- -[[1, 2, 3], [1, 1, 1], [5, 5, 5], [1, 1, 1], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [3, 1, 4], [5, 3, 1], [1, 3, 2]] - -query ?? -select - array_replace_n( - arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), - [4, 5, 6], - [1, 1, 1], - 2 - ), - array_replace_n( - arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), - [2, 3, 4], - [3, 1, 4], - 2 - ); ----- -[[1, 2, 3], [1, 1, 1], [5, 5, 5], [1, 1, 1], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [3, 1, 4], [5, 3, 1], [1, 3, 2]] - -query ?? -select - array_replace_n( - arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), - [4, 5, 6], - [1, 1, 1], - 2 - ), - array_replace_n( - arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), - [2, 3, 4], - [3, 1, 4], - 2 - ); ----- -[[1, 2, 3], [1, 1, 1], [5, 5, 5], [1, 1, 1], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [3, 1, 4], [5, 3, 1], [1, 3, 2]] - -# list_replace_n scalar function #3 (function alias `array_replace_n`) -query ??? -select - list_replace_n(make_array(1, 2, 3, 4), 2, 3, 2), - list_replace_n(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0, 2), - list_replace_n(make_array(1, 2, 3), 4, 0, 3); ----- -[1, 3, 3, 4] [1, 0, 0, 5, 4, 6, 7] [1, 2, 3] - -query ??? -select - list_replace_n(arrow_cast(make_array(1, 2, 3, 4), 'LargeList(Int64)'), 2, 3, 2), - list_replace_n(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'LargeList(Int64)'), 4, 0, 2), - list_replace_n(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4, 0, 3); ----- -[1, 3, 3, 4] [1, 0, 0, 5, 4, 6, 7] [1, 2, 3] - -# array_replace_n scalar function #4 (null input) -query ? -select array_replace_n(make_array(1, 2, 3, 4, 5), NULL, NULL, NULL); ----- -[1, 2, 3, 4, 5] - -query ? -select array_replace_n(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL, NULL, NULL); ----- -[1, 2, 3, 4, 5] - -# array_replace_n scalar function with columns #1 -query ? -select - array_replace_n(column1, column2, column3, column4) -from arrays_with_repeating_elements; ----- -[1, 4, 1, 3, 4, 4, 1, 3, 2, 3] -[7, 7, 5, 5, 6, 5, 5, 5, 4, 4] -[10, 10, 10, 8, 10, 9, 10, 8, 7, 7] -[13, 11, 12, 13, 11, 12, 13, 11, 12, 13] - -query ? -select - array_replace_n(column1, column2, column3, column4) -from large_arrays_with_repeating_elements; ----- -[1, 4, 1, 3, 4, 4, 1, 3, 2, 3] -[7, 7, 5, 5, 6, 5, 5, 5, 4, 4] -[10, 10, 10, 8, 10, 9, 10, 8, 7, 7] -[13, 11, 12, 13, 11, 12, 13, 11, 12, 13] - -# array_replace_n scalar function with columns #2 (element is list) -query ? -select - array_replace_n(column1, column2, column3, column4) -from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [10, 11, 12], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[19, 20, 21], [19, 20, 21], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[28, 29, 30], [28, 29, 30], [28, 29, 30], [22, 23, 24], [28, 29, 30], [25, 26, 27], [28, 29, 30], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39]] - -query ? -select - array_replace_n(column1, column2, column3, column4) -from large_nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [10, 11, 12], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[19, 20, 21], [19, 20, 21], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[28, 29, 30], [28, 29, 30], [28, 29, 30], [22, 23, 24], [28, 29, 30], [25, 26, 27], [28, 29, 30], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39]] - - -# array_replace_n scalar function with columns and scalars #1 -query ???? -select - array_replace_n(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2, column3, column4), - array_replace_n(column1, 1, column3, column4), - array_replace_n(column1, column2, 4, column4), - array_replace_n(column1, column2, column3, 2) -from arrays_with_repeating_elements; ----- -[1, 4, 4, 4, 5, 4, 4, 7, 7, 10, 7, 8] [4, 2, 4, 3, 2, 2, 4, 3, 2, 3] [1, 4, 1, 3, 4, 4, 1, 3, 2, 3] [1, 4, 1, 3, 4, 2, 1, 3, 2, 3] -[1, 2, 2, 7, 5, 7, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [7, 7, 5, 5, 6, 5, 5, 5, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 10, 10, 10, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [4, 4, 4, 8, 4, 9, 4, 8, 7, 7] [10, 10, 7, 8, 7, 9, 7, 8, 7, 7] -[1, 2, 2, 4, 5, 4, 4, 7, 7, 13, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [4, 11, 12, 4, 11, 12, 4, 11, 12, 4] [13, 11, 12, 13, 11, 12, 10, 11, 12, 10] - -query ???? -select - array_replace_n(arrow_cast(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), 'LargeList(Int64)'), column2, column3, column4), - array_replace_n(column1, 1, column3, column4), - array_replace_n(column1, column2, 4, column4), - array_replace_n(column1, column2, column3, 2) -from large_arrays_with_repeating_elements; ----- -[1, 4, 4, 4, 5, 4, 4, 7, 7, 10, 7, 8] [4, 2, 4, 3, 2, 2, 4, 3, 2, 3] [1, 4, 1, 3, 4, 4, 1, 3, 2, 3] [1, 4, 1, 3, 4, 2, 1, 3, 2, 3] -[1, 2, 2, 7, 5, 7, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [7, 7, 5, 5, 6, 5, 5, 5, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 10, 10, 10, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [4, 4, 4, 8, 4, 9, 4, 8, 7, 7] [10, 10, 7, 8, 7, 9, 7, 8, 7, 7] -[1, 2, 2, 4, 5, 4, 4, 7, 7, 13, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [4, 11, 12, 4, 11, 12, 4, 11, 12, 4] [13, 11, 12, 13, 11, 12, 10, 11, 12, 10] - -# array_replace_n scalar function with columns and scalars #2 (element is list) -query ???? -select - array_replace_n( - make_array( - [7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [4, 5, 6]), - column2, - column3, - column4 - ), - array_replace_n(column1, make_array(1, 2, 3), column3, column4), - array_replace_n(column1, column2, make_array(11, 12, 13), column4), - array_replace_n(column1, column2, column3, 2) -from nested_arrays_with_repeating_elements; ----- -[[7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [10, 11, 12]] [[10, 11, 12], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [11, 12, 13], [1, 2, 3], [7, 8, 9], [11, 12, 13], [11, 12, 13], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[7, 8, 9], [2, 1, 3], [1, 5, 6], [19, 20, 21], [2, 1, 3], [7, 8, 9], [4, 5, 6]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[11, 12, 13], [11, 12, 13], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[19, 20, 21], [19, 20, 21], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [4, 5, 6]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[11, 12, 13], [11, 12, 13], [11, 12, 13], [22, 23, 24], [11, 12, 13], [25, 26, 27], [11, 12, 13], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[28, 29, 30], [28, 29, 30], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [4, 5, 6]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13]] [[37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -query ???? -select - array_replace_n( - arrow_cast(make_array( - [7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [4, 5, 6]), 'LargeList(List(Int64))'), - column2, - column3, - column4 - ), - array_replace_n(column1, make_array(1, 2, 3), column3, column4), - array_replace_n(column1, column2, make_array(11, 12, 13), column4), - array_replace_n(column1, column2, column3, 2) -from large_nested_arrays_with_repeating_elements; ----- -[[7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [10, 11, 12]] [[10, 11, 12], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [11, 12, 13], [1, 2, 3], [7, 8, 9], [11, 12, 13], [11, 12, 13], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[7, 8, 9], [2, 1, 3], [1, 5, 6], [19, 20, 21], [2, 1, 3], [7, 8, 9], [4, 5, 6]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[11, 12, 13], [11, 12, 13], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[19, 20, 21], [19, 20, 21], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [4, 5, 6]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[11, 12, 13], [11, 12, 13], [11, 12, 13], [22, 23, 24], [11, 12, 13], [25, 26, 27], [11, 12, 13], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[28, 29, 30], [28, 29, 30], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[7, 8, 9], [2, 1, 3], [1, 5, 6], [10, 11, 12], [2, 1, 3], [7, 8, 9], [4, 5, 6]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13]] [[37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -## array_replace_all (aliases: `list_replace_all`) - -# array_replace_all scalar function #1 -query ??? -select - array_replace_all(make_array(1, 2, 3, 4), 2, 3), - array_replace_all(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0), - array_replace_all(make_array(1, 2, 3), 4, 0); ----- -[1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] - -query ??? -select - array_replace_all(arrow_cast(make_array(1, 2, 3, 4), 'LargeList(Int64)'), 2, 3), - array_replace_all(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'LargeList(Int64)'), 4, 0), - array_replace_all(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4, 0); ----- -[1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] - -query ??? -select - array_replace_all(arrow_cast(make_array(1, 2, 3, 4), 'FixedSizeList(4, Int64)'), 2, 3), - array_replace_all(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'FixedSizeList(7, Int64)'), 4, 0), - array_replace_all(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 4, 0); ----- -[1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] - -# array_replace_all scalar function #2 (element is list) -query ?? -select - array_replace_all( - make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), - [4, 5, 6], - [1, 1, 1] - ), - array_replace_all( - make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), - [2, 3, 4], - [3, 1, 4] - ); ----- -[[1, 2, 3], [1, 1, 1], [5, 5, 5], [1, 1, 1], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [3, 1, 4], [5, 3, 1], [1, 3, 2]] - -query ?? -select - array_replace_all( - arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), - [4, 5, 6], - [1, 1, 1] - ), - array_replace_all( - arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), - [2, 3, 4], - [3, 1, 4] - ); ----- -[[1, 2, 3], [1, 1, 1], [5, 5, 5], [1, 1, 1], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [3, 1, 4], [5, 3, 1], [1, 3, 2]] - -query ?? -select - array_replace_all( - arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), - [4, 5, 6], - [1, 1, 1] - ), - array_replace_all( - arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), - [2, 3, 4], - [3, 1, 4] - ); ----- -[[1, 2, 3], [1, 1, 1], [5, 5, 5], [1, 1, 1], [7, 8, 9]] [[1, 3, 2], [3, 1, 4], [3, 1, 4], [5, 3, 1], [1, 3, 2]] - -# list_replace_all scalar function #3 (function alias `array_replace_all`) -query ??? -select - list_replace_all(make_array(1, 2, 3, 4), 2, 3), - list_replace_all(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0), - list_replace_all(make_array(1, 2, 3), 4, 0); ----- -[1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] - -query ??? -select - list_replace_all(arrow_cast(make_array(1, 2, 3, 4), 'LargeList(Int64)'), 2, 3), - list_replace_all(arrow_cast(make_array(1, 4, 4, 5, 4, 6, 7), 'LargeList(Int64)'), 4, 0), - list_replace_all(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4, 0); ----- -[1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] - -# array_replace_all scalar function #4 (null input) -query ? -select array_replace_all(make_array(1, 2, 3, 4, 5), NULL, NULL); ----- -[1, 2, 3, 4, 5] - -query ? -select array_replace_all(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL, NULL); ----- -[1, 2, 3, 4, 5] - -# array_replace_all scalar function with columns #1 -query ? -select - array_replace_all(column1, column2, column3) -from arrays_with_repeating_elements; ----- -[1, 4, 1, 3, 4, 4, 1, 3, 4, 3] -[7, 7, 5, 5, 6, 5, 5, 5, 7, 7] -[10, 10, 10, 8, 10, 9, 10, 8, 10, 10] -[13, 11, 12, 13, 11, 12, 13, 11, 12, 13] - -query ? -select - array_replace_all(column1, column2, column3) -from large_arrays_with_repeating_elements; ----- -[1, 4, 1, 3, 4, 4, 1, 3, 4, 3] -[7, 7, 5, 5, 6, 5, 5, 5, 7, 7] -[10, 10, 10, 8, 10, 9, 10, 8, 10, 10] -[13, 11, 12, 13, 11, 12, 13, 11, 12, 13] - -# array_replace_all scalar function with columns #2 (element is list) -query ? -select - array_replace_all(column1, column2, column3) -from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [7, 8, 9]] -[[19, 20, 21], [19, 20, 21], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [19, 20, 21], [19, 20, 21]] -[[28, 29, 30], [28, 29, 30], [28, 29, 30], [22, 23, 24], [28, 29, 30], [25, 26, 27], [28, 29, 30], [22, 23, 24], [28, 29, 30], [28, 29, 30]] -[[37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39]] - -query ? -select - array_replace_all(column1, column2, column3) -from large_nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [10, 11, 12], [1, 2, 3], [7, 8, 9], [10, 11, 12], [7, 8, 9]] -[[19, 20, 21], [19, 20, 21], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [19, 20, 21], [19, 20, 21]] -[[28, 29, 30], [28, 29, 30], [28, 29, 30], [22, 23, 24], [28, 29, 30], [25, 26, 27], [28, 29, 30], [22, 23, 24], [28, 29, 30], [28, 29, 30]] -[[37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39], [31, 32, 33], [34, 35, 36], [37, 38, 39]] - -# array_replace_all scalar function with columns and scalars #1 -query ??? -select - array_replace_all(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2, column3), - array_replace_all(column1, 1, column3), - array_replace_all(column1, column2, 4) -from arrays_with_repeating_elements; ----- -[1, 4, 4, 4, 5, 4, 4, 7, 7, 10, 7, 8] [4, 2, 4, 3, 2, 2, 4, 3, 2, 3] [1, 4, 1, 3, 4, 4, 1, 3, 4, 3] -[1, 2, 2, 7, 5, 7, 7, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 10, 10, 10, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [4, 4, 4, 8, 4, 9, 4, 8, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 7, 7, 13, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [4, 11, 12, 4, 11, 12, 4, 11, 12, 4] - -query ??? -select - array_replace_all(arrow_cast(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), 'LargeList(Int64)'), column2, column3), - array_replace_all(column1, 1, column3), - array_replace_all(column1, column2, 4) -from large_arrays_with_repeating_elements; ----- -[1, 4, 4, 4, 5, 4, 4, 7, 7, 10, 7, 8] [4, 2, 4, 3, 2, 2, 4, 3, 2, 3] [1, 4, 1, 3, 4, 4, 1, 3, 4, 3] -[1, 2, 2, 7, 5, 7, 7, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 10, 10, 10, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [4, 4, 4, 8, 4, 9, 4, 8, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 7, 7, 13, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [4, 11, 12, 4, 11, 12, 4, 11, 12, 4] - -# array_replace_all scalar function with columns and scalars #2 (element is list) -query ??? -select - array_replace_all( - make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), - column2, - column3 - ), - array_replace_all(column1, make_array(1, 2, 3), column3), - array_replace_all(column1, column2, make_array(11, 12, 13)) -from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [10, 11, 12], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [11, 12, 13], [1, 2, 3], [7, 8, 9], [11, 12, 13], [11, 12, 13], [1, 2, 3], [7, 8, 9], [11, 12, 13], [7, 8, 9]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [19, 20, 21], [13, 14, 15], [19, 20, 21], [19, 20, 21], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[11, 12, 13], [11, 12, 13], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [11, 12, 13], [11, 12, 13]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [28, 29, 30], [28, 29, 30], [28, 29, 30], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[11, 12, 13], [11, 12, 13], [11, 12, 13], [22, 23, 24], [11, 12, 13], [25, 26, 27], [11, 12, 13], [22, 23, 24], [11, 12, 13], [11, 12, 13]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [37, 38, 39], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13]] - -query ??? -select - array_replace_all( - arrow_cast(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), 'LargeList(List(Int64))'), - column2, - column3 - ), - array_replace_all(column1, make_array(1, 2, 3), column3), - array_replace_all(column1, column2, make_array(11, 12, 13)) -from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [10, 11, 12], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [4, 5, 6], [10, 11, 12], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [11, 12, 13], [1, 2, 3], [7, 8, 9], [11, 12, 13], [11, 12, 13], [1, 2, 3], [7, 8, 9], [11, 12, 13], [7, 8, 9]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [19, 20, 21], [13, 14, 15], [19, 20, 21], [19, 20, 21], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[11, 12, 13], [11, 12, 13], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [11, 12, 13], [11, 12, 13]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [28, 29, 30], [28, 29, 30], [28, 29, 30], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[11, 12, 13], [11, 12, 13], [11, 12, 13], [22, 23, 24], [11, 12, 13], [25, 26, 27], [11, 12, 13], [22, 23, 24], [11, 12, 13], [11, 12, 13]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [37, 38, 39], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13], [31, 32, 33], [34, 35, 36], [11, 12, 13]] - -# array_replace with null handling - -statement ok -create table t as values - (make_array(3, 1, NULL, 3), 3, 4, 2), - (make_array(3, 1, NULL, 3), NULL, 5, 2), - (NULL, 3, 2, 1), - (make_array(3, 1, 3), 3, NULL, 1) -; - - -# ([3, 1, NULL, 3], 3, 4, 2) => [4, 1, NULL, 4] NULL not matched -# ([3, 1, NULL, 3], NULL, 5, 2) => [3, 1, NULL, 3] NULL is replaced with 5 -# ([NULL], 3, 2, 1) => NULL -# ([3, 1, 3], 3, NULL, 1) => [NULL, 1 3] - -query ?III? -select column1, column2, column3, column4, array_replace_n(column1, column2, column3, column4) from t; ----- -[3, 1, NULL, 3] 3 4 2 [4, 1, NULL, 4] -[3, 1, NULL, 3] NULL 5 2 [3, 1, 5, 3] -NULL 3 2 1 NULL -[3, 1, 3] 3 NULL 1 [NULL, 1, 3] - - - -statement ok -drop table t; - - - -## array_to_string (aliases: `list_to_string`, `array_join`, `list_join`) - -# array_to_string scalar function #1 -query TTT -select array_to_string(['h', 'e', 'l', 'l', 'o'], ','), array_to_string([1, 2, 3, 4, 5], '-'), array_to_string([1.0, 2.0, 3.0], '|'); ----- -h,e,l,l,o 1-2-3-4-5 1|2|3 - -# array_to_string scalar function #2 -query TTT -select array_to_string([1, 1, 1], '1'), array_to_string([[1, 2], [3, 4], [5, 6]], '+'), array_to_string(array_repeat(array_repeat(array_repeat(3, 2), 2), 3), '/\'); ----- -11111 1+2+3+4+5+6 3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3 - -# array_to_string scalar function #3 -query T -select array_to_string(make_array(), ',') ----- -(empty) - -# array to string dictionary -statement ok -CREATE TABLE table1 AS VALUES - (1, 'foo'), - (3, 'bar'), - (1, 'foo'), - (2, NULL), - (NULL, 'baz') - ; - -# expect 1-3-1-2 (dictionary values should be repeated) -query T -SELECT array_to_string(array_agg(column1),'-') -FROM ( - SELECT arrow_cast(column1, 'Dictionary(Int32, Int32)') as column1 - FROM table1 -); ----- -1-3-1-2 - -# expect foo,bar,foo,baz (dictionary values should be repeated) -query T -SELECT array_to_string(array_agg(column2),',') -FROM ( - SELECT arrow_cast(column2, 'Dictionary(Int64, Utf8)') as column2 - FROM table1 -); ----- -foo,bar,foo,baz - -# Expect only values that are in the group -query I?T -SELECT column1, array_agg(column2), array_to_string(array_agg(column2),',') -FROM ( - SELECT column1, arrow_cast(column2, 'Dictionary(Int32, Utf8)') as column2 - FROM table1 -) -GROUP BY column1 -ORDER BY column1; ----- -1 [foo, foo] foo,foo -2 [NULL] (empty) -3 [bar] bar -NULL [baz] baz - -# verify make_array does force to Utf8View -query T -SELECT arrow_typeof(make_array(arrow_cast('a', 'Utf8View'), 'b', 'c', 'd')); ----- -List(Utf8View) - -# expect a,b,c,d. make_array forces all types to be of a common type (see above) -query T -SELECT array_to_string(make_array(arrow_cast('a', 'Utf8View'), 'b', 'c', 'd'), ','); ----- -a,b,c,d - -# array_to_string using largeutf8 for second arg -query TTT -select array_to_string(['h', 'e', 'l', 'l', 'o'], arrow_cast(',', 'LargeUtf8')), array_to_string([1, 2, 3, 4, 5], arrow_cast('-', 'LargeUtf8')), array_to_string([1.0, 2.0, 3.0], arrow_cast('|', 'LargeUtf8')); ----- -h,e,l,l,o 1-2-3-4-5 1|2|3 - -# array_to_string using utf8view for second arg -query TTT -select array_to_string(['h', 'e', 'l', 'l', 'o'], arrow_cast(',', 'Utf8View')), array_to_string([1, 2, 3, 4, 5], arrow_cast('-', 'Utf8View')), array_to_string([1.0, 2.0, 3.0], arrow_cast('|', 'Utf8View')); ----- -h,e,l,l,o 1-2-3-4-5 1|2|3 - -statement ok -drop table table1; - - -## array_union (aliases: `list_union`) - -# array_union scalar function #1 -query ? -select array_union([1, 2, 3, 4], [5, 6, 3, 4]); ----- -[1, 2, 3, 4, 5, 6] - -query ? -select array_union(arrow_cast([1, 2, 3, 4], 'LargeList(Int64)'), arrow_cast([5, 6, 3, 4], 'LargeList(Int64)')); ----- -[1, 2, 3, 4, 5, 6] - -query ? -select array_union(arrow_cast([1, 2, 3, 4], 'FixedSizeList(4, Int64)'), arrow_cast([5, 6, 3, 4], 'FixedSizeList(4, Int64)')); ----- -[1, 2, 3, 4, 5, 6] - -query ? -select array_union(arrow_cast([1, 2, 3, 4], 'FixedSizeList(4, Int64)'), arrow_cast([5, 6], 'FixedSizeList(2, Int64)')); ----- -[1, 2, 3, 4, 5, 6] - -# array_union scalar function #2 -query ? -select array_union([1, 2, 3, 4], [5, 6, 7, 8]); ----- -[1, 2, 3, 4, 5, 6, 7, 8] - -query ? -select array_union(arrow_cast([1, 2, 3, 4], 'LargeList(Int64)'), arrow_cast([5, 6, 7, 8], 'LargeList(Int64)')); ----- -[1, 2, 3, 4, 5, 6, 7, 8] - -# array_union scalar function #3 -query ? -select array_union([1,2,3], []); ----- -[1, 2, 3] - -query ? -select array_union(arrow_cast([1,2,3], 'LargeList(Int64)'), arrow_cast([], 'LargeList(Int64)')); ----- -[1, 2, 3] - -# array_union scalar function #4 -query ? -select array_union([1, 2, 3, 4], [5, 4]); ----- -[1, 2, 3, 4, 5] - -query ? -select array_union(arrow_cast([1, 2, 3, 4], 'LargeList(Int64)'), arrow_cast([5, 4], 'LargeList(Int64)')); ----- -[1, 2, 3, 4, 5] - -# array_union scalar function #5 -statement ok -CREATE TABLE arrays_with_repeating_elements_for_union -AS VALUES - ([0, 1, 1], []), - ([1, 1], [2]), - ([2, 3], [3]), - ([3], [3, 4]) -; - -query ? -select array_union(column1, column2) from arrays_with_repeating_elements_for_union; ----- -[0, 1] -[1, 2] -[2, 3] -[3, 4] - -query ? -select array_union(arrow_cast(column1, 'LargeList(Int64)'), arrow_cast(column2, 'LargeList(Int64)')) from arrays_with_repeating_elements_for_union; ----- -[0, 1] -[1, 2] -[2, 3] -[3, 4] - -statement ok -drop table arrays_with_repeating_elements_for_union; - -# array_union scalar function #6 -query ? -select array_union([], []); ----- -[] - -query ? -select array_union(arrow_cast([], 'LargeList(Int64)'), arrow_cast([], 'LargeList(Int64)')); ----- -[] - -# array_union scalar function #7 -# re-enable when https://github.com/apache/arrow-rs/issues/9227 is fixed -# query ? -# select array_union([[null]], []); -# ---- -# [[]] - -query error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'array_union' function: -select array_union(arrow_cast([[null]], 'LargeList(List(Int64))'), arrow_cast([], 'LargeList(Int64)')); - -# array_union scalar function #8 -query ? -select array_union([null], [null]); ----- -[NULL] - -query ? -select array_union(arrow_cast([[null]], 'LargeList(List(Int64))'), arrow_cast([[null]], 'LargeList(List(Int64))')); ----- -[[NULL]] - -# array_union scalar function #9 -query ? -select array_union(null, []); ----- -NULL - -query ? -select array_union(null, arrow_cast([], 'LargeList(Int64)')); ----- -NULL - -# array_union scalar function #10 -query ? -select array_union(null, null); ----- -NULL - -# array_union scalar function #11 -query ? -select array_union([1, 1, 2, 2, 3, 3], null); ----- -NULL - -query ? -select array_union(arrow_cast([1, 1, 2, 2, 3, 3], 'LargeList(Int64)'), null); ----- -NULL - -# array_union scalar function #12 -query ? -select array_union(null, [1, 1, 2, 2, 3, 3]); ----- -NULL - -query ? -select array_union(null, arrow_cast([1, 1, 2, 2, 3, 3], 'LargeList(Int64)')); ----- -NULL - -# array_union scalar function #13 -query ? -select array_union([1.2, 3.0], [1.2, 3.0, 5.7]); ----- -[1.2, 3.0, 5.7] - -query ? -select array_union(arrow_cast([1.2, 3.0], 'LargeList(Float64)'), arrow_cast([1.2, 3.0, 5.7], 'LargeList(Float64)')); ----- -[1.2, 3.0, 5.7] - -# array_union scalar function #14 -query ? -select array_union(['hello'], ['hello','datafusion']); ----- -[hello, datafusion] - -query ? -select array_union(arrow_cast(['hello'], 'LargeList(Utf8)'), arrow_cast(['hello','datafusion'], 'LargeList(Utf8)')); ----- -[hello, datafusion] - -query ? -select array_union(column1, column2) -from array_intersect_table_1D_NULL; ----- -[1, 2, 3, 4] -[2, 3] -[3, 4] -NULL -NULL -NULL - -query ? -select array_union(arrow_cast(null, 'List(Int64)'), [1, 2]); ----- -NULL - -query ? -select array_union([1, 2], arrow_cast(null, 'List(Int64)')); ----- -NULL - -query ? -select array_intersect(arrow_cast(null, 'List(Int64)'), [1, 2]); ----- -NULL - -query ? -select array_intersect([1, 2], arrow_cast(null, 'List(Int64)')); ----- -NULL - -query ? -select array_except(arrow_cast(null, 'List(Int64)'), [1, 2]); ----- -NULL - -query ? -select array_except([1, 2], arrow_cast(null, 'List(Int64)')); ----- -NULL - -# list_to_string scalar function #4 (function alias `array_to_string`) -query TTT -select list_to_string(['h', 'e', 'l', 'l', 'o'], ','), list_to_string([1, 2, 3, 4, 5], '-'), list_to_string([1.0, 2.0, 3.0], '|'); ----- -h,e,l,l,o 1-2-3-4-5 1|2|3 - -query TTT -select list_to_string(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), ','), list_to_string(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), '-'), list_to_string(arrow_cast([1.0, 2.0, 3.0], 'LargeList(Float64)'), '|'); ----- -h,e,l,l,o 1-2-3-4-5 1|2|3 - -# array_join scalar function #5 (function alias `array_to_string`) -query TTT -select array_join(['h', 'e', 'l', 'l', 'o'], ','), array_join([1, 2, 3, 4, 5], '-'), array_join([1.0, 2.0, 3.0], '|'); ----- -h,e,l,l,o 1-2-3-4-5 1|2|3 - -query TTT -select array_join(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), ','), array_join(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), '-'), array_join(arrow_cast([1.0, 2.0, 3.0], 'LargeList(Float64)'), '|'); ----- -h,e,l,l,o 1-2-3-4-5 1|2|3 - -# list_join scalar function #6 (function alias `list_join`) -query TTT -select list_join(['h', 'e', 'l', 'l', 'o'], ','), list_join([1, 2, 3, 4, 5], '-'), list_join([1.0, 2.0, 3.0], '|'); ----- -h,e,l,l,o 1-2-3-4-5 1|2|3 - -query TTT -select list_join(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), ','), list_join(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), '-'), list_join(arrow_cast([1.0, 2.0, 3.0], 'LargeList(Float64)'), '|'); ----- -h,e,l,l,o 1-2-3-4-5 1|2|3 - -# array_to_string scalar function with nulls #1 -query TTT -select array_to_string(make_array('h', NULL, 'l', NULL, 'o'), ','), array_to_string(make_array(1, NULL, 3, NULL, 5), '-'), array_to_string(make_array(NULL, 2.0, 3.0), '|'); ----- -h,l,o 1-3-5 2|3 - -query TTT -select array_to_string(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), ','), array_to_string(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), '-'), array_to_string(arrow_cast([1.0, 2.0, 3.0], 'LargeList(Float64)'), '|'); ----- -h,e,l,l,o 1-2-3-4-5 1|2|3 - -query TTT -select array_to_string(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), ','), array_to_string(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), '-'), array_to_string(arrow_cast([1.0, 2.0, 3.0], 'FixedSizeList(3, Float64)'), '|'); ----- -h,e,l,l,o 1-2-3-4-5 1|2|3 - -# array_to_string scalar function with nulls #2 -query TTT -select array_to_string(make_array('h', NULL, NULL, NULL, 'o'), ',', '-'), array_to_string(make_array(NULL, 2, NULL, 4, 5), '-', 'nil'), array_to_string(make_array(1.0, NULL, 3.0), '|', '0'); ----- -h,-,-,-,o nil-2-nil-4-5 1|0|3 - -query TTT -select array_to_string(arrow_cast(make_array('h', NULL, NULL, NULL, 'o'), 'LargeList(Utf8)'), ',', '-'), array_to_string(arrow_cast(make_array(NULL, 2, NULL, 4, 5), 'LargeList(Int64)'), '-', 'nil'), array_to_string(arrow_cast(make_array(1.0, NULL, 3.0), 'LargeList(Float64)'), '|', '0'); ----- -h,-,-,-,o nil-2-nil-4-5 1|0|3 - -query TTT -select array_to_string(arrow_cast(make_array('h', NULL, NULL, NULL, 'o'), 'FixedSizeList(5, Utf8)'), ',', '-'), array_to_string(arrow_cast(make_array(NULL, 2, NULL, 4, 5), 'FixedSizeList(5, Int64)'), '-', 'nil'), array_to_string(arrow_cast(make_array(1.0, NULL, 3.0), 'FixedSizeList(3, Float64)'), '|', '0'); ----- -h,-,-,-,o nil-2-nil-4-5 1|0|3 - -# array_to_string float formatting: special values and longer decimals -query TTT -select - array_to_string(make_array(CAST('NaN' AS DOUBLE), CAST('Infinity' AS DOUBLE), CAST('-Infinity' AS DOUBLE), CAST('0.30000000000000004' AS DOUBLE), CAST('1.2345678901234567' AS DOUBLE)), '|'), - array_to_string(arrow_cast(make_array(CAST('NaN' AS DOUBLE), CAST('Infinity' AS DOUBLE), CAST('-Infinity' AS DOUBLE), CAST('0.30000000000000004' AS DOUBLE), CAST('1.2345678901234567' AS DOUBLE)), 'LargeList(Float64)'), '|'), - array_to_string(arrow_cast(make_array(CAST('NaN' AS DOUBLE), CAST('Infinity' AS DOUBLE), CAST('-Infinity' AS DOUBLE), CAST('0.30000000000000004' AS DOUBLE), CAST('1.2345678901234567' AS DOUBLE)), 'FixedSizeList(5, Float64)'), '|'); ----- -NaN|inf|-inf|0.30000000000000004|1.2345678901234567 NaN|inf|-inf|0.30000000000000004|1.2345678901234567 NaN|inf|-inf|0.30000000000000004|1.2345678901234567 - -# array_to_string float formatting: scientific-notation inputs -query T -select array_to_string( - make_array( - CAST('1E20' AS DOUBLE), - CAST('-1e+20' AS DOUBLE), - CAST('6.02214076e23' AS DOUBLE), - CAST('1.2345e6' AS DOUBLE), - CAST('1e-5' AS DOUBLE), - CAST('-1e-5' AS DOUBLE), - CAST('9.1093837015e-31' AS DOUBLE), - CAST('-2.5e-4' AS DOUBLE) - ), - '|' -); ----- -100000000000000000000|-100000000000000000000|602214076000000000000000|1234500|0.00001|-0.00001|0.00000000000000000000000000000091093837015|-0.00025 - -query T -select array_to_string(arrow_cast([arrow_cast([NULL, 'a'], 'FixedSizeList(2, Utf8)'), NULL], 'FixedSizeList(2, FixedSizeList(2, Utf8))'), ',', '-'); ----- --,a,- - -# array_to_string with columns #1 - -# For reference -# select column1, column4 from arrays_values; -# ---- -# [NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] , -# [11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] . -# [21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] - -# [31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] ok -# NULL @ -# [41, 42, 43, 44, 45, 46, 47, 48, 49, 50] $ -# [51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] ^ -# [61, 62, 63, 64, 65, 66, 67, 68, 69, 70] NULL - -query T -select array_to_string(column1, column4) from arrays_values; ----- -2,3,4,5,6,7,8,9,10 -11.12.13.14.15.16.17.18.20 -21-22-23-25-26-27-28-29-30 -31ok32ok33ok34ok35ok37ok38ok39ok40 -NULL -41$42$43$44$45$46$47$48$49$50 -51^52^54^55^56^57^58^59^60 -NULL - -query T -select array_to_string(column1, column4) from large_arrays_values; ----- -2,3,4,5,6,7,8,9,10 -11.12.13.14.15.16.17.18.20 -21-22-23-25-26-27-28-29-30 -31ok32ok33ok34ok35ok37ok38ok39ok40 -NULL -41$42$43$44$45$46$47$48$49$50 -51^52^54^55^56^57^58^59^60 -NULL - -query TT -select array_to_string(column1, '_'), array_to_string(make_array(1,2,3), '/') from arrays_values; ----- -2_3_4_5_6_7_8_9_10 1/2/3 -11_12_13_14_15_16_17_18_20 1/2/3 -21_22_23_25_26_27_28_29_30 1/2/3 -31_32_33_34_35_37_38_39_40 1/2/3 -NULL 1/2/3 -41_42_43_44_45_46_47_48_49_50 1/2/3 -51_52_54_55_56_57_58_59_60 1/2/3 -61_62_63_64_65_66_67_68_69_70 1/2/3 - -query TT -select array_to_string(column1, '_'), array_to_string(make_array(1,2,3), '/') from large_arrays_values; ----- -2_3_4_5_6_7_8_9_10 1/2/3 -11_12_13_14_15_16_17_18_20 1/2/3 -21_22_23_25_26_27_28_29_30 1/2/3 -31_32_33_34_35_37_38_39_40 1/2/3 -NULL 1/2/3 -41_42_43_44_45_46_47_48_49_50 1/2/3 -51_52_54_55_56_57_58_59_60 1/2/3 -61_62_63_64_65_66_67_68_69_70 1/2/3 - -query TT -select array_to_string(column1, '_', '*'), array_to_string(make_array(make_array(1,2,3)), '.') from arrays_values; ----- -*_2_3_4_5_6_7_8_9_10 1.2.3 -11_12_13_14_15_16_17_18_*_20 1.2.3 -21_22_23_*_25_26_27_28_29_30 1.2.3 -31_32_33_34_35_*_37_38_39_40 1.2.3 -NULL 1.2.3 -41_42_43_44_45_46_47_48_49_50 1.2.3 -51_52_*_54_55_56_57_58_59_60 1.2.3 -61_62_63_64_65_66_67_68_69_70 1.2.3 - -query TT -select array_to_string(column1, '_', '*'), array_to_string(make_array(make_array(1,2,3)), '.') from large_arrays_values; ----- -*_2_3_4_5_6_7_8_9_10 1.2.3 -11_12_13_14_15_16_17_18_*_20 1.2.3 -21_22_23_*_25_26_27_28_29_30 1.2.3 -31_32_33_34_35_*_37_38_39_40 1.2.3 -NULL 1.2.3 -41_42_43_44_45_46_47_48_49_50 1.2.3 -51_52_*_54_55_56_57_58_59_60 1.2.3 -61_62_63_64_65_66_67_68_69_70 1.2.3 - -# array_to_string with per-row null_string column -statement ok -CREATE TABLE test_null_str_col AS VALUES - (make_array(1, NULL, 3), ',', 'N/A'), - (make_array(NULL, 5, NULL), ',', 'MISSING'), - (make_array(10, NULL, 12), '-', 'X'), - (make_array(20, NULL, 21), '-', NULL); - -query T -SELECT array_to_string(column1, column2, column3) FROM test_null_str_col; ----- -1,N/A,3 -MISSING,5,MISSING -10-X-12 -20-21 - -statement ok -DROP TABLE test_null_str_col; - -# array_to_string with decimal values -query T -select array_to_string(arrow_cast(make_array(1.5, NULL, 3.14), 'List(Decimal128(10, 2))'), ',', 'N'); ----- -1.50,N,3.14 - -# array_to_string with date values -query T -select array_to_string(arrow_cast(make_array('2024-01-15', '2024-06-30', '2024-12-25'), 'List(Date32)'), ','); ----- -2024-01-15,2024-06-30,2024-12-25 - -query T -select array_to_string(arrow_cast(make_array('2024-01-15', NULL, '2024-12-25'), 'List(Date32)'), ',', 'N'); ----- -2024-01-15,N,2024-12-25 - -# array_to_string with timestamp values -query T -select array_to_string(make_array(arrow_cast('2024-01-15T10:30:00', 'Timestamp(Second, None)'), arrow_cast('2024-06-30T15:45:00', 'Timestamp(Second, None)')), '|'); ----- -2024-01-15T10:30:00|2024-06-30T15:45:00 - -query T -select array_to_string(make_array(arrow_cast('2024-01-15T10:30:00', 'Timestamp(Millisecond, None)'), arrow_cast('2024-06-30T15:45:00', 'Timestamp(Millisecond, None)')), '|'); ----- -2024-01-15T10:30:00|2024-06-30T15:45:00 - -query T -select array_to_string(make_array(arrow_cast('2024-01-15T10:30:00', 'Timestamp(Microsecond, None)'), arrow_cast('2024-06-30T15:45:00', 'Timestamp(Microsecond, None)')), '|'); ----- -2024-01-15T10:30:00|2024-06-30T15:45:00 - -query T -select array_to_string(make_array(arrow_cast('2024-01-15T10:30:00', 'Timestamp(Nanosecond, None)'), arrow_cast('2024-06-30T15:45:00', 'Timestamp(Nanosecond, None)')), '|'); ----- -2024-01-15T10:30:00|2024-06-30T15:45:00 - -# array_to_string with time values -query T -select array_to_string(make_array(arrow_cast('10:30:00', 'Time32(Second)'), arrow_cast('15:45:00', 'Time32(Second)')), ','); ----- -10:30:00,15:45:00 - -query T -select array_to_string(make_array(arrow_cast('10:30:00', 'Time64(Microsecond)'), arrow_cast('15:45:00', 'Time64(Microsecond)')), ','); ----- -10:30:00,15:45:00 - -# array_to_string with interval values -query T -select array_to_string(make_array(interval '1 year 2 months', interval '3 days 4 hours'), ','); ----- -14 mons,3 days 4 hours - -# array_to_string with duration values -query T -select array_to_string(make_array(arrow_cast(1000, 'Duration(Millisecond)'), arrow_cast(2000, 'Duration(Millisecond)')), ','); ----- -PT1S,PT2S - - -## cardinality - -# cardinality scalar function -query III -select cardinality(make_array(1, 2, 3, 4, 5)), cardinality([1, 3, 5]), cardinality(make_array('h', 'e', 'l', 'l', 'o')); ----- -5 3 5 - -query III -select cardinality(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), cardinality(arrow_cast([1, 3, 5], 'LargeList(Int64)')), cardinality(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)')); ----- -5 3 5 - -query III -select cardinality(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)')), cardinality(arrow_cast([1, 3, 5], 'FixedSizeList(3, Int64)')), cardinality(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)')); ----- -5 3 5 - -# cardinality scalar function #2 -query II -select cardinality(make_array([1, 2], [3, 4], [5, 6])), cardinality(array_repeat(array_repeat(array_repeat(3, 3), 2), 3)); ----- -6 18 - -query I -select cardinality(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'LargeList(List(Int64))')); ----- -6 - -query I -select cardinality(arrow_cast([[1, 2], [3, 4], [5, 6]], 'FixedSizeList(3, List(Int64))')); ----- -6 - -# cardinality scalar function #3 -query II -select cardinality(make_array()), cardinality(make_array(make_array())) ----- -0 0 - -query II -select cardinality([]), cardinality([]::int[]) as with_cast ----- -0 0 - -query II -select cardinality(arrow_cast(make_array(), 'LargeList(Int64)')), cardinality(arrow_cast(make_array(make_array()), 'LargeList(List(Int64))')) ----- -0 0 - -#TODO -#https://github.com/apache/datafusion/issues/9158 -#query II -#select cardinality(arrow_cast(make_array(), 'FixedSizeList(1, Null)')), cardinality(arrow_cast(make_array(make_array()), 'FixedSizeList(1, List(Int64))')) -#---- -#NULL 0 - -# cardinality of NULL arrays should return NULL -query II -select cardinality(NULL), cardinality(arrow_cast(NULL, 'LargeList(Int64)')) ----- -NULL NULL - -# cardinality with columns -query III -select cardinality(column1), cardinality(column2), cardinality(column3) from arrays; ----- -4 3 5 -4 3 5 -4 3 5 -4 3 3 -NULL 3 4 -4 NULL 1 -4 3 NULL - -query III -select cardinality(column1), cardinality(column2), cardinality(column3) from large_arrays; ----- -4 3 5 -4 3 5 -4 3 5 -4 3 3 -NULL 3 4 -4 NULL 1 -4 3 NULL - -query III -select cardinality(column1), cardinality(column2), cardinality(column3) from fixed_size_arrays; ----- -4 3 5 -4 3 5 -4 3 5 -4 3 5 -NULL 3 5 -4 NULL 5 -4 3 NULL - -## array_remove (aliases: `list_remove`) - -# array_remove scalar function #1 -query ??? -select array_remove(make_array(1, 2, 2, 1, 1), 2), array_remove(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0), array_remove(make_array('h', 'e', 'l', 'l', 'o'), 'l'); ----- -[1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o] - -query ??? -select array_remove(arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), 2), - array_remove(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'LargeList(Float64)'), 1.0), - array_remove(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 'l'); ----- -[1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o] - -query ??? -select array_remove(arrow_cast(make_array(1, 2, 2, 1, 1), 'FixedSizeList(5, Int64)'), 2), - array_remove(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'FixedSizeList(5, Float64)'), 1.0), - array_remove(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 'l'); ----- -[1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o] - -query ??? -select - array_remove(make_array(1, null, 2, 3), 2), - array_remove(make_array(1.1, null, 2.2, 3.3), 1.1), - array_remove(make_array('a', null, 'bc'), 'a'); ----- -[1, NULL, 3] [NULL, 2.2, 3.3] [NULL, bc] - -query ??? -select - array_remove(arrow_cast(make_array(1, null, 2, 3), 'LargeList(Int64)'), 2), - array_remove(arrow_cast(make_array(1.1, null, 2.2, 3.3), 'LargeList(Float64)'), 1.1), - array_remove(arrow_cast(make_array('a', null, 'bc'), 'LargeList(Utf8)'), 'a'); ----- -[1, NULL, 3] [NULL, 2.2, 3.3] [NULL, bc] - -query ??? -select - array_remove(arrow_cast(make_array(1, null, 2, 3), 'FixedSizeList(4, Int64)'), 2), - array_remove(arrow_cast(make_array(1.1, null, 2.2, 3.3), 'FixedSizeList(4, Float64)'), 1.1), - array_remove(arrow_cast(make_array('a', null, 'bc'), 'FixedSizeList(3, Utf8)'), 'a'); ----- -[1, NULL, 3] [NULL, 2.2, 3.3] [NULL, bc] - -#TODO: https://github.com/apache/datafusion/issues/7142 -# follow PostgreSQL behavior -#query ? -#select -# array_remove(NULL, 1) -#---- -#NULL - -query ?? -select - array_remove(make_array(1, null, 2), null), - array_remove(make_array(1, null, 2, null), null); ----- -NULL NULL - -query ?? -select - array_remove(arrow_cast(make_array(1, null, 2), 'LargeList(Int64)'), null), - array_remove(arrow_cast(make_array(1, null, 2, null), 'LargeList(Int64)'), null); ----- -NULL NULL - -query ?? -select - array_remove(arrow_cast(make_array(1, null, 2), 'FixedSizeList(3, Int64)'), null), - array_remove(arrow_cast(make_array(1, null, 2, null), 'FixedSizeList(4, Int64)'), null); ----- -NULL NULL - -# array_remove with null element from column -query ? -select array_remove(column1, column2) from (values - (make_array(1, 2, 3), 2), - (make_array(4, 5, 6), null), - (make_array(7, 8, 9), 8), - (null, 1) -) as t(column1, column2); ----- -[1, 3] -NULL -[7, 9] -NULL - -# array_remove with null element from column (LargeList) -query ? -select array_remove(column1, column2) from (values - (arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 2), - (arrow_cast(make_array(4, 5, 6), 'LargeList(Int64)'), null), - (arrow_cast(make_array(7, 8, 9), 'LargeList(Int64)'), 8) -) as t(column1, column2); ----- -[1, 3] -NULL -[7, 9] - -# array_remove scalar function #2 (element is list) -query ?? -select array_remove(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), [4, 5, 6]), array_remove(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), [2, 3, 4]); ----- -[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] - -query ?? -select array_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), [4, 5, 6]), - array_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), [2, 3, 4]); ----- -[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] - -query ?? -select array_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, List(Int64))'), [4, 5, 6]), - array_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, List(Int64))'), [2, 3, 4]); ----- -[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] - -query ?? -select array_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), [4, 5, 6]), - array_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), [2, 3, 4]); ----- -[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] - -# list_remove scalar function #3 (function alias `array_remove`) -query ??? -select list_remove(make_array(1, 2, 2, 1, 1), 2), list_remove(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0), list_remove(make_array('h', 'e', 'l', 'l', 'o'), 'l'); ----- -[1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o] - -query ?? -select list_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, List(Int64))'), [4, 5, 6]), - list_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, List(Int64))'), [2, 3, 4]); ----- -[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] - -# array_remove scalar function with columns #1 -query ? -select array_remove(column1, column2) from arrays_with_repeating_elements; ----- -[1, 1, 3, 2, 2, 1, 3, 2, 3] -[4, 5, 5, 6, 5, 5, 5, 4, 4] -[7, 7, 8, 7, 9, 7, 8, 7, 7] -[11, 12, 10, 11, 12, 10, 11, 12, 10] - -query ? -select array_remove(column1, column2) from large_arrays_with_repeating_elements; ----- -[1, 1, 3, 2, 2, 1, 3, 2, 3] -[4, 5, 5, 6, 5, 5, 5, 4, 4] -[7, 7, 8, 7, 9, 7, 8, 7, 7] -[11, 12, 10, 11, 12, 10, 11, 12, 10] - -query ? -select array_remove(column1, column2) from fixed_arrays_with_repeating_elements; ----- -[1, 1, 3, 2, 2, 1, 3, 2, 3] -[4, 5, 5, 6, 5, 5, 5, 4, 4] -[7, 7, 8, 7, 9, 7, 8, 7, 7] -[11, 12, 10, 11, 12, 10, 11, 12, 10] - -# array_remove scalar function with columns #2 (element is list) -query ? -select array_remove(column1, column2) from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -query ? -select array_remove(column1, column2) from large_nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -query ? -select array_remove(column1, column2) from fixed_size_nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -# array_remove scalar function with columns and scalars #1 -query ?? -select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove(column1, 1) from arrays_with_repeating_elements; ----- -[1, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 1, 3, 2, 2, 1, 3, 2, 3] -[1, 2, 2, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] -[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] - -query ?? -select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove(column1, 1) from large_arrays_with_repeating_elements; ----- -[1, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 1, 3, 2, 2, 1, 3, 2, 3] -[1, 2, 2, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] -[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] - -query ?? -select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove(column1, 1) from fixed_arrays_with_repeating_elements; ----- -[1, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 1, 3, 2, 2, 1, 3, 2, 3] -[1, 2, 2, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] -[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] - -# array_remove scalar function with columns and scalars #2 (element is list) -query ?? -select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), - array_remove(column1, make_array(1, 2, 3)) from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -query ?? -select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), - array_remove(column1, make_array(1, 2, 3)) from large_nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -query ?? -select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), - array_remove(column1, make_array(1, 2, 3)) from fixed_size_nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -## array_remove_n (aliases: `list_remove_n`) - -# array_remove_n with null element scalar -query ?? -select array_remove_n(make_array(1, 2, 2, 1, 1), NULL, 2), - array_remove_n(make_array(1, 2, 2, 1, 1), 2, 2); ----- -NULL [1, 1, 1] - -# array_remove_n with null element scalar (LargeList) -query ?? -select array_remove_n(arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), NULL, 2), - array_remove_n(arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), 2, 2); ----- -NULL [1, 1, 1] - -# array_remove_n with null element from column -query ? -select array_remove_n(column1, column2, column3) from (values - (make_array(1, 2, 2, 1, 1), 2, 2), - (make_array(3, 4, 4, 3, 3), null, 2), - (make_array(5, 6, 6, 5, 5), 6, 1), - (null, 1, 1) -) as t(column1, column2, column3); ----- -[1, 1, 1] -NULL -[5, 6, 5, 5] -NULL - -# array_remove_n with null element from column (LargeList) -query ? -select array_remove_n(column1, column2, column3) from (values - (arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), 2, 2), - (arrow_cast(make_array(3, 4, 4, 3, 3), 'LargeList(Int64)'), null, 2), - (arrow_cast(make_array(5, 6, 6, 5, 5), 'LargeList(Int64)'), 6, 1) -) as t(column1, column2, column3); ----- -[1, 1, 1] -NULL -[5, 6, 5, 5] - -# array_remove_n scalar function #1 -query ??? -select array_remove_n(make_array(1, 2, 2, 1, 1), 2, 2), array_remove_n(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0, 2), array_remove_n(make_array('h', 'e', 'l', 'l', 'o'), 'l', 3); ----- -[1, 1, 1] [2.0, 2.0, 1.0] [h, e, o] - -query ??? -select array_remove_n(arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int32)'), 2, 2), - array_remove_n(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'LargeList(Float32)'), 1.0, 2), - array_remove_n(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 'l', 3); ----- -[1, 1, 1] [2.0, 2.0, 1.0] [h, e, o] - -query ??? -select array_remove_n(arrow_cast(make_array(1, 2, 2, 1, 1), 'FixedSizeList(5, Int32)'), 2, 2), - array_remove_n(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'FixedSizeList(5, Float32)'), 1.0, 2), - array_remove_n(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 'l', 3); ----- -[1, 1, 1] [2.0, 2.0, 1.0] [h, e, o] - -# array_remove_n scalar function #2 (element is list) -query ?? -select array_remove_n(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), [4, 5, 6], 2), array_remove_n(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), [2, 3, 4], 2); ----- -[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] - -query ?? -select array_remove_n(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), [4, 5, 6], 2), - array_remove_n(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), [2, 3, 4], 2); ----- -[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] - -query ?? -select array_remove_n(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), [4, 5, 6], 2), - array_remove_n(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), [2, 3, 4], 2); ----- -[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] - -# list_remove_n scalar function #3 (function alias `array_remove_n`) -query ??? -select list_remove_n(make_array(1, 2, 2, 1, 1), 2, 2), list_remove_n(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0, 2), list_remove_n(make_array('h', 'e', 'l', 'l', 'o'), 'l', 3); ----- -[1, 1, 1] [2.0, 2.0, 1.0] [h, e, o] - -# array_remove_n scalar function with columns #1 -query ? -select array_remove_n(column1, column2, column4) from arrays_with_repeating_elements; ----- -[1, 1, 3, 1, 3, 2, 3] -[5, 5, 6, 5, 5, 5, 4, 4] -[8, 9, 8, 7, 7] -[11, 12, 11, 12, 11, 12] - -# array_remove_n scalar function with columns #2 (element is list) -query ? -select array_remove_n(column1, column2, column4) from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [1, 2, 3], [7, 8, 9], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[22, 23, 24], [25, 26, 27], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36]] - -# array_remove_n scalar function with columns and scalars #1 -query ??? -select array_remove_n(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2, column4), array_remove_n(column1, 1, column4), array_remove_n(column1, column2, 2) from arrays_with_repeating_elements; ----- -[1, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 3, 2, 2, 3, 2, 3] [1, 1, 3, 2, 1, 3, 2, 3] -[1, 2, 2, 5, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] [5, 5, 6, 5, 5, 5, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [7, 8, 7, 9, 7, 8, 7, 7] -[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] [11, 12, 11, 12, 10, 11, 12, 10] - -# array_remove_n scalar function with columns and scalars #2 (element is list) -query ??? -select array_remove_n(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2, column4), array_remove_n(column1, make_array(1, 2, 3), column4), array_remove_n(column1, column2, 2) from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [7, 8, 9], [4, 5, 6], [4, 5, 6], [7, 8, 9], [4, 5, 6], [7, 8, 9]] [[1, 2, 3], [1, 2, 3], [7, 8, 9], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] [[13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] [[31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -## array_remove_all (aliases: `list_removes`) - -#TODO: https://github.com/apache/datafusion/issues/7142 -# array_remove_all with NULL elements -#query ? -#select array_remove_all(NULL, 1); -#---- -#NULL - -query ? -select array_remove_all(make_array(1, 2, 2, 1, 1), NULL); ----- -NULL - -# array_remove_all with null element from column -query ? -select array_remove_all(column1, column2) from (values - (make_array(1, 2, 2, 1, 1), 2), - (make_array(3, 4, 4, 3, 3), null), - (make_array(5, 6, 6, 5, 5), 6), - (null, 1) -) as t(column1, column2); ----- -[1, 1, 1] -NULL -[5, 5, 5] -NULL - -# array_remove_all with null element from column (LargeList) -query ? -select array_remove_all(column1, column2) from (values - (arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), 2), - (arrow_cast(make_array(3, 4, 4, 3, 3), 'LargeList(Int64)'), null), - (arrow_cast(make_array(5, 6, 6, 5, 5), 'LargeList(Int64)'), 6) -) as t(column1, column2); ----- -[1, 1, 1] -NULL -[5, 5, 5] - -# array_remove_all scalar function #1 -query ??? -select array_remove_all(make_array(1, 2, 2, 1, 1), 2), array_remove_all(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0), array_remove_all(make_array('h', 'e', 'l', 'l', 'o'), 'l'); ----- -[1, 1, 1] [2.0, 2.0] [h, e, o] - -query ??? -select array_remove_all(arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), 2), - array_remove_all(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'LargeList(Float64)'), 1.0), - array_remove_all(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 'l'); ----- -[1, 1, 1] [2.0, 2.0] [h, e, o] - -query ??? -select array_remove_all(arrow_cast(make_array(1, 2, 2, 1, 1), 'FixedSizeList(5, Int64)'), 2), array_remove_all(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'FixedSizeList(5, Float64)'), 1.0), array_remove_all(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 'l'); ----- -[1, 1, 1] [2.0, 2.0] [h, e, o] - -# array_remove_all scalar function #2 (element is list) -query ?? -select array_remove_all(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), [4, 5, 6]), array_remove_all(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), [2, 3, 4]); ----- -[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] - -query ?? -select array_remove_all(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, List(Int64))'), [4, 5, 6]), - array_remove_all(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, List(Int64))'), [2, 3, 4]); ----- -[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] - -query ?? -select array_remove_all(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), [4, 5, 6]), - array_remove_all(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, FixedSizeList(3, Int64))'), [2, 3, 4]); ----- -[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] - -# list_remove_all scalar function #3 (function alias `array_remove_all`) -query ??? -select list_remove_all(make_array(1, 2, 2, 1, 1), 2), list_remove_all(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 1.0), list_remove_all(make_array('h', 'e', 'l', 'l', 'o'), 'l'); ----- -[1, 1, 1] [2.0, 2.0] [h, e, o] - -query ?? -select list_remove_all(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, List(Int64))'), [4, 5, 6]), - list_remove_all(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, List(Int64))'), [2, 3, 4]); ----- -[[1, 2, 3], [5, 5, 5], [7, 8, 9]] [[1, 3, 2], [5, 3, 1], [1, 3, 2]] - -# array_remove_all scalar function with columns #1 -query ? -select array_remove_all(column1, column2) from arrays_with_repeating_elements; ----- -[1, 1, 3, 1, 3, 3] -[5, 5, 6, 5, 5, 5] -[8, 9, 8] -[11, 12, 11, 12, 11, 12] - -query ? -select array_remove_all(column1, column2) from fixed_arrays_with_repeating_elements; ----- -[1, 1, 3, 1, 3, 3] -[5, 5, 6, 5, 5, 5] -[8, 9, 8] -[11, 12, 11, 12, 11, 12] - -# array_remove_all scalar function with columns #2 (element is list) -query ? -select array_remove_all(column1, column2) from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [1, 2, 3], [7, 8, 9], [1, 2, 3], [7, 8, 9], [7, 8, 9]] -[[13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15]] -[[22, 23, 24], [25, 26, 27], [22, 23, 24]] -[[31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36]] - -query ? -select array_remove_all(column1, column2) from fixed_size_nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [1, 2, 3], [7, 8, 9], [1, 2, 3], [7, 8, 9], [7, 8, 9]] -[[13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15]] -[[22, 23, 24], [25, 26, 27], [22, 23, 24]] -[[31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36], [31, 32, 33], [34, 35, 36]] - -# array_remove_all scalar function with columns and scalars #1 -query ?? -select array_remove_all(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove_all(column1, 1) from arrays_with_repeating_elements; ----- -[1, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 3, 2, 2, 3, 2, 3] -[1, 2, 2, 5, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] -[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] - -query ?? -select array_remove_all(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove_all(column1, 1) from fixed_arrays_with_repeating_elements; ----- -[1, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 3, 2, 2, 3, 2, 3] -[1, 2, 2, 5, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] -[1, 2, 2, 4, 5, 4, 4, 10, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] -[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] - -# array_remove_all scalar function with columns and scalars #2 (element is list) -query ?? -select array_remove_all(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), array_remove_all(column1, make_array(1, 2, 3)) from nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [7, 8, 9], [4, 5, 6], [4, 5, 6], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -query ?? -select array_remove_all(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), - array_remove_all(column1, make_array(1, 2, 3)) from fixed_size_nested_arrays_with_repeating_elements; ----- -[[1, 2, 3], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [7, 8, 9], [4, 5, 6], [4, 5, 6], [7, 8, 9], [4, 5, 6], [7, 8, 9]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [28, 29, 30], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] -[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] - -## trim_array (deprecated) - -## array_length (aliases: `list_length`) - -# array_length scalar function #1 -query III -select array_length(make_array(1, 2, 3, 4, 5)), array_length(make_array(1, 2, 3)), array_length(make_array([1, 2], [3, 4], [5, 6])); ----- -5 3 3 - -query III -select array_length(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), array_length(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_length(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'LargeList(List(Int64))')); ----- -5 3 3 - -# array_length scalar function #2 -query III -select array_length(make_array(1, 2, 3, 4, 5), 1), array_length(make_array(1, 2, 3), 1), array_length(make_array([1, 2], [3, 4], [5, 6]), 1); ----- -5 3 3 - -query III -select array_length(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 1), array_length(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 1), array_length(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'LargeList(List(Int64))'), 1); ----- -5 3 3 - -# array_length scalar function #3 -query III -select array_length(make_array(1, 2, 3, 4, 5), 2), array_length(make_array(1, 2, 3), 2), array_length(make_array([1, 2], [3, 4], [5, 6]), 2); ----- -NULL NULL 2 - -query III -select array_length(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2), array_length(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 2), array_length(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'LargeList(List(Int64))'), 2); ----- -NULL NULL 2 - -# array_length scalar function #4 -query II -select array_length(array_repeat(array_repeat(array_repeat(3, 5), 2), 3), 1), array_length(array_repeat(array_repeat(array_repeat(3, 5), 2), 3), 2); ----- -3 2 - -query II -select array_length(arrow_cast(array_repeat(array_repeat(array_repeat(3, 5), 2), 3), 'LargeList(List(List(Int64)))'), 1), array_length(arrow_cast(array_repeat(array_repeat(array_repeat(3, 5), 2), 3), 'LargeList(List(List(Int64)))'), 2); ----- -3 2 - -# array_length scalar function #5 -query III -select array_length(make_array()), array_length(make_array(), 1), array_length(make_array(), 2) ----- -0 0 NULL - -# array_length scalar function #6 nested array -query III -select array_length([[1, 2, 3, 4], [5, 6, 7, 8]]), array_length([[1, 2, 3, 4], [5, 6, 7, 8]], 1), array_length([[1, 2, 3, 4], [5, 6, 7, 8]], 2); ----- -2 2 4 - -# list_length scalar function #7 (function alias `array_length`) -query IIII -select list_length(make_array(1, 2, 3, 4, 5)), list_length(make_array(1, 2, 3)), list_length(make_array([1, 2], [3, 4], [5, 6])), array_length([[1, 2, 3, 4], [5, 6, 7, 8]], 3); ----- -5 3 3 NULL - -query III -select list_length(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), list_length(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), list_length(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'LargeList(List(Int64))')); ----- -5 3 3 - -# array_length with columns -query I -select array_length(column1, column3) from arrays_values; ----- -10 -NULL -NULL -NULL -NULL -NULL -NULL -NULL - -query I -select array_length(arrow_cast(column1, 'LargeList(Int64)'), column3) from arrays_values; ----- -10 -NULL -NULL -NULL -NULL -NULL -NULL -NULL - -# array_length with columns and scalars -query II -select array_length(array[array[1, 2], array[3, 4]], column3), array_length(column1, 1) from arrays_values; ----- -2 10 -2 10 -NULL 10 -NULL 10 -NULL NULL -NULL 10 -NULL 10 -NULL 10 - -query II -select array_length(arrow_cast(array[array[1, 2], array[3, 4]], 'LargeList(List(Int64))'), column3), array_length(arrow_cast(column1, 'LargeList(Int64)'), 1) from arrays_values; ----- -2 10 -2 10 -NULL 10 -NULL 10 -NULL NULL -NULL 10 -NULL 10 -NULL 10 - -# array_length for fixed sized list - -query III -select array_length(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)')), array_length(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_length(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'FixedSizeList(3, List(Int64))')); ----- -5 3 3 - -query III -select array_length(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 1), array_length(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 1), array_length(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'FixedSizeList(3, List(Int64))'), 1); ----- -5 3 3 - - -query RRR -select array_distance([2], [3]), list_distance([1], [2]), list_distance([1], [-2]); ----- -1 1 3 - -query error -select list_distance([1], [1, 2]); - -query R -select array_distance([[1, 1]], [1, 2]); ----- -1 - -query R -select array_distance([[1, 1]], [[1, 2]]); ----- -1 - -query R -select array_distance([[1, 1]], [[1, 2]]); ----- -1 - -query RR -select array_distance([1, 1, 0, 0], [2, 2, 1, 1]), list_distance([1, 2, 3], [1, 2, 3]); ----- -2 0 - -query RR -select array_distance([1.0, 1, 0, 0], [2, 2.0, 1, 1]), list_distance([1, 2.0, 3], [1, 2, 3]); ----- -2 0 - -query R -select list_distance([1, 1, NULL, 0], [2, 2, NULL, NULL]); ----- -NULL - -query R -select list_distance([NULL, NULL], [NULL, NULL]); ----- -NULL - -query R -select list_distance([1.0, 2.0, 3.0], [1.0, 2.0, 3.5]) AS distance; ----- -0.5 - -query R -select list_distance([1, 2, 3], [1, 2, 3]) AS distance; ----- -0 - -# array_distance with columns -query RRR -select array_distance(column1, column2), array_distance(column1, column3), array_distance(column1, column4) from arrays_distance_table; ----- -0 0.374165738677 NULL -5.196152422707 6.063827174318 NULL -10.392304845413 11.778794505381 NULL -15.58845726812 15.935494971917 NULL - -query RRR -select array_distance(column1, column2), array_distance(column1, column3), array_distance(column1, column4) from large_arrays_distance_table; ----- -0 0.374165738677 NULL -5.196152422707 6.063827174318 NULL -10.392304845413 11.778794505381 NULL -15.58845726812 15.935494971917 NULL - -query RRR -select array_distance(column1, column2), array_distance(column1, column3), array_distance(column1, column4) from fixed_size_arrays_distance_table; ----- -0 0.374165738677 NULL -5.196152422707 6.063827174318 NULL -10.392304845413 11.778794505381 NULL -15.58845726812 15.935494971917 NULL - - -## array_dims (aliases: `list_dims`) - -# array dims error -query error -select array_dims(1); - -# array_dims scalar function -query ??? -select array_dims(make_array(1, 2, 3)), array_dims(make_array([1, 2], [3, 4])), array_dims(make_array([[[[1], [2]]]])); ----- -[3] [2, 2] [1, 1, 1, 2, 1] - -query ??? -select array_dims(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_dims(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(List(Int64))')), array_dims(arrow_cast(make_array([[[[1], [2]]]]), 'LargeList(List(List(List(List(Int64)))))')); ----- -[3] [2, 2] [1, 1, 1, 2, 1] - -query ??? -select array_dims(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_dims(arrow_cast(make_array([1, 2], [3, 4]), 'FixedSizeList(2, List(Int64))')), array_dims(arrow_cast(make_array([[[[1], [2]]]]), 'FixedSizeList(1, List(List(List(List(Int64)))))')); ----- -[3] [2, 2] [1, 1, 1, 2, 1] - -# array_dims scalar function #2 -query ?? -select array_dims(array_repeat(array_repeat(array_repeat(2, 3), 2), 1)), array_dims(array_repeat(array_repeat(array_repeat(3, 4), 5), 2)); ----- -[1, 2, 3] [2, 5, 4] - -query ?? -select array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(2, 3), 2), 1), 'LargeList(List(List(Int64)))')), array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(3, 4), 5), 2), 'LargeList(List(List(Int64)))')); ----- -[1, 2, 3] [2, 5, 4] - -query ?? -select array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(2, 3), 2), 1), 'FixedSizeList(1, List(List(Int64)))')), array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(3, 4), 5), 2), 'FixedSizeList(2, List(List(Int64)))')); ----- -[1, 2, 3] [2, 5, 4] - -# array_dims scalar function #3 -query ?? -select array_dims(make_array()), array_dims(make_array(make_array())) ----- -NULL [1, 0] - -query ?? -select array_dims(arrow_cast(make_array(), 'LargeList(Int64)')), array_dims(arrow_cast(make_array(make_array()), 'LargeList(List(Int64))')) ----- -NULL [1, 0] - -# list_dims scalar function #4 (function alias `array_dims`) -query ??? -select list_dims(make_array(1, 2, 3)), list_dims(make_array([1, 2], [3, 4])), list_dims(make_array([[[[1], [2]]]])); ----- -[3] [2, 2] [1, 1, 1, 2, 1] - -query ??? -select list_dims(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), list_dims(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(List(Int64))')), list_dims(arrow_cast(make_array([[[[1], [2]]]]), 'LargeList(List(List(List(List(Int64)))))')); ----- -[3] [2, 2] [1, 1, 1, 2, 1] - -query ??? -select list_dims(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), list_dims(arrow_cast(make_array([1, 2], [3, 4]), 'FixedSizeList(2, List(Int64))')), list_dims(arrow_cast(make_array([[[[1], [2]]]]), 'FixedSizeList(1, List(List(List(List(Int64)))))')); ----- -[3] [2, 2] [1, 1, 1, 2, 1] - -# array_dims with columns -query ??? -select array_dims(column1), array_dims(column2), array_dims(column3) from arrays; ----- -[2, 2] [3] [5] -[2, 2] [3] [5] -[2, 2] [3] [5] -[2, 2] [3] [3] -NULL [3] [4] -[2, 2] NULL [1] -[2, 2] [3] NULL - -query ??? -select array_dims(column1), array_dims(column2), array_dims(column3) from large_arrays; ----- -[2, 2] [3] [5] -[2, 2] [3] [5] -[2, 2] [3] [5] -[2, 2] [3] [3] -NULL [3] [4] -[2, 2] NULL [1] -[2, 2] [3] NULL - -query ??? -select array_dims(column1), array_dims(column2), array_dims(column3) from fixed_size_arrays; ----- -[2, 2] [3] [5] -[2, 2] [3] [5] -[2, 2] [3] [5] -[2, 2] [3] [5] -NULL [3] [5] -[2, 2] NULL [5] -[2, 2] [3] NULL - - -## array_ndims (aliases: `list_ndims`) - -# array_ndims scalar function #1 - -#follow PostgreSQL -query I -select - array_ndims(null); ----- -NULL - -query I -select - array_ndims([2, 3]); ----- -1 - -statement ok -CREATE TABLE array_ndims_table -AS VALUES - ([1], [1, 2, 3], [[7]], [[[[[10]]]]]), - ([2], [4, 5], [[8]], [[[[[10]]]]]), - (NUll, [6, 7], [[9]], [[[[[10]]]]]), - ([3], [6], [[9]], [[[[[10]]]]]) -; - -statement ok -CREATE TABLE large_array_ndims_table -AS SELECT - column1, - arrow_cast(column2, 'LargeList(Int64)') as column2, - arrow_cast(column3, 'LargeList(List(Int64))') as column3, - arrow_cast(column4, 'LargeList(List(List(List(List(Int64)))))') as column4 -FROM array_ndims_table; - -statement ok -CREATE TABLE fixed_array_ndims_table -AS VALUES - (arrow_cast([1], 'FixedSizeList(1, Int64)'), arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)'), arrow_cast([[7]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')), - (arrow_cast([2], 'FixedSizeList(1, Int64)'), arrow_cast([4, 5, 6], 'FixedSizeList(3, Int64)'), arrow_cast([[8]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')), - (null, arrow_cast([6, 7, 8], 'FixedSizeList(3, Int64)'), arrow_cast([[9]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')), - (arrow_cast([3], 'FixedSizeList(1, Int64)'), arrow_cast([6, 7, 8], 'FixedSizeList(3, Int64)'), arrow_cast([[9]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')) -; - -query IIII -select - array_ndims(column1), - array_ndims(column2), - array_ndims(column3), - array_ndims(column4) -from array_ndims_table; ----- -1 1 2 5 -1 1 2 5 -NULL 1 2 5 -1 1 2 5 - -query IIII -select - array_ndims(column1), - array_ndims(column2), - array_ndims(column3), - array_ndims(column4) -from large_array_ndims_table; ----- -1 1 2 5 -1 1 2 5 -NULL 1 2 5 -1 1 2 5 - -query IIII -select - array_ndims(column1), - array_ndims(column2), - array_ndims(column3), - array_ndims(column4) -from fixed_array_ndims_table; ----- -1 1 2 5 -1 1 2 5 -NULL 1 2 5 -1 1 2 5 - - - -statement ok -drop table array_ndims_table; - -statement ok -drop table large_array_ndims_table - -query I -select array_ndims(arrow_cast([null], 'List(List(List(Int64)))')); ----- -3 - -# array_ndims scalar function #2 -query II -select array_ndims(array_repeat(array_repeat(array_repeat(1, 3), 2), 1)), array_ndims([[[[[[[[[[[[[[[[[[[[[1]]]]]]]]]]]]]]]]]]]]]); ----- -3 21 - -# array_ndims scalar function #3 -query II -select array_ndims(make_array()), array_ndims(make_array(make_array())) ----- -1 2 - -query II -select array_ndims(arrow_cast(make_array(), 'LargeList(Int64)')), array_ndims(arrow_cast(make_array(make_array()), 'LargeList(List(Int64))')) ----- -1 2 - -# list_ndims scalar function #4 (function alias `array_ndims`) -query III -select list_ndims(make_array(1, 2, 3)), list_ndims(make_array([1, 2], [3, 4])), list_ndims(make_array([[[[1], [2]]]])); ----- -1 2 5 - -query III -select list_ndims(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), list_ndims(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(List(Int64))')), list_ndims(arrow_cast(make_array([[[[1], [2]]]]), 'LargeList(List(List(List(List(Int64)))))')); ----- -1 2 5 - -query II -select list_ndims(make_array()), list_ndims(make_array(make_array())) ----- -1 2 - -query II -select list_ndims(arrow_cast(make_array(), 'LargeList(Int64)')), list_ndims(arrow_cast(make_array(make_array()), 'LargeList(List(Int64))')) ----- -1 2 - -# array_ndims with columns -query III -select array_ndims(column1), array_ndims(column2), array_ndims(column3) from arrays; ----- -2 1 1 -2 1 1 -2 1 1 -2 1 1 -NULL 1 1 -2 NULL 1 -2 1 NULL - -query III -select array_ndims(column1), array_ndims(column2), array_ndims(column3) from large_arrays; ----- -2 1 1 -2 1 1 -2 1 1 -2 1 1 -NULL 1 1 -2 NULL 1 -2 1 NULL - -## array_has/array_has_all/array_has_any - -# If lhs is empty, return false -query B -select array_has([], 1); ----- -false - -# If rhs is Null, we returns Null -query BBB -select array_has([], null), - array_has([1, 2, 3], null), - array_has([null, 1], null); ----- -NULL NULL NULL - -# Always return false if not contained even if list has null elements -query BB -select array_has([1, null, 2], 3), - array_has([null, null, null], 3); ----- -false false - -#TODO: array_has_all and array_has_any cannot handle NULL -#query BBBB -#select array_has_any([], null), -# array_has_any([1, 2, 3], null), -# array_has_all([], null), -# array_has_all([1, 2, 3], null); -#---- -#false false false false - -query BBBBBBBBBBBB -select array_has(make_array(1,2), 1), - array_has(make_array(1,2,NULL), 1), - array_has(make_array([2,3], [3,4]), make_array(2,3)), - array_has(make_array([[1], [2,3]], [[4,5], [6]]), make_array([1], [2,3])), - array_has(make_array([[1], [2,3]], [[4,5], [6]]), make_array([4,5], [6])), - array_has(make_array([[1], [2,3]], [[4,5], [6]]), make_array([1])), - array_has(make_array([[[1]]]), make_array([[1]])), - array_has(make_array([[[1]]], [[[1], [2]]]), make_array([[2]])), - array_has(make_array([[[1]]], [[[1], [2]]]), make_array([[1], [2]])), - list_has(make_array(1,2,3), 4), - array_contains(make_array(1,2,3), 3), - list_contains(make_array(1,2,3), 0) -; ----- -true true true true true false true false true false true false - -query BBBBBBBBBBBB -select array_has(arrow_cast(make_array(1,2), 'LargeList(Int64)'), 1), - array_has(arrow_cast(make_array(1,2,NULL), 'LargeList(Int64)'), 1), - array_has(arrow_cast(make_array([2,3], [3,4]), 'LargeList(List(Int64))'), make_array(2,3)), - array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'LargeList(List(List(Int64)))'), make_array([1], [2,3])), - array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'LargeList(List(List(Int64)))'), make_array([4,5], [6])), - array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'LargeList(List(List(Int64)))'), make_array([1])), - array_has(arrow_cast(make_array([[[1]]]), 'LargeList(List(List(List(Int64))))'), make_array([[1]])), - array_has(arrow_cast(make_array([[[1]]], [[[1], [2]]]), 'LargeList(List(List(List(Int64))))'), make_array([[2]])), - array_has(arrow_cast(make_array([[[1]]], [[[1], [2]]]), 'LargeList(List(List(List(Int64))))'), make_array([[1], [2]])), - list_has(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), 4), - array_contains(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), 3), - list_contains(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), 0) -; ----- -true true true true true false true false true false true false - -query BBBBBBBBBBBB -select array_has(arrow_cast(make_array(1,2), 'FixedSizeList(2, Int64)'), 1), - array_has(arrow_cast(make_array(1,2,NULL), 'FixedSizeList(3, Int64)'), 1), - array_has(arrow_cast(make_array([2,3], [3,4]), 'FixedSizeList(2, List(Int64))'), make_array(2,3)), - array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'FixedSizeList(2, List(List(Int64)))'), make_array([1], [2,3])), - array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'FixedSizeList(2, List(List(Int64)))'), make_array([4,5], [6])), - array_has(arrow_cast(make_array([[1], [2,3]], [[4,5], [6]]), 'FixedSizeList(2, List(List(Int64)))'), make_array([1])), - array_has(arrow_cast(make_array([[[1]]]), 'FixedSizeList(1, List(List(List(Int64))))'), make_array([[1]])), - array_has(arrow_cast(make_array([[[1]]], [[[1], [2]]]), 'FixedSizeList(2, List(List(List(Int64))))'), make_array([[2]])), - array_has(arrow_cast(make_array([[[1]]], [[[1], [2]]]), 'FixedSizeList(2, List(List(List(Int64))))'), make_array([[1], [2]])), - list_has(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), 4), - array_contains(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), 3), - list_contains(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), 0) -; ----- -true true true true true false true false true false true false - -query BBB -select array_has(column1, column2), - array_has_all(column3, column4), - array_has_any(column5, column6) -from array_has_table_1D; ----- -true true true -false false false - -query BBB -select array_has(arrow_cast(column1, 'LargeList(Int64)'), column2), - array_has_all(arrow_cast(column3, 'LargeList(Int64)'), arrow_cast(column4, 'LargeList(Int64)')), - array_has_any(arrow_cast(column5, 'LargeList(Int64)'), arrow_cast(column6, 'LargeList(Int64)')) -from array_has_table_1D; ----- -true true true -false false false - -query B -select array_has(column1, column2) -from array_has_table_null; ----- -true -true -false -false -false - -# array_has([1, 3, 5], 1) -> true (array contains element) -# array_has([], 1) -> false (empty array, not null) -# array_has(null, 1) -> null (null array) -query BB -select array_has(column1, column2), array_has(null, column2) -from array_has_table_empty; ----- -true NULL -false NULL -NULL NULL - -# Test for issue: array_has should return false for empty arrays, not null -# This test demonstrates the correct behavior with COALESCE to show the distinction -# array_has([1, 3, 5], 1) -> 'true' -# array_has([], 1) -> 'false' (empty array should return false) -# array_has(null, 1) -> 'null' (null array should return null) -query ?T -SELECT column1, COALESCE(CAST(array_has(column1, column2) AS VARCHAR), 'null') -from array_has_table_empty; ----- -[1, 3, 5] true -[] false -NULL null - -query B -select array_has(column1, column2) -from fixed_size_array_has_table_1D; ----- -true -false - -query BB -select array_has_all(column3, column4), - array_has_any(column5, column6) -from fixed_size_array_has_table_1D; ----- -true true -false false - -query BBB -select array_has(column1, column2), - array_has_all(column3, column4), - array_has_any(column5, column6) -from array_has_table_1D_Float; ----- -true true false -false false true - -query BBB -select array_has(arrow_cast(column1, 'LargeList(Float64)'), column2), - array_has_all(arrow_cast(column3, 'LargeList(Float64)'), arrow_cast(column4, 'LargeList(Float64)')), - array_has_any(arrow_cast(column5, 'LargeList(Float64)'), arrow_cast(column6, 'LargeList(Float64)')) -from array_has_table_1D_Float; ----- -true true false -false false true - -query B -select array_has(column1, column2) -from fixed_size_array_has_table_1D_Float; ----- -true -false - -query BB -select array_has_all(column3, column4), - array_has_any(column5, column6) -from fixed_size_array_has_table_1D_Float; ----- -true true -false true - -query BBB -select array_has(column1, column2), - array_has_all(column3, column4), - array_has_any(column5, column6) -from array_has_table_1D_Boolean; ----- -false true true -true true true - -query BBB -select array_has(arrow_cast(column1, 'LargeList(Boolean)'), column2), - array_has_all(arrow_cast(column3, 'LargeList(Boolean)'), arrow_cast(column4, 'LargeList(Boolean)')), - array_has_any(arrow_cast(column5, 'LargeList(Boolean)'), arrow_cast(column6, 'LargeList(Boolean)')) -from array_has_table_1D_Boolean; ----- -false true true -true true true - -query B -select array_has(column1, column2) -from fixed_size_array_has_table_1D_Boolean; ----- -false -true - -query BB -select array_has_all(column3, column4), - array_has_any(column5, column6) -from fixed_size_array_has_table_1D_Boolean; ----- -true true -true true - -query BBBBBBBB -select array_has_all(column3, arrow_cast(column4,'LargeList(Boolean)')), - array_has_any(column5, arrow_cast(column6,'LargeList(Boolean)')), - array_has_all(column3, arrow_cast(column4,'List(Boolean)')), - array_has_any(column5, arrow_cast(column6,'List(Boolean)')), - array_has_all(arrow_cast(column3, 'LargeList(Boolean)'), column4), - array_has_any(arrow_cast(column5, 'LargeList(Boolean)'), column6), - array_has_all(arrow_cast(column3, 'List(Boolean)'), column4), - array_has_any(arrow_cast(column5, 'List(Boolean)'), column6) -from fixed_size_array_has_table_1D_Boolean; ----- -true true true true true true true true -true true true true true true true true - -query BBB -select array_has(column1, column2), - array_has_all(column3, column4), - array_has_any(column5, column6) -from array_has_table_1D_UTF8; ----- -true true false -false false true - -query BBB -select array_has(arrow_cast(column1, 'LargeList(Utf8)'), column2), - array_has_all(arrow_cast(column3, 'LargeList(Utf8)'), arrow_cast(column4, 'LargeList(Utf8)')), - array_has_any(arrow_cast(column5, 'LargeList(Utf8)'), arrow_cast(column6, 'LargeList(Utf8)')) -from array_has_table_1D_UTF8; ----- -true true false -false false true - -query B -select array_has(column1, column2) -from fixed_size_array_has_table_1D_UTF8; ----- -true -false - -query BB -select array_has(column1, column2), - array_has_all(column3, column4) -from array_has_table_2D; ----- -false true -true false - -query BB -select array_has(arrow_cast(column1, 'LargeList(List(Int64))'), column2), - array_has_all(arrow_cast(column3, 'LargeList(List(Int64))'), arrow_cast(column4, 'LargeList(List(Int64))')) -from array_has_table_2D; ----- -false true -true false - -query B -select array_has(arrow_cast(column1, 'LargeList(List(Int64))'), column2) -from fixed_size_array_has_table_2D; ----- -false -false - -query B -select array_has_all(arrow_cast(column3, 'LargeList(List(Int64))'), arrow_cast(column4, 'LargeList(List(Int64))')) -from fixed_size_array_has_table_2D; ----- -true -false - -query B -select array_has_all(column1, column2) -from array_has_table_2D_float; ----- -true -false - -query B -select array_has_all(arrow_cast(column1, 'LargeList(List(Float64))'), arrow_cast(column2, 'LargeList(List(Float64))')) -from array_has_table_2D_float; ----- -true -false - -query B -select array_has_all(column1, column2) -from fixed_size_array_has_table_2D_float; ----- -false -false - -query B -select array_has(column1, column2) from array_has_table_3D; ----- -false -true -false -false -true -false -true - -query B -select array_has(arrow_cast(column1, 'LargeList(List(List(Int64)))'), column2) from array_has_table_3D; ----- -false -true -false -false -true -false -true - -query B -select array_has(column1, column2) from fixed_size_array_has_table_3D; ----- -false -false -false -false -true -true -true - -query BBBB -select array_has(column1, make_array(5, 6)), - array_has(column1, make_array(7, NULL)), - array_has(column2, 5.5), - array_has(column3, 'o') -from arrays; ----- -false false false true -true false true false -true false false true -false true false false -NULL NULL false false -false false NULL false -false false false NULL - -query BBBB -select array_has(arrow_cast(column1, 'LargeList(List(Int64))'), make_array(5, 6)), - array_has(arrow_cast(column1, 'LargeList(List(Int64))'), make_array(7, NULL)), - array_has(arrow_cast(column2, 'LargeList(Float64)'), 5.5), - array_has(arrow_cast(column3, 'LargeList(Utf8)'), 'o') -from arrays; ----- -false false false true -true false true false -true false false true -false true false false -NULL NULL false false -false false NULL false -false false false NULL - -# Row 1: [[NULL,2],[3,NULL]], [1.1,2.2,3.3], ['L','o','r','e','m'] -# Row 2: [[3,4],[5,6]], [NULL,5.5,6.6], ['i','p',NULL,'u','m'] -# Row 3: [[5,6],[7,8]], [7.7,8.8,9.9], ['d',NULL,'l','o','r'] -# Row 4: [[7,NULL],[9,10]], [10.1,NULL,12.2], ['s','i','t','a','b'] -# Row 5: NULL, [13.3,14.4,15.5], ['a','m','e','t','x'] -# Row 6: [[11,12],[13,14]], NULL, [',','a','b','c','d'] -# Row 7: [[15,16],[NULL,18]], [16.6,17.7,18.8], NULL -query BBBB -select array_has(column1, make_array(5, 6)), - array_has(column1, make_array(7, NULL)), - array_has(column2, 5.5), - array_has(column3, 'o') -from fixed_size_arrays; ----- -false false false true -true false true false -true false false true -false true false false -NULL NULL false false -false false NULL false -false false false NULL - -query BBBB -select array_has_all(make_array(1,2,3), []), - array_has_any(make_array(1,2,3), []), - array_has_all(make_array('aa','bb','cc'), []), - array_has_any(make_array('aa','bb','cc'), []) -; ----- -true false true false - -query BBBBBBBBBBBBB -select array_has_all(make_array(1,2,3), make_array(1,3)), - array_has_all(make_array(1,2,3), make_array(1,4)), - array_has_all(make_array([1,2], [3,4]), make_array([1,2])), - array_has_all(make_array([1,2], [3,4]), make_array([1,3])), - array_has_all(make_array([1,2], [3,4]), make_array([1,2], [3,4], [5,6])), - array_has_all(make_array([[1,2,3]]), make_array([[1]])), - array_has_all(make_array([[1,2,3]]), make_array([[1,2,3]])), - array_has_any(make_array(1,2,3), make_array(1,10,100)), - array_has_any(make_array(1,2,3), make_array(10,100)), - array_has_any(make_array([1,2], [3,4]), make_array([1,10], [10,4])), - array_has_any(make_array([1,2], [3,4]), make_array([10,20], [3,4])), - array_has_any(make_array([[1,2,3]]), make_array([[1,2,3], [4,5,6]])), - array_has_any(make_array([[1,2,3]]), make_array([[1,2,3]], [[4,5,6]])) -; ----- -true false true false false false true true false false true false true - -query BBBBBBBBBBBBB -select array_has_all(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), arrow_cast(make_array(1,3), 'LargeList(Int64)')), - array_has_all(arrow_cast(make_array(1,2,3),'LargeList(Int64)'), arrow_cast(make_array(1,4), 'LargeList(Int64)')), - array_has_all(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([1,2]), 'LargeList(List(Int64))')), - array_has_all(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([1,3]), 'LargeList(List(Int64))')), - array_has_all(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([1,2], [3,4], [5,6]), 'LargeList(List(Int64))')), - array_has_all(arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))'), arrow_cast(make_array([[1]]), 'LargeList(List(List(Int64)))')), - array_has_all(arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))'), arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))')), - array_has_any(arrow_cast(make_array(1,2,3),'LargeList(Int64)'), arrow_cast(make_array(1,10,100), 'LargeList(Int64)')), - array_has_any(arrow_cast(make_array(1,2,3),'LargeList(Int64)'), arrow_cast(make_array(10,100),'LargeList(Int64)')), - array_has_any(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([1,10], [10,4]), 'LargeList(List(Int64))')), - array_has_any(arrow_cast(make_array([1,2], [3,4]), 'LargeList(List(Int64))'), arrow_cast(make_array([10,20], [3,4]), 'LargeList(List(Int64))')), - array_has_any(arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))'), arrow_cast(make_array([[1,2,3], [4,5,6]]), 'LargeList(List(List(Int64)))')), - array_has_any(arrow_cast(make_array([[1,2,3]]), 'LargeList(List(List(Int64)))'), arrow_cast(make_array([[1,2,3]], [[4,5,6]]), 'LargeList(List(List(Int64)))')) -; ----- -true false true false false false true true false false true false true - -query BBBBBBBBBBBBB -select array_has_all(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(1, 3), 'FixedSizeList(2, Int64)')), - array_has_all(arrow_cast(make_array(1,2,3),'FixedSizeList(3, Int64)'), arrow_cast(make_array(1, 4), 'FixedSizeList(2, Int64)')), - array_has_all(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,2]), 'FixedSizeList(1, List(Int64))')), - array_has_all(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,3]), 'FixedSizeList(1, List(Int64))')), - array_has_all(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,2], [3,4], [5,6]), 'FixedSizeList(3, List(Int64))')), - array_has_all(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1]]), 'FixedSizeList(1, List(List(Int64)))')), - array_has_all(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))')), - array_has_any(arrow_cast(make_array(1,2,3),'FixedSizeList(3, Int64)'), arrow_cast(make_array(1,10,100), 'FixedSizeList(3, Int64)')), - array_has_any(arrow_cast(make_array(1,2,3),'FixedSizeList(3, Int64)'), arrow_cast(make_array(10, 100),'FixedSizeList(2, Int64)')), - array_has_any(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([1,10], [10,4]), 'FixedSizeList(2, List(Int64))')), - array_has_any(arrow_cast(make_array([1,2], [3,4]), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array([10,20], [3,4]), 'FixedSizeList(2, List(Int64))')), - array_has_any(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1,2,3], [4,5,6]]), 'FixedSizeList(1, List(List(Int64)))')), - array_has_any(arrow_cast(make_array([[1,2,3]]), 'FixedSizeList(1, List(List(Int64)))'), arrow_cast(make_array([[1,2,3]], [[4,5,6]]), 'FixedSizeList(2, List(List(Int64)))')) -; ----- -true false true false false false true true false false true false true - -# rewrite various array_has operations to InList where the haystack is a literal list -# NB that `col in (a, b, c)` is simplified to OR if there are <= 3 elements, so we make 4-element haystack lists - -query I -with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) -select count(*) from test WHERE needle IN ('7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'); ----- -1 - -query TT -explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) -select count(*) from test WHERE needle IN ('7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'); ----- -logical_plan -01)Projection: count(Int64(1)) AS count(*) -02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] -03)----SubqueryAlias: test -04)------SubqueryAlias: t -05)--------Projection: -06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")]) -07)------------TableScan: generate_series() projection=[value] -physical_plan -01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] -02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] -03)----CoalescePartitionsExec -04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] -05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]), projection=[] -06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192] - -query I -with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) -select count(*) from test WHERE needle = ANY(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c']); ----- -1 - -query TT -explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) -select count(*) from test WHERE needle = ANY(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c']); ----- -logical_plan -01)Projection: count(Int64(1)) AS count(*) -02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] -03)----SubqueryAlias: test -04)------SubqueryAlias: t -05)--------Projection: -06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")]) -07)------------TableScan: generate_series() projection=[value] -physical_plan -01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] -02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] -03)----CoalescePartitionsExec -04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] -05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]), projection=[] -06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192] - -query I -with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) -select count(*) from test WHERE array_has(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'], needle); ----- -1 - -query TT -explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) -select count(*) from test WHERE array_has(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'], needle); ----- -logical_plan -01)Projection: count(Int64(1)) AS count(*) -02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] -03)----SubqueryAlias: test -04)------SubqueryAlias: t -05)--------Projection: -06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")]) -07)------------TableScan: generate_series() projection=[value] -physical_plan -01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] -02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] -03)----CoalescePartitionsExec -04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] -05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]), projection=[] -06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192] - -query I -with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) -select count(*) from test WHERE array_has(arrow_cast(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'], 'LargeList(Utf8View)'), needle); ----- -1 - -query TT -explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) -select count(*) from test WHERE array_has(arrow_cast(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'], 'LargeList(Utf8View)'), needle); ----- -logical_plan -01)Projection: count(Int64(1)) AS count(*) -02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] -03)----SubqueryAlias: test -04)------SubqueryAlias: t -05)--------Projection: -06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")]) -07)------------TableScan: generate_series() projection=[value] -physical_plan -01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] -02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] -03)----CoalescePartitionsExec -04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] -05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]), projection=[] -06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192] - -query I -with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) -select count(*) from test WHERE array_has(arrow_cast(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'], 'FixedSizeList(4, Utf8View)'), needle); ----- -1 - -query TT -explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) -select count(*) from test WHERE array_has(arrow_cast(['7f4b18de3cfeb9b4ac78c381ee2ad278', 'a', 'b', 'c'], 'FixedSizeList(4, Utf8View)'), needle); ----- -logical_plan -01)Projection: count(Int64(1)) AS count(*) -02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] -03)----SubqueryAlias: test -04)------SubqueryAlias: t -05)--------Projection: -06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")]) -07)------------TableScan: generate_series() projection=[value] -physical_plan -01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] -02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] -03)----CoalescePartitionsExec -04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] -05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN (SET) ([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]), projection=[] -06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192] - -query I -with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) -select count(*) from test WHERE array_has([needle], needle); ----- -100000 - -# The optimizer does not currently eliminate the filter; -# Instead, it's rewritten as `IS NULL OR NOT NULL` due to SQL null semantics -query TT -explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i)) -select count(*) from test WHERE array_has([needle], needle); ----- -logical_plan -01)Projection: count(Int64(1)) AS count(*) -02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] -03)----SubqueryAlias: test -04)------SubqueryAlias: t -05)--------Projection: -06)----------Filter: substr(CAST(md5(CAST(generate_series().value AS Utf8View)) AS Utf8View), Int64(1), Int64(32)) IS NOT NULL OR Boolean(NULL) -07)------------TableScan: generate_series() projection=[value] -physical_plan -01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] -02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] -03)----CoalescePartitionsExec -04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] -05)--------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IS NOT NULL OR NULL, projection=[] -06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -07)------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192] - -# any operator -query ? -select column3 from arrays where 'L'=any(column3); ----- -[L, o, r, e, m] - -query I -select count(*) from arrays where 'L'=any(column3); ----- -1 - -query I -select count(*) from arrays where 'X'=any(column3); ----- -0 - -# any operator with comparison operators -# Use inline arrays so the test data is visible and the needle (5) -# falls within the range of some arrays but not others. -statement ok -CREATE TABLE any_op_test AS VALUES - (1, make_array(1, 2, 3)), - (2, make_array(4, 5, 6)), - (3, make_array(7, 8, 9)), - (4, make_array(3, 5, 7)); - -# 5 > ANY(arr): true when array_min < 5 -# row1: min=1 < 5 ✓, row2: min=4 < 5 ✓, row3: min=7 < 5 ✗, row4: min=3 < 5 ✓ -query I? -select column1, column2 from any_op_test where 5 > any(column2) order by column1; ----- -1 [1, 2, 3] -2 [4, 5, 6] -4 [3, 5, 7] - -# 5 >= ANY(arr): true when array_min <= 5 -# row1: min=1 <= 5 ✓, row2: min=4 <= 5 ✓, row3: min=7 <= 5 ✗, row4: min=3 <= 5 ✓ -query I? -select column1, column2 from any_op_test where 5 >= any(column2) order by column1; ----- -1 [1, 2, 3] -2 [4, 5, 6] -4 [3, 5, 7] - -# 5 < ANY(arr): true when array_max > 5 -# row1: max=3 > 5 ✗, row2: max=6 > 5 ✓, row3: max=9 > 5 ✓, row4: max=7 > 5 ✓ -query I? -select column1, column2 from any_op_test where 5 < any(column2) order by column1; ----- -2 [4, 5, 6] -3 [7, 8, 9] -4 [3, 5, 7] - -# 5 <= ANY(arr): true when array_max >= 5 -# row1: max=3 >= 5 ✗, row2: max=6 >= 5 ✓, row3: max=9 >= 5 ✓, row4: max=7 >= 5 ✓ -query I? -select column1, column2 from any_op_test where 5 <= any(column2) order by column1; ----- -2 [4, 5, 6] -3 [7, 8, 9] -4 [3, 5, 7] - -# 5 <> ANY(arr): true when array_min != 5 OR array_max != 5 -# row1: [1,2,3] min=1!=5 ✓, row2: [4,5,6] min=4!=5 ✓, row3: [7,8,9] min=7!=5 ✓, row4: [3,5,7] min=3!=5 ✓ -query I? -select column1, column2 from any_op_test where 5 <> any(column2) order by column1; ----- -1 [1, 2, 3] -2 [4, 5, 6] -3 [7, 8, 9] -4 [3, 5, 7] - -# For a single-element array where the element equals the needle, <> should return false -query B -select 5 <> any(make_array(5)); ----- -false - -# For a uniform array [5,5,5], <> should also return false -query B -select 5 <> any(make_array(5, 5, 5)); ----- -false - -# Empty array: all operators should return false (no elements satisfy the condition) -query B -select 5 = any(make_array()); ----- -false - -query B -select 5 <> any(make_array()); ----- -false - -query B -select 5 > any(make_array()); ----- -false - -query B -select 5 < any(make_array()); ----- -false - -query B -select 5 >= any(make_array()); ----- -false - -query B -select 5 <= any(make_array()); ----- -false - -# Mixed NULL + non-NULL array where no non-NULL element satisfies the condition -# These return false (NULLs are skipped by array_min/array_max) -query B -select 5 > any(make_array(6, NULL)); ----- -false - -query B -select 5 < any(make_array(3, NULL)); ----- -false - -query B -select 5 >= any(make_array(6, NULL)); ----- -false - -query B -select 5 <= any(make_array(3, NULL)); ----- -false - -# Mixed NULL + non-NULL array where a non-NULL element satisfies the condition -query B -select 5 > any(make_array(3, NULL)); ----- -true - -query B -select 5 < any(make_array(6, NULL)); ----- -true - -query B -select 5 >= any(make_array(5, NULL)); ----- -true - -query B -select 5 <= any(make_array(5, NULL)); ----- -true - -query B -select 5 <> any(make_array(3, NULL)); ----- -true - -query B -select 5 <> any(make_array(5, NULL)); ----- -false - -# All-NULL array: all operators should return false -query B -select 5 > any(make_array(NULL::INT, NULL::INT)); ----- -false - -query B -select 5 < any(make_array(NULL::INT, NULL::INT)); ----- -false - -query B -select 5 >= any(make_array(NULL::INT, NULL::INT)); ----- -false - -query B -select 5 <= any(make_array(NULL::INT, NULL::INT)); ----- -false - -query B -select 5 <> any(make_array(NULL::INT, NULL::INT)); ----- -false - -# NULL left operand: should return NULL for non-empty arrays -query B -select NULL > any(make_array(1, 2, 3)); ----- -NULL - -query B -select NULL < any(make_array(1, 2, 3)); ----- -NULL - -query B -select NULL >= any(make_array(1, 2, 3)); ----- -NULL - -query B -select NULL <= any(make_array(1, 2, 3)); ----- -NULL - -query B -select NULL <> any(make_array(1, 2, 3)); ----- -NULL - -# NULL left operand with empty array: should return false -query B -select NULL > any(make_array()); ----- -false - -# NULL array: should return NULL -query B -select 5 > any(NULL::INT[]); ----- -NULL - -query B -select 5 < any(NULL::INT[]); ----- -NULL - -query B -select 5 >= any(NULL::INT[]); ----- -NULL - -query B -select 5 <= any(NULL::INT[]); ----- -NULL - -query B -select 5 <> any(NULL::INT[]); ----- -NULL - -statement ok -DROP TABLE any_op_test; - -## array_distinct - -#TODO: https://github.com/apache/datafusion/issues/7142 -#query ? -#select array_distinct(null); -#---- -#NULL - -# test with empty row, the row that does not match the condition has row count 0 -statement ok -create table t1(a int, b int) as values (100, 1), (101, 2), (102, 3), (101, 2); - -# rowsort is to ensure the order of group by is deterministic, array_sort has no effect here, since the sum() always returns single row. -query ? rowsort -select array_distinct([sum(a)]) from t1 where a > 100 group by b; ----- -[102] -[202] - -statement ok -drop table t1; - -query ? -select array_distinct(a) from values ([1, 2, 3]), (null), ([1, 3, 1]) as X(a); ----- -[1, 2, 3] -NULL -[1, 3] - -query ? -select array_distinct(arrow_cast(null, 'LargeList(Int64)')); ----- -NULL - -query ? -select array_distinct([]); ----- -[] - -query ? -select array_distinct([[], []]); ----- -[[]] - -query ? -select array_distinct(column1) -from array_distinct_table_1D; ----- -[1, 2, 3] -[1, 2, 3, 4, 5] -[3, 5] - -query ? -select array_distinct(column1) -from array_distinct_table_1D_UTF8; ----- -[a, bc, def] -[a, bc, def, defg] -[defg] - -query ? -select array_distinct(column1) -from array_distinct_table_2D; ----- -[[1, 2], [3, 4], [5, 6]] -[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] -[[5, 6], NULL] - -query ? -select array_distinct(column1) -from array_distinct_table_1D_large; ----- -[1, 2, 3] -[1, 2, 3, 4, 5] -[3, 5] - -query ? -select array_distinct(column1) -from array_distinct_table_1D_fixed; ----- -[1, 2, 3] -[1, 2, 3, 4, 5] -[3, 5] - -query ? -select array_distinct(column1) -from array_distinct_table_1D_UTF8_fixed; ----- -[a, bc, def] -[a, bc, def, defg] -[defg] - -query ? -select array_distinct(column1) -from array_distinct_table_2D_fixed; ----- -[[1, 2], [3, 4], [5, 6]] -[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] -[[5, 6], NULL] - -## arrays_zip (aliases: `list_zip`) - -# Spark example: arrays_zip(array(1, 2, 3), array(2, 3, 4)) -query ? -select arrays_zip([1, 2, 3], [2, 3, 4]); ----- -[{1: 1, 2: 2}, {1: 2, 2: 3}, {1: 3, 2: 4}] - -# Spark example: arrays_zip(array(1, 2), array(2, 3), array(3, 4)) -query ? -select arrays_zip([1, 2], [2, 3], [3, 4]); ----- -[{1: 1, 2: 2, 3: 3}, {1: 2, 2: 3, 3: 4}] - -# basic: two integer arrays of equal length -query ? -select arrays_zip([1, 2, 3], [10, 20, 30]); ----- -[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}] - -# basic: two arrays with different element types (int + string) -query ? -select arrays_zip([1, 2, 3], ['a', 'b', 'c']); ----- -[{1: 1, 2: a}, {1: 2, 2: b}, {1: 3, 2: c}] - -# three arrays of equal length -query ? -select arrays_zip([1, 2, 3], [10, 20, 30], [100, 200, 300]); ----- -[{1: 1, 2: 10, 3: 100}, {1: 2, 2: 20, 3: 200}, {1: 3, 2: 30, 3: 300}] - -# four arrays of equal length -query ? -select arrays_zip([1], [2], [3], [4]); ----- -[{1: 1, 2: 2, 3: 3, 4: 4}] - -# mixed element types: float + boolean -query ? -select arrays_zip([1.5, 2.5], [true, false]); ----- -[{1: 1.5, 2: true}, {1: 2.5, 2: false}] - -# different length arrays: shorter array padded with NULLs -query ? -select arrays_zip([1, 2], [3, 4, 5]); ----- -[{1: 1, 2: 3}, {1: 2, 2: 4}, {1: NULL, 2: 5}] - -# different length arrays: first longer -query ? -select arrays_zip([1, 2, 3], [10]); ----- -[{1: 1, 2: 10}, {1: 2, 2: NULL}, {1: 3, 2: NULL}] - -# different length: one single element, other three elements -query ? -select arrays_zip([1], ['a', 'b', 'c']); ----- -[{1: 1, 2: a}, {1: NULL, 2: b}, {1: NULL, 2: c}] - -# empty arrays -query ? -select arrays_zip([], []); ----- -[] - -# one empty, one non-empty -query ? -select arrays_zip([], [1, 2, 3]); ----- -[{1: NULL, 2: 1}, {1: NULL, 2: 2}, {1: NULL, 2: 3}] - -# NULL elements inside arrays -query ? -select arrays_zip([1, NULL, 3], ['a', 'b', 'c']); ----- -[{1: 1, 2: a}, {1: NULL, 2: b}, {1: 3, 2: c}] - -# all NULL elements -query ? -select arrays_zip([NULL::int, NULL, NULL], [NULL::text, NULL, NULL]); ----- -[{1: NULL, 2: NULL}, {1: NULL, 2: NULL}, {1: NULL, 2: NULL}] - -# both args are NULL (entire list null) -query ? -select arrays_zip(NULL::int[], NULL::int[]); ----- -NULL - -# one arg is NULL list, other is real array -query ? -select arrays_zip(NULL::int[], [1, 2, 3]); ----- -[{1: NULL, 2: 1}, {1: NULL, 2: 2}, {1: NULL, 2: 3}] - -# real array + NULL list -query ? -select arrays_zip([1, 2], NULL::text[]); ----- -[{1: 1, 2: NULL}, {1: 2, 2: NULL}] - -# column-level test with multiple rows -query ? -select arrays_zip(a, b) from (values ([1, 2], [10, 20]), ([3, 4, 5], [30]), ([6], [60, 70])) as t(a, b); ----- -[{1: 1, 2: 10}, {1: 2, 2: 20}] -[{1: 3, 2: 30}, {1: 4, 2: NULL}, {1: 5, 2: NULL}] -[{1: 6, 2: 60}, {1: NULL, 2: 70}] - -# column-level test with NULL rows -query ? -select arrays_zip(a, b) from (values ([1, 2], [10, 20]), (null, [30, 40]), ([5, 6], null)) as t(a, b); ----- -[{1: 1, 2: 10}, {1: 2, 2: 20}] -[{1: NULL, 2: 30}, {1: NULL, 2: 40}] -[{1: 5, 2: NULL}, {1: 6, 2: NULL}] - -# column-level test with single argument -query ? -select arrays_zip(a) from (values ([1, 2], [10, 20]), (null, [30, 40]), ([5, 6], null)) as t(a, b); ----- -[{1: 1}, {1: 2}] -NULL -[{1: 5}, {1: 6}] - -query ? -select arrays_zip(b) from (values ([1, 2], [10, 20]), (null, [30, 40]), ([5, 6], null)) as t(a, b); ----- -[{1: 10}, {1: 20}] -[{1: 30}, {1: 40}] -NULL - -# No input -query error Error during planning: 'arrays_zip' does not support zero arguments -select arrays_zip(); - -# Non-array input -query error DataFusion error: Execution error: arrays_zip expects array arguments, got Int64 -select arrays_zip(1, 2); - -# null input -query ? -select arrays_zip(null) ----- -NULL - -# single empty array -query ? -select arrays_zip([]) ----- -[] - - -# single array of null -query ? -select arrays_zip([null]) ----- -[{1: NULL}] - -query ? -select arrays_zip([NULL::int]) ----- -[{1: NULL}] - -query ? -select arrays_zip([NULL::int[]]) ----- -[{1: NULL}] - -# alias: list_zip -query ? -select list_zip([1, 2], [3, 4]); ----- -[{1: 1, 2: 3}, {1: 2, 2: 4}] - -# column test: total values equal (3 each) but per-row lengths differ -# a: [1] b: [10, 20] → row 0: a has 1, b has 2 -# a: [2, 3] b: [30] → row 1: a has 2, b has 1 -# total a values = 3, total b values = 3 (same!) but rows are misaligned -query ? -select arrays_zip(a, b) from (values ([1], [10, 20]), ([2, 3], [30])) as t(a, b); ----- -[{1: 1, 2: 10}, {1: NULL, 2: 20}] -[{1: 2, 2: 30}, {1: 3, 2: NULL}] - -# single element arrays -query ? -select arrays_zip([42], ['hello']); ----- -[{1: 42, 2: hello}] - -# single argument -query ? -select arrays_zip([1, 2, 3]); ----- -[{1: 1}, {1: 2}, {1: 3}] - -# arrays_zip with LargeList inputs -query ? -select arrays_zip( - arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), - arrow_cast(make_array(10, 20, 30), 'LargeList(Int64)') -); ----- -[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}] - -# arrays_zip with LargeList different lengths (padding) -query ? -select arrays_zip( - arrow_cast(make_array(1, 2), 'LargeList(Int64)'), - arrow_cast(make_array(10, 20, 30), 'LargeList(Int64)') -); ----- -[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: NULL, 2: 30}] - -# single argument from LargeList -query ? -select arrays_zip(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')); ----- -[{1: 1}, {1: 2}, {1: 3}] - -# arrays_zip with FixedSizeList inputs -query ? -select arrays_zip( - arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), - arrow_cast(make_array(10, 20, 30), 'FixedSizeList(3, Int64)') -); ----- -[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}] - -# single argument from FixedSizeList -query ? -select arrays_zip(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')); ----- -[{1: 1}, {1: 2}, {1: 3}] - -# arrays_zip mixing List and LargeList -query ? -select arrays_zip( - [1, 2, 3], - arrow_cast(make_array(10, 20, 30), 'LargeList(Int64)') -); ----- -[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}] - -# arrays_zip mixing List and FixedSizeList with different lengths (padding) -query ? -select arrays_zip( - [1, 2, 3], - arrow_cast(make_array(10, 20), 'FixedSizeList(2, Int64)') -); ----- -[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: NULL}] - -# arrays_zip with LargeList and FixedSizeList mixed types -query ? -select arrays_zip( - arrow_cast(make_array(1, 2), 'LargeList(Int64)'), - arrow_cast(make_array('a', 'b'), 'FixedSizeList(2, Utf8)') -); ----- -[{1: 1, 2: a}, {1: 2, 2: b}] - -query ??? -select array_intersect(column1, column2), - array_intersect(column3, column4), - array_intersect(column5, column6) -from array_intersect_table_1D; ----- -[1] [1, 3] [1, 3] -[11] [11, 33] [11, 33] - -query ??? -select array_intersect(column1, column2), - array_intersect(column3, column4), - array_intersect(column5, column6) -from large_array_intersect_table_1D; ----- -[1] [1, 3] [1, 3] -[11] [11, 33] [11, 33] - -query ??? -select array_intersect(column1, column2), - array_intersect(column3, column4), - array_intersect(column5, column6) -from array_intersect_table_1D_Float; ----- -[1.0] [1.0, 3.0] [] -[] [2.0] [1.11] - -query ??? -select array_intersect(column1, column2), - array_intersect(column3, column4), - array_intersect(column5, column6) -from array_intersect_table_1D_Boolean; ----- -[] [true, false] [false] -[false] [true] [true] - -query ??? -select array_intersect(column1, column2), - array_intersect(column3, column4), - array_intersect(column5, column6) -from large_array_intersect_table_1D_Boolean; ----- -[] [true, false] [false] -[false] [true] [true] - -query ??? -select array_intersect(column1, column2), - array_intersect(column3, column4), - array_intersect(column5, column6) -from array_intersect_table_1D_UTF8; ----- -[bc] [rust, arrow] [] -[] [datafusion, rust, arrow] [rust, arrow] - -query ??? -select array_intersect(column1, column2), - array_intersect(column3, column4), - array_intersect(column5, column6) -from large_array_intersect_table_1D_UTF8; ----- -[bc] [rust, arrow] [] -[] [datafusion, rust, arrow] [rust, arrow] - -query ? -select array_intersect(column1, column2) -from array_intersect_table_1D_NULL; ----- -[2, 3] -[3] -[3] -NULL -NULL -NULL - -query ?? -select array_intersect(column1, column2), - array_intersect(column3, column4) -from array_intersect_table_2D; ----- -[] [[4, 5], [6, 7]] -[[3, 4]] [[5, 6, 7], [8, 9, 10]] - -query ?? -select array_intersect(column1, column2), - array_intersect(column3, column4) -from large_array_intersect_table_2D; ----- -[] [[4, 5], [6, 7]] -[[3, 4]] [[5, 6, 7], [8, 9, 10]] - - -query ? -select array_intersect(column1, column2) -from array_intersect_table_2D_float; ----- -[[1.1, 2.2], [3.3]] -[[1.1, 2.2], [3.3]] - -query ? -select array_intersect(column1, column2) -from large_array_intersect_table_2D_float; ----- -[[1.1, 2.2], [3.3]] -[[1.1, 2.2], [3.3]] - -query ? -select array_intersect(column1, column2) -from array_intersect_table_3D; ----- -[] -[[[1, 2]]] - -query ? -select array_intersect(column1, column2) -from large_array_intersect_table_3D; ----- -[] -[[[1, 2]]] - -query ?????? -SELECT array_intersect(make_array(1,2,3), make_array(2,3,4)), - array_intersect(make_array(1,3,5), make_array(2,4,6)), - array_intersect(make_array('aa','bb','cc'), make_array('cc','aa','dd')), - array_intersect(make_array(true, false), make_array(true)), - array_intersect(make_array(1.1, 2.2, 3.3), make_array(2.2, 3.3, 4.4)), - array_intersect(make_array([1, 1], [2, 2], [3, 3]), make_array([2, 2], [3, 3], [4, 4])) -; ----- -[2, 3] [] [aa, cc] [true] [2.2, 3.3] [[2, 2], [3, 3]] - -query ?????? -SELECT array_intersect(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), arrow_cast(make_array(2,3,4), 'LargeList(Int64)')), - array_intersect(arrow_cast(make_array(1,3,5), 'LargeList(Int64)'), arrow_cast(make_array(2,4,6), 'LargeList(Int64)')), - array_intersect(arrow_cast(make_array('aa','bb','cc'), 'LargeList(Utf8)'), arrow_cast(make_array('cc','aa','dd'), 'LargeList(Utf8)')), - array_intersect(arrow_cast(make_array(true, false), 'LargeList(Boolean)'), arrow_cast(make_array(true), 'LargeList(Boolean)')), - array_intersect(arrow_cast(make_array(1.1, 2.2, 3.3), 'LargeList(Float64)'), arrow_cast(make_array(2.2, 3.3, 4.4), 'LargeList(Float64)')), - array_intersect(arrow_cast(make_array([1, 1], [2, 2], [3, 3]), 'LargeList(List(Int64))'), arrow_cast(make_array([2, 2], [3, 3], [4, 4]), 'LargeList(List(Int64))')) -; ----- -[2, 3] [] [aa, cc] [true] [2.2, 3.3] [[2, 2], [3, 3]] - -query ?????? -SELECT array_intersect(arrow_cast(make_array(1,2,3), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(2,3,4), 'FixedSizeList(3, Int64)')), - array_intersect(arrow_cast(make_array(1,3,5), 'FixedSizeList(3, Int64)'), arrow_cast(make_array(2,4,6), 'FixedSizeList(3, Int64)')), - array_intersect(arrow_cast(make_array('aa','bb','cc'), 'FixedSizeList(3, Utf8)'), arrow_cast(make_array('cc','aa','dd'), 'FixedSizeList(3, Utf8)')), - array_intersect(arrow_cast(make_array(true, false), 'FixedSizeList(2, Boolean)'), arrow_cast(make_array(true), 'FixedSizeList(1, Boolean)')), - array_intersect(arrow_cast(make_array(1.1, 2.2, 3.3), 'FixedSizeList(3, Float64)'), arrow_cast(make_array(2.2, 3.3, 4.4), 'FixedSizeList(3, Float64)')), - array_intersect(arrow_cast(make_array([1, 1], [2, 2], [3, 3]), 'FixedSizeList(3, List(Int64))'), arrow_cast(make_array([2, 2], [3, 3], [4, 4]), 'FixedSizeList(3, List(Int64))')) -; ----- -[2, 3] [] [aa, cc] [true] [2.2, 3.3] [[2, 2], [3, 3]] - -query ? -select array_intersect([], []); ----- -[] - -query ? -select array_intersect(arrow_cast([], 'LargeList(Int64)'), arrow_cast([], 'LargeList(Int64)')); ----- -[] - -query ? -select array_intersect([1, 1, 2, 2, 3, 3], null); ----- -NULL - -query ? -select array_intersect(arrow_cast([1, 1, 2, 2, 3, 3], 'LargeList(Int64)'), null); ----- -NULL - -query ? -select array_intersect(null, [1, 1, 2, 2, 3, 3]); ----- -NULL - -query ? -select array_intersect(null, arrow_cast([1, 1, 2, 2, 3, 3], 'LargeList(Int64)')); ----- -NULL - -query ? -select array_intersect([], null); ----- -NULL - -query ? -select array_intersect([[1,2,3]], [[]]); ----- -[] - -query ? -select array_intersect([[null]], [[]]); ----- -[] - -query ? -select array_intersect(arrow_cast([], 'LargeList(Int64)'), null); ----- -NULL - -query ? -select array_intersect(null, []); ----- -NULL - -query ? -select array_intersect(null, arrow_cast([], 'LargeList(Int64)')); ----- -NULL - -query ? -select array_intersect(null, null); ----- -NULL - -query ?????? -SELECT list_intersect(make_array(1,2,3), make_array(2,3,4)), - list_intersect(make_array(1,3,5), make_array(2,4,6)), - list_intersect(make_array('aa','bb','cc'), make_array('cc','aa','dd')), - list_intersect(make_array(true, false), make_array(true)), - list_intersect(make_array(1.1, 2.2, 3.3), make_array(2.2, 3.3, 4.4)), - list_intersect(make_array([1, 1], [2, 2], [3, 3]), make_array([2, 2], [3, 3], [4, 4])) -; ----- -[2, 3] [] [aa, cc] [true] [2.2, 3.3] [[2, 2], [3, 3]] - -query ?????? -SELECT list_intersect(arrow_cast(make_array(1,2,3), 'LargeList(Int64)'), arrow_cast(make_array(2,3,4), 'LargeList(Int64)')), - list_intersect(arrow_cast(make_array(1,3,5), 'LargeList(Int64)'), arrow_cast(make_array(2,4,6), 'LargeList(Int64)')), - list_intersect(arrow_cast(make_array('aa','bb','cc'), 'LargeList(Utf8)'), arrow_cast(make_array('cc','aa','dd'), 'LargeList(Utf8)')), - list_intersect(arrow_cast(make_array(true, false), 'LargeList(Boolean)'), arrow_cast(make_array(true), 'LargeList(Boolean)')), - list_intersect(arrow_cast(make_array(1.1, 2.2, 3.3), 'LargeList(Float64)'), arrow_cast(make_array(2.2, 3.3, 4.4), 'LargeList(Float64)')), - list_intersect(arrow_cast(make_array([1, 1], [2, 2], [3, 3]), 'LargeList(List(Int64))'), arrow_cast(make_array([2, 2], [3, 3], [4, 4]), 'LargeList(List(Int64))')) -; ----- -[2, 3] [] [aa, cc] [true] [2.2, 3.3] [[2, 2], [3, 3]] - -query BBBB -select list_has_all(make_array(1,2,3), make_array(4,5,6)), - list_has_all(make_array(1,2,3), make_array(1,2)), - list_has_any(make_array(1,2,3), make_array(4,5,6)), - list_has_any(make_array(1,2,3), make_array(1,2,4)) -; ----- -false true false true - -query BBBB -select arrays_overlap(make_array(1,2,3), make_array(4,5,6)), - arrays_overlap(make_array(1,2,3), make_array(1,2,4)), - arrays_overlap(make_array(['aa']), make_array(['aa'],['bb'])), - arrays_overlap(make_array('aa',NULL), make_array('bb',NULL)) -; ----- -false true true true - -query ??? -select range(column2), - range(column1, column2), - range(column1, column2, column3) -from arrays_range; ----- -[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] [3, 4, 5, 6, 7, 8, 9] [3, 5, 7, 9] -[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] [4, 5, 6, 7, 8, 9, 10, 11, 12] [4, 7, 10] - -query ??????????? -select range(5), - range(2, 5), - range(2, 10, 3), - range(10, 2, -3), - range(1, 5, -1), - range(1, -5, 1), - range(1, -5, -1), - range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH), - range(DATE '1993-02-01', DATE '1993-01-01', INTERVAL '-1' DAY), - range(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '1' YEAR), - range(DATE '1993-03-01', DATE '1989-04-01', INTERVAL '1' YEAR) -; ----- -[0, 1, 2, 3, 4] [2, 3, 4] [2, 5, 8] [10, 7, 4] [] [] [1, 0, -1, -2, -3, -4] [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01] [1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02] [1989-04-01, 1990-04-01, 1991-04-01] [] - -# Ensure can coerce from other valid types -query ??????????? -select range(5), - range(2, 5), - range(2, 10, 3), - range(10, 2, -3), - range(arrow_cast(1, 'Int8'), 5, -1), - range(arrow_cast(1, 'Int16'), arrow_cast(-5, 'Int8'), 1), - range(arrow_cast(1, 'Int32'), arrow_cast(-5, 'Int16'), arrow_cast(-1, 'Int8')), - range(DATE '1992-09-01', DATE '1993-03-01', arrow_cast('1 MONTH', 'Interval(YearMonth)')), - range(DATE '1993-02-01', arrow_cast(DATE '1993-01-01', 'Date64'), INTERVAL '-1' DAY), - range(arrow_cast(DATE '1989-04-01', 'Date64'), DATE '1993-03-01', INTERVAL '1' YEAR), - range(arrow_cast(DATE '1993-03-01', 'Date64'), arrow_cast(DATE '1989-04-01', 'Date64'), INTERVAL '1' YEAR) -; ----- -[0, 1, 2, 3, 4] [2, 3, 4] [2, 5, 8] [10, 7, 4] [] [] [1, 0, -1, -2, -3, -4] [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01] [1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02] [1989-04-01, 1990-04-01, 1991-04-01] [] - -# Test range with zero step -query error DataFusion error: Execution error: step can't be 0 for function range\(start \[, stop, step\]\) -select range(1, 1, 0); - -# Test range with big steps -query ???? -select - range(-9223372036854775808, -9223372036854775808, -9223372036854775808) as c1, - range(9223372036854775807, 9223372036854775807, 9223372036854775807) as c2, - range(0, -9223372036854775808, -9223372036854775808) as c3, - range(0, 9223372036854775807, 9223372036854775807) as c4; ----- -[] [] [0] [0] - -# Test range for other edge cases -query ???????? -select - range(9223372036854775807, 9223372036854775807, -1) as c1, - range(9223372036854775807, 9223372036854775806, -1) as c2, - range(9223372036854775807, 9223372036854775807, 1) as c3, - range(9223372036854775806, 9223372036854775807, 1) as c4, - range(-9223372036854775808, -9223372036854775808, -1) as c5, - range(-9223372036854775807, -9223372036854775808, -1) as c6, - range(-9223372036854775808, -9223372036854775808, 1) as c7, - range(-9223372036854775808, -9223372036854775807, 1) as c8; ----- -[] [9223372036854775807] [] [9223372036854775806] [] [-9223372036854775807] [] [-9223372036854775808] - -# Test range(start, stop, step) with NULL values -query ? -select range(start, stop, step) from - (values (1), (NULL)) as start_values(start), - (values (10), (NULL)) as stop_values(stop), - (values (3), (NULL)) as step_values(step) -where start is null or stop is null or step is null ----- -NULL -NULL -NULL -NULL -NULL -NULL -NULL - -# Test range(start, stop) with NULL values -query ? -select range(start, stop) from - (values (1), (NULL)) as start_values(start), - (values (10), (NULL)) as stop_values(stop) -where start is null or stop is null ----- -NULL -NULL -NULL - -# Test range(stop) with NULL value -query ? -select range(NULL) ----- -NULL - -## should return NULL -query ? -select range(DATE '1992-09-01', NULL, INTERVAL '1' YEAR); ----- -NULL - -## should return NULL -query ? -select range(TIMESTAMP '1992-09-01', NULL, INTERVAL '1' YEAR); ----- -NULL - -query ? -select range(DATE '1992-09-01', DATE '1993-03-01', NULL); ----- -NULL - -query ? -select range(TIMESTAMP '1992-09-01', TIMESTAMP '1993-03-01', NULL); ----- -NULL - -query ? -select range(NULL, DATE '1993-03-01', INTERVAL '1' YEAR); ----- -NULL - -query ? -select generate_series(NULL::Date, DATE '1993-03-01', INTERVAL '1' YEAR); ----- -NULL - -query ? -select generate_series(DATE '1993-03-01', NULL::Date, INTERVAL '1' YEAR); ----- -NULL - -query ? -select generate_series(DATE '1993-02-01', DATE '1993-03-01', NULL::Interval); ----- -NULL - -query ? -select range(NULL, TIMESTAMP '1993-03-01', INTERVAL '1' YEAR); ----- -NULL - -query ? -select range(NULL, NULL, NULL); ----- -NULL - -query ? -select range(NULL::timestamp, NULL::timestamp, NULL); ----- -NULL - -query ? -select range(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '-1' YEAR) ----- -[] - -query ? -select range(TIMESTAMP '1989-04-01', TIMESTAMP '1993-03-01', INTERVAL '-1' YEAR) ----- -[] - -query ? -select range(DATE '1993-03-01', DATE '1989-04-01', INTERVAL '1' YEAR) ----- -[] - -query ? -select range(TIMESTAMP '1993-03-01', TIMESTAMP '1989-04-01', INTERVAL '1' YEAR) ----- -[] - -query error DataFusion error: Execution error: Cannot generate date range less than 1 day\. -select range(DATE '1993-03-01', DATE '1993-03-01', INTERVAL '1' HOUR) - -query ? -select range(TIMESTAMP '1993-03-01', TIMESTAMP '1993-03-01', INTERVAL '1' HOUR) ----- -[] - -query ????????? -select generate_series(5), - generate_series(2, 5), - generate_series(2, 10, 3), - generate_series(1, 5, 1), - generate_series(5, 1, -1), - generate_series(10, 2, -3), - generate_series(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH), - generate_series(DATE '1993-02-01', DATE '1993-01-01', INTERVAL '-1' DAY), - generate_series(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '1' YEAR) -; ----- -[0, 1, 2, 3, 4, 5] [2, 3, 4, 5] [2, 5, 8] [1, 2, 3, 4, 5] [5, 4, 3, 2, 1] [10, 7, 4] [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01, 1993-03-01] [1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02, 1993-01-01] [1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01] - -query ? -select generate_series('2021-01-01'::timestamp, '2021-01-01T15:00:00'::timestamp, INTERVAL '1' HOUR); ----- -[2021-01-01T00:00:00, 2021-01-01T01:00:00, 2021-01-01T02:00:00, 2021-01-01T03:00:00, 2021-01-01T04:00:00, 2021-01-01T05:00:00, 2021-01-01T06:00:00, 2021-01-01T07:00:00, 2021-01-01T08:00:00, 2021-01-01T09:00:00, 2021-01-01T10:00:00, 2021-01-01T11:00:00, 2021-01-01T12:00:00, 2021-01-01T13:00:00, 2021-01-01T14:00:00, 2021-01-01T15:00:00] - -# Other timestamp types are coerced to nanosecond -query ? -select generate_series(arrow_cast('2021-01-01'::timestamp, 'Timestamp(s)'), '2021-01-01T15:00:00'::timestamp, INTERVAL '1' HOUR); ----- -[2021-01-01T00:00:00, 2021-01-01T01:00:00, 2021-01-01T02:00:00, 2021-01-01T03:00:00, 2021-01-01T04:00:00, 2021-01-01T05:00:00, 2021-01-01T06:00:00, 2021-01-01T07:00:00, 2021-01-01T08:00:00, 2021-01-01T09:00:00, 2021-01-01T10:00:00, 2021-01-01T11:00:00, 2021-01-01T12:00:00, 2021-01-01T13:00:00, 2021-01-01T14:00:00, 2021-01-01T15:00:00] - -query ? -select generate_series('2021-01-01'::timestamp, arrow_cast('2021-01-01T15:00:00'::timestamp, 'Timestamp(µs)'), INTERVAL '1' HOUR); ----- -[2021-01-01T00:00:00, 2021-01-01T01:00:00, 2021-01-01T02:00:00, 2021-01-01T03:00:00, 2021-01-01T04:00:00, 2021-01-01T05:00:00, 2021-01-01T06:00:00, 2021-01-01T07:00:00, 2021-01-01T08:00:00, 2021-01-01T09:00:00, 2021-01-01T10:00:00, 2021-01-01T11:00:00, 2021-01-01T12:00:00, 2021-01-01T13:00:00, 2021-01-01T14:00:00, 2021-01-01T15:00:00] - -query ? -select generate_series('2021-01-01T00:00:00EST'::timestamp, '2021-01-01T15:00:00-12:00'::timestamp, INTERVAL '1' HOUR); ----- -[2021-01-01T05:00:00, 2021-01-01T06:00:00, 2021-01-01T07:00:00, 2021-01-01T08:00:00, 2021-01-01T09:00:00, 2021-01-01T10:00:00, 2021-01-01T11:00:00, 2021-01-01T12:00:00, 2021-01-01T13:00:00, 2021-01-01T14:00:00, 2021-01-01T15:00:00, 2021-01-01T16:00:00, 2021-01-01T17:00:00, 2021-01-01T18:00:00, 2021-01-01T19:00:00, 2021-01-01T20:00:00, 2021-01-01T21:00:00, 2021-01-01T22:00:00, 2021-01-01T23:00:00, 2021-01-02T00:00:00, 2021-01-02T01:00:00, 2021-01-02T02:00:00, 2021-01-02T03:00:00] - -query ? -select generate_series(arrow_cast('2021-01-01T00:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), arrow_cast('2021-01-01T15:00:00', 'Timestamp(Nanosecond, Some("+05:00"))'), INTERVAL '1' HOUR); ----- -[2021-01-01T00:00:00-05:00, 2021-01-01T01:00:00-05:00, 2021-01-01T02:00:00-05:00, 2021-01-01T03:00:00-05:00, 2021-01-01T04:00:00-05:00, 2021-01-01T05:00:00-05:00] - -## -5500000000 ns is -5.5 sec -query ? -select generate_series(arrow_cast('2021-01-01T00:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), arrow_cast('2021-01-01T06:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), INTERVAL '1 HOUR 30 MINUTE -5500000000 NANOSECOND'); ----- -[2021-01-01T00:00:00-05:00, 2021-01-01T01:29:54.500-05:00, 2021-01-01T02:59:49-05:00, 2021-01-01T04:29:43.500-05:00, 2021-01-01T05:59:38-05:00] - -## mixing types for timestamps is not supported -query error DataFusion error: Error during planning: Internal error: Function 'generate_series' failed to match any signature -select generate_series(arrow_cast('2021-01-01T00:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), DATE '2021-01-02', INTERVAL '1' HOUR); - -## mixing types not allowed even if an argument is null -query error DataFusion error: Error during planning: Internal error: Function 'generate_series' failed to match any signature -select generate_series(TIMESTAMP '1992-09-01', DATE '1993-03-01', NULL); - -query error DataFusion error: Error during planning: Internal error: Function 'generate_series' failed to match any signature -select generate_series(1, '2024-01-01', '2025-01-02'); - -query error DataFusion error: Error during planning: Internal error: Function 'generate_series' failed to match any signature -select generate_series('2024-01-01'::timestamp, '2025-01-02', interval '1 day'); - -## should return NULL -query ? -select generate_series(DATE '1992-09-01', NULL, INTERVAL '1' YEAR); ----- -NULL - -## should return NULL -query ? -select generate_series(TIMESTAMP '1992-09-01', NULL, INTERVAL '1' YEAR); ----- -NULL - -query ? -select generate_series(DATE '1992-09-01', DATE '1993-03-01', NULL); ----- -NULL - -query ? -select generate_series(NULL, DATE '1993-03-01', INTERVAL '1' YEAR); ----- -NULL - -query ? -select generate_series(NULL::Date, DATE '1993-03-01', INTERVAL '1' YEAR); ----- -NULL - -query ? -select generate_series(DATE '1993-03-01', NULL::Date, INTERVAL '1' YEAR); ----- -NULL - -query ? -select generate_series(DATE '1993-02-01', DATE '1993-03-01', NULL::Interval); ----- -NULL - -query ? -select generate_series(NULL, TIMESTAMP '1993-03-01', INTERVAL '1' YEAR); ----- -NULL - -query ? -select generate_series(NULL, NULL, NULL); ----- -NULL - -query ? -select generate_series(NULL::timestamp, NULL::timestamp, NULL); ----- -NULL - -query ? -select generate_series(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '-1' YEAR) ----- -[] - -query ? -select generate_series(TIMESTAMP '1989-04-01', TIMESTAMP '1993-03-01', INTERVAL '-1' YEAR) ----- -[] - -query ? -select generate_series(DATE '1993-03-01', DATE '1989-04-01', INTERVAL '1' YEAR) ----- -[] - -query ? -select generate_series(TIMESTAMP '1993-03-01', TIMESTAMP '1989-04-01', INTERVAL '1' YEAR) ----- -[] - -query error DataFusion error: Execution error: Cannot generate date range less than 1 day. -select generate_series(DATE '2000-01-01', DATE '2000-01-03', INTERVAL '1' HOUR) - -query error DataFusion error: Execution error: Cannot generate date range less than 1 day. -select generate_series(DATE '2000-01-01', DATE '2000-01-03', INTERVAL '-1' HOUR) - -query ? -select generate_series(TIMESTAMP '2000-01-01', TIMESTAMP '2000-01-02', INTERVAL '1' HOUR) ----- -[2000-01-01T00:00:00, 2000-01-01T01:00:00, 2000-01-01T02:00:00, 2000-01-01T03:00:00, 2000-01-01T04:00:00, 2000-01-01T05:00:00, 2000-01-01T06:00:00, 2000-01-01T07:00:00, 2000-01-01T08:00:00, 2000-01-01T09:00:00, 2000-01-01T10:00:00, 2000-01-01T11:00:00, 2000-01-01T12:00:00, 2000-01-01T13:00:00, 2000-01-01T14:00:00, 2000-01-01T15:00:00, 2000-01-01T16:00:00, 2000-01-01T17:00:00, 2000-01-01T18:00:00, 2000-01-01T19:00:00, 2000-01-01T20:00:00, 2000-01-01T21:00:00, 2000-01-01T22:00:00, 2000-01-01T23:00:00, 2000-01-02T00:00:00] - -query ? -select generate_series(TIMESTAMP '2000-01-02', TIMESTAMP '2000-01-01', INTERVAL '-1' HOUR) ----- -[2000-01-02T00:00:00, 2000-01-01T23:00:00, 2000-01-01T22:00:00, 2000-01-01T21:00:00, 2000-01-01T20:00:00, 2000-01-01T19:00:00, 2000-01-01T18:00:00, 2000-01-01T17:00:00, 2000-01-01T16:00:00, 2000-01-01T15:00:00, 2000-01-01T14:00:00, 2000-01-01T13:00:00, 2000-01-01T12:00:00, 2000-01-01T11:00:00, 2000-01-01T10:00:00, 2000-01-01T09:00:00, 2000-01-01T08:00:00, 2000-01-01T07:00:00, 2000-01-01T06:00:00, 2000-01-01T05:00:00, 2000-01-01T04:00:00, 2000-01-01T03:00:00, 2000-01-01T02:00:00, 2000-01-01T01:00:00, 2000-01-01T00:00:00] - -# Test generate_series with small intervals -query ? -select generate_series('2000-01-01T00:00:00.000000001Z'::timestamp, '2000-01-01T00:00:00.00000001Z'::timestamp, INTERVAL '1' NANOSECONDS) ----- -[2000-01-01T00:00:00.000000001, 2000-01-01T00:00:00.000000002, 2000-01-01T00:00:00.000000003, 2000-01-01T00:00:00.000000004, 2000-01-01T00:00:00.000000005, 2000-01-01T00:00:00.000000006, 2000-01-01T00:00:00.000000007, 2000-01-01T00:00:00.000000008, 2000-01-01T00:00:00.000000009, 2000-01-01T00:00:00.000000010] - -# Test generate_series with zero step -query error DataFusion error: Execution error: step can't be 0 for function generate_series\(start \[, stop, step\]\) -select generate_series(1, 1, 0); - -# Test generate_series with zero step -query error DataFusion error: Execution error: Interval argument to generate_series must not be 0 -select generate_series(TIMESTAMP '2000-01-02', TIMESTAMP '2000-01-01', INTERVAL '0' MINUTE); - -# Test generate_series with big steps -query ???? -select - generate_series(-9223372036854775808, -9223372036854775808, -9223372036854775808) as c1, - generate_series(9223372036854775807, 9223372036854775807, 9223372036854775807) as c2, - generate_series(0, -9223372036854775808, -9223372036854775808) as c3, - generate_series(0, 9223372036854775807, 9223372036854775807) as c4; ----- -[-9223372036854775808] [9223372036854775807] [0, -9223372036854775808] [0, 9223372036854775807] - - -# Test generate_series for other edge cases -query ???? -select - generate_series(9223372036854775807, 9223372036854775807, -1) as c1, - generate_series(9223372036854775807, 9223372036854775807, 1) as c2, - generate_series(-9223372036854775808, -9223372036854775808, -1) as c3, - generate_series(-9223372036854775808, -9223372036854775808, 1) as c4; ----- -[9223372036854775807] [9223372036854775807] [-9223372036854775808] [-9223372036854775808] - -# Test generate_series(start, stop, step) with NULL values -query ? -select generate_series(start, stop, step) from - (values (1), (NULL)) as start_values(start), - (values (10), (NULL)) as stop_values(stop), - (values (3), (NULL)) as step_values(step) -where start is null or stop is null or step is null ----- -NULL -NULL -NULL -NULL -NULL -NULL -NULL - -# Test generate_series(start, stop) with NULL values -query ? -select generate_series(start, stop) from - (values (1), (NULL)) as start_values(start), - (values (10), (NULL)) as stop_values(stop) -where start is null or stop is null ----- -NULL -NULL -NULL - -# Test generate_series(stop) with NULL value -query ? -select generate_series(NULL) ----- -NULL - -# Test generate_series with a table of date values -statement ok -CREATE TABLE date_table( - start DATE, - stop DATE, - step INTERVAL -) AS VALUES - (DATE '1992-01-01', DATE '1993-01-02', INTERVAL '1' MONTH), - (DATE '1993-02-01', DATE '1993-01-01', INTERVAL '-1' DAY), - (DATE '1989-04-01', DATE '1993-03-01', INTERVAL '1' YEAR); - -query ? -select generate_series(start, stop, step) from date_table; ----- -[1992-01-01, 1992-02-01, 1992-03-01, 1992-04-01, 1992-05-01, 1992-06-01, 1992-07-01, 1992-08-01, 1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01] -[1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02, 1993-01-01] -[1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01] - -query ? -select generate_series(start, stop, INTERVAL '1 year') from date_table; ----- -[1992-01-01, 1993-01-01] -[] -[1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01] - -query ? -select generate_series(start, '1993-03-01'::date, INTERVAL '1 year') from date_table; ----- -[1992-01-01, 1993-01-01] -[1993-02-01] -[1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01] - -# Test generate_series with a table of timestamp values -statement ok -CREATE TABLE timestamp_table( - start TIMESTAMP, - stop TIMESTAMP, - step INTERVAL -) AS VALUES - (TIMESTAMP '1992-01-01T00:00:00', TIMESTAMP '1993-01-02T00:00:00', INTERVAL '1' MONTH), - (TIMESTAMP '1993-02-01T00:00:00', TIMESTAMP '1993-01-01T00:00:00', INTERVAL '-1' DAY), - (TIMESTAMP '1989-04-01T00:00:00', TIMESTAMP '1993-03-01T00:00:00', INTERVAL '1' YEAR); - -query ? -select generate_series(start, stop, step) from timestamp_table; ----- -[1992-01-01T00:00:00, 1992-02-01T00:00:00, 1992-03-01T00:00:00, 1992-04-01T00:00:00, 1992-05-01T00:00:00, 1992-06-01T00:00:00, 1992-07-01T00:00:00, 1992-08-01T00:00:00, 1992-09-01T00:00:00, 1992-10-01T00:00:00, 1992-11-01T00:00:00, 1992-12-01T00:00:00, 1993-01-01T00:00:00] -[1993-02-01T00:00:00, 1993-01-31T00:00:00, 1993-01-30T00:00:00, 1993-01-29T00:00:00, 1993-01-28T00:00:00, 1993-01-27T00:00:00, 1993-01-26T00:00:00, 1993-01-25T00:00:00, 1993-01-24T00:00:00, 1993-01-23T00:00:00, 1993-01-22T00:00:00, 1993-01-21T00:00:00, 1993-01-20T00:00:00, 1993-01-19T00:00:00, 1993-01-18T00:00:00, 1993-01-17T00:00:00, 1993-01-16T00:00:00, 1993-01-15T00:00:00, 1993-01-14T00:00:00, 1993-01-13T00:00:00, 1993-01-12T00:00:00, 1993-01-11T00:00:00, 1993-01-10T00:00:00, 1993-01-09T00:00:00, 1993-01-08T00:00:00, 1993-01-07T00:00:00, 1993-01-06T00:00:00, 1993-01-05T00:00:00, 1993-01-04T00:00:00, 1993-01-03T00:00:00, 1993-01-02T00:00:00, 1993-01-01T00:00:00] -[1989-04-01T00:00:00, 1990-04-01T00:00:00, 1991-04-01T00:00:00, 1992-04-01T00:00:00] - -query ? -select generate_series(start, stop, INTERVAL '1 year') from timestamp_table; ----- -[1992-01-01T00:00:00, 1993-01-01T00:00:00] -[] -[1989-04-01T00:00:00, 1990-04-01T00:00:00, 1991-04-01T00:00:00, 1992-04-01T00:00:00] - -query ? -select generate_series(start, '1993-03-01T00:00:00'::timestamp, INTERVAL '1 year') from timestamp_table; ----- -[1992-01-01T00:00:00, 1993-01-01T00:00:00] -[1993-02-01T00:00:00] -[1989-04-01T00:00:00, 1990-04-01T00:00:00, 1991-04-01T00:00:00, 1992-04-01T00:00:00] - -# https://github.com/apache/datafusion/issues/11922 -query ? -select generate_series(start, '1993-03-01T00:00:00'::timestamp, INTERVAL '1 year') from timestamp_table; ----- -[1992-01-01T00:00:00, 1993-01-01T00:00:00] -[1993-02-01T00:00:00] -[1989-04-01T00:00:00, 1990-04-01T00:00:00, 1991-04-01T00:00:00, 1992-04-01T00:00:00] - -## array_except - -statement ok -CREATE TABLE array_except_table -AS VALUES - ([1, 2, 2, 3], [2, 3, 4]), - ([2, 3, 3], [3]), - ([3], [3, 3, 4]), - (null, [3, 4]), - ([1, 2], null), - (null, null) -; - -query ? -select array_except(column1, column2) from array_except_table; ----- -[1] -[2] -[] -NULL -NULL -NULL - -statement ok -drop table array_except_table; - -statement ok -CREATE TABLE array_except_nested_list_table -AS VALUES - ([[1, 2], [3]], [[2], [3], [4, 5]]), - ([[1, 2], [3]], [[2], [1, 2]]), - ([[1, 2], [3]], null), - (null, [[1], [2, 3], [4, 5, 6]]), - ([[1], [2, 3], [4, 5, 6]], [[2, 3], [4, 5, 6], [1]]) -; - -query ? -select array_except(column1, column2) from array_except_nested_list_table; ----- -[[1, 2]] -[[3]] -NULL -NULL -[] - -statement ok -drop table array_except_nested_list_table; - -statement ok -CREATE TABLE array_except_table_float -AS VALUES - ([1.1, 2.2, 3.3], [2.2]), - ([1.1, 2.2, 3.3], [4.4]), - ([1.1, 2.2, 3.3], [3.3, 2.2, 1.1]) -; - -query ? -select array_except(column1, column2) from array_except_table_float; ----- -[1.1, 3.3] -[1.1, 2.2, 3.3] -[] - -statement ok -drop table array_except_table_float; - -statement ok -CREATE TABLE array_except_table_ut8 -AS VALUES - (['a', 'b', 'c'], ['a']), - (['a', 'bc', 'def'], ['g', 'def']), - (['a', 'bc', 'def'], null), - (null, ['a']) -; - -query ? -select array_except(column1, column2) from array_except_table_ut8; ----- -[b, c] -[a, bc] -NULL -NULL - -statement ok -drop table array_except_table_ut8; - -statement ok -CREATE TABLE array_except_table_bool -AS VALUES - ([true, false, false], [false]), - ([true, true, true], [false]), - ([false, false, false], [true]), - ([true, false], null), - (null, [true, false]) -; - -query ? -select array_except(column1, column2) from array_except_table_bool; ----- -[true] -[true] -[false] -NULL -NULL - -statement ok -drop table array_except_table_bool; - -query ? -select array_except([], null); ----- -NULL - -query ? -select array_except([], []); ----- -[] - -query ? -select array_except(null, []); ----- -NULL - -query ? -select array_except(null, null) ----- -NULL - -query ? -select array_except(arrow_cast([1, 2, 3, 4], 'LargeList(Int64)'), arrow_cast([5, 6, 3, 4], 'LargeList(Int64)')); ----- -[1, 2] - -query ? -select array_except(arrow_cast([1, 2, 3, 4], 'FixedSizeList(4, Int64)'), arrow_cast([5, 6, 3, 4], 'FixedSizeList(4, Int64)')); ----- -[1, 2] - -### Array operators tests - - -## array concatenate operator - -# array concatenate operator with scalars #1 (like array_concat scalar function) -query ?? -select make_array(1, 2, 3) || make_array(4, 5, 6) || make_array(7, 8, 9), make_array([1], [2]) || make_array([3], [4]); ----- -[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] - -# array concatenate operator with scalars #2 (like array_append scalar function) -query ??? -select make_array(1, 2, 3) || 4, make_array(1.0, 2.0, 3.0) || 4.0, make_array('h', 'e', 'l', 'l') || 'o'; ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -# array concatenate operator with scalars #3 (like array_prepend scalar function) -query ??? -select 1 || make_array(2, 3, 4), 1.0 || make_array(2.0, 3.0, 4.0), 'h' || make_array('e', 'l', 'l', 'o'); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -# array concatenate operator with scalars #4 (mixed) -query ? -select 0 || [1,2,3] || 4 || [5] || [6,7]; ----- -[0, 1, 2, 3, 4, 5, 6, 7] - -# array concatenate operator with nd-list #5 (mixed) -query ? -select 0 || [1,2,3] || [[4,5]] || [[6,7,8]] || [9,10]; ----- -[[0, 1, 2, 3], [4, 5], [6, 7, 8], [9, 10]] - -# array concatenate operator non-valid cases -## concat 2D with scalar is not valid -query error -select 0 || [1,2,3] || [[4,5]] || [[6,7,8]] || [9,10] || 11; - -## concat scalar with 2D is not valid -query error -select 0 || [[1,2,3]]; - -# array concatenate operator with column - -statement ok -CREATE TABLE array_concat_operator_table -AS VALUES - (0, [1, 2, 2, 3], 4, [5, 6, 5]), - (-1, [4, 5, 6], 7, [8, 1, 1]) -; - -query ? -select column1 || column2 || column3 || column4 from array_concat_operator_table; ----- -[0, 1, 2, 2, 3, 4, 5, 6, 5] -[-1, 4, 5, 6, 7, 8, 1, 1] - -statement ok -drop table array_concat_operator_table; - -## array containment operator - -# array containment operator with scalars #1 (at arrow) -query BBBBBBB -select make_array(1,2,3) @> make_array(1,3), - make_array(1,2,3) @> make_array(1,4), - make_array([1,2], [3,4]) @> make_array([1,2]), - make_array([1,2], [3,4]) @> make_array([1,3]), - make_array([1,2], [3,4]) @> make_array([1,2], [3,4], [5,6]), - make_array([[1,2,3]]) @> make_array([[1]]), - make_array([[1,2,3]]) @> make_array([[1,2,3]]); ----- -true false true false false false true - -# Make sure it is rewritten to function array_has_all() -query TT -explain select [1,2,3] @> [1,3]; ----- -logical_plan -01)Projection: Boolean(true) AS array_has_all(make_array(Int64(1),Int64(2),Int64(3)),make_array(Int64(1),Int64(3))) -02)--EmptyRelation: rows=1 -physical_plan -01)ProjectionExec: expr=[true as array_has_all(make_array(Int64(1),Int64(2),Int64(3)),make_array(Int64(1),Int64(3)))] -02)--PlaceholderRowExec - -# array containment operator with scalars #2 (arrow at) -query BBBBBBB -select make_array(1,3) <@ make_array(1,2,3), - make_array(1,4) <@ make_array(1,2,3), - make_array([1,2]) <@ make_array([1,2], [3,4]), - make_array([1,3]) <@ make_array([1,2], [3,4]), - make_array([1,2], [3,4], [5,6]) <@ make_array([1,2], [3,4]), - make_array([[1]]) <@ make_array([[1,2,3]]), - make_array([[1,2,3]]) <@ make_array([[1,2,3]]); ----- -true false true false false false true - -# Make sure it is rewritten to function array_has_all() -query TT -explain select [1,3] <@ [1,2,3]; ----- -logical_plan -01)Projection: Boolean(true) AS array_has_all(make_array(Int64(1),Int64(2),Int64(3)),make_array(Int64(1),Int64(3))) -02)--EmptyRelation: rows=1 -physical_plan -01)ProjectionExec: expr=[true as array_has_all(make_array(Int64(1),Int64(2),Int64(3)),make_array(Int64(1),Int64(3)))] -02)--PlaceholderRowExec - -### Array casting tests - - -## make_array - -# make_array scalar function #1 -query ? -select make_array(1, 2.0) ----- -[1.0, 2.0] - -# make_array scalar function #2 -query ? -select make_array(null, 1.0) ----- -[NULL, 1.0] - -# make_array scalar function #3 -query ? -select make_array(1, 2.0, null, 3) ----- -[1.0, 2.0, NULL, 3.0] - -# make_array scalar function #4 -query ? -select make_array(1.0, '2', null) ----- -[1.0, 2.0, NULL] - -### FixedSizeListArray - -statement ok -CREATE EXTERNAL TABLE fixed_size_list_array STORED AS PARQUET LOCATION '../core/tests/data/fixed_size_list_array.parquet'; - -query T -select arrow_typeof(f0) from fixed_size_list_array; ----- -FixedSizeList(2 x Int64) -FixedSizeList(2 x Int64) - -query ? -select * from fixed_size_list_array; ----- -[1, 2] -[3, 4] - -query ? -select f0 from fixed_size_list_array; ----- -[1, 2] -[3, 4] - -query ? -select arrow_cast(f0, 'List(Int64)') from fixed_size_list_array; ----- -[1, 2] -[3, 4] - -query ? -select make_array(arrow_cast(f0, 'List(Int64)')) from fixed_size_list_array ----- -[[1, 2]] -[[3, 4]] - -query T -select arrow_typeof(make_array(arrow_cast(f0, 'List(Int64)'))) from fixed_size_list_array ----- -List(List(Int64)) -List(List(Int64)) - -query ? -select make_array(f0) from fixed_size_list_array ----- -[[1, 2]] -[[3, 4]] - -query T -select arrow_typeof(make_array(f0)) from fixed_size_list_array ----- -List(FixedSizeList(2 x Int64)) -List(FixedSizeList(2 x Int64)) - -query ? -select array_concat(column1, [7]) from arrays_values_v2; ----- -[NULL, 2, 3, 7] -[7] -[9, NULL, 10, 7] -[NULL, 1, 7] -[11, 12, 7] -[7] - -# flatten - -query ? -select flatten(NULL); ----- -NULL - -# flatten with scalar values #1 -query ??? -select flatten(make_array(1, 2, 1, 3, 2)), - flatten(make_array([1], [2, 3], [null], make_array(4, null, 5))), - flatten(make_array([[1.1]], [[2.2]], [[3.3], [4.4]])); ----- -[1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]] - -query ??? -select flatten(arrow_cast(make_array(1, 2, 1, 3, 2), 'LargeList(Int64)')), - flatten(arrow_cast(make_array([1], null, [2, 3], [null], make_array(4, null, 5)), 'LargeList(LargeList(Int64))')), - flatten(arrow_cast(make_array([[1.1]], [[2.2]], [[3.3], [4.4]]), 'LargeList(LargeList(LargeList(Float64)))')); ----- -[1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]] - -query ??? -select flatten(arrow_cast(make_array(1, 2, 1, 3, 2), 'FixedSizeList(5, Int64)')), - flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'FixedSizeList(4, List(Int64))')), - flatten(arrow_cast(make_array([[1.1], [2.2]], [[3.3], [4.4]]), 'FixedSizeList(2, List(List(Float64)))')); ----- -[1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]] - -query ??TT -select flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'FixedSizeList(4, LargeList(Int64))')), - flatten(arrow_cast(make_array([[1.1], [2.2]], [[3.3], [4.4]]), 'List(LargeList(FixedSizeList(1, Float64)))')), - arrow_typeof(flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'FixedSizeList(4, LargeList(Int64))'))), - arrow_typeof(flatten(arrow_cast(make_array([[1.1], [2.2]], [[3.3], [4.4]]), 'List(LargeList(FixedSizeList(1, Float64)))'))); ----- -[1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]] LargeList(Int64) LargeList(FixedSizeList(1 x Float64)) - -# flatten with column values -query ???? -select flatten(column1), - flatten(column2), - flatten(column3), - flatten(column4) -from flatten_table; ----- -[1, 2, 3] [[1, 2, 3], [4, 5], [6]] [[[1]], [[2, 3]]] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4] -[1, 2, 3, 4, 5, 6] [[8]] [[[1, 2]], [[3]]] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] - -query ???? -select flatten(column1), - flatten(column2), - flatten(column3), - flatten(column4) -from large_flatten_table; ----- -[1, 2, 3] [[1, 2, 3], [4, 5], [6]] [[[1]], [[2, 3]]] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4] -[1, 2, 3, 4, 5, 6] [[8]] [[[1, 2]], [[3]]] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] - -query ???? -select flatten(column1), - flatten(column2), - flatten(column3), - flatten(column4) -from fixed_size_flatten_table; ----- -[1, 2, 3] [[1, 2, 3], [4, 5], [6]] [[[1]], [[2, 3]]] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4] -[1, 2, 3, 4, 5, 6] [[8], [9, 10], [11, 12, 13]] [[[1, 2]], [[3]]] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] - -# flatten with different inner list type -query ?????? -select flatten(arrow_cast(make_array([1, 2], [3, 4]), 'List(FixedSizeList(2, Int64))')), - flatten(arrow_cast(make_array([[1, 2]], [[3, 4]]), 'List(FixedSizeList(1, List(Int64)))')), - flatten(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(List(Int64))')), - flatten(arrow_cast(make_array([[1, 2]], [[3, 4]]), 'LargeList(List(List(Int64)))')), - flatten(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(FixedSizeList(2, Int64))')), - flatten(arrow_cast(make_array([[1, 2]], [[3, 4]]), 'LargeList(FixedSizeList(1, List(Int64)))')) ----- -[1, 2, 3, 4] [[1, 2], [3, 4]] [1, 2, 3, 4] [[1, 2], [3, 4]] [1, 2, 3, 4] [[1, 2], [3, 4]] - -## empty (aliases: `array_empty`, `list_empty`) -# empty scalar function #1 -query B -select empty(make_array(1)); ----- -false - -query B -select empty(arrow_cast(make_array(1), 'LargeList(Int64)')); ----- -false - -query B -select empty(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); ----- -false - -# empty scalar function #2 -query B -select empty(make_array()); ----- -true - -query B -select empty(arrow_cast(make_array(), 'LargeList(Int64)')); ----- -true - -#TODO: https://github.com/apache/datafusion/issues/9158 -#query B -#select empty(arrow_cast(make_array(), 'FixedSizeList(0, Null)')); -#---- -#true - -# empty scalar function #3 -query B -select empty(make_array(NULL)); ----- -false - -query B -select empty(arrow_cast(make_array(NULL), 'LargeList(Int64)')); ----- -false - -query B -select empty(arrow_cast(make_array(NULL), 'FixedSizeList(1, Int64)')); ----- -false - -#TODO: https://github.com/apache/datafusion/issues/7142 -# empty scalar function #4 -#query B -#select empty(NULL); -#---- -#NULL - -# empty scalar function #5 -query B -select empty(column1) from arrays; ----- -false -false -false -false -NULL -false -false - -query B -select empty(arrow_cast(column1, 'LargeList(List(Int64))')) from arrays; ----- -false -false -false -false -NULL -false -false - -query B -select empty(column1) from fixed_size_arrays; ----- -false -false -false -false -NULL -false -false - -## array_empty (aliases: `empty`, `list_empty`) -# array_empty scalar function #1 -query B -select array_empty(make_array(1)); ----- -false - -query B -select array_empty(arrow_cast(make_array(1), 'LargeList(Int64)')); ----- -false - -# array_empty scalar function #2 -query B -select array_empty(make_array()); ----- -true - -query B -select array_empty(arrow_cast(make_array(), 'LargeList(Int64)')); ----- -true - -# array_empty scalar function #3 -query B -select array_empty(make_array(NULL)); ----- -false - -query B -select array_empty(arrow_cast(make_array(NULL), 'LargeList(Int64)')); ----- -false - -## list_empty (aliases: `empty`, `array_empty`) -# list_empty scalar function #1 -query B -select list_empty(make_array(1)); ----- -false - -query B -select list_empty(arrow_cast(make_array(1), 'LargeList(Int64)')); ----- -false - -# list_empty scalar function #2 -query B -select list_empty(make_array()); ----- -true - -query B -select list_empty(arrow_cast(make_array(), 'LargeList(Int64)')); ----- -true - -# list_empty scalar function #3 -query B -select list_empty(make_array(NULL)); ----- -false - -query B -select list_empty(arrow_cast(make_array(NULL), 'LargeList(Int64)')); ----- -false - -# string_to_array scalar function -query ? -SELECT string_to_array('abcxxxdef', 'xxx') ----- -[abc, def] - -query I -SELECT cardinality(string_to_array('', ',')) ----- -0 - -query I -SELECT cardinality(string_to_array('', '')) ----- -0 - -query I -SELECT cardinality(string_to_array('', ',', 'x')) ----- -0 - -query I -SELECT cardinality(string_to_array('', '', 'x')) ----- -0 - -query ? -SELECT string_to_array('abc', '') ----- -[abc] - -query ? -SELECT string_to_array('abc', NULL) ----- -[a, b, c] - -query ? -SELECT string_to_array('abc def', ' ', 'def') ----- -[abc, NULL] - -query ? -select string_to_array(e, ',') from values; ----- -[Lorem] -[ipsum] -[dolor] -[sit] -[amet] -[, ] -[consectetur] -[adipiscing] -NULL - -# karge string tests for string_to_array - -# string_to_array scalar function -query ? -SELECT string_to_array(arrow_cast('abcxxxdef', 'LargeUtf8'), 'xxx') ----- -[abc, def] - -# string_to_array scalar function -query ? -SELECT string_to_array(arrow_cast('abcxxxdef', 'LargeUtf8'), arrow_cast('xxx', 'LargeUtf8')) ----- -[abc, def] - -query ? -SELECT string_to_array(arrow_cast('abc', 'LargeUtf8'), NULL) ----- -[a, b, c] - -query ? -select string_to_array(arrow_cast(e, 'LargeUtf8'), ',') from values; ----- -[Lorem] -[ipsum] -[dolor] -[sit] -[amet] -[, ] -[consectetur] -[adipiscing] -NULL - -query ? -select string_to_array(arrow_cast(e, 'LargeUtf8'), ',', arrow_cast('Lorem', 'LargeUtf8')) from values; ----- -[NULL] -[ipsum] -[dolor] -[sit] -[amet] -[, ] -[consectetur] -[adipiscing] -NULL - -# string view tests for string_to_array - -# string_to_array scalar function -query ? -SELECT string_to_array(arrow_cast('abcxxxdef', 'Utf8View'), 'xxx') ----- -[abc, def] - -query ? -SELECT string_to_array(arrow_cast('abc', 'Utf8View'), NULL) ----- -[a, b, c] - -query ? -select string_to_array(arrow_cast(e, 'Utf8View'), ',') from values; ----- -[Lorem] -[ipsum] -[dolor] -[sit] -[amet] -[, ] -[consectetur] -[adipiscing] -NULL - -# test string_to_array aliases - -query ? -select string_to_list(e, 'm') from values; ----- -[Lore, ] -[ipsu, ] -[dolor] -[sit] -[a, et] -[,] -[consectetur] -[adipiscing] -NULL - -# string_to_array: single-char delimiter producing multiple elements -query ? -SELECT string_to_array('a,b,c', ',') ----- -[a, b, c] - -# string_to_array: delimiter not found in input -query ? -SELECT string_to_array('abc', ',') ----- -[abc] - -# string_to_array: empty string input -query ? -SELECT string_to_array('', ',') ----- -[] - -# string_to_array: null_str matching multiple elements -query ? -SELECT string_to_array('a,NULL,b,NULL,c', ',', 'NULL') ----- -[a, NULL, b, NULL, c] - -# string_to_array: null_str matching all elements -query ? -SELECT string_to_array('x,x,x', ',', 'x') ----- -[NULL, NULL, NULL] - -# string_to_array: null_str with empty-string delimiter -query ? -SELECT string_to_array('abc', '', 'abc') ----- -[NULL] - -# string_to_array: NULL string input -query ? -SELECT string_to_array(NULL, ',') ----- -NULL - -# string_to_array: columnar delimiter -query ?? -SELECT string_to_array('a,b,c', col1), string_to_array('a::b::c', col2) - FROM (VALUES (',', '::')) AS t(col1, col2) ----- -[a, b, c] [a, b, c] - -# string_to_array: columnar null_str -query ? -SELECT string_to_array('a,NULL,b', ',', col1) - FROM (VALUES ('NULL')) AS t(col1) ----- -[a, NULL, b] - -# string_to_array: adjacent delimiters produce empty strings -query ? -SELECT string_to_array('a,,b', ',') ----- -[a, , b] - -# string_to_array: delimiter at start and end -query ? -SELECT string_to_array(',a,b,', ',') ----- -[, a, b, ] - -# array_resize scalar function #1 -query ? -select array_resize(make_array(1, 2, 3), 1); ----- -[1] - -query ? -select array_resize(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 1); ----- -[1] - -query ? -select array_resize(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 1); ----- -[1] - -# array_resize scalar function #2 -query ? -select array_resize(make_array(1, 2, 3), 5); ----- -[1, 2, 3, NULL, NULL] - -query ? -select array_resize(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 5); ----- -[1, 2, 3, NULL, NULL] - -query ? -select array_resize(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 5); ----- -[1, 2, 3, NULL, NULL] - -# array_resize scalar function #3 -query ? -select array_resize(make_array(1, 2, 3), 5, 4); ----- -[1, 2, 3, 4, 4] - -query ? -select array_resize(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 5, 4); ----- -[1, 2, 3, 4, 4] - -# array_resize scalar function #4 -query error -select array_resize(make_array(1, 2, 3), -5, 2); - -# array_resize scalar function #5 -query ? -select array_resize(make_array(1.1, 2.2, 3.3), 10, 9.9); ----- -[1.1, 2.2, 3.3, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9] - -query ? -select array_resize(arrow_cast(make_array(1.1, 2.2, 3.3), 'LargeList(Float64)'), 10, 9.9); ----- -[1.1, 2.2, 3.3, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9] - -# array_resize scalar function #5 -query ? -select array_resize(column1, column2, column3) from arrays_values; ----- -[NULL] -[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 2, 2] -[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] -[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4] -NULL -[] -[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL] -[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7] - -query ? -select array_resize(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) from arrays_values; ----- -[NULL] -[11, 12, 13, 14, 15, 16, 17, 18, NULL, 20, 2, 2] -[21, 22, 23, NULL, 25, 26, 27, 28, 29, 30, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] -[31, 32, 33, 34, 35, NULL, 37, 38, 39, 40, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4] -NULL -[] -[51, 52, NULL, 54, 55, 56, 57, 58, 59, 60, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL] -[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7] - -# array_resize scalar function #5 -query ? -select array_resize([[1], [2], [3]], 10, [5]); ----- -[[1], [2], [3], [5], [5], [5], [5], [5], [5], [5]] - -query ? -select array_resize(arrow_cast([[1], [2], [3]], 'LargeList(List(Int64))'), 10, [5]); ----- -[[1], [2], [3], [5], [5], [5], [5], [5], [5], [5]] - -# array_resize null value -query ? -select array_resize(arrow_cast(NULL, 'List(Int8)'), 1); ----- -NULL - -statement ok -CREATE TABLE array_resize_values -AS VALUES - (make_array(1, NULL, 3, 4, 5, 6, 7, 8, 9, 10), 2, 1), - (make_array(11, 12, NULL, 14, 15, 16, 17, 18, 19, 20), 5, 2), - (make_array(21, 22, 23, 24, NULL, 26, 27, 28, 29, 30), 8, 3), - (make_array(31, 32, 33, 34, 35, 36, NULL, 38, 39, 40), 12, 4), - (NULL, 3, 0), - (make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), NULL, 6), - (make_array(51, 52, 53, 54, 55, NULL, 57, 58, 59, 60), 13, NULL), - (make_array(61, 62, 63, 64, 65, 66, 67, 68, 69, 70), 15, 7) -; - -# array_resize columnar test #1 -query ? -select array_resize(column1, column2, column3) from array_resize_values; ----- -[1, NULL] -[11, 12, NULL, 14, 15] -[21, 22, 23, 24, NULL, 26, 27, 28] -[31, 32, 33, 34, 35, 36, NULL, 38, 39, 40, 4, 4] -NULL -[] -[51, 52, 53, 54, 55, NULL, 57, 58, 59, 60, NULL, NULL, NULL] -[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7] - -# array_resize columnar test #2 -query ? -select array_resize(arrow_cast(column1, 'LargeList(Int64)'), column2, column3) from array_resize_values; ----- -[1, NULL] -[11, 12, NULL, 14, 15] -[21, 22, 23, 24, NULL, 26, 27, 28] -[31, 32, 33, 34, 35, 36, NULL, 38, 39, 40, 4, 4] -NULL -[] -[51, 52, 53, 54, 55, NULL, 57, 58, 59, 60, NULL, NULL, NULL] -[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 7, 7, 7, 7, 7] - -## array_reverse -query ?? -select array_reverse(make_array(1, 2, 3)), array_reverse(make_array(1)); ----- -[3, 2, 1] [1] - -query ?? -select array_reverse(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_reverse(arrow_cast(make_array(1), 'LargeList(Int64)')); ----- -[3, 2, 1] [1] - -query ???? -select array_reverse(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), - array_reverse(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')), - array_reverse(arrow_cast(make_array(1, NULL, 3), 'FixedSizeList(3, Int64)')), - array_reverse(arrow_cast(make_array(NULL, NULL, NULL), 'FixedSizeList(3, Int64)')); ----- -[3, 2, 1] [1] [3, NULL, 1] [NULL, NULL, NULL] - -query ?? -select array_reverse(NULL), array_reverse([]); ----- -NULL [] - -query ?? -select array_reverse(column1), column1 from arrays_values; ----- -[10, 9, 8, 7, 6, 5, 4, 3, 2, NULL] [NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10] -[20, NULL, 18, 17, 16, 15, 14, 13, 12, 11] [11, 12, 13, 14, 15, 16, 17, 18, NULL, 20] -[30, 29, 28, 27, 26, 25, NULL, 23, 22, 21] [21, 22, 23, NULL, 25, 26, 27, 28, 29, 30] -[40, 39, 38, 37, NULL, 35, 34, 33, 32, 31] [31, 32, 33, 34, 35, NULL, 37, 38, 39, 40] -NULL NULL -[50, 49, 48, 47, 46, 45, 44, 43, 42, 41] [41, 42, 43, 44, 45, 46, 47, 48, 49, 50] -[60, 59, 58, 57, 56, 55, 54, NULL, 52, 51] [51, 52, NULL, 54, 55, 56, 57, 58, 59, 60] -[70, 69, 68, 67, 66, 65, 64, 63, 62, 61] [61, 62, 63, 64, 65, 66, 67, 68, 69, 70] - -statement ok -CREATE TABLE test_reverse_fixed_size AS VALUES - (arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)')), - (arrow_cast([4, 5, 6], 'FixedSizeList(3, Int64)')), - (arrow_cast([NULL, 8, 9], 'FixedSizeList(3, Int64)')), - (NULL); - -query ? -SELECT array_reverse(column1) FROM test_reverse_fixed_size; ----- -[3, 2, 1] -[6, 5, 4] -[9, 8, NULL] -NULL - -statement ok -DROP TABLE test_reverse_fixed_size; - -# Test defining a table with array columns -statement ok -create table test_create_array_table( - a int[], - b text[], - -- two-dimensional array - c int[][], - d int -); - -query I -insert into test_create_array_table values - ([1, 2, 3], ['a', 'b', 'c'], [[4,6], [6,7,8]], 1); ----- -1 - -query ???I -select * from test_create_array_table; ----- -[1, 2, 3] [a, b, c] [[4, 6], [6, 7, 8]] 1 - -query T -select arrow_typeof(a) from test_create_array_table; ----- -List(Int32) - -query T -select arrow_typeof(c) from test_create_array_table; ----- -List(List(Int32)) - -# Test casting to array types -# issue: https://github.com/apache/datafusion/issues/9440 -query ??T -select [1,2,3]::int[], [['1']]::int[][], arrow_typeof([]::text[]); ----- -[1, 2, 3] [[1]] List(Utf8View) - -# test empty arrays return length -# issue: https://github.com/apache/datafusion/pull/12459 -statement ok -create table values_all_empty (a int[]) as values ([]), ([]); - -query B -select array_has(a, 1) from values_all_empty; ----- -false -false - -# Test create table with fixed sized array -statement ok -create table fixed_size_col_table (a int[3]) as values ([1,2,3]), ([4,5,6]); - -query T -select arrow_typeof(a) from fixed_size_col_table; ----- -FixedSizeList(3 x Int32) -FixedSizeList(3 x Int32) - -query ? rowsort -SELECT DISTINCT a FROM fixed_size_col_table ----- -[1, 2, 3] -[4, 5, 6] - -query ?I rowsort -SELECT a, count(*) FROM fixed_size_col_table GROUP BY a ----- -[1, 2, 3] 1 -[4, 5, 6] 1 - -statement error Cast error: Cannot cast to FixedSizeList\(3\): value at index 0 has length 2 -create table varying_fixed_size_col_table (a int[3]) as values ([1,2,3]), ([4,5]); - -# https://github.com/apache/datafusion/issues/16187 -# should be NULL in case of out of bounds for Null Type -query ? -select [named_struct('a', 1, 'b', null)][-2]; ----- -NULL - -statement ok -COPY (select [[true, false], [false, true]] a, [false, true] b union select [[null, null]], null) to 'test_files/scratch/array/array_has/single_file.parquet' stored as parquet; - -statement ok -CREATE EXTERNAL TABLE array_has STORED AS PARQUET location 'test_files/scratch/array/array_has/single_file.parquet'; - -query B -select array_contains(a, b) from array_has order by 1 nulls last; ----- -true -NULL - -# Expected output (once supported): -# ---- -# [5, 4, 3, 2, 1] -query error -select array_reverse(arrow_cast(make_array(1, 2, 3, 4, 5), 'ListView(Int64)')); - -### Delete tables - -statement ok -drop table values; - -statement ok -drop table values_without_nulls; - -statement ok -drop table nested_arrays; - -statement ok -drop table large_nested_arrays; - -statement ok -drop table fixed_size_nested_arrays; - -statement ok -drop table arrays; - -statement ok -drop table large_arrays; - -statement ok -drop table fixed_size_arrays; - -statement ok -drop table slices; - -statement ok -drop table fixed_slices; - -statement ok -drop table arrayspop; - -statement ok -drop table large_arrayspop; - -statement ok -drop table arrays_values; - -statement ok -drop table arrays_values_v2; - -statement ok -drop table large_arrays_values_v2; - -statement ok -drop table array_has_table_1D; - -statement ok -drop table array_has_table_1D_Float; - -statement ok -drop table array_has_table_1D_Boolean; - -statement ok -drop table array_has_table_1D_UTF8; - -statement ok -drop table array_has_table_2D; - -statement ok -drop table array_has_table_2D_float; - -statement ok -drop table array_has_table_3D; - -statement ok -drop table array_intersect_table_1D; - -statement ok -drop table large_array_intersect_table_1D; - -statement ok -drop table array_intersect_table_1D_Float; - -statement ok -drop table large_array_intersect_table_1D_Float; - -statement ok -drop table array_intersect_table_1D_Boolean; - -statement ok -drop table large_array_intersect_table_1D_Boolean; - -statement ok -drop table array_intersect_table_1D_UTF8; - -statement ok -drop table large_array_intersect_table_1D_UTF8; - -statement ok -drop table array_intersect_table_2D; - -statement ok -drop table large_array_intersect_table_2D; - -statement ok -drop table array_intersect_table_2D_float; - -statement ok -drop table large_array_intersect_table_2D_float; - -statement ok -drop table array_intersect_table_3D; - -statement ok -drop table large_array_intersect_table_3D; - -statement ok -drop table fixed_size_array_has_table_1D; - -statement ok -drop table fixed_size_array_has_table_1D_Float; - -statement ok -drop table fixed_size_array_has_table_1D_Boolean; - -statement ok -drop table fixed_size_array_has_table_1D_UTF8; - -statement ok -drop table fixed_size_array_has_table_2D; - -statement ok -drop table fixed_size_array_has_table_2D_float; - -statement ok -drop table fixed_size_array_has_table_3D; - -statement ok -drop table arrays_range; - -statement ok -drop table arrays_with_repeating_elements; - -statement ok -drop table large_arrays_with_repeating_elements; - -statement ok -drop table fixed_arrays_with_repeating_elements; - -statement ok -drop table nested_arrays_with_repeating_elements; - -statement ok -drop table large_nested_arrays_with_repeating_elements; - -statement ok -drop table fixed_size_nested_arrays_with_repeating_elements; - -statement ok -drop table flatten_table; - -statement ok -drop table large_flatten_table; - -statement ok -drop table fixed_size_flatten_table; - -statement ok -drop table arrays_values_without_nulls; - -statement ok -drop table large_arrays_values_without_nulls; - -statement ok -drop table fixed_size_arrays_values_without_nulls; - -statement ok -drop table test_create_array_table; - -statement ok -drop table values_all_empty; - -statement ok -drop table fixed_size_col_table; - -statement ok -drop table array_has; From dd908880148e63a6aed3fc95efd0f9d11a403712 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 23 Apr 2026 10:57:58 +0200 Subject: [PATCH 62/83] Add table ref to ListingTableUrl --- datafusion/core/src/execution/context/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 0ec36e6152c42..27b5da6145feb 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1760,7 +1760,9 @@ impl SessionContext { provided_schema: Option, sql_definition: Option, ) -> Result<()> { - let table_path = ListingTableUrl::parse(table_path)?; + let table_ref = table_ref.into(); + let table_path = + ListingTableUrl::parse(table_path)?.with_table_ref(table_ref.clone()); let resolved_schema = match provided_schema { Some(s) => s, None => options.infer_schema(&self.state(), &table_path).await?, From 987ce88c582e061d179078efe331003358bafd9f Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 23 Apr 2026 11:30:39 +0200 Subject: [PATCH 63/83] Add heapsize for table-scoped-path --- datafusion/common/src/heap_size.rs | 8 +++++++- .../execution/src/cache/file_statistics_cache.rs | 10 +++++----- .../execution/src/cache/list_files_cache.rs | 15 +++++++++++---- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index b744225681450..3049530550a07 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -16,7 +16,7 @@ // under the License. use crate::stats::Precision; -use crate::{ColumnStatistics, ScalarValue, Statistics}; +use crate::{ColumnStatistics, ScalarValue, Statistics, TableReference}; use arrow::array::{ Array, FixedSizeListArray, LargeListArray, LargeListViewArray, ListArray, ListViewArray, MapArray, StructArray, @@ -51,6 +51,12 @@ impl DFHeapSize for Statistics { } } +impl DFHeapSize for TableReference { + fn heap_size(&self) -> usize { + self.table().heap_size() + } +} + impl DFHeapSize for Precision { diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index 1faeff4fa7a93..5a53fb4ca94e8 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -100,7 +100,7 @@ impl DefaultFileStatisticsCacheState { key: &TableScopedPath, value: CachedFileMetadata, ) -> Option { - let key_size = key.path.as_ref().heap_size(); + let key_size = key.heap_size(); let entry_size = value.heap_size(); if entry_size + key_size > self.memory_limit { @@ -111,11 +111,11 @@ impl DefaultFileStatisticsCacheState { let old_value = self.lru_queue.put(key.clone(), value); self.memory_used += entry_size; - self.memory_used += key.path.as_ref().heap_size(); + self.memory_used += key.heap_size(); if let Some(old_entry) = &old_value { self.memory_used -= old_entry.heap_size(); - self.memory_used -= key.path.as_ref().heap_size(); + self.memory_used -= key.heap_size(); } self.evict_entries(); @@ -125,7 +125,7 @@ impl DefaultFileStatisticsCacheState { fn remove(&mut self, k: &TableScopedPath) -> Option { if let Some(old_entry) = self.lru_queue.remove(k) { - self.memory_used -= k.path.as_ref().heap_size(); + self.memory_used -= k.heap_size(); self.memory_used -= old_entry.heap_size(); Some(old_entry) } else { @@ -149,7 +149,7 @@ impl DefaultFileStatisticsCacheState { fn evict_entries(&mut self) { while self.memory_used > self.memory_limit { if let Some(removed) = self.lru_queue.pop() { - self.memory_used -= removed.0.path.as_ref().heap_size(); + self.memory_used -= removed.0.heap_size(); self.memory_used -= removed.1.heap_size(); } else { // cache is empty while memory_used > memory_limit, cannot happen diff --git a/datafusion/execution/src/cache/list_files_cache.rs b/datafusion/execution/src/cache/list_files_cache.rs index b1b8e6b500169..92aad904273d2 100644 --- a/datafusion/execution/src/cache/list_files_cache.rs +++ b/datafusion/execution/src/cache/list_files_cache.rs @@ -22,16 +22,17 @@ use std::{ time::Duration, }; -use datafusion_common::TableReference; -use datafusion_common::instant::Instant; -use object_store::{ObjectMeta, path::Path}; - use crate::cache::{ CacheAccessor, cache_manager::{CachedFileList, ListFilesCache}, lru_queue::LruQueue, }; +use datafusion_common::TableReference; +use datafusion_common::heap_size::DFHeapSize; +use datafusion_common::instant::Instant; +use object_store::{ObjectMeta, path::Path}; + pub trait TimeProvider: Send + Sync + 'static { fn now(&self) -> Instant; } @@ -169,6 +170,12 @@ impl Default for DefaultListFilesCacheState { } } +impl DFHeapSize for TableScopedPath { + fn heap_size(&self) -> usize { + self.path.as_ref().heap_size() + self.table.heap_size() + } +} + impl DefaultListFilesCacheState { fn new(memory_limit: usize, ttl: Option) -> Self { Self { From f3c39a95f6a76a771585c31c1d034e9dd35c5230 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 23 Apr 2026 11:47:34 +0200 Subject: [PATCH 64/83] Make list_entries table-scoped --- datafusion/execution/src/cache/cache_manager.rs | 4 +--- .../execution/src/cache/file_statistics_cache.rs | 8 +++----- datafusion/execution/src/cache/list_files_cache.rs | 12 +++++++++++- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index 066876945f995..251faf7c2f8e1 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -105,7 +105,7 @@ pub trait FileStatisticsCache: fn update_cache_limit(&self, limit: usize); /// Retrieves the information about the entries currently cached. - fn list_entries(&self) -> HashMap; + fn list_entries(&self) -> HashMap; fn drop_table_entries(&self, table_ref: &Option) -> Result<()>; } @@ -137,8 +137,6 @@ pub struct FileStatisticsCacheEntry { pub statistics_size_bytes: usize, /// Whether ordering information is cached for this file. pub has_ordering: bool, - /// Reference to the table associated with this statistics entry. - pub table_reference: Option, } /// Cached file listing. diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index 5a53fb4ca94e8..fd787f2dc7544 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -19,7 +19,6 @@ use crate::cache::cache_manager::{ CachedFileMetadata, FileStatisticsCache, FileStatisticsCacheEntry, }; use crate::cache::{CacheAccessor, TableScopedPath}; -use object_store::path::Path; use std::collections::HashMap; use std::sync::Mutex; @@ -221,13 +220,13 @@ impl FileStatisticsCache for DefaultFileStatisticsCache { state.evict_entries(); } - fn list_entries(&self) -> HashMap { - let mut entries = HashMap::::new(); + fn list_entries(&self) -> HashMap { + let mut entries = HashMap::::new(); for entry in self.state.lock().unwrap().lru_queue.list_entries() { let path = entry.0.clone(); let cached = entry.1; entries.insert( - path.path, + path, FileStatisticsCacheEntry { object_meta: cached.meta.clone(), num_rows: cached.statistics.num_rows, @@ -235,7 +234,6 @@ impl FileStatisticsCache for DefaultFileStatisticsCache { table_size_bytes: cached.statistics.total_byte_size, statistics_size_bytes: cached.statistics.heap_size(), has_ordering: cached.ordering.is_some(), - table_reference: path.table, }, ); } diff --git a/datafusion/execution/src/cache/list_files_cache.rs b/datafusion/execution/src/cache/list_files_cache.rs index 92aad904273d2..c173ec855320a 100644 --- a/datafusion/execution/src/cache/list_files_cache.rs +++ b/datafusion/execution/src/cache/list_files_cache.rs @@ -21,7 +21,7 @@ use std::{ sync::{Arc, Mutex}, time::Duration, }; - +use std::fmt::{Debug, Display, Formatter}; use crate::cache::{ CacheAccessor, cache_manager::{CachedFileList, ListFilesCache}, @@ -176,6 +176,16 @@ impl DFHeapSize for TableScopedPath { } } +impl Display for TableScopedPath { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if let Some(table) = &self.table { + write!(f, "({}, {})", self.path, table) + } else { + write!(f, "({})", self.path) + } + } +} + impl DefaultListFilesCacheState { fn new(memory_limit: usize, ttl: Option) -> Self { Self { From 630b44eec6e0b2d4064c3450523ee533ee34d790 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 23 Apr 2026 11:57:54 +0200 Subject: [PATCH 65/83] fixup! Make list_entries table-scoped --- .../execution/src/cache/file_statistics_cache.rs | 11 +++++------ datafusion/execution/src/cache/list_files_cache.rs | 13 +++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index fd787f2dc7544..eebe8124c7cba 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -275,6 +275,7 @@ mod tests { use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; use object_store::ObjectMeta; + use object_store::path::Path; use std::sync::Arc; fn create_test_meta(path: &str, size: u64) -> ObjectMeta { @@ -351,7 +352,7 @@ mod tests { table: None, }; - let entry = entries.get(&path_3.path).unwrap(); + let entry = entries.get(&path_3).unwrap(); assert_eq!(entry.object_meta.size, 2048); // Should be updated value } @@ -442,7 +443,7 @@ mod tests { // Verify list_entries shows has_ordering = true let entries = cache.list_entries(); assert_eq!(entries.len(), 1); - assert!(entries.get(&path.path).unwrap().has_ordering); + assert!(entries.get(&path).unwrap().has_ordering); } #[test] @@ -587,7 +588,7 @@ mod tests { entries, HashMap::from([ ( - Path::from("test1.parquet"), + path_1, FileStatisticsCacheEntry { object_meta: meta1, num_rows: Precision::Absent, @@ -595,11 +596,10 @@ mod tests { table_size_bytes: Precision::Absent, statistics_size_bytes: 304, has_ordering: false, - table_reference: None, } ), ( - Path::from("test2.parquet"), + path_2, FileStatisticsCacheEntry { object_meta: meta2, num_rows: Precision::Absent, @@ -607,7 +607,6 @@ mod tests { table_size_bytes: Precision::Absent, statistics_size_bytes: 304, has_ordering: true, - table_reference: None, } ), ]) diff --git a/datafusion/execution/src/cache/list_files_cache.rs b/datafusion/execution/src/cache/list_files_cache.rs index c173ec855320a..3cccf7e6421b5 100644 --- a/datafusion/execution/src/cache/list_files_cache.rs +++ b/datafusion/execution/src/cache/list_files_cache.rs @@ -15,18 +15,19 @@ // specific language governing permissions and limitations // under the License. +use crate::cache::{ + CacheAccessor, + cache_manager::{CachedFileList, ListFilesCache}, + lru_queue::LruQueue, +}; + +use std::fmt::{Debug, Display, Formatter}; use std::mem::size_of; use std::{ collections::HashMap, sync::{Arc, Mutex}, time::Duration, }; -use std::fmt::{Debug, Display, Formatter}; -use crate::cache::{ - CacheAccessor, - cache_manager::{CachedFileList, ListFilesCache}, - lru_queue::LruQueue, -}; use datafusion_common::TableReference; use datafusion_common::heap_size::DFHeapSize; From 4a52cb98c0f25f26251c354d13cd117a0e5e0865 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Thu, 23 Apr 2026 12:09:07 +0200 Subject: [PATCH 66/83] fixup! fixup! Make list_entries table-scoped --- datafusion/execution/src/cache/list_files_cache.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/execution/src/cache/list_files_cache.rs b/datafusion/execution/src/cache/list_files_cache.rs index 3cccf7e6421b5..e04bb9340b09b 100644 --- a/datafusion/execution/src/cache/list_files_cache.rs +++ b/datafusion/execution/src/cache/list_files_cache.rs @@ -180,9 +180,9 @@ impl DFHeapSize for TableScopedPath { impl Display for TableScopedPath { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { if let Some(table) = &self.table { - write!(f, "({}, {})", self.path, table) + write!(f, "{}, {}", self.path, table) } else { - write!(f, "({})", self.path) + write!(f, "{}", self.path) } } } From ddf135a7240bebae0270c98b35bc3e9a3588a2cb Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Sun, 26 Apr 2026 08:02:41 +0200 Subject: [PATCH 67/83] Improve heap size estimation for Arc --- datafusion/common/src/heap_size.rs | 331 ++++++++++-------- .../execution/src/cache/cache_manager.rs | 16 +- .../src/cache/file_statistics_cache.rs | 48 +-- .../execution/src/cache/list_files_cache.rs | 6 +- 4 files changed, 223 insertions(+), 178 deletions(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index 3049530550a07..2855091c08e2a 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -27,6 +27,7 @@ use arrow::datatypes::{ }; use chrono::{DateTime, Utc}; use half::f16; +use hashbrown::HashSet; use std::collections::HashMap; use std::fmt::Debug; use std::sync::Arc; @@ -40,103 +41,110 @@ pub trait DFHeapSize { /// /// Note that the size of the type itself is not included in the result -- /// instead, that size is added by the caller (e.g. container). - fn heap_size(&self) -> usize; + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize; +} + +#[derive(Default)] +pub struct DFHeapSizeCtx { + seen: HashSet, } impl DFHeapSize for Statistics { - fn heap_size(&self) -> usize { - self.num_rows.heap_size() - + self.total_byte_size.heap_size() - + self.column_statistics.heap_size() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.num_rows.heap_size(ctx) + + self.total_byte_size.heap_size(ctx) + + self.column_statistics.heap_size(ctx) } } impl DFHeapSize for TableReference { - fn heap_size(&self) -> usize { - self.table().heap_size() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.table().heap_size(ctx) } } impl DFHeapSize for Precision { - fn heap_size(&self) -> usize { - self.get_value().map_or_else(|| 0, |v| v.heap_size()) + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.get_value().map_or_else(|| 0, |v| v.heap_size(ctx)) } } impl DFHeapSize for ColumnStatistics { - fn heap_size(&self) -> usize { - self.null_count.heap_size() - + self.max_value.heap_size() - + self.min_value.heap_size() - + self.sum_value.heap_size() - + self.distinct_count.heap_size() - + self.byte_size.heap_size() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.null_count.heap_size(ctx) + + self.max_value.heap_size(ctx) + + self.min_value.heap_size(ctx) + + self.sum_value.heap_size(ctx) + + self.distinct_count.heap_size(ctx) + + self.byte_size.heap_size(ctx) } } impl DFHeapSize for ScalarValue { - fn heap_size(&self) -> usize { + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { use crate::scalar::ScalarValue::*; match self { Null => 0, - Boolean(b) => b.heap_size(), - Float16(f) => f.heap_size(), - Float32(f) => f.heap_size(), - Float64(f) => f.heap_size(), - Decimal32(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), - Decimal64(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), - Decimal128(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), - Decimal256(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), - Int8(i) => i.heap_size(), - Int16(i) => i.heap_size(), - Int32(i) => i.heap_size(), - Int64(i) => i.heap_size(), - UInt8(u) => u.heap_size(), - UInt16(u) => u.heap_size(), - UInt32(u) => u.heap_size(), - UInt64(u) => u.heap_size(), - Utf8(u) => u.heap_size(), - Utf8View(u) => u.heap_size(), - LargeUtf8(l) => l.heap_size(), - Binary(b) => b.heap_size(), - BinaryView(b) => b.heap_size(), - FixedSizeBinary(a, b) => a.heap_size() + b.heap_size(), - LargeBinary(l) => l.heap_size(), - FixedSizeList(f) => f.heap_size(), - List(l) => l.heap_size(), - LargeList(l) => l.heap_size(), - Struct(s) => s.heap_size(), - Map(m) => m.heap_size(), - Date32(d) => d.heap_size(), - Date64(d) => d.heap_size(), - Time32Second(t) => t.heap_size(), - Time32Millisecond(t) => t.heap_size(), - Time64Microsecond(t) => t.heap_size(), - Time64Nanosecond(t) => t.heap_size(), - TimestampSecond(a, b) => a.heap_size() + b.heap_size(), - TimestampMillisecond(a, b) => a.heap_size() + b.heap_size(), - TimestampMicrosecond(a, b) => a.heap_size() + b.heap_size(), - TimestampNanosecond(a, b) => a.heap_size() + b.heap_size(), - IntervalYearMonth(i) => i.heap_size(), - IntervalDayTime(i) => i.heap_size(), - IntervalMonthDayNano(i) => i.heap_size(), - DurationSecond(d) => d.heap_size(), - DurationMillisecond(d) => d.heap_size(), - DurationMicrosecond(d) => d.heap_size(), - DurationNanosecond(d) => d.heap_size(), - Union(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), - Dictionary(a, b) => a.heap_size() + b.heap_size(), - RunEndEncoded(a, b, c) => a.heap_size() + b.heap_size() + c.heap_size(), - ListView(a) => a.heap_size(), - LargeListView(a) => a.heap_size(), + Boolean(b) => b.heap_size(ctx), + Float16(f) => f.heap_size(ctx), + Float32(f) => f.heap_size(ctx), + Float64(f) => f.heap_size(ctx), + Decimal32(a, b, c) => a.heap_size(ctx) + b.heap_size(ctx) + c.heap_size(ctx), + Decimal64(a, b, c) => a.heap_size(ctx) + b.heap_size(ctx) + c.heap_size(ctx), + Decimal128(a, b, c) => a.heap_size(ctx) + b.heap_size(ctx) + c.heap_size(ctx), + Decimal256(a, b, c) => a.heap_size(ctx) + b.heap_size(ctx) + c.heap_size(ctx), + Int8(i) => i.heap_size(ctx), + Int16(i) => i.heap_size(ctx), + Int32(i) => i.heap_size(ctx), + Int64(i) => i.heap_size(ctx), + UInt8(u) => u.heap_size(ctx), + UInt16(u) => u.heap_size(ctx), + UInt32(u) => u.heap_size(ctx), + UInt64(u) => u.heap_size(ctx), + Utf8(u) => u.heap_size(ctx), + Utf8View(u) => u.heap_size(ctx), + LargeUtf8(l) => l.heap_size(ctx), + Binary(b) => b.heap_size(ctx), + BinaryView(b) => b.heap_size(ctx), + FixedSizeBinary(a, b) => a.heap_size(ctx) + b.heap_size(ctx), + LargeBinary(l) => l.heap_size(ctx), + FixedSizeList(f) => f.heap_size(ctx), + List(l) => l.heap_size(ctx), + LargeList(l) => l.heap_size(ctx), + Struct(s) => s.heap_size(ctx), + Map(m) => m.heap_size(ctx), + Date32(d) => d.heap_size(ctx), + Date64(d) => d.heap_size(ctx), + Time32Second(t) => t.heap_size(ctx), + Time32Millisecond(t) => t.heap_size(ctx), + Time64Microsecond(t) => t.heap_size(ctx), + Time64Nanosecond(t) => t.heap_size(ctx), + TimestampSecond(a, b) => a.heap_size(ctx) + b.heap_size(ctx), + TimestampMillisecond(a, b) => a.heap_size(ctx) + b.heap_size(ctx), + TimestampMicrosecond(a, b) => a.heap_size(ctx) + b.heap_size(ctx), + TimestampNanosecond(a, b) => a.heap_size(ctx) + b.heap_size(ctx), + IntervalYearMonth(i) => i.heap_size(ctx), + IntervalDayTime(i) => i.heap_size(ctx), + IntervalMonthDayNano(i) => i.heap_size(ctx), + DurationSecond(d) => d.heap_size(ctx), + DurationMillisecond(d) => d.heap_size(ctx), + DurationMicrosecond(d) => d.heap_size(ctx), + DurationNanosecond(d) => d.heap_size(ctx), + Union(a, b, c) => a.heap_size(ctx) + b.heap_size(ctx) + c.heap_size(ctx), + Dictionary(a, b) => a.heap_size(ctx) + b.heap_size(ctx), + RunEndEncoded(a, b, c) => { + a.heap_size(ctx) + b.heap_size(ctx) + c.heap_size(ctx) + } + ListView(a) => a.heap_size(ctx), + LargeListView(a) => a.heap_size(ctx), } } } impl DFHeapSize for DataType { - fn heap_size(&self) -> usize { + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { use DataType::*; match self { Null => 0, @@ -152,50 +160,50 @@ impl DFHeapSize for DataType { Float16 => 0, Float32 => 0, Float64 => 0, - Timestamp(t, s) => t.heap_size() + s.heap_size(), + Timestamp(t, s) => t.heap_size(ctx) + s.heap_size(ctx), Date32 => 0, Date64 => 0, - Time32(t) => t.heap_size(), - Time64(t) => t.heap_size(), - Duration(t) => t.heap_size(), - Interval(i) => i.heap_size(), + Time32(t) => t.heap_size(ctx), + Time64(t) => t.heap_size(ctx), + Duration(t) => t.heap_size(ctx), + Interval(i) => i.heap_size(ctx), Binary => 0, - FixedSizeBinary(i) => i.heap_size(), + FixedSizeBinary(i) => i.heap_size(ctx), LargeBinary => 0, BinaryView => 0, Utf8 => 0, LargeUtf8 => 0, Utf8View => 0, - List(v) => v.heap_size(), - ListView(v) => v.heap_size(), - FixedSizeList(f, i) => f.heap_size() + i.heap_size(), - LargeList(l) => l.heap_size(), - LargeListView(l) => l.heap_size(), - Struct(s) => s.heap_size(), - Union(u, m) => u.heap_size() + m.heap_size(), - Dictionary(a, b) => a.heap_size() + b.heap_size(), - Decimal32(p, s) => p.heap_size() + s.heap_size(), - Decimal64(p, s) => p.heap_size() + s.heap_size(), - Decimal128(p, s) => p.heap_size() + s.heap_size(), - Decimal256(p, s) => p.heap_size() + s.heap_size(), - Map(m, b) => m.heap_size() + b.heap_size(), - RunEndEncoded(a, b) => a.heap_size() + b.heap_size(), + List(v) => v.heap_size(ctx), + ListView(v) => v.heap_size(ctx), + FixedSizeList(f, i) => f.heap_size(ctx) + i.heap_size(ctx), + LargeList(l) => l.heap_size(ctx), + LargeListView(l) => l.heap_size(ctx), + Struct(s) => s.heap_size(ctx), + Union(u, m) => u.heap_size(ctx) + m.heap_size(ctx), + Dictionary(a, b) => a.heap_size(ctx) + b.heap_size(ctx), + Decimal32(p, s) => p.heap_size(ctx) + s.heap_size(ctx), + Decimal64(p, s) => p.heap_size(ctx) + s.heap_size(ctx), + Decimal128(p, s) => p.heap_size(ctx) + s.heap_size(ctx), + Decimal256(p, s) => p.heap_size(ctx) + s.heap_size(ctx), + Map(m, b) => m.heap_size(ctx) + b.heap_size(ctx), + RunEndEncoded(a, b) => a.heap_size(ctx) + b.heap_size(ctx), } } } impl DFHeapSize for Vec { - fn heap_size(&self) -> usize { + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { let item_size = size_of::(); // account for the contents of the Vec (self.capacity() * item_size) + // add any heap allocations by contents - self.iter().map(|t| t.heap_size()).sum::() + self.iter().map(|t| t.heap_size(ctx)).sum::() } } impl DFHeapSize for HashMap { - fn heap_size(&self) -> usize { + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { let capacity = self.capacity(); if capacity == 0 { return 0; @@ -231,86 +239,92 @@ impl DFHeapSize for HashMap { group_size + (buckets * (key_val_size + metadata_size)) - + self.keys().map(|k| k.heap_size()).sum::() - + self.values().map(|v| v.heap_size()).sum::() + + self.keys().map(|k| k.heap_size(ctx)).sum::() + + self.values().map(|v| v.heap_size(ctx)).sum::() } } impl DFHeapSize for Arc { - fn heap_size(&self) -> usize { + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + let ptr = Arc::as_ptr(self) as usize; + + if !ctx.seen.insert(ptr) { + return 0; + } + // Arc stores weak and strong counts on the heap alongside an instance of T - 2 * size_of::() + self.as_ref().heap_size() + 2 * size_of::() + self.as_ref().heap_size(ctx) } } impl DFHeapSize for Arc { - fn heap_size(&self) -> usize { - 2 * size_of::() + size_of_val(self.as_ref()) + self.as_ref().heap_size() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + 2 * size_of::() + size_of_val(self.as_ref()) + self.as_ref().heap_size(ctx) } } impl DFHeapSize for Fields { - fn heap_size(&self) -> usize { - self.into_iter().map(|f| f.heap_size()).sum::() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.into_iter().map(|f| f.heap_size(ctx)).sum::() } } impl DFHeapSize for StructArray { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { self.get_array_memory_size() } } impl DFHeapSize for LargeListArray { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { self.get_array_memory_size() } } impl DFHeapSize for LargeListViewArray { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { self.get_array_memory_size() } } impl DFHeapSize for ListArray { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { self.get_array_memory_size() } } impl DFHeapSize for ListViewArray { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { self.get_array_memory_size() } } impl DFHeapSize for FixedSizeListArray { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { self.get_array_memory_size() } } impl DFHeapSize for MapArray { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { self.get_array_memory_size() } } impl DFHeapSize for Arc { - fn heap_size(&self) -> usize { - 2 * size_of::() + self.as_ref().heap_size() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + 2 * size_of::() + self.as_ref().heap_size(ctx) } } impl DFHeapSize for Box { - fn heap_size(&self) -> usize { - size_of::() + self.as_ref().heap_size() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + size_of::() + self.as_ref().heap_size(ctx) } } impl DFHeapSize for Option { - fn heap_size(&self) -> usize { - self.as_ref().map(|inner| inner.heap_size()).unwrap_or(0) + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.as_ref().map(|inner| inner.heap_size(ctx)).unwrap_or(0) } } @@ -319,158 +333,181 @@ where A: DFHeapSize, B: DFHeapSize, { - fn heap_size(&self) -> usize { - self.0.heap_size() + self.1.heap_size() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.0.heap_size(ctx) + self.1.heap_size(ctx) } } impl DFHeapSize for String { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { self.capacity() } } impl DFHeapSize for str { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { self.len() } } impl DFHeapSize for UnionFields { - fn heap_size(&self) -> usize { - self.iter().map(|f| f.0.heap_size() + f.1.heap_size()).sum() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.iter() + .map(|f| f.0.heap_size(ctx) + f.1.heap_size(ctx)) + .sum() } } impl DFHeapSize for UnionMode { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for TimeUnit { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for IntervalUnit { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for Field { - fn heap_size(&self) -> usize { - self.name().heap_size() - + self.data_type().heap_size() - + self.is_nullable().heap_size() - + self.dict_is_ordered().heap_size() - + self.metadata().heap_size() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.name().heap_size(ctx) + + self.data_type().heap_size(ctx) + + self.is_nullable().heap_size(ctx) + + self.dict_is_ordered().heap_size(ctx) + + self.metadata().heap_size(ctx) } } impl DFHeapSize for IntervalMonthDayNano { - fn heap_size(&self) -> usize { - self.days.heap_size() + self.months.heap_size() + self.nanoseconds.heap_size() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.days.heap_size(ctx) + + self.months.heap_size(ctx) + + self.nanoseconds.heap_size(ctx) } } impl DFHeapSize for IntervalDayTime { - fn heap_size(&self) -> usize { - self.days.heap_size() + self.milliseconds.heap_size() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.days.heap_size(ctx) + self.milliseconds.heap_size(ctx) } } impl DFHeapSize for DateTime { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for bool { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for u8 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for u16 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for u32 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for u64 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for i8 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for i16 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for i32 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for i64 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for i128 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for i256 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for f16 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for f32 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for f64 { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } impl DFHeapSize for usize { - fn heap_size(&self) -> usize { + fn heap_size(&self, _: &mut DFHeapSizeCtx) -> usize { 0 // no heap allocations } } + +#[test] +fn test_heap_size_arc_avoid_double_accounting() { + let a1 = Arc::new(vec![1, 2, 3]); + let mut ctx = DFHeapSizeCtx::default(); + let heap_size = a1.heap_size(&mut ctx); + + let a2 = Arc::clone(&a1); + let a3 = Arc::clone(&a1); + let a4 = Arc::clone(&a3); + + let mut ctx = DFHeapSizeCtx::default(); + let heap_size_with_clones = a1.heap_size(&mut ctx) + + a2.heap_size(&mut ctx) + + a3.heap_size(&mut ctx) + + a4.heap_size(&mut ctx); + + assert_eq!(heap_size, heap_size_with_clones); +} diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index 251faf7c2f8e1..09861ddf6451e 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -24,7 +24,7 @@ use crate::cache::file_statistics_cache::{ use crate::cache::list_files_cache::ListFilesEntry; use crate::cache::list_files_cache::TableScopedPath; use datafusion_common::TableReference; -use datafusion_common::heap_size::DFHeapSize; +use datafusion_common::heap_size::{DFHeapSize, DFHeapSizeCtx}; use datafusion_common::stats::Precision; use datafusion_common::{Result, Statistics}; use datafusion_physical_expr_common::sort_expr::LexOrdering; @@ -111,13 +111,13 @@ pub trait FileStatisticsCache: } impl DFHeapSize for CachedFileMetadata { - fn heap_size(&self) -> usize { - self.meta.size.heap_size() - + self.meta.last_modified.heap_size() - + self.meta.version.heap_size() - + self.meta.e_tag.heap_size() - + self.meta.location.as_ref().heap_size() - + self.statistics.heap_size() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.meta.size.heap_size(ctx) + + self.meta.last_modified.heap_size(ctx) + + self.meta.version.heap_size(ctx) + + self.meta.e_tag.heap_size(ctx) + + self.meta.location.as_ref().heap_size(ctx) + + self.statistics.heap_size(ctx) //TODO add ordering once LexOrdering/PhysicalExpr implements DFHeapSize } } diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index eebe8124c7cba..fe9b0edccc9a2 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -25,7 +25,7 @@ use std::sync::Mutex; pub use crate::cache::DefaultFilesMetadataCache; use crate::cache::lru_queue::LruQueue; use datafusion_common::TableReference; -use datafusion_common::heap_size::DFHeapSize; +use datafusion_common::heap_size::{DFHeapSize, DFHeapSizeCtx}; /// Default implementation of [`FileStatisticsCache`] /// @@ -99,8 +99,9 @@ impl DefaultFileStatisticsCacheState { key: &TableScopedPath, value: CachedFileMetadata, ) -> Option { - let key_size = key.heap_size(); - let entry_size = value.heap_size(); + let mut ctx = DFHeapSizeCtx::default(); + let key_size = key.heap_size(&mut ctx); + let entry_size = value.heap_size(&mut ctx); if entry_size + key_size > self.memory_limit { // Remove potential stale entry @@ -110,11 +111,11 @@ impl DefaultFileStatisticsCacheState { let old_value = self.lru_queue.put(key.clone(), value); self.memory_used += entry_size; - self.memory_used += key.heap_size(); + self.memory_used += key.heap_size(&mut ctx); if let Some(old_entry) = &old_value { - self.memory_used -= old_entry.heap_size(); - self.memory_used -= key.heap_size(); + self.memory_used -= old_entry.heap_size(&mut ctx); + self.memory_used -= key.heap_size(&mut ctx); } self.evict_entries(); @@ -124,8 +125,9 @@ impl DefaultFileStatisticsCacheState { fn remove(&mut self, k: &TableScopedPath) -> Option { if let Some(old_entry) = self.lru_queue.remove(k) { - self.memory_used -= k.heap_size(); - self.memory_used -= old_entry.heap_size(); + let mut ctx = DFHeapSizeCtx::default(); + self.memory_used -= k.heap_size(&mut ctx); + self.memory_used -= old_entry.heap_size(&mut ctx); Some(old_entry) } else { None @@ -148,8 +150,9 @@ impl DefaultFileStatisticsCacheState { fn evict_entries(&mut self) { while self.memory_used > self.memory_limit { if let Some(removed) = self.lru_queue.pop() { - self.memory_used -= removed.0.heap_size(); - self.memory_used -= removed.1.heap_size(); + let mut ctx = DFHeapSizeCtx::default(); + self.memory_used -= removed.0.heap_size(&mut ctx); + self.memory_used -= removed.1.heap_size(&mut ctx); } else { // cache is empty while memory_used > memory_limit, cannot happen log::error!( @@ -222,6 +225,7 @@ impl FileStatisticsCache for DefaultFileStatisticsCache { fn list_entries(&self) -> HashMap { let mut entries = HashMap::::new(); + let mut ctx = DFHeapSizeCtx::default(); for entry in self.state.lock().unwrap().lru_queue.list_entries() { let path = entry.0.clone(); let cached = entry.1; @@ -232,7 +236,7 @@ impl FileStatisticsCache for DefaultFileStatisticsCache { num_rows: cached.statistics.num_rows, num_columns: cached.statistics.column_statistics.len(), table_size_bytes: cached.statistics.total_byte_size, - statistics_size_bytes: cached.statistics.heap_size(), + statistics_size_bytes: cached.statistics.heap_size(&mut ctx), has_ordering: cached.ordering.is_some(), }, ); @@ -269,6 +273,7 @@ mod tests { use arrow::buffer::{OffsetBuffer, ScalarBuffer}; use arrow::datatypes::{DataType, Field, Schema, TimeUnit}; use chrono::DateTime; + use datafusion_common::heap_size::DFHeapSizeCtx; use datafusion_common::stats::Precision; use datafusion_common::{ColumnStatistics, ScalarValue, Statistics}; use datafusion_expr::ColumnarValue; @@ -619,10 +624,12 @@ mod tests { let (meta_2, value_2) = create_cached_file_metadata_with_stats("test2.parquet"); let (meta_3, value_3) = create_cached_file_metadata_with_stats("test3.parquet"); - let limit_for_2_entries = meta_1.location.as_ref().heap_size() - + value_1.heap_size() - + meta_2.location.as_ref().heap_size() - + value_2.heap_size(); + let mut ctx = DFHeapSizeCtx::default(); + + let limit_for_2_entries = meta_1.location.as_ref().heap_size(&mut ctx) + + value_1.heap_size(&mut ctx) + + meta_2.location.as_ref().heap_size(&mut ctx) + + value_2.heap_size(&mut ctx); // create a cache with a limit which fits exactly 2 entries let cache = DefaultFileStatisticsCache::new(limit_for_2_entries); @@ -672,11 +679,12 @@ mod tests { cache.put(&path_3, value_3.clone()); assert_eq!(cache.memory_used(), limit_for_2_entries); + let mut ctx = DFHeapSizeCtx::default(); cache.remove(&path_2); assert_eq!(cache.len(), 1); assert_eq!( cache.memory_used(), - meta_3.location.as_ref().heap_size() + value_3.heap_size() + meta_3.location.as_ref().heap_size(&mut ctx) + value_3.heap_size(&mut ctx) ); cache.clear(); @@ -687,8 +695,8 @@ mod tests { #[test] fn test_cache_rejects_entry_which_is_too_large() { let (meta, value) = create_cached_file_metadata_with_stats("test1.parquet"); - - let limit_less_than_the_entry = value.heap_size() - 1; + let mut ctx = DFHeapSizeCtx::default(); + let limit_less_than_the_entry = value.heap_size(&mut ctx) - 1; // create a cache with a size less than the entry let cache = DefaultFileStatisticsCache::new(limit_less_than_the_entry); @@ -727,8 +735,8 @@ mod tests { total_byte_size: Precision::Exact(100), column_statistics: vec![column_statistics.clone()], }; - - let object_meta = create_test_meta(file_name, stats.heap_size() as u64); + let mut ctx = DFHeapSizeCtx::default(); + let object_meta = create_test_meta(file_name, stats.heap_size(&mut ctx) as u64); let value = CachedFileMetadata::new(object_meta.clone(), Arc::new(stats.clone()), None); (object_meta, value) diff --git a/datafusion/execution/src/cache/list_files_cache.rs b/datafusion/execution/src/cache/list_files_cache.rs index e04bb9340b09b..a3cdf7c5e9110 100644 --- a/datafusion/execution/src/cache/list_files_cache.rs +++ b/datafusion/execution/src/cache/list_files_cache.rs @@ -30,7 +30,7 @@ use std::{ }; use datafusion_common::TableReference; -use datafusion_common::heap_size::DFHeapSize; +use datafusion_common::heap_size::{DFHeapSize, DFHeapSizeCtx}; use datafusion_common::instant::Instant; use object_store::{ObjectMeta, path::Path}; @@ -172,8 +172,8 @@ impl Default for DefaultListFilesCacheState { } impl DFHeapSize for TableScopedPath { - fn heap_size(&self) -> usize { - self.path.as_ref().heap_size() + self.table.heap_size() + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + self.path.as_ref().heap_size(ctx) + self.table.heap_size(ctx) } } From 0c6356efc6472b8b14ad11d43e65b43ea53bf2e7 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Mon, 27 Apr 2026 10:13:28 +0200 Subject: [PATCH 68/83] fixup! Improve heap size estimation for Arc --- datafusion/common/src/heap_size.rs | 80 ++++++++++++++----- .../src/cache/file_statistics_cache.rs | 1 + 2 files changed, 60 insertions(+), 21 deletions(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index 2855091c08e2a..285241c3a6ba4 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -246,7 +246,20 @@ impl DFHeapSize for HashMap { impl DFHeapSize for Arc { fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { - let ptr = Arc::as_ptr(self) as usize; + let ptr = Arc::as_ptr(self) as *const i32 as usize; + + if !ctx.seen.insert(ptr) { + return 0; + } + + // Arc stores weak and strong counts on the heap alongside an instance of T + 2 * size_of::() + self.as_ref().heap_size(ctx) + } +} + +impl DFHeapSize for Arc { + fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + let ptr = Arc::as_ptr(self) as *const i32 as usize; if !ctx.seen.insert(ptr) { return 0; @@ -259,6 +272,13 @@ impl DFHeapSize for Arc { impl DFHeapSize for Arc { fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { + let ptr = Arc::as_ptr(self) as *const i32 as usize; + + if !ctx.seen.insert(ptr) { + return 0; + } + + // Arc stores weak and strong counts on the heap alongside an instance of T 2 * size_of::() + size_of_val(self.as_ref()) + self.as_ref().heap_size(ctx) } } @@ -310,12 +330,6 @@ impl DFHeapSize for MapArray { } } -impl DFHeapSize for Arc { - fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { - 2 * size_of::() + self.as_ref().heap_size(ctx) - } -} - impl DFHeapSize for Box { fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { size_of::() + self.as_ref().heap_size(ctx) @@ -493,21 +507,45 @@ impl DFHeapSize for usize { } } -#[test] -fn test_heap_size_arc_avoid_double_accounting() { - let a1 = Arc::new(vec![1, 2, 3]); - let mut ctx = DFHeapSizeCtx::default(); - let heap_size = a1.heap_size(&mut ctx); +#[cfg(test)] +mod tests { + use super::*; - let a2 = Arc::clone(&a1); - let a3 = Arc::clone(&a1); - let a4 = Arc::clone(&a3); + #[test] + fn test_heap_size_arc_avoid_double_accounting() { + let a1 = Arc::new(vec![1, 2, 3]); + let mut ctx = DFHeapSizeCtx::default(); + let heap_size = a1.heap_size(&mut ctx); - let mut ctx = DFHeapSizeCtx::default(); - let heap_size_with_clones = a1.heap_size(&mut ctx) - + a2.heap_size(&mut ctx) - + a3.heap_size(&mut ctx) - + a4.heap_size(&mut ctx); + let a2 = Arc::clone(&a1); + let a3 = Arc::clone(&a1); + let a4 = Arc::clone(&a3); - assert_eq!(heap_size, heap_size_with_clones); + let mut ctx = DFHeapSizeCtx::default(); + let heap_size_with_clones = a1.heap_size(&mut ctx) + + a2.heap_size(&mut ctx) + + a3.heap_size(&mut ctx) + + a4.heap_size(&mut ctx); + + assert_eq!(heap_size, heap_size_with_clones); + } + + #[test] + fn test_heap_size_arc_str_avoid_double_accounting() { + let a1 = Arc::new("Hello".to_string()); + let mut ctx = DFHeapSizeCtx::default(); + let heap_size = a1.heap_size(&mut ctx); + + let a2 = Arc::clone(&a1); + let a3 = Arc::clone(&a1); + let a4 = Arc::clone(&a3); + + let mut ctx = DFHeapSizeCtx::default(); + let heap_size_with_clones = a1.heap_size(&mut ctx) + + a2.heap_size(&mut ctx) + + a3.heap_size(&mut ctx) + + a4.heap_size(&mut ctx); + + assert_eq!(heap_size, heap_size_with_clones); + } } diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index fe9b0edccc9a2..f7e675c9f395d 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -110,6 +110,7 @@ impl DefaultFileStatisticsCacheState { } let old_value = self.lru_queue.put(key.clone(), value); + let mut ctx = DFHeapSizeCtx::default(); self.memory_used += entry_size; self.memory_used += key.heap_size(&mut ctx); From 3995e4e5b1c845d639a26732b731f5a29df30450 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Mon, 27 Apr 2026 11:34:06 +0200 Subject: [PATCH 69/83] Update migration guide --- .../library-user-guide/upgrading/54.0.0.md | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/docs/source/library-user-guide/upgrading/54.0.0.md b/docs/source/library-user-guide/upgrading/54.0.0.md index 34d1f7c61eaf1..411b1c8a72899 100644 --- a/docs/source/library-user-guide/upgrading/54.0.0.md +++ b/docs/source/library-user-guide/upgrading/54.0.0.md @@ -497,3 +497,42 @@ impl Default for MyTreeNode { } } ``` + +[20047]: https://github.com/apache/datafusion/pull/20047 + +### File statistics cache is now memory-limited and managed by the `CacheManager` + +The file statistics cache used by `ListingTable` is now memory-limited and +centrally managed through the `CacheManager`. + +To configure the cache size use the `file_statistics_cache_limit` setting: + +```sql +SET datafusion.runtime.file_statistics_cache_limit = '10MB' +``` + +To disable the file statistics cache, set the limit to 0. + +The file statistics cache is no longer created inside the `ListingTable`. +Instead, it is created within the `CacheManager` and must be passed to `ListingTable`. + +**Who is affected:** + +- Users who want to limit the memory usage of the file statistics cache. +- Users who want to disable the file statistics. +- Users creating a `ListingTable` programmatically with a file statistics cache + +**Migration guide:** + +Disable the cache by setting the configuration value to 0: + +```sql +SET datafusion.runtime.file_statistics_cache_limit = '0k' +``` + +Use the file statistics cache provided by the CacheManager when initializing a new ListingTable: + +```rust,ignore +ListingTable::try_new(config)? + .with_cache(ctx.runtime_env().cache_manager.get_file_statistic_cache(),) +``` From 30da2c77b6ac4fb082fe376488f78cc7a0d7210a Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Mon, 27 Apr 2026 11:48:35 +0200 Subject: [PATCH 70/83] fixup! Update migration guide --- docs/source/library-user-guide/upgrading/54.0.0.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/library-user-guide/upgrading/54.0.0.md b/docs/source/library-user-guide/upgrading/54.0.0.md index 411b1c8a72899..466971cd40183 100644 --- a/docs/source/library-user-guide/upgrading/54.0.0.md +++ b/docs/source/library-user-guide/upgrading/54.0.0.md @@ -520,7 +520,7 @@ Instead, it is created within the `CacheManager` and must be passed to `ListingT - Users who want to limit the memory usage of the file statistics cache. - Users who want to disable the file statistics. -- Users creating a `ListingTable` programmatically with a file statistics cache +- Users creating a `ListingTable` programmatically with a file statistics cache. **Migration guide:** From e9c0ec9c0bdef90deebf6c8cdb2e9ceaa663aed2 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 29 Apr 2026 10:22:16 +0200 Subject: [PATCH 71/83] Improve heapsize estimation for TableReference --- datafusion/common/src/heap_size.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index 285241c3a6ba4..a17e2cd713ccb 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -59,7 +59,17 @@ impl DFHeapSize for Statistics { impl DFHeapSize for TableReference { fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { - self.table().heap_size(ctx) + match self { + TableReference::Bare { table } => table.heap_size(ctx), + TableReference::Partial { schema, table } => { + schema.heap_size(ctx) + table.heap_size(ctx) + } + TableReference::Full { + catalog, + schema, + table, + } => catalog.heap_size(ctx) + schema.heap_size(ctx) + table.heap_size(ctx), + } } } From 326698b7f07e07f90423fabd5a3a5f899f66fe8f Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 29 Apr 2026 10:44:50 +0200 Subject: [PATCH 72/83] Improve memory handling when inserting --- datafusion/execution/src/cache/file_statistics_cache.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index f7e675c9f395d..93d26ff340494 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -109,14 +109,14 @@ impl DefaultFileStatisticsCacheState { return None; } - let old_value = self.lru_queue.put(key.clone(), value); - let mut ctx = DFHeapSizeCtx::default(); self.memory_used += entry_size; - self.memory_used += key.heap_size(&mut ctx); + self.memory_used += key_size; + let old_value = self.lru_queue.put(key.clone(), value); if let Some(old_entry) = &old_value { + let mut ctx = DFHeapSizeCtx::default(); self.memory_used -= old_entry.heap_size(&mut ctx); - self.memory_used -= key.heap_size(&mut ctx); + self.memory_used -= key_size; } self.evict_entries(); From e5a10497a43f64b96f3a47eaab57cdf2dfcd7e2d Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 29 Apr 2026 10:59:41 +0200 Subject: [PATCH 73/83] Fix comments in Cache Manager --- datafusion/execution/src/cache/cache_manager.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index 09861ddf6451e..b49040e9f4802 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -454,7 +454,7 @@ pub const DEFAULT_METADATA_CACHE_LIMIT: usize = 50 * 1024 * 1024; // 50M pub struct CacheManagerConfig { /// Enable caching of file statistics when listing files. /// Enabling the cache avoids repeatedly reading file statistics in a DataFusion session. - /// Default is enabled with 1MiB. Currently only Parquet files are supported. + /// Default is enabled. Currently only Parquet files are supported. pub file_statistics_cache: Option>, /// Limit of the file statistics cache, in bytes. Default: 20MiB. pub file_statistics_cache_limit: usize, @@ -464,7 +464,7 @@ pub struct CacheManagerConfig { /// are cached. /// Note that if this option is enabled, DataFusion will not see any updates to the underlying /// storage for at least `list_files_cache_ttl` duration. - /// Default is disabled. + /// Default is enabled. pub list_files_cache: Option>, /// Limit of the `list_files_cache`, in bytes. Default: 1MiB. pub list_files_cache_limit: usize, From bd9d05c24a71a8ec0ffc506ea3fd62e0c7cb27ab Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 29 Apr 2026 11:03:56 +0200 Subject: [PATCH 74/83] Improve upgrade guide --- docs/source/library-user-guide/upgrading/54.0.0.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/library-user-guide/upgrading/54.0.0.md b/docs/source/library-user-guide/upgrading/54.0.0.md index 466971cd40183..7d2e6bf05b828 100644 --- a/docs/source/library-user-guide/upgrading/54.0.0.md +++ b/docs/source/library-user-guide/upgrading/54.0.0.md @@ -514,13 +514,13 @@ SET datafusion.runtime.file_statistics_cache_limit = '10MB' To disable the file statistics cache, set the limit to 0. The file statistics cache is no longer created inside the `ListingTable`. -Instead, it is created within the `CacheManager` and must be passed to `ListingTable`. +Instead, it is created within the `CacheManager` and must be passed to the `ListingTable`. **Who is affected:** - Users who want to limit the memory usage of the file statistics cache. -- Users who want to disable the file statistics. -- Users creating a `ListingTable` programmatically with a file statistics cache. +- Users who want to disable the file statistics cache. +- Users who want to create a `ListingTable` programmatically with a file statistics cache. **Migration guide:** From 2ec39ff05892c0f69739e47fcd6d77777aa88499 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Wed, 29 Apr 2026 15:03:05 +0200 Subject: [PATCH 75/83] Fix upgrade guide --- docs/source/library-user-guide/upgrading/54.0.0.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/library-user-guide/upgrading/54.0.0.md b/docs/source/library-user-guide/upgrading/54.0.0.md index 7d2e6bf05b828..c2ed54110f0b8 100644 --- a/docs/source/library-user-guide/upgrading/54.0.0.md +++ b/docs/source/library-user-guide/upgrading/54.0.0.md @@ -530,9 +530,9 @@ Disable the cache by setting the configuration value to 0: SET datafusion.runtime.file_statistics_cache_limit = '0k' ``` -Use the file statistics cache provided by the CacheManager when initializing a new ListingTable: +Use the file statistics cache provided by the `CacheManager` when initializing a new `ListingTable`: ```rust,ignore ListingTable::try_new(config)? - .with_cache(ctx.runtime_env().cache_manager.get_file_statistic_cache(),) + .with_cache(ctx.runtime_env().cache_manager.get_file_statistic_cache()) ``` From c56eb5f0477725b2d0ec73dd768efb7cfdce6249 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Mon, 4 May 2026 16:48:02 +0200 Subject: [PATCH 76/83] Return stale entries from cache --- datafusion/execution/src/cache/file_statistics_cache.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index 93d26ff340494..e1edd0557878d 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -105,8 +105,7 @@ impl DefaultFileStatisticsCacheState { if entry_size + key_size > self.memory_limit { // Remove potential stale entry - self.remove(key); - return None; + return self.remove(key); } self.memory_used += entry_size; From ef64cdc4b72bf35cf620fa283b639c5c33b7b8ca Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Mon, 4 May 2026 16:49:07 +0200 Subject: [PATCH 77/83] Fix upgrade guide --- docs/source/library-user-guide/upgrading/54.0.0.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/library-user-guide/upgrading/54.0.0.md b/docs/source/library-user-guide/upgrading/54.0.0.md index c2ed54110f0b8..f6891903a8332 100644 --- a/docs/source/library-user-guide/upgrading/54.0.0.md +++ b/docs/source/library-user-guide/upgrading/54.0.0.md @@ -508,7 +508,7 @@ centrally managed through the `CacheManager`. To configure the cache size use the `file_statistics_cache_limit` setting: ```sql -SET datafusion.runtime.file_statistics_cache_limit = '10MB' +SET datafusion.runtime.file_statistics_cache_limit = '10M' ``` To disable the file statistics cache, set the limit to 0. @@ -527,7 +527,7 @@ Instead, it is created within the `CacheManager` and must be passed to the `List Disable the cache by setting the configuration value to 0: ```sql -SET datafusion.runtime.file_statistics_cache_limit = '0k' +SET datafusion.runtime.file_statistics_cache_limit = '0K' ``` Use the file statistics cache provided by the `CacheManager` when initializing a new `ListingTable`: From 78575c156706e3045aa619ab33f60c02d7a03b76 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 5 May 2026 10:11:10 +0200 Subject: [PATCH 78/83] Fix Arc heapsize test --- datafusion/common/src/heap_size.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index a17e2cd713ccb..b50e557389f03 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -542,7 +542,7 @@ mod tests { #[test] fn test_heap_size_arc_str_avoid_double_accounting() { - let a1 = Arc::new("Hello".to_string()); + let a1: Arc = Arc::from("Hello"); let mut ctx = DFHeapSizeCtx::default(); let heap_size = a1.heap_size(&mut ctx); From cf7be58f505a0636cab1afb7670c43c4d0689ee5 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 5 May 2026 10:14:02 +0200 Subject: [PATCH 79/83] Remove const i32 cast from heapsize estimation --- datafusion/common/src/heap_size.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index b50e557389f03..36f998115bb22 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -256,7 +256,7 @@ impl DFHeapSize for HashMap { impl DFHeapSize for Arc { fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { - let ptr = Arc::as_ptr(self) as *const i32 as usize; + let ptr = Arc::as_ptr(self) as usize; if !ctx.seen.insert(ptr) { return 0; @@ -282,7 +282,7 @@ impl DFHeapSize for Arc { impl DFHeapSize for Arc { fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { - let ptr = Arc::as_ptr(self) as *const i32 as usize; + let ptr = Arc::as_ptr(self) as usize; if !ctx.seen.insert(ptr) { return 0; From fb25a1abe4440b57efbe0a7f605ad392493335a6 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 5 May 2026 12:30:15 +0200 Subject: [PATCH 80/83] Fix heapsize estimation for Arc --- datafusion/common/src/heap_size.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index 36f998115bb22..7c246066a0232 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -263,7 +263,7 @@ impl DFHeapSize for Arc { } // Arc stores weak and strong counts on the heap alongside an instance of T - 2 * size_of::() + self.as_ref().heap_size(ctx) + 2 * size_of::() + size_of::() + self.as_ref().heap_size(ctx) } } From 7dabd910709e32df02817962a0dff0e4c67adac0 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 5 May 2026 12:33:43 +0200 Subject: [PATCH 81/83] Fix comment in cache_manager --- datafusion/execution/src/cache/cache_manager.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index b49040e9f4802..08a8dc9fd9cda 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -496,8 +496,6 @@ impl Default for CacheManagerConfig { impl CacheManagerConfig { /// Set the cache for file statistics. - /// - /// Default is `None` (disabled). pub fn with_file_statistics_cache( mut self, cache: Option>, From 26ed54cbf5370ba676f1b744c4e0a9bb6d31e9f4 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 5 May 2026 13:35:02 +0200 Subject: [PATCH 82/83] Fix linter + clippy --- datafusion/common/src/heap_size.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/common/src/heap_size.rs b/datafusion/common/src/heap_size.rs index 7c246066a0232..edb64709d5aa4 100644 --- a/datafusion/common/src/heap_size.rs +++ b/datafusion/common/src/heap_size.rs @@ -282,7 +282,7 @@ impl DFHeapSize for Arc { impl DFHeapSize for Arc { fn heap_size(&self, ctx: &mut DFHeapSizeCtx) -> usize { - let ptr = Arc::as_ptr(self) as usize; + let ptr = Arc::as_ptr(self) as *const i32 as usize; if !ctx.seen.insert(ptr) { return 0; @@ -542,7 +542,7 @@ mod tests { #[test] fn test_heap_size_arc_str_avoid_double_accounting() { - let a1: Arc = Arc::from("Hello"); + let a1: Arc = Arc::from("Hello"); let mut ctx = DFHeapSizeCtx::default(); let heap_size = a1.heap_size(&mut ctx); From 457032af464283d3cfe7411a95a2f29b8de0e8b4 Mon Sep 17 00:00:00 2001 From: Michael Kleen Date: Tue, 5 May 2026 13:51:31 +0200 Subject: [PATCH 83/83] Adapt test acording to heapsize estimation changes --- datafusion/execution/src/cache/file_statistics_cache.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/execution/src/cache/file_statistics_cache.rs b/datafusion/execution/src/cache/file_statistics_cache.rs index e1edd0557878d..12f0bb1b8af88 100644 --- a/datafusion/execution/src/cache/file_statistics_cache.rs +++ b/datafusion/execution/src/cache/file_statistics_cache.rs @@ -599,7 +599,7 @@ mod tests { num_rows: Precision::Absent, num_columns: 1, table_size_bytes: Precision::Absent, - statistics_size_bytes: 304, + statistics_size_bytes: 360, has_ordering: false, } ), @@ -610,7 +610,7 @@ mod tests { num_rows: Precision::Absent, num_columns: 1, table_size_bytes: Precision::Absent, - statistics_size_bytes: 304, + statistics_size_bytes: 360, has_ordering: true, } ),