diff --git a/vortex-cuda/cub/src/lib.rs b/vortex-cuda/cub/src/lib.rs index d7666a46f55..706373255aa 100644 --- a/vortex-cuda/cub/src/lib.rs +++ b/vortex-cuda/cub/src/lib.rs @@ -17,8 +17,6 @@ use std::path::PathBuf; use std::sync::OnceLock; -use vortex_cuda_macros::cuda_tests; - /// Raw FFI type definitions and dynamically-loaded function pointers from bindgen. #[allow( non_upper_case_globals, @@ -60,11 +58,11 @@ pub fn cub_library() -> Result<&'static sys::CubLibrary, CubError> { .map_err(|e| CubError::LibraryLoadError(e.clone())) } -#[cuda_tests] +#[cfg(test)] mod tests { use crate::filter; - #[test] + #[vortex_cuda_macros::test] fn test_filter_temp_size_u64() -> Result<(), crate::CubError> { let temp_bytes = filter::filter_get_temp_size_u64(1000)?; // CUB requires some temporary storage @@ -72,14 +70,14 @@ mod tests { Ok(()) } - #[test] + #[vortex_cuda_macros::test] fn test_filter_temp_size_f64() -> Result<(), crate::CubError> { let temp_bytes = filter::filter_get_temp_size_f64(10000)?; assert!(temp_bytes > 0); Ok(()) } - #[test] + #[vortex_cuda_macros::test] fn test_filter_temp_size_zero_items() -> Result<(), crate::CubError> { // Just verify the call doesn't fail with zero items let _temp_bytes = filter::filter_get_temp_size_u8(0)?; diff --git a/vortex-cuda/macros/src/lib.rs b/vortex-cuda/macros/src/lib.rs index afe26bfc99e..722664db915 100644 --- a/vortex-cuda/macros/src/lib.rs +++ b/vortex-cuda/macros/src/lib.rs @@ -18,10 +18,10 @@ //! #[cuda_not_available] //! fn fallback_function() { /* ... */ } //! -//! // Only compiled in test builds when CUDA is available -//! #[cuda_test] -//! mod tests { -//! // ... +//! // Ignore tests when CUDA is not available +//! #[crate::test] +//! async fn my_test() { +//! ... //! } //! ``` @@ -30,7 +30,6 @@ use std::sync::LazyLock; use proc_macro::TokenStream; use quote::quote; -use syn::Item; use syn::parse_macro_input; /// Cached result of nvcc availability check. @@ -61,17 +60,39 @@ pub fn cuda_not_available(_attr: TokenStream, item: TokenStream) -> TokenStream } } -/// Conditionally compiles the annotated item only in test builds when CUDA is available. +/// Test attribute to ignore tests if CUDA isn't available. Supports both sync and async tests (using tokio). +/// +/// Must be named `test` to work with frameworks like `rstest`. #[proc_macro_attribute] -pub fn cuda_tests(_attr: TokenStream, item: TokenStream) -> TokenStream { +pub fn test(_attr: TokenStream, item: TokenStream) -> TokenStream { + let item = parse_macro_input!(item as syn::ItemFn); if *NVCC_AVAILABLE { - let item = parse_macro_input!(item as Item); - quote! { - #[cfg(test)] - #item + if item.sig.asyncness.is_some() { + quote! { + #[tokio::test] + #item + } + } else { + quote! { + #[test] + #item + } } .into() } else { - TokenStream::new() + if item.sig.asyncness.is_some() { + quote! { + #[tokio::test] + #[ignore] + #item + } + } else { + quote! { + #[test] + #[ignore] + #item + } + } + .into() } } diff --git a/vortex-cuda/nvcomp/src/lib.rs b/vortex-cuda/nvcomp/src/lib.rs index 650f06069ff..2f9fd00deb9 100644 --- a/vortex-cuda/nvcomp/src/lib.rs +++ b/vortex-cuda/nvcomp/src/lib.rs @@ -35,7 +35,6 @@ mod error; pub mod zstd; pub use error::NvcompError; -use vortex_cuda_macros::cuda_tests; /// The loaded nvcomp library instance. static NVCOMP_LIB: OnceLock> = OnceLock::new(); @@ -66,13 +65,12 @@ pub fn nvcomp_library() -> Result<&'static sys::NvcompLibrary, NvcompError> { .as_ref() .map_err(|e| NvcompError::LibraryLoadError(e.clone())) } - -#[cuda_tests] +#[cfg(test)] mod tests { use crate::zstd; /// Test that we can call nvcompBatchedZstdDecompressGetTempSizeAsync. - #[test] + #[vortex_cuda_macros::test] fn test_get_decompress_temp_size() { let num_chunks = 10; let max_uncompressed_chunk_bytes = 65536; // 64KB recommended chunk size diff --git a/vortex-cuda/src/arrow/canonical.rs b/vortex-cuda/src/arrow/canonical.rs index 3a6d0b3d666..2dc35b1f360 100644 --- a/vortex-cuda/src/arrow/canonical.rs +++ b/vortex-cuda/src/arrow/canonical.rs @@ -18,7 +18,6 @@ use vortex::error::VortexResult; use vortex::error::vortex_bail; use vortex::error::vortex_ensure; use vortex::extension::datetime::AnyTemporal; -use vortex_cuda_macros::cuda_tests; use crate::CudaExecutionCtx; use crate::arrow::ArrowArray; @@ -275,8 +274,7 @@ unsafe extern "C" fn release_array(array: *mut ArrowArray) { } } -#[cuda_tests] -#[allow(clippy::unwrap_used)] +#[cfg(test)] mod tests { use rstest::rstest; use vortex::array::IntoArray; @@ -308,7 +306,7 @@ mod tests { #[case::i64(PrimitiveArray::from_iter(0i64..10).into_array(), 10)] #[case::f32(PrimitiveArray::from_iter([1.0f32, 2.0, 3.0]).into_array(), 3)] #[case::f64(PrimitiveArray::from_iter([1.0f64, 2.0, 3.0]).into_array(), 3)] - #[tokio::test] + #[crate::test] async fn test_export_primitive( #[case] array: vortex::array::ArrayRef, #[case] expected_len: i64, @@ -330,7 +328,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_export_null() -> VortexResult<()> { let mut ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -346,7 +344,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_export_decimal() -> VortexResult<()> { let mut ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -365,7 +363,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_export_temporal() -> VortexResult<()> { let mut ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -388,7 +386,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_export_varbinview() -> VortexResult<()> { let mut ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -413,7 +411,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_export_struct() -> VortexResult<()> { let mut ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); diff --git a/vortex-cuda/src/dynamic_dispatch/mod.rs b/vortex-cuda/src/dynamic_dispatch/mod.rs index 25de9da0473..a2a431f1490 100644 --- a/vortex-cuda/src/dynamic_dispatch/mod.rs +++ b/vortex-cuda/src/dynamic_dispatch/mod.rs @@ -19,8 +19,6 @@ #![allow(non_snake_case)] #![allow(clippy::cast_possible_truncation)] -use vortex_cuda_macros::cuda_tests; - mod plan_builder; pub use plan_builder::build_plan; @@ -187,8 +185,7 @@ impl DynamicDispatchPlan { } } -#[cuda_tests] -#[allow(clippy::cast_possible_truncation)] +#[cfg(test)] mod tests { use std::sync::Arc; @@ -232,11 +229,11 @@ mod tests { .map(|i| ((i as u64) % (max_val + 1)) as u32) .collect(); let primitive = PrimitiveArray::new(Buffer::from(values), NonNullable); - BitPackedArray::encode(&primitive.to_array(), bit_width) + BitPackedArray::encode(&primitive.into_array(), bit_width) .vortex_expect("failed to create BitPacked array") } - #[test] + #[crate::test] fn test_max_scalar_ops() -> VortexResult<()> { let bit_width: u8 = 6; let len = 2050; @@ -273,7 +270,7 @@ mod tests { Ok(()) } - #[test] + #[crate::test] fn test_plan_structure() { // Stage 0: input dict values (BP→FoR) into smem[0..256) // Stage 1: output codes (BP→FoR→DICT) into smem[256..2304), gather from smem[0] @@ -321,7 +318,7 @@ mod tests { Ok((ptr, device_buf)) } - #[test] + #[crate::test] fn test_load_for_zigzag_alp() -> VortexResult<()> { // Max scalar ops depth with LOAD source: LOAD → FoR → ZigZag → ALP // (Exercises all four scalar op types without DICT) @@ -422,7 +419,7 @@ mod tests { Ok(unsafe { std::mem::transmute::, Vec>(actual) }) } - #[test] + #[crate::test] fn test_bitpacked() -> VortexResult<()> { let bit_width: u8 = 10; let len = 3000; @@ -433,7 +430,7 @@ mod tests { let bp = make_bitpacked_array_u32(bit_width, len); let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let (plan, _bufs) = build_plan(&bp.to_array(), &cuda_ctx)?; + let (plan, _bufs) = build_plan(&bp.into_array(), &cuda_ctx)?; let actual = run_dynamic_dispatch_plan(&cuda_ctx, len, &plan)?; assert_eq!(actual, expected); @@ -441,7 +438,7 @@ mod tests { Ok(()) } - #[test] + #[crate::test] fn test_for_bitpacked() -> VortexResult<()> { let bit_width: u8 = 6; let len = 3000; @@ -457,7 +454,7 @@ mod tests { let for_arr = FoRArray::try_new(bp.into_array(), Scalar::from(reference))?; let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let (plan, _bufs) = build_plan(&for_arr.to_array(), &cuda_ctx)?; + let (plan, _bufs) = build_plan(&for_arr.into_array(), &cuda_ctx)?; let actual = run_dynamic_dispatch_plan(&cuda_ctx, len, &plan)?; assert_eq!(actual, expected); @@ -465,7 +462,7 @@ mod tests { Ok(()) } - #[test] + #[crate::test] fn test_runend() -> VortexResult<()> { let ends: Vec = vec![1000, 2000, 3000]; let values: Vec = vec![10, 20, 30]; @@ -482,7 +479,7 @@ mod tests { let re = RunEndArray::new(ends_arr, values_arr); let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let (plan, _bufs) = build_plan(&re.to_array(), &cuda_ctx)?; + let (plan, _bufs) = build_plan(&re.into_array(), &cuda_ctx)?; let actual = run_dynamic_dispatch_plan(&cuda_ctx, len, &plan)?; assert_eq!(actual, expected); @@ -490,7 +487,7 @@ mod tests { Ok(()) } - #[test] + #[crate::test] fn test_dict_for_bp_values_bp_codes() -> VortexResult<()> { // Dict where both codes and values are BitPacked+FoR. let dict_reference = 1_000_000u32; @@ -504,17 +501,17 @@ mod tests { // BitPack+FoR the dict values let dict_prim = PrimitiveArray::new(Buffer::from(dict_residuals), NonNullable); - let dict_bp = BitPackedArray::encode(&dict_prim.to_array(), 6)?; + let dict_bp = BitPackedArray::encode(&dict_prim.into_array(), 6)?; let dict_for = FoRArray::try_new(dict_bp.into_array(), Scalar::from(dict_reference))?; // BitPack the codes let codes_prim = PrimitiveArray::new(Buffer::from(codes), NonNullable); - let codes_bp = BitPackedArray::encode(&codes_prim.to_array(), 6)?; + let codes_bp = BitPackedArray::encode(&codes_prim.into_array(), 6)?; let dict = DictArray::try_new(codes_bp.into_array(), dict_for.into_array())?; let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let (plan, _bufs) = build_plan(&dict.to_array(), &cuda_ctx)?; + let (plan, _bufs) = build_plan(&dict.into_array(), &cuda_ctx)?; let actual = run_dynamic_dispatch_plan(&cuda_ctx, len, &plan)?; assert_eq!(actual, expected); @@ -522,7 +519,7 @@ mod tests { Ok(()) } - #[test] + #[crate::test] fn test_alp_for_bitpacked() -> VortexResult<()> { // ALP(FoR(BitPacked)): encode each layer, then reassemble the tree // bottom-up because encode() methods produce flat outputs. @@ -545,7 +542,7 @@ mod tests { ); let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let (plan, _bufs) = build_plan(&tree.to_array(), &cuda_ctx)?; + let (plan, _bufs) = build_plan(&tree.into_array(), &cuda_ctx)?; let actual = run_dispatch_plan_f32(&cuda_ctx, len, &plan)?; assert_eq!(actual, floats); @@ -553,7 +550,7 @@ mod tests { Ok(()) } - #[test] + #[crate::test] fn test_zigzag_bitpacked() -> VortexResult<()> { // ZigZag(BitPacked): unpack then zigzag-decode. let bit_width: u8 = 4; @@ -569,11 +566,11 @@ mod tests { .collect(); let prim = PrimitiveArray::new(Buffer::from(raw), NonNullable); - let bp = BitPackedArray::encode(&prim.to_array(), bit_width)?; + let bp = BitPackedArray::encode(&prim.into_array(), bit_width)?; let zz = ZigZagArray::try_new(bp.into_array())?; let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let (plan, _bufs) = build_plan(&zz.to_array(), &cuda_ctx)?; + let (plan, _bufs) = build_plan(&zz.into_array(), &cuda_ctx)?; let actual = run_dynamic_dispatch_plan(&cuda_ctx, len, &plan)?; assert_eq!(actual, expected); @@ -581,7 +578,7 @@ mod tests { Ok(()) } - #[test] + #[crate::test] fn test_for_runend() -> VortexResult<()> { // FoR(RunEnd): expand runs then add constant. let ends: Vec = vec![500, 1000, 1500, 2000, 2500, 3000]; @@ -601,7 +598,7 @@ mod tests { let for_arr = FoRArray::try_new(re.into_array(), Scalar::from(reference))?; let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let (plan, _bufs) = build_plan(&for_arr.to_array(), &cuda_ctx)?; + let (plan, _bufs) = build_plan(&for_arr.into_array(), &cuda_ctx)?; let actual = run_dynamic_dispatch_plan(&cuda_ctx, len, &plan)?; assert_eq!(actual, expected); @@ -609,7 +606,7 @@ mod tests { Ok(()) } - #[test] + #[crate::test] fn test_for_dict() -> VortexResult<()> { // FoR(Dict(codes=Primitive, values=Primitive)): gather then add constant. let dict_values: Vec = vec![100, 200, 300, 400]; @@ -629,7 +626,7 @@ mod tests { let for_arr = FoRArray::try_new(dict.into_array(), Scalar::from(reference))?; let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let (plan, _bufs) = build_plan(&for_arr.to_array(), &cuda_ctx)?; + let (plan, _bufs) = build_plan(&for_arr.into_array(), &cuda_ctx)?; let actual = run_dynamic_dispatch_plan(&cuda_ctx, len, &plan)?; assert_eq!(actual, expected); @@ -637,7 +634,7 @@ mod tests { Ok(()) } - #[test] + #[crate::test] fn test_dict_for_bp_codes() -> VortexResult<()> { // Dict(codes=FoR(BitPacked), values=primitive) let dict_values: Vec = (0..8).map(|i| i * 1000 + 7).collect(); @@ -649,14 +646,14 @@ mod tests { // BitPack codes, then wrap in FoR (reference=0 so values unchanged) let bit_width: u8 = 3; let codes_prim = PrimitiveArray::new(Buffer::from(codes), NonNullable); - let codes_bp = BitPackedArray::encode(&codes_prim.to_array(), bit_width)?; + let codes_bp = BitPackedArray::encode(&codes_prim.into_array(), bit_width)?; let codes_for = FoRArray::try_new(codes_bp.into_array(), Scalar::from(0u32))?; let values_prim = PrimitiveArray::new(Buffer::from(dict_values), NonNullable); let dict = DictArray::try_new(codes_for.into_array(), values_prim.into_array())?; let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let (plan, _bufs) = build_plan(&dict.to_array(), &cuda_ctx)?; + let (plan, _bufs) = build_plan(&dict.into_array(), &cuda_ctx)?; let actual = run_dynamic_dispatch_plan(&cuda_ctx, len, &plan)?; assert_eq!(actual, expected); @@ -664,7 +661,7 @@ mod tests { Ok(()) } - #[test] + #[crate::test] fn test_dict_primitive_values_bp_codes() -> VortexResult<()> { let dict_values: Vec = vec![100, 200, 300, 400]; let dict_size = dict_values.len(); @@ -674,13 +671,13 @@ mod tests { let bit_width: u8 = 2; let codes_prim = PrimitiveArray::new(Buffer::from(codes), NonNullable); - let codes_bp = BitPackedArray::encode(&codes_prim.to_array(), bit_width)?; + let codes_bp = BitPackedArray::encode(&codes_prim.into_array(), bit_width)?; let values_prim = PrimitiveArray::new(Buffer::from(dict_values), NonNullable); let dict = DictArray::try_new(codes_bp.into_array(), values_prim.into_array())?; let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let (plan, _bufs) = build_plan(&dict.to_array(), &cuda_ctx)?; + let (plan, _bufs) = build_plan(&dict.into_array(), &cuda_ctx)?; let actual = run_dynamic_dispatch_plan(&cuda_ctx, len, &plan)?; assert_eq!(actual, expected); diff --git a/vortex-cuda/src/kernel/arrays/constant.rs b/vortex-cuda/src/kernel/arrays/constant.rs index 9fcab51a654..bebb7eae662 100644 --- a/vortex-cuda/src/kernel/arrays/constant.rs +++ b/vortex-cuda/src/kernel/arrays/constant.rs @@ -26,7 +26,6 @@ use vortex::dtype::NativePType; use vortex::error::VortexResult; use vortex::error::vortex_bail; use vortex::error::vortex_err; -use vortex_cuda_macros::cuda_tests; use crate::CudaDeviceBuffer; use crate::executor::CudaExecute; @@ -190,7 +189,7 @@ where ))) } -#[cuda_tests] +#[cfg(test)] mod tests { use rstest::rstest; use vortex::array::IntoArray; @@ -221,7 +220,7 @@ mod tests { #[case::i64(make_constant_array(-1000000i64, 2050))] #[case::f32(make_constant_array(1.23f32, 2050))] #[case::f64(make_constant_array(4.56789f64, 2050))] - #[tokio::test] + #[crate::test] async fn test_cuda_constant_materialization( #[case] constant_array: ConstantArray, ) -> VortexResult<()> { @@ -231,7 +230,7 @@ mod tests { let cpu_result = constant_array.to_canonical()?; let gpu_result = ConstantNumericExecutor - .execute(constant_array.to_array(), &mut cuda_ctx) + .execute(constant_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU materialization failed") .into_host() @@ -243,7 +242,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_constant_empty_array() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -252,7 +251,7 @@ mod tests { let cpu_result = constant_array.to_canonical()?; let gpu_result = ConstantNumericExecutor - .execute(constant_array.to_array(), &mut cuda_ctx) + .execute(constant_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU materialization failed") .into_host() @@ -264,7 +263,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_constant_small_array() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -274,7 +273,7 @@ mod tests { let cpu_result = constant_array.to_canonical()?; let gpu_result = ConstantNumericExecutor - .execute(constant_array.to_array(), &mut cuda_ctx) + .execute(constant_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU materialization failed") .into_host() diff --git a/vortex-cuda/src/kernel/arrays/dict.rs b/vortex-cuda/src/kernel/arrays/dict.rs index b4abd7e9691..334ca9caf03 100644 --- a/vortex-cuda/src/kernel/arrays/dict.rs +++ b/vortex-cuda/src/kernel/arrays/dict.rs @@ -29,7 +29,6 @@ use vortex::dtype::NativePType; use vortex::error::VortexExpect; use vortex::error::VortexResult; use vortex::error::vortex_bail; -use vortex_cuda_macros::cuda_tests; use crate::CudaBufferExt; use crate::CudaDeviceBuffer; @@ -301,7 +300,7 @@ async fn execute_dict_varbinview( })) } -#[cuda_tests] +#[cfg(test)] mod tests { use vortex::array::IntoArray; use vortex::array::arrays::DecimalArray; @@ -329,7 +328,7 @@ mod tests { )) } - #[tokio::test] + #[crate::test] async fn test_cuda_dict_u32_values_u8_codes() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -349,7 +348,7 @@ mod tests { // Execute on CUDA let cuda_result = DictExecutor - .execute(dict_array.to_array(), &mut cuda_ctx) + .execute(dict_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_primitive(); @@ -361,7 +360,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_dict_u64_values_u16_codes() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -384,7 +383,7 @@ mod tests { // Execute on CUDA let cuda_result = DictExecutor - .execute(dict_array.to_array(), &mut cuda_ctx) + .execute(dict_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_primitive(); @@ -396,7 +395,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_dict_i32_values_u32_codes() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -416,7 +415,7 @@ mod tests { // Execute on CUDA let cuda_result = DictExecutor - .execute(dict_array.to_array(), &mut cuda_ctx) + .execute(dict_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_primitive(); @@ -427,7 +426,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_dict_large_array() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -447,7 +446,7 @@ mod tests { // Execute on CUDA let cuda_result = DictExecutor - .execute(dict_array.to_array(), &mut cuda_ctx) + .execute(dict_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_primitive(); @@ -459,7 +458,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_dict_values_with_validity() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -491,7 +490,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_dict_codes_with_validity() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -517,7 +516,7 @@ mod tests { // Execute on CUDA let cuda_result = DictExecutor - .execute(dict_array.to_array(), &mut cuda_ctx) + .execute(dict_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_primitive(); @@ -528,7 +527,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_dict_both_with_validity() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -561,7 +560,7 @@ mod tests { // Execute on CUDA let cuda_result = DictExecutor - .execute(dict_array.to_array(), &mut cuda_ctx) + .execute(dict_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_primitive(); @@ -572,7 +571,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_dict_i64_values_with_validity() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -606,7 +605,7 @@ mod tests { // Execute on CUDA let cuda_result = DictExecutor - .execute(dict_array.to_array(), &mut cuda_ctx) + .execute(dict_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_primitive(); @@ -617,7 +616,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_dict_all_valid_matches_baseline() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -639,7 +638,7 @@ mod tests { // Execute on CUDA let cuda_result = DictExecutor - .execute(dict_array.to_array(), &mut cuda_ctx) + .execute(dict_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_primitive(); @@ -660,7 +659,7 @@ mod tests { )) } - #[tokio::test] + #[crate::test] async fn test_cuda_dict_decimal_i8_values() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -678,7 +677,7 @@ mod tests { let baseline = dict_array.to_canonical()?; let cuda_result = DictExecutor - .execute(dict_array.to_array(), &mut cuda_ctx) + .execute(dict_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_decimal(); @@ -688,7 +687,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_dict_decimal_i16_values() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -706,7 +705,7 @@ mod tests { let baseline = dict_array.to_canonical()?; let cuda_result = DictExecutor - .execute(dict_array.to_array(), &mut cuda_ctx) + .execute(dict_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_decimal(); @@ -716,7 +715,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_dict_decimal_i32_values() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -734,7 +733,7 @@ mod tests { let baseline = dict_array.to_canonical()?; let cuda_result = DictExecutor - .execute(dict_array.to_array(), &mut cuda_ctx) + .execute(dict_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_decimal(); @@ -744,7 +743,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_dict_decimal_i64_values() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -765,7 +764,7 @@ mod tests { let baseline = dict_array.to_canonical()?; let cuda_result = DictExecutor - .execute(dict_array.to_array(), &mut cuda_ctx) + .execute(dict_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_decimal(); @@ -775,7 +774,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_dict_decimal_i128_values() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -801,7 +800,7 @@ mod tests { let baseline = dict_array.to_canonical()?; let cuda_result = DictExecutor - .execute(dict_array.to_array(), &mut cuda_ctx) + .execute(dict_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_decimal(); @@ -819,7 +818,7 @@ mod tests { .into_varbinview()) } - #[tokio::test] + #[crate::test] async fn test_cuda_dict_string_values_u8_codes() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -834,7 +833,7 @@ mod tests { let baseline = dict_array.to_canonical()?; let cuda_result = DictExecutor - .execute(dict_array.to_array(), &mut cuda_ctx) + .execute(dict_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_varbinview(); @@ -844,7 +843,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_dict_string_values_u16_codes() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -859,7 +858,7 @@ mod tests { let baseline = dict_array.to_canonical()?; let cuda_result = DictExecutor - .execute(dict_array.to_array(), &mut cuda_ctx) + .execute(dict_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_varbinview(); @@ -869,7 +868,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_dict_string_max_inlined_12_bytes() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -886,7 +885,7 @@ mod tests { let baseline = dict_array.to_canonical()?; let cuda_result = DictExecutor - .execute(dict_array.to_array(), &mut cuda_ctx) + .execute(dict_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_varbinview(); @@ -896,7 +895,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_dict_string_outlined_views() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -916,7 +915,7 @@ mod tests { let baseline = dict_array.to_canonical()?; let cuda_result = DictExecutor - .execute(dict_array.to_array(), &mut cuda_ctx) + .execute(dict_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_varbinview(); @@ -926,7 +925,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_dict_string_empty_strings() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -941,7 +940,7 @@ mod tests { let baseline = dict_array.to_canonical()?; let cuda_result = DictExecutor - .execute(dict_array.to_array(), &mut cuda_ctx) + .execute(dict_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_varbinview(); @@ -951,7 +950,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_dict_string_values_with_validity() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -967,7 +966,7 @@ mod tests { let baseline = dict_array.to_canonical()?; let cuda_result = DictExecutor - .execute(dict_array.to_array(), &mut cuda_ctx) + .execute(dict_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_varbinview(); @@ -977,7 +976,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_dict_string_outlined_with_validity() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -1000,7 +999,7 @@ mod tests { let baseline = dict_array.to_canonical()?; let cuda_result = DictExecutor - .execute(dict_array.to_array(), &mut cuda_ctx) + .execute(dict_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_varbinview(); @@ -1010,7 +1009,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_dict_decimal_i256_values() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -1036,7 +1035,7 @@ mod tests { let baseline = dict_array.to_canonical()?; let cuda_result = DictExecutor - .execute(dict_array.to_array(), &mut cuda_ctx) + .execute(dict_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_decimal(); diff --git a/vortex-cuda/src/kernel/encodings/alp.rs b/vortex-cuda/src/kernel/encodings/alp.rs index 9aabdea8658..a47cb24c42d 100644 --- a/vortex-cuda/src/kernel/encodings/alp.rs +++ b/vortex-cuda/src/kernel/encodings/alp.rs @@ -23,7 +23,6 @@ use vortex::encodings::alp::match_each_alp_float_ptype; use vortex::error::VortexResult; use vortex::error::vortex_ensure; use vortex::error::vortex_err; -use vortex_cuda_macros::cuda_tests; use crate::CudaBufferExt; use crate::CudaDeviceBuffer; @@ -116,7 +115,7 @@ where ))) } -#[cuda_tests] +#[cfg(test)] mod tests { use vortex::array::IntoArray; use vortex::array::arrays::PrimitiveArray; @@ -134,7 +133,7 @@ mod tests { use crate::CanonicalCudaExt; use crate::session::CudaSession; - #[tokio::test] + #[crate::test] async fn test_cuda_alp_decompression_f32() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -166,7 +165,7 @@ mod tests { let cpu_result = alp_array.to_canonical()?.into_array(); let gpu_result = ALPExecutor - .execute(alp_array.to_array(), &mut cuda_ctx) + .execute(alp_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_host() diff --git a/vortex-cuda/src/kernel/encodings/bitpacked.rs b/vortex-cuda/src/kernel/encodings/bitpacked.rs index 54d1ef65fe0..ff078411ea1 100644 --- a/vortex-cuda/src/kernel/encodings/bitpacked.rs +++ b/vortex-cuda/src/kernel/encodings/bitpacked.rs @@ -23,7 +23,6 @@ use vortex::encodings::fastlanes::unpack_iter::BitPacked; use vortex::error::VortexResult; use vortex::error::vortex_ensure; use vortex::error::vortex_err; -use vortex_cuda_macros::cuda_tests; use crate::CudaBufferExt; use crate::CudaDeviceBuffer; @@ -166,7 +165,7 @@ where ))) } -#[cuda_tests] +#[cfg(test)] mod tests { use futures::executor::block_on; use rstest::rstest; @@ -191,6 +190,7 @@ mod tests { #[case::u32((0u16..128u16).cycle().take(2048), 6)] #[case::u16((0u32..128u32).cycle().take(2048), 6)] #[case::u16((0u64..128u64).cycle().take(2048), 6)] + #[crate::test] fn test_patched( #[case] iter: impl Iterator, #[case] bw: u8, @@ -201,14 +201,14 @@ mod tests { let array = PrimitiveArray::new(iter.collect::>(), NonNullable); // Last two items should be patched - let bp_with_patches = BitPackedArray::encode(&array.to_array(), bw)?; + let bp_with_patches = BitPackedArray::encode(&array.into_array(), bw)?; assert!(bp_with_patches.patches().is_some()); let cpu_result = bp_with_patches.to_canonical()?.into_array(); let gpu_result = block_on(async { BitPackedExecutor - .execute(bp_with_patches.to_array(), &mut cuda_ctx) + .execute(bp_with_patches.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_host() @@ -221,7 +221,7 @@ mod tests { Ok(()) } - #[test] + #[crate::test] fn test_patches() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -232,14 +232,14 @@ mod tests { ); // Last two items should be patched - let bp_with_patches = BitPackedArray::encode(&array.to_array(), 9)?; + let bp_with_patches = BitPackedArray::encode(&array.into_array(), 9)?; assert!(bp_with_patches.patches().is_some()); let cpu_result = bp_with_patches.to_canonical()?.into_array(); let gpu_result = block_on(async { BitPackedExecutor - .execute(bp_with_patches.to_array(), &mut cuda_ctx) + .execute(bp_with_patches.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_host() @@ -260,6 +260,7 @@ mod tests { #[case::bw_5(5)] #[case::bw_6(6)] #[case::bw_7(7)] + #[crate::test] fn test_cuda_bitunpack_u8(#[case] bit_width: u8) -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -273,13 +274,13 @@ mod tests { NonNullable, ); - let bitpacked_array = BitPackedArray::encode(&primitive_array.to_array(), bit_width) + let bitpacked_array = BitPackedArray::encode(&primitive_array.into_array(), bit_width) .vortex_expect("operation should succeed in test"); let cpu_result = bitpacked_array.to_canonical()?; let gpu_result = block_on(async { BitPackedExecutor - .execute(bitpacked_array.to_array(), &mut cuda_ctx) + .execute(bitpacked_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_host() @@ -308,6 +309,7 @@ mod tests { #[case::bw_13(13)] #[case::bw_14(14)] #[case::bw_15(15)] + #[crate::test] fn test_cuda_bitunpack_u16(#[case] bit_width: u8) -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -321,13 +323,13 @@ mod tests { NonNullable, ); - let bitpacked_array = BitPackedArray::encode(&primitive_array.to_array(), bit_width) + let bitpacked_array = BitPackedArray::encode(&primitive_array.into_array(), bit_width) .vortex_expect("operation should succeed in test"); let cpu_result = bitpacked_array.to_canonical()?; let gpu_result = block_on(async { BitPackedExecutor - .execute(bitpacked_array.to_array(), &mut cuda_ctx) + .execute(bitpacked_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_host() @@ -372,6 +374,7 @@ mod tests { #[case::bw_29(29)] #[case::bw_30(30)] #[case::bw_31(31)] + #[crate::test] fn test_cuda_bitunpack_u32(#[case] bit_width: u8) -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -385,13 +388,13 @@ mod tests { NonNullable, ); - let bitpacked_array = BitPackedArray::encode(&primitive_array.to_array(), bit_width) + let bitpacked_array = BitPackedArray::encode(&primitive_array.into_array(), bit_width) .vortex_expect("operation should succeed in test"); let cpu_result = bitpacked_array.to_canonical()?; let gpu_result = block_on(async { BitPackedExecutor - .execute(bitpacked_array.to_array(), &mut cuda_ctx) + .execute(bitpacked_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_host() @@ -468,6 +471,7 @@ mod tests { #[case::bw_61(61)] #[case::bw_62(62)] #[case::bw_63(63)] + #[crate::test] fn test_cuda_bitunpack_u64(#[case] bit_width: u8) -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -481,12 +485,12 @@ mod tests { NonNullable, ); - let bitpacked_array = BitPackedArray::encode(&primitive_array.to_array(), bit_width) + let bitpacked_array = BitPackedArray::encode(&primitive_array.into_array(), bit_width) .vortex_expect("operation should succeed in test"); let cpu_result = bitpacked_array.to_canonical()?; let gpu_result = block_on(async { BitPackedExecutor - .execute(bitpacked_array.to_array(), &mut cuda_ctx) + .execute(bitpacked_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_host() @@ -499,7 +503,7 @@ mod tests { Ok(()) } - #[test] + #[crate::test] fn test_cuda_bitunpack_sliced() -> VortexResult<()> { let bit_width = 32; let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) @@ -514,7 +518,7 @@ mod tests { NonNullable, ); - let bitpacked_array = BitPackedArray::encode(&primitive_array.to_array(), bit_width) + let bitpacked_array = BitPackedArray::encode(&primitive_array.into_array(), bit_width) .vortex_expect("operation should succeed in test"); let slice_ref = bitpacked_array.clone().into_array().slice(67..3969)?; let mut exec_ctx = ExecutionCtx::new(VortexSession::empty().with::()); diff --git a/vortex-cuda/src/kernel/encodings/date_time_parts.rs b/vortex-cuda/src/kernel/encodings/date_time_parts.rs index e68e8f320fc..1f99ab341f3 100644 --- a/vortex-cuda/src/kernel/encodings/date_time_parts.rs +++ b/vortex-cuda/src/kernel/encodings/date_time_parts.rs @@ -28,7 +28,6 @@ use vortex::error::vortex_err; use vortex::extension::datetime::TimeUnit; use vortex::extension::datetime::Timestamp; use vortex::scalar::Scalar; -use vortex_cuda_macros::cuda_tests; use crate::CudaBufferExt; use crate::CudaDeviceBuffer; @@ -199,7 +198,7 @@ where )) } -#[cuda_tests] +#[cfg(test)] mod tests { use rstest::rstest; use vortex::array::IntoArray; @@ -272,7 +271,7 @@ mod tests { vec![123456789i64, 0, 0], TimeUnit::Nanoseconds )] - #[tokio::test] + #[crate::test] async fn test_cuda_datetimeparts_decompression( #[case] days: Vec, #[case] seconds: Vec, @@ -286,7 +285,7 @@ mod tests { let cpu_result = dtp_array.to_canonical()?; let gpu_result = DateTimePartsExecutor - .execute(dtp_array.to_array(), &mut cuda_ctx) + .execute(dtp_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_host() @@ -298,7 +297,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_datetimeparts_large_array() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -312,7 +311,7 @@ mod tests { let cpu_result = dtp_array.to_canonical()?; let gpu_result = DateTimePartsExecutor - .execute(dtp_array.to_array(), &mut cuda_ctx) + .execute(dtp_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_host() @@ -324,7 +323,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_datetimeparts_with_nulls() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -360,7 +359,7 @@ mod tests { let cpu_result = dtp_array.to_canonical()?; let gpu_result = DateTimePartsExecutor - .execute(dtp_array.to_array(), &mut cuda_ctx) + .execute(dtp_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_host() diff --git a/vortex-cuda/src/kernel/encodings/decimal_byte_parts.rs b/vortex-cuda/src/kernel/encodings/decimal_byte_parts.rs index 2615f0a67d9..d7ba349dc97 100644 --- a/vortex-cuda/src/kernel/encodings/decimal_byte_parts.rs +++ b/vortex-cuda/src/kernel/encodings/decimal_byte_parts.rs @@ -13,7 +13,6 @@ use vortex::encodings::decimal_byte_parts::DecimalBytePartsArrayParts; use vortex::encodings::decimal_byte_parts::DecimalBytePartsVTable; use vortex::error::VortexResult; use vortex::error::vortex_bail; -use vortex_cuda_macros::cuda_tests; use crate::CudaExecutionCtx; use crate::executor::CudaArrayExt; @@ -52,7 +51,7 @@ impl CudaExecute for DecimalBytePartsExecutor { } } -#[cuda_tests] +#[cfg(test)] mod tests { use rstest::rstest; use vortex::array::IntoArray; @@ -73,7 +72,7 @@ mod tests { #[case::i16_p10_s2(Buffer::from(vec![100i16, 200, 300, 400, 500]), 10, 2)] #[case::i32_p18_s4(Buffer::from(vec![100i32, 200, 300, 400, 500]), 18, 4)] #[case::i64_p38_s6(Buffer::from(vec![100i64, 200, 300, 400, 500]), 38, 6)] - #[tokio::test] + #[crate::test] async fn test_decimal_byte_parts_gpu_decode( #[case] encoded: Buffer, #[case] precision: u8, @@ -92,7 +91,7 @@ mod tests { let cpu_result = dbp_array.to_canonical().vortex_expect("CPU canonicalize"); let gpu_result = DecimalBytePartsExecutor - .execute(dbp_array.to_array(), &mut cuda_ctx) + .execute(dbp_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decode"); diff --git a/vortex-cuda/src/kernel/encodings/for_.rs b/vortex-cuda/src/kernel/encodings/for_.rs index aef5202e2ab..019b6fb4952 100644 --- a/vortex-cuda/src/kernel/encodings/for_.rs +++ b/vortex-cuda/src/kernel/encodings/for_.rs @@ -23,7 +23,6 @@ use vortex::error::VortexExpect; use vortex::error::VortexResult; use vortex::error::vortex_ensure; use vortex::error::vortex_err; -use vortex_cuda_macros::cuda_tests; use crate::CudaBufferExt; use crate::executor::CudaArrayExt; @@ -119,7 +118,7 @@ where ))) } -#[cuda_tests] +#[cfg(test)] mod tests { use rstest::rstest; use vortex::array::IntoArray; @@ -151,7 +150,7 @@ mod tests { #[case::u16(make_for_array((0..2050).map(|i| (i % 2050) as u16).collect(), 1000u16))] #[case::u32(make_for_array((0..2050).map(|i| (i % 2050) as u32).collect(), 100000u32))] #[case::u64(make_for_array((0..2050).map(|i| (i % 2050) as u64).collect(), 1000000u64))] - #[tokio::test] + #[crate::test] async fn test_cuda_for_decompression(#[case] for_array: FoRArray) -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -159,7 +158,7 @@ mod tests { let cpu_result = for_array.to_canonical()?; let gpu_result = FoRExecutor - .execute(for_array.to_array(), &mut cuda_ctx) + .execute(for_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_host() @@ -171,7 +170,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_signed_ffor() { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -187,7 +186,7 @@ mod tests { let cpu_result = for_array.to_canonical().unwrap(); let gpu_result = FoRExecutor - .execute(for_array.to_array(), &mut cuda_ctx) + .execute(for_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_host() diff --git a/vortex-cuda/src/kernel/encodings/runend.rs b/vortex-cuda/src/kernel/encodings/runend.rs index ad22f555c50..f7c8fc28088 100644 --- a/vortex-cuda/src/kernel/encodings/runend.rs +++ b/vortex-cuda/src/kernel/encodings/runend.rs @@ -26,7 +26,6 @@ use vortex::error::vortex_bail; use vortex::error::vortex_ensure; use vortex::error::vortex_err; use vortex::scalar::Scalar; -use vortex_cuda_macros::cuda_tests; use crate::CudaBufferExt; use crate::CudaDeviceBuffer; @@ -157,8 +156,7 @@ async fn decode_runend_typed VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -202,7 +200,7 @@ mod tests { let cpu_result = runend_array.to_canonical()?; let gpu_result = RunEndExecutor - .execute(runend_array.to_array(), &mut cuda_ctx) + .execute(runend_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_host() @@ -214,7 +212,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_runend_large_array() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -224,7 +222,7 @@ mod tests { let total_len = num_runs * run_length; let ends: Vec = (1..=num_runs).map(|i| (i * run_length) as u64).collect(); - let values: Vec = (0..num_runs).map(|i| i as i32).collect(); + let values: Vec = (0..num_runs).map(|i| i32::try_from(i).unwrap()).collect(); let runend_array = make_runend_array(ends, values); assert_eq!(runend_array.len(), total_len); @@ -232,7 +230,7 @@ mod tests { let cpu_result = runend_array.to_canonical()?; let gpu_result = RunEndExecutor - .execute(runend_array.to_array(), &mut cuda_ctx) + .execute(runend_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_host() @@ -244,7 +242,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_runend_single_run() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -254,7 +252,7 @@ mod tests { let cpu_result = runend_array.to_canonical()?; let gpu_result = RunEndExecutor - .execute(runend_array.to_array(), &mut cuda_ctx) + .execute(runend_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_host() @@ -266,7 +264,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_runend_many_small_runs() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -281,7 +279,7 @@ mod tests { let cpu_result = runend_array.to_canonical()?; let gpu_result = RunEndExecutor - .execute(runend_array.to_array(), &mut cuda_ctx) + .execute(runend_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_host() diff --git a/vortex-cuda/src/kernel/encodings/sequence.rs b/vortex-cuda/src/kernel/encodings/sequence.rs index 312d7ef1475..abe53cdd18f 100644 --- a/vortex-cuda/src/kernel/encodings/sequence.rs +++ b/vortex-cuda/src/kernel/encodings/sequence.rs @@ -18,7 +18,6 @@ use vortex::encodings::sequence::SequenceArrayParts; use vortex::encodings::sequence::SequenceVTable; use vortex::error::VortexResult; use vortex::error::vortex_err; -use vortex_cuda_macros::cuda_tests; use crate::CudaDeviceBuffer; use crate::CudaExecutionCtx; @@ -82,7 +81,7 @@ async fn execute_typed( ))) } -#[cuda_tests] +#[cfg(test)] mod tests { use futures::executor::block_on; use rstest::rstest; @@ -104,6 +103,7 @@ mod tests { #[case::u16(10u16, 2u16, 100)] #[case::u32(10u32, 2u32, 1000)] #[case::u64(100u64, 20u64, 500)] + #[crate::test] fn test_sequence>( #[case] base: T, #[case] multiplier: T, diff --git a/vortex-cuda/src/kernel/encodings/zigzag.rs b/vortex-cuda/src/kernel/encodings/zigzag.rs index f3ca80c2aa4..f18be9d2b76 100644 --- a/vortex-cuda/src/kernel/encodings/zigzag.rs +++ b/vortex-cuda/src/kernel/encodings/zigzag.rs @@ -19,7 +19,6 @@ use vortex::encodings::zigzag::ZigZagVTable; use vortex::error::VortexResult; use vortex::error::vortex_ensure; use vortex::error::vortex_err; -use vortex_cuda_macros::cuda_tests; use crate::CudaBufferExt; use crate::executor::CudaArrayExt; @@ -96,7 +95,7 @@ where ))) } -#[cuda_tests] +#[cfg(test)] mod tests { use vortex::array::IntoArray; use vortex::array::arrays::PrimitiveArray; @@ -111,7 +110,7 @@ mod tests { use crate::CanonicalCudaExt; use crate::session::CudaSession; - #[tokio::test] + #[crate::test] async fn test_cuda_zigzag_decompression_u32() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -127,7 +126,7 @@ mod tests { let cpu_result = zigzag_array.to_canonical()?; let gpu_result = ZigZagExecutor - .execute(zigzag_array.to_array(), &mut cuda_ctx) + .execute(zigzag_array.into_array(), &mut cuda_ctx) .await .vortex_expect("GPU decompression failed") .into_host() diff --git a/vortex-cuda/src/kernel/encodings/zstd.rs b/vortex-cuda/src/kernel/encodings/zstd.rs index e70047f61b3..7c8caba89a7 100644 --- a/vortex-cuda/src/kernel/encodings/zstd.rs +++ b/vortex-cuda/src/kernel/encodings/zstd.rs @@ -31,7 +31,6 @@ use vortex::error::VortexExpect; use vortex::error::VortexResult; use vortex::error::vortex_err; use vortex::mask::AllOr; -use vortex_cuda_macros::cuda_tests; use vortex_nvcomp::sys::nvcompStatus_t; use vortex_nvcomp::zstd as nvcomp_zstd; @@ -341,7 +340,7 @@ async fn decode_zstd(array: ZstdArray, ctx: &mut CudaExecutionCtx) -> VortexResu } } -#[cuda_tests] +#[cfg(test)] mod tests { use vortex::array::IntoArray; use vortex::array::arrays::VarBinViewArray; @@ -353,7 +352,7 @@ mod tests { use super::*; use crate::session::CudaSession; - #[tokio::test] + #[crate::test] async fn test_cuda_zstd_decompression_utf8() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -378,7 +377,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_zstd_decompression_multiple_frames() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -413,7 +412,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_zstd_decompression_sliced() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); diff --git a/vortex-cuda/src/kernel/encodings/zstd_buffers.rs b/vortex-cuda/src/kernel/encodings/zstd_buffers.rs index 6396f73681c..bc218c841c0 100644 --- a/vortex-cuda/src/kernel/encodings/zstd_buffers.rs +++ b/vortex-cuda/src/kernel/encodings/zstd_buffers.rs @@ -20,7 +20,6 @@ use vortex::encodings::zstd::ZstdBuffersArray; use vortex::encodings::zstd::ZstdBuffersVTable; use vortex::error::VortexResult; use vortex::error::vortex_err; -use vortex_cuda_macros::cuda_tests; use vortex_nvcomp::sys; use vortex_nvcomp::sys::nvcompStatus_t; use vortex_nvcomp::zstd as nvcomp_zstd; @@ -218,7 +217,7 @@ async fn validate_decompress_results( Ok(()) } -#[cuda_tests] +#[cfg(test)] mod tests { use vortex::array::IntoArray; use vortex::array::arrays::PrimitiveArray; @@ -233,7 +232,7 @@ mod tests { use crate::CanonicalCudaExt; use crate::session::CudaSession; - #[tokio::test] + #[crate::test] async fn test_cuda_zstd_buffers_decompression_primitive() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); @@ -252,7 +251,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_cuda_zstd_buffers_decompression_varbinview() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); diff --git a/vortex-cuda/src/kernel/filter/decimal.rs b/vortex-cuda/src/kernel/filter/decimal.rs index a42d51b9f04..8da8ed17b12 100644 --- a/vortex-cuda/src/kernel/filter/decimal.rs +++ b/vortex-cuda/src/kernel/filter/decimal.rs @@ -9,7 +9,6 @@ use vortex::dtype::NativeDecimalType; use vortex::error::VortexResult; use vortex::mask::Mask; use vortex_cub::filter::CubFilterable; -use vortex_cuda_macros::cuda_tests; use crate::CudaExecutionCtx; use crate::kernel::filter::filter_sized; @@ -37,7 +36,7 @@ pub(super) async fn filter_decimal VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create CUDA execution context"); diff --git a/vortex-cuda/src/kernel/filter/primitive.rs b/vortex-cuda/src/kernel/filter/primitive.rs index 5019ff9b770..32d6746de8b 100644 --- a/vortex-cuda/src/kernel/filter/primitive.rs +++ b/vortex-cuda/src/kernel/filter/primitive.rs @@ -9,7 +9,6 @@ use vortex::dtype::NativePType; use vortex::error::VortexResult; use vortex::mask::Mask; use vortex_cub::filter::CubFilterable; -use vortex_cuda_macros::cuda_tests; use crate::CudaExecutionCtx; use crate::kernel::filter::filter_sized; @@ -37,7 +36,7 @@ where ))) } -#[cuda_tests] +#[cfg(test)] mod tests { use rstest::rstest; use vortex::array::IntoArray; @@ -79,7 +78,7 @@ mod tests { PrimitiveArray::from_iter([1u32, 2, 3, 4, 5]), Mask::from_iter([false, false, false, false, false]) )] - #[tokio::test] + #[crate::test] async fn test_gpu_filter( #[case] input: PrimitiveArray, #[case] mask: Mask, @@ -104,7 +103,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[crate::test] async fn test_gpu_filter_large_array() -> VortexResult<()> { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create CUDA execution context"); diff --git a/vortex-cuda/src/kernel/filter/varbinview.rs b/vortex-cuda/src/kernel/filter/varbinview.rs index f5e205f07aa..e9994a375f1 100644 --- a/vortex-cuda/src/kernel/filter/varbinview.rs +++ b/vortex-cuda/src/kernel/filter/varbinview.rs @@ -6,7 +6,6 @@ use vortex::array::arrays::VarBinViewArray; use vortex::array::arrays::VarBinViewArrayParts; use vortex::error::VortexResult; use vortex::mask::Mask; -use vortex_cuda_macros::cuda_tests; use crate::CudaExecutionCtx; use crate::kernel::filter::filter_sized; @@ -37,7 +36,7 @@ pub(super) async fn filter_varbinview( ))) } -#[cuda_tests] +#[cfg(test)] mod tests { use rstest::rstest; use vortex::array::IntoArray; @@ -65,7 +64,7 @@ mod tests { ), Mask::from_iter([true, true, true, true, true, true, true, true, false]) )] - #[tokio::test] + #[crate::test] async fn test_gpu_filter_strings( #[case] input: VarBinViewArray, #[case] mask: Mask, diff --git a/vortex-cuda/src/kernel/mod.rs b/vortex-cuda/src/kernel/mod.rs index 221307cdbc4..93ffd768df5 100644 --- a/vortex-cuda/src/kernel/mod.rs +++ b/vortex-cuda/src/kernel/mod.rs @@ -20,7 +20,6 @@ use tracing::trace; use vortex::error::VortexResult; use vortex::error::vortex_err; use vortex::utils::aliases::dash_map::DashMap; -use vortex_cuda_macros::cuda_tests; mod arrays; mod encodings; @@ -282,9 +281,8 @@ impl KernelLoader { } } -#[cuda_tests] +#[cfg(test)] mod tests { - #![allow(clippy::expect_used)] use cudarc::driver::CudaContext; use cudarc::driver::PushKernelArg; @@ -297,7 +295,7 @@ mod tests { /// This test launches a special config_check kernel that reports the kernel-side /// constants, then verifies they match the Rust-side constants used in /// `launch_cuda_kernel_impl`. - #[test] + #[crate::test] fn test_kernel_config_matches_rust_config() { // These must match the constants in launch_cuda_kernel_impl const THREADS_PER_BLOCK: u32 = 64; diff --git a/vortex-cuda/src/kernel/patches/mod.rs b/vortex-cuda/src/kernel/patches/mod.rs index 79654907ce6..38b2248ef08 100644 --- a/vortex-cuda/src/kernel/patches/mod.rs +++ b/vortex-cuda/src/kernel/patches/mod.rs @@ -19,7 +19,6 @@ use vortex::array::vtable::ValidityHelper; use vortex::dtype::NativePType; use vortex::error::VortexResult; use vortex::error::vortex_ensure; -use vortex_cuda_macros::cuda_tests; use crate::CudaBufferExt; use crate::CudaDeviceBuffer; @@ -98,7 +97,7 @@ pub(crate) async fn execute_patches< Ok(target) } -#[cuda_tests] +#[cfg(test)] mod tests { use std::sync::Arc; @@ -135,7 +134,7 @@ mod tests { #[case::i64(0_i64)] #[case::f32(0_f32)] #[case::f64(0_f64)] - #[tokio::test] + #[crate::test] async fn test_patches(#[case] _v: Values) { tokio::join!( full_test_case::(), @@ -203,7 +202,7 @@ mod tests { fn force_cast(array: PrimitiveArray) -> PrimitiveArray { array - .to_array() + .into_array() .cast(DType::Primitive(T::PTYPE, Nullability::NonNullable)) .unwrap() .to_primitive() diff --git a/vortex-cuda/src/kernel/patches/types.rs b/vortex-cuda/src/kernel/patches/types.rs index b8a2157ff3d..2e6d429bb23 100644 --- a/vortex-cuda/src/kernel/patches/types.rs +++ b/vortex-cuda/src/kernel/patches/types.rs @@ -235,7 +235,7 @@ mod tests { use crate::kernel::patches::types::transpose; - #[test] + #[crate::test] fn test_transpose_patches() { let patch_values = buffer![0u32, 10, 20, 30, 40, 50, 60, 70, 80]; diff --git a/vortex-cuda/src/lib.rs b/vortex-cuda/src/lib.rs index 9f3d6df94f2..665b9e30c62 100644 --- a/vortex-cuda/src/lib.rs +++ b/vortex-cuda/src/lib.rs @@ -74,6 +74,8 @@ use vortex::encodings::zigzag::ZigZagVTable; #[cfg(feature = "unstable_encodings")] use vortex::encodings::zstd::ZstdBuffersVTable; use vortex::encodings::zstd::ZstdVTable; +#[cfg(test)] +use vortex_cuda_macros::test; pub use vortex_nvcomp as nvcomp; use crate::kernel::SequenceExecutor; diff --git a/vortex-cuda/src/pinned.rs b/vortex-cuda/src/pinned.rs index ae6c24cd40d..c48cf98624b 100644 --- a/vortex-cuda/src/pinned.rs +++ b/vortex-cuda/src/pinned.rs @@ -14,7 +14,6 @@ use vortex::error::VortexResult; use vortex::error::vortex_err; use vortex::error::vortex_panic; use vortex::utils::aliases::hash_map::HashMap; -use vortex_cuda_macros::cuda_tests; use crate::CudaDeviceBuffer; use crate::stream::VortexCudaStream; @@ -351,7 +350,7 @@ impl Drop for PooledPinnedBuffer { } } -#[cuda_tests] +#[cfg(test)] mod tests { use std::sync::Arc; @@ -372,7 +371,7 @@ mod tests { Ok((pool, stream)) } - #[test] + #[crate::test] fn transfer_to_device_round_trip() -> VortexResult<()> { let (pool, stream) = setup()?; let data: Vec = (0..=255u8).collect(); @@ -387,7 +386,7 @@ mod tests { Ok(()) } - #[test] + #[crate::test] fn transfer_puts_buffer_inflight() -> VortexResult<()> { let (pool, stream) = setup()?; @@ -411,7 +410,7 @@ mod tests { Ok(()) } - #[test] + #[crate::test] fn pool_reclaims_after_transfer_completes() -> VortexResult<()> { let (pool, stream) = setup()?; @@ -444,7 +443,7 @@ mod tests { Ok(()) } - #[test] + #[crate::test] fn drop_returns_buffer_to_pool() -> VortexResult<()> { let (pool, _stream) = setup()?; @@ -466,7 +465,7 @@ mod tests { Ok(()) } - #[test] + #[crate::test] fn transfer_consumes_inner_so_drop_is_noop() -> VortexResult<()> { let (pool, stream) = setup()?;