Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 19 additions & 5 deletions .github/actions/setup-rust/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ description: "Toolchain setup and Initial compilation"

inputs:
repo-token:
description: "Deprecated: no longer used. Protoc is now downloaded directly from GitHub releases CDN."
description: "GitHub token for accessing the repository (typically secrets.GITHUB_TOKEN)"
required: false
default: ""
default: "${{ github.token }}"
toolchain:
description: "optional override for the toolchain version (e.g. nightly)"
required: false
Expand Down Expand Up @@ -50,9 +50,23 @@ runs:
- name: Rust Compile Cache
if: inputs.enable-sccache == 'true'
uses: mozilla-actions/sccache-action@v0.0.9
with:
version: "v0.14.0"

- name: Install Protoc (for lance-encoding build step)
if: runner.os != 'Windows'
uses: ./.github/actions/setup-protoc
uses: arduino/setup-protoc@v3
with:
version: "29.3"
repo-token: ${{ inputs.repo-token }}

- name: Install Ninja (for DuckDB build system)
uses: seanmiddleditch/gha-setup-ninja@master

- name: Install Sweep
shell: bash
if: ${{ inputs.timestamp == 'true' && github.ref_name == 'develop' }}
run: cargo install cargo-sweep

- name: Timestamp Cache
shell: bash
if: ${{ inputs.timestamp == 'true' && github.ref_name == 'develop' }}
run: cargo sweep --stamp
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ __pycache__/
# Distribution / packaging
.Python
build/
ninja-build/
develop-eggs/
dist/
downloads/
Expand Down
51 changes: 41 additions & 10 deletions vortex-duckdb/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ static DUCKDB_VERSION: Lazy<DuckDBVersion> = Lazy::new(|| {
parse_version(&version)
} else {
// The default DuckDB version to use when DUCKDB_VERSION env var is not set.
DuckDBVersion::Release("1.4.2".to_owned())
DuckDBVersion::Release("1.5.0".to_owned())
}
});

Expand Down Expand Up @@ -275,7 +275,15 @@ fn extract_duckdb_source(source_dir: &Path) -> Result<PathBuf, Box<dyn std::erro
}

/// Build DuckDB from source. Used for commit hashes or when VX_DUCKDB_DEBUG is set.
fn build_duckdb(duckdb_source_dir: &Path) -> Result<PathBuf, Box<dyn std::error::Error>> {
fn build_duckdb(
duckdb_source_dir: &Path,
version: &DuckDBVersion,
debug: bool,
) -> Result<PathBuf, Box<dyn std::error::Error>> {
let build_type = match debug {
true => "debug",
false => "release",
};
// Check for ninja
if Command::new("ninja").arg("--version").output().is_err() {
return Err(
Expand All @@ -285,10 +293,12 @@ fn build_duckdb(duckdb_source_dir: &Path) -> Result<PathBuf, Box<dyn std::error:

let inner_dir_name = DUCKDB_VERSION.archive_inner_dir_name();
let duckdb_repo_dir = duckdb_source_dir.join(&inner_dir_name);
let build_dir = duckdb_repo_dir.join("build").join("debug");
let build_dir = duckdb_repo_dir.join("build").join(build_type);

// Check if already built
let lib_dir = build_dir.join("src");
let lib_dir_str = lib_dir.display();
println!("cargo:info=Checking if DuckDB is already built in {lib_dir_str}",);

let already_built = lib_dir.join("libduckdb.dylib").exists()
|| lib_dir.join("libduckdb.so").exists()
|| lib_dir
Expand All @@ -309,12 +319,26 @@ fn build_duckdb(duckdb_source_dir: &Path) -> Result<PathBuf, Box<dyn std::error:
("1", "0")
};

let mut envs = vec![
("GEN", "ninja"),
("DISABLE_SANITIZER", asan_option),
("THREADSAN", tsan_option),
("BUILD_SHELL", "false"),
("BUILD_UNITTESTS", "false"),
("ENABLE_UNITTEST_CPP_TESTS", "false"),
];

// If we're building from a commit (likely a pre-release), we need to
// build extensions statically. Otherwise DuckDB tries to load them
// from an http endpoint with version 0.0.1 (all non-tagged builds)
// which doesn't exists. httpfs also requires CURL dev headers
if matches!(version, DuckDBVersion::Commit(_)) {
envs.push(("BUILD_EXTENSIONS", "httpfs;parquet;tpch;tpcds;jemalloc"));
};

let output = Command::new("make")
.current_dir(&duckdb_repo_dir)
.env("GEN", "ninja")
.env("DISABLE_SANITIZER", asan_option)
.env("THREADSAN", tsan_option)
.arg("debug")
.envs(envs)
.output()?;

if !output.status.success() {
Expand Down Expand Up @@ -398,15 +422,21 @@ fn main() {
drop(fs::remove_dir_all(&duckdb_symlink));
std::os::unix::fs::symlink(&extracted_source_path, &duckdb_symlink).unwrap();

// Determine whether to build from source or use prebuilt libraries
let use_debug_build =
env::var("VX_DUCKDB_DEBUG").is_ok_and(|v| matches!(v.as_str(), "1" | "true"));
println!("cargo:info=DuckDB debug build: {use_debug_build}");

let library_path = if use_debug_build || !DUCKDB_VERSION.is_release() {
// Build from source for:
// - Commit hashes (no prebuilt available)
// - When VX_DUCKDB_DEBUG=1 (user wants debug build)
build_duckdb(&extracted_source_path).unwrap()
match build_duckdb(&extracted_source_path, &DUCKDB_VERSION, use_debug_build) {
Ok(path) => path,
Err(err) => {
println!("cargo:error={err}");
panic!("duckdb build failed");
}
}
} else {
// Download prebuilt libraries for release versions
let archive_path = download_duckdb_lib_archive().unwrap();
Expand Down Expand Up @@ -494,6 +524,7 @@ fn main() {
.file("cpp/file_system.cpp")
.file("cpp/logical_type.cpp")
.file("cpp/object_cache.cpp")
.file("cpp/reusable_dict.cpp")
.file("cpp/replacement_scan.cpp")
.file("cpp/scalar_function.cpp")
.file("cpp/table_filter.cpp")
Expand Down
11 changes: 6 additions & 5 deletions vortex-duckdb/cpp/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ duckdb_state duckdb_vx_get_config_value(duckdb_config config, const char *key, d

std::string key_str(key);

// First check set_variables (the primary location for config values)
auto set_it = db_config->options.set_variables.find(key_str);
if (set_it != db_config->options.set_variables.end()) {
// First check set_variable_defaults (the primary location for config values)
auto set_it = db_config->options.set_variable_defaults.find(key_str);
if (set_it != db_config->options.set_variable_defaults.end()) {
*out_value = reinterpret_cast<duckdb_value>(new Value(set_it->second));
return DuckDBSuccess;
}
Expand Down Expand Up @@ -75,8 +75,9 @@ int duckdb_vx_config_has_key(duckdb_config config, const char *key) {

std::string key_str(key);

// Check if the key exists in set_variables (primary location)
if (db_config->options.set_variables.find(key_str) != db_config->options.set_variables.end()) {
// Check if the key exists in set_variable_defaults (primary location)
if (db_config->options.set_variable_defaults.find(key_str) !=
db_config->options.set_variable_defaults.end()) {
return 1;
}

Expand Down
1 change: 1 addition & 0 deletions vortex-duckdb/cpp/include/duckdb_vx.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "duckdb_vx/file_system.h"
#include "duckdb_vx/logical_type.h"
#include "duckdb_vx/object_cache.h"
#include "duckdb_vx/reusable_dict.h"
#include "duckdb_vx/replacement_scan.h"
#include "duckdb_vx/scalar_function.h"
#include "duckdb_vx/table_filter.h"
Expand Down
1 change: 1 addition & 0 deletions vortex-duckdb/cpp/include/duckdb_vx/object_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ typedef void (*duckdb_vx_deleter_fn)(void *ptr);
void duckdb_vx_object_cache_put(duckdb_vx_object_cache object_cache,
const char *key,
void *value,
uint64_t estimated_size,
duckdb_vx_deleter_fn deleter);

// Fetches the key from the object cache, returning nullptr if the key is not present.
Expand Down
35 changes: 35 additions & 0 deletions vortex-duckdb/cpp/include/duckdb_vx/reusable_dict.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#pragma once

#include "duckdb.h"
#include "duckdb_vx/error.h"

#ifdef __cplusplus /* If compiled as C++, use C ABI */
extern "C" {
#endif

typedef struct duckdb_vx_reusable_dict_ *duckdb_vx_reusable_dict;

/// Creates a new reusable dictionary from a logical type and size.
/// The returned dictionary can be used with duckdb_vx_vector_dictionary_reusable.
duckdb_vx_reusable_dict duckdb_vx_reusable_dict_create(duckdb_logical_type logical_type, idx_t size);

/// Destroys the reusable dictionary.
void duckdb_vx_reusable_dict_destroy(duckdb_vx_reusable_dict *dict);

/// Clones the reusable dictionary.
duckdb_vx_reusable_dict duckdb_vx_reusable_dict_clone(duckdb_vx_reusable_dict dict);

/// Get the internal vector of the reusable dictionary.
void duckdb_vx_reusable_dict_set_vector(duckdb_vx_reusable_dict reusable, duckdb_vector *out_vector);

/// Creates a dictionary vector using a reusable dictionary and a selection vector.
void duckdb_vx_vector_dictionary_reusable(duckdb_vector vector,
duckdb_vx_reusable_dict reusable,
duckdb_selection_vector sel_vec);

#ifdef __cplusplus /* End C ABI */
}
#endif
4 changes: 3 additions & 1 deletion vortex-duckdb/cpp/include/duckdb_vx/table_filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ typedef enum DUCKDB_VX_TABLE_FILTER_TYPE {
DUCKDB_VX_TABLE_FILTER_TYPE_OPTIONAL_FILTER = 6, // executing filter is not required for query correctness
DUCKDB_VX_TABLE_FILTER_TYPE_IN_FILTER = 7, // col IN (C1, C2, C3, ...)
DUCKDB_VX_TABLE_FILTER_TYPE_DYNAMIC_FILTER = 8, // dynamic filters can be updated at run-time
DUCKDB_VX_TABLE_FILTER_TYPE_EXPRESSION_FILTER = 9 // an arbitrary expression
DUCKDB_VX_TABLE_FILTER_TYPE_EXPRESSION_FILTER = 9, // an arbitrary expression
DUCKDB_VX_TABLE_FILTER_TYPE_BLOOM_FILTER =
10 // a probabilistic filter that can test whether a value is in a set of other value
} duckdb_vx_table_filter_type;

typedef struct duckdb_vx_table_filter_set_ *duckdb_vx_table_filter_set;
Expand Down
2 changes: 0 additions & 2 deletions vortex-duckdb/cpp/include/duckdb_vx/vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ void duckdb_vx_vector_dictionary(duckdb_vector ffi_vector,
duckdb_selection_vector ffi_sel_vec,
idx_t count);

void duckdb_vx_set_dictionary_vector_id(duckdb_vector dict, const char *id, unsigned int id_len);

void duckdb_vx_set_dictionary_vector_length(duckdb_vector dict, unsigned int len);

// Add the buffer to the string vector (basically, keep it alive as long as the vector).
Expand Down
12 changes: 10 additions & 2 deletions vortex-duckdb/cpp/object_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,17 @@ namespace vortex {
class OpaqueWrapper : public duckdb::ObjectCacheEntry {
public:
duckdb::unique_ptr<void, duckdb_vx_deleter_fn> ptr;
duckdb::optional_idx estimated_size;

explicit OpaqueWrapper(void *p, duckdb_vx_deleter_fn del) : ptr(p, del) {
explicit OpaqueWrapper(void *p, duckdb::optional_idx estimated_size, duckdb_vx_deleter_fn del)
: ptr(p, del), estimated_size(estimated_size) {
}
~OpaqueWrapper() override = default;

duckdb::optional_idx GetEstimatedCacheMemory() const override {
return estimated_size;
}

duckdb::string GetObjectType() override {
return "vortex_opaque_wrapper";
}
Expand All @@ -32,9 +38,11 @@ class OpaqueWrapper : public duckdb::ObjectCacheEntry {
extern "C" void duckdb_vx_object_cache_put(duckdb_vx_object_cache cache,
const char *key,
void *value,
uint64_t estimated_size,
duckdb_vx_deleter_fn deleter) {
auto object_cache = reinterpret_cast<duckdb::ObjectCache *>(cache);
auto wrapper = duckdb::make_shared_ptr<vortex::OpaqueWrapper>(value, deleter);
auto wrapper =
duckdb::make_shared_ptr<vortex::OpaqueWrapper>(value, duckdb::optional_idx(estimated_size), deleter);
object_cache->Put(std::string(key), wrapper);
}

Expand Down
50 changes: 50 additions & 0 deletions vortex-duckdb/cpp/reusable_dict.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#include "duckdb/common/types/vector.hpp"
#include "duckdb_vx.h"

using namespace duckdb;

// buffer_ptr is shared_ptr, two pointers long, but duckdb_vx_reusable_dict is
// one pointer long, so we need a wrapper.
using Buffer = buffer_ptr<VectorChildBuffer>;
struct ReusableDict {
Buffer buffer;
ReusableDict(Buffer buffer) : buffer(std::move(buffer)) {
}
};

extern "C" duckdb_vx_reusable_dict duckdb_vx_reusable_dict_create(duckdb_logical_type ffi_type, idx_t size) {
const LogicalType &type = *reinterpret_cast<LogicalType *>(ffi_type);
auto buffer = DictionaryVector::CreateReusableDictionary(type, size);
auto ptr = std::make_unique<ReusableDict>(std::move(buffer));
return reinterpret_cast<duckdb_vx_reusable_dict>(ptr.release());
}

extern "C" void duckdb_vx_reusable_dict_destroy(duckdb_vx_reusable_dict *dict) {
if (dict && *dict) {
delete reinterpret_cast<ReusableDict *>(*dict);
}
}

extern "C" duckdb_vx_reusable_dict duckdb_vx_reusable_dict_clone(duckdb_vx_reusable_dict dict) {
ReusableDict *wrapper = reinterpret_cast<ReusableDict *>(dict);
auto ptr = std::make_unique<ReusableDict>(wrapper->buffer);
return reinterpret_cast<duckdb_vx_reusable_dict>(ptr.release());
}

extern "C" void duckdb_vx_reusable_dict_set_vector(duckdb_vx_reusable_dict reusable,
duckdb_vector *out_vector) {
auto *wrapper = reinterpret_cast<ReusableDict *>(reusable);
*out_vector = reinterpret_cast<duckdb_vector>(&wrapper->buffer->data);
}

extern "C" void duckdb_vx_vector_dictionary_reusable(duckdb_vector ffi_vector,
duckdb_vx_reusable_dict reusable,
duckdb_selection_vector ffi_sel_vec) {
auto vector = reinterpret_cast<Vector *>(ffi_vector);
auto *wrapper = reinterpret_cast<ReusableDict *>(reusable);
auto sel_vec = reinterpret_cast<SelectionVector *>(ffi_sel_vec);
vector->Dictionary(wrapper->buffer, *sel_vec);
}
5 changes: 0 additions & 5 deletions vortex-duckdb/cpp/vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,6 @@ extern "C" void duckdb_vx_vector_dictionary(duckdb_vector ffi_vector,
vector->Dictionary(*dict, dictionary_size, *sel_vec, count);
}

extern "C" void duckdb_vx_set_dictionary_vector_id(duckdb_vector dict, const char *id, unsigned int id_len) {
auto ddict = reinterpret_cast<duckdb::Vector *>(dict);
DictionaryVector::SetDictionaryId(*ddict, std::string(id, id_len));
}

extern "C" void duckdb_vx_set_dictionary_vector_length(duckdb_vector dict, unsigned int len) {
auto ddict = reinterpret_cast<duckdb::Vector *>(dict);
ddict->GetBuffer()->Cast<DictionaryBuffer>().SetDictionarySize(len);
Expand Down
2 changes: 0 additions & 2 deletions vortex-duckdb/include/vortex.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ extern "C" {
#include "duckdb.h"


#define DUCKDB_STANDARD_VECTOR_SIZE 2048

/**
* Global symbol visibility in the Vortex extension:
* - Rust functions use C ABI with "_rust" suffix (e.g., vortex_init_rust)
Expand Down
3 changes: 3 additions & 0 deletions vortex-duckdb/src/convert/table_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,5 +120,8 @@ pub fn try_from_table_filter(
// TODO(ngates): figure out which column ID DuckDB is using for the expression.
vortex_bail!("expression table filter is not supported: {}", expr);
}
TableFilterClass::Bloom => {
vortex_bail!("bloom filter table filter is not supported")
}
}))
}
2 changes: 2 additions & 0 deletions vortex-duckdb/src/duckdb/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ mod logical_type;
mod macro_;
mod object_cache;
mod query_result;
mod reusable_dict;
mod scalar_function;
mod selection_vector;
mod table_filter;
Expand All @@ -39,6 +40,7 @@ pub use file_system::*;
pub use logical_type::*;
pub use object_cache::*;
pub use query_result::*;
pub use reusable_dict::*;
pub use scalar_function::*;
pub use selection_vector::*;
pub use table_filter::*;
Expand Down
7 changes: 6 additions & 1 deletion vortex-duckdb/src/duckdb/object_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,16 @@ impl ObjectCacheRef {
.vortex_expect("object cache key should be valid C string");
let opaque_ptr = Box::into_raw(Box::new(entry));

// Pass 0 to allow eviction by DuckDB, u64::MAX to prevent eviction, otherwise provide an
// estimate of the size of the object in bytes.
let estimated_size: u64 = 0;

unsafe {
cpp::duckdb_vx_object_cache_put(
self.as_ptr(),
key_cstr.as_ptr(),
opaque_ptr.cast(),
opaque_ptr as *mut c_void,
estimated_size,
Some(rust_box_deleter::<T>),
);
}
Expand Down
Loading
Loading