From 24619ed5b2ae80a6e4cecf3debbaebfb3980fe19 Mon Sep 17 00:00:00 2001 From: Max Romagnoli Date: Wed, 1 Apr 2026 08:07:10 +0100 Subject: [PATCH 1/3] feat: r field metadata --- r/R/arrowExports.R | 20 +++++++++++++-- r/R/field.R | 47 +++++++++++++++++++++++++++++------ r/src/arrowExports.cpp | 46 +++++++++++++++++++++++++++++++--- r/src/field.cpp | 42 +++++++++++++++++++++++++++++-- r/tests/testthat/test-field.R | 27 ++++++++++++++++++++ 5 files changed, 167 insertions(+), 15 deletions(-) diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index 778d2420231e..fe99b91b38db 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -1240,8 +1240,8 @@ Field__name <- function(field) { .Call(`_arrow_Field__name`, field) } -Field__Equals <- function(field, other) { - .Call(`_arrow_Field__Equals`, field, other) +Field__Equals <- function(field, other, check_metadata) { + .Call(`_arrow_Field__Equals`, field, other, check_metadata) } Field__nullable <- function(field) { @@ -1252,6 +1252,22 @@ Field__type <- function(field) { .Call(`_arrow_Field__type`, field) } +Field__HasMetadata <- function(field) { + .Call(`_arrow_Field__HasMetadata`, field) +} + +Field__metadata <- function(field) { + .Call(`_arrow_Field__metadata`, field) +} + +Field__WithMetadata <- function(field, metadata) { + .Call(`_arrow_Field__WithMetadata`, field, metadata) +} + +Field__RemoveMetadata <- function(field) { + .Call(`_arrow_Field__RemoveMetadata`, field) +} + fs___FileInfo__type <- function(x) { .Call(`_arrow_fs___FileInfo__type`, x) } diff --git a/r/R/field.R b/r/R/field.R index 6ae18e06c1c7..f1a9c8a2eac1 100644 --- a/r/R/field.R +++ b/r/R/field.R @@ -26,7 +26,18 @@ #' @section Methods: #' #' - `f$ToString()`: convert to a string -#' - `f$Equals(other)`: test for equality. More naturally called as `f == other` +#' - `f$Equals(other, check_metadata = FALSE)`: test for equality. +#' More naturally called as `f == other` +#' - `f$WithMetadata(metadata)`: returns a new `Field` with the key-value +#' `metadata` set. Note that all list elements in `metadata` will be coerced +#' to `character`. +#' - `f$RemoveMetadata()`: returns a new `Field` without metadata. +#' +#' @section Active bindings: +#' +#' - `$HasMetadata`: logical: does this `Field` have extra metadata? +#' - `$metadata`: returns the key-value metadata as a named list, or `NULL` +#' if no metadata is set. #' #' @name Field #' @rdname Field-class @@ -38,8 +49,15 @@ Field <- R6Class( ToString = function() { prettier_dictionary_type(Field__ToString(self)) }, - Equals = function(other, ...) { - inherits(other, "Field") && Field__Equals(self, other) + Equals = function(other, check_metadata = FALSE, ...) { + inherits(other, "Field") && Field__Equals(self, other, isTRUE(check_metadata)) + }, + WithMetadata = function(metadata = NULL) { + metadata <- prepare_key_value_metadata(metadata) + Field__WithMetadata(self, metadata) + }, + RemoveMetadata = function() { + Field__RemoveMetadata(self) }, export_to_c = function(ptr) ExportField(self, ptr) ), @@ -52,14 +70,27 @@ Field <- R6Class( }, type = function() { Field__type(self) + }, + HasMetadata = function() { + Field__HasMetadata(self) + }, + metadata = function() { + if (self$HasMetadata) { + as.list(Field__metadata(self)) + } else { + NULL + } } ) ) -Field$create <- function(name, type, metadata, nullable = TRUE) { +Field$create <- function(name, type, metadata = NULL, nullable = TRUE) { assert_that(inherits(name, "character"), length(name) == 1L) type <- as_type(type, name) - assert_that(missing(metadata), msg = "metadata= is currently ignored") - Field__initialize(enc2utf8(name), type, nullable) + f <- Field__initialize(enc2utf8(name), type, nullable) + if (!is.null(metadata)) { + f <- f$WithMetadata(metadata) + } + f } #' @include arrowExports.R Field$import_from_c <- ImportField @@ -68,11 +99,13 @@ Field$import_from_c <- ImportField #' #' @param name field name #' @param type logical type, instance of [DataType] -#' @param metadata currently ignored +#' @param metadata a named character vector or list to attach as field metadata. +#' All values will be coerced to `character`. #' @param nullable TRUE if field is nullable #' #' @examples #' field("x", int32()) +#' field("x", int32(), metadata = list(key = "value")) #' @rdname Field #' @seealso [Field] #' @export diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 5edf62d77244..0291f1a9da07 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -3219,12 +3219,13 @@ BEGIN_CPP11 END_CPP11 } // field.cpp -bool Field__Equals(const std::shared_ptr& field, const std::shared_ptr& other); -extern "C" SEXP _arrow_Field__Equals(SEXP field_sexp, SEXP other_sexp){ +bool Field__Equals(const std::shared_ptr& field, const std::shared_ptr& other, bool check_metadata); +extern "C" SEXP _arrow_Field__Equals(SEXP field_sexp, SEXP other_sexp, SEXP check_metadata_sexp){ BEGIN_CPP11 arrow::r::Input&>::type field(field_sexp); arrow::r::Input&>::type other(other_sexp); - return cpp11::as_sexp(Field__Equals(field, other)); + arrow::r::Input::type check_metadata(check_metadata_sexp); + return cpp11::as_sexp(Field__Equals(field, other, check_metadata)); END_CPP11 } // field.cpp @@ -3243,6 +3244,39 @@ BEGIN_CPP11 return cpp11::as_sexp(Field__type(field)); END_CPP11 } +// field.cpp +bool Field__HasMetadata(const std::shared_ptr& field); +extern "C" SEXP _arrow_Field__HasMetadata(SEXP field_sexp){ +BEGIN_CPP11 + arrow::r::Input&>::type field(field_sexp); + return cpp11::as_sexp(Field__HasMetadata(field)); +END_CPP11 +} +// field.cpp +cpp11::writable::list Field__metadata(const std::shared_ptr& field); +extern "C" SEXP _arrow_Field__metadata(SEXP field_sexp){ +BEGIN_CPP11 + arrow::r::Input&>::type field(field_sexp); + return cpp11::as_sexp(Field__metadata(field)); +END_CPP11 +} +// field.cpp +std::shared_ptr Field__WithMetadata(const std::shared_ptr& field, cpp11::strings metadata); +extern "C" SEXP _arrow_Field__WithMetadata(SEXP field_sexp, SEXP metadata_sexp){ +BEGIN_CPP11 + arrow::r::Input&>::type field(field_sexp); + arrow::r::Input::type metadata(metadata_sexp); + return cpp11::as_sexp(Field__WithMetadata(field, metadata)); +END_CPP11 +} +// field.cpp +std::shared_ptr Field__RemoveMetadata(const std::shared_ptr& field); +extern "C" SEXP _arrow_Field__RemoveMetadata(SEXP field_sexp){ +BEGIN_CPP11 + arrow::r::Input&>::type field(field_sexp); + return cpp11::as_sexp(Field__RemoveMetadata(field)); +END_CPP11 +} // filesystem.cpp fs::FileType fs___FileInfo__type(const std::shared_ptr& x); extern "C" SEXP _arrow_fs___FileInfo__type(SEXP x_sexp){ @@ -6009,9 +6043,13 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_Field__initialize", (DL_FUNC) &_arrow_Field__initialize, 3}, { "_arrow_Field__ToString", (DL_FUNC) &_arrow_Field__ToString, 1}, { "_arrow_Field__name", (DL_FUNC) &_arrow_Field__name, 1}, - { "_arrow_Field__Equals", (DL_FUNC) &_arrow_Field__Equals, 2}, + { "_arrow_Field__Equals", (DL_FUNC) &_arrow_Field__Equals, 3}, { "_arrow_Field__nullable", (DL_FUNC) &_arrow_Field__nullable, 1}, { "_arrow_Field__type", (DL_FUNC) &_arrow_Field__type, 1}, + { "_arrow_Field__HasMetadata", (DL_FUNC) &_arrow_Field__HasMetadata, 1}, + { "_arrow_Field__metadata", (DL_FUNC) &_arrow_Field__metadata, 1}, + { "_arrow_Field__WithMetadata", (DL_FUNC) &_arrow_Field__WithMetadata, 2}, + { "_arrow_Field__RemoveMetadata", (DL_FUNC) &_arrow_Field__RemoveMetadata, 1}, { "_arrow_fs___FileInfo__type", (DL_FUNC) &_arrow_fs___FileInfo__type, 1}, { "_arrow_fs___FileInfo__set_type", (DL_FUNC) &_arrow_fs___FileInfo__set_type, 2}, { "_arrow_fs___FileInfo__path", (DL_FUNC) &_arrow_fs___FileInfo__path, 1}, diff --git a/r/src/field.cpp b/r/src/field.cpp index 87c9f3e2f3c0..bb7e3b979ff0 100644 --- a/r/src/field.cpp +++ b/r/src/field.cpp @@ -18,6 +18,7 @@ #include "./arrow_types.h" #include +#include // [[arrow::export]] std::shared_ptr Field__initialize( @@ -38,8 +39,45 @@ std::string Field__name(const std::shared_ptr& field) { // [[arrow::export]] bool Field__Equals(const std::shared_ptr& field, - const std::shared_ptr& other) { - return field->Equals(other); + const std::shared_ptr& other, bool check_metadata) { + return field->Equals(other, check_metadata); +} + +// [[arrow::export]] +bool Field__HasMetadata(const std::shared_ptr& field) { + return field->HasMetadata(); +} + +// [[arrow::export]] +cpp11::writable::list Field__metadata(const std::shared_ptr& field) { + auto meta = field->metadata(); + int64_t n = 0; + if (field->HasMetadata()) { + n = meta->size(); + } + cpp11::writable::list out(n); + std::vector names_out(n); + for (int i = 0; i < n; i++) { + out[i] = cpp11::as_sexp(meta->value(i)); + names_out[i] = meta->key(i); + } + out.names() = names_out; + return out; +} + +// [[arrow::export]] +std::shared_ptr Field__WithMetadata( + const std::shared_ptr& field, cpp11::strings metadata) { + auto values = cpp11::as_cpp>(metadata); + auto names = cpp11::as_cpp>(metadata.attr("names")); + auto kv = std::make_shared(std::move(names), std::move(values)); + return field->WithMetadata(std::move(kv)); +} + +// [[arrow::export]] +std::shared_ptr Field__RemoveMetadata( + const std::shared_ptr& field) { + return field->RemoveMetadata(); } // [[arrow::export]] diff --git a/r/tests/testthat/test-field.R b/r/tests/testthat/test-field.R index 6cef48ad541f..2ffa1191b01f 100644 --- a/r/tests/testthat/test-field.R +++ b/r/tests/testthat/test-field.R @@ -63,3 +63,30 @@ test_that("Field to C-interface", { # must clean up the pointer or we leak delete_arrow_schema(ptr) }) + +test_that("Field metadata", { + x <- field("x", int32()) + expect_false(x$HasMetadata) + expect_null(x$metadata) + + x_meta <- field("x", int32(), metadata = list(key = "value")) + expect_true(x_meta$HasMetadata) + expect_identical(x_meta$metadata, list(key = "value")) + + x_meta2 <- x$WithMetadata(list(key = "value")) + expect_true(x_meta2$HasMetadata) + expect_identical(x_meta2$metadata, list(key = "value")) + + x_no_meta <- x_meta$RemoveMetadata() + expect_false(x_no_meta$HasMetadata) + expect_null(x_no_meta$metadata) +}) + +test_that("Field$Equals with check_metadata", { + x <- field("x", int32()) + x_meta <- field("x", int32(), metadata = list(key = "value")) + + expect_true(x$Equals(x_meta)) + expect_false(x$Equals(x_meta, check_metadata = TRUE)) + expect_true(x == x_meta) +}) From 85674d3c0f5ea2dec889fb05a8d5ac0cb9ac0234 Mon Sep 17 00:00:00 2001 From: Max Romagnoli Date: Fri, 3 Apr 2026 09:05:25 +0200 Subject: [PATCH 2/3] precommit --- r/src/field.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/r/src/field.cpp b/r/src/field.cpp index bb7e3b979ff0..ddfcd17b98dd 100644 --- a/r/src/field.cpp +++ b/r/src/field.cpp @@ -70,7 +70,8 @@ std::shared_ptr Field__WithMetadata( const std::shared_ptr& field, cpp11::strings metadata) { auto values = cpp11::as_cpp>(metadata); auto names = cpp11::as_cpp>(metadata.attr("names")); - auto kv = std::make_shared(std::move(names), std::move(values)); + auto kv = + std::make_shared(std::move(names), std::move(values)); return field->WithMetadata(std::move(kv)); } From 8535707aa6c7e87d674152c41b343109be162022 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Mon, 6 Apr 2026 10:56:24 +0100 Subject: [PATCH 3/3] Fix spacing lint --- r/src/field.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/src/field.cpp b/r/src/field.cpp index ddfcd17b98dd..22a87bfbf211 100644 --- a/r/src/field.cpp +++ b/r/src/field.cpp @@ -71,7 +71,7 @@ std::shared_ptr Field__WithMetadata( auto values = cpp11::as_cpp>(metadata); auto names = cpp11::as_cpp>(metadata.attr("names")); auto kv = - std::make_shared(std::move(names), std::move(values)); + std::make_shared(std::move(names), std::move(values)); return field->WithMetadata(std::move(kv)); }