diff --git a/r/R/util.R b/r/R/util.R index c63e1ee5459..cb98358b432 100644 --- a/r/R/util.R +++ b/r/R/util.R @@ -196,6 +196,18 @@ repeat_value_as_array <- function(object, n) { } handle_csv_read_error <- function(msg, call, schema) { + if (grepl("conversion error to null", msg)) { + msg <- c( + msg, + i = paste( + "Column type was inferred as null because the first block of data", + "contained only missing values. See `?csv_read_options` for how to", + "set a smaller value." + ) + ) + abort(msg, call = call) + } + if (grepl("conversion error", msg) && inherits(schema, "Schema")) { msg <- c( msg, diff --git a/r/tests/testthat/test-dataset-csv.R b/r/tests/testthat/test-dataset-csv.R index 749d1672ac5..5cf8e23b097 100644 --- a/r/tests/testthat/test-dataset-csv.R +++ b/r/tests/testthat/test-dataset-csv.R @@ -711,3 +711,21 @@ test_that("open_dataset() with `decimal_point` argument", { tibble(x = 1.2, y = "c") ) }) + +test_that("more informative error when column inferred as null due to sparse data (GH-35806)", { + tf <- tempfile() + on.exit(unlink(tf)) + + writeLines(c("x,y", paste0(1:100, ",")), tf) + write("101,foo", tf, append = TRUE) + + expect_error( + open_dataset( + tf, + format = "csv", + read_options = csv_read_options(block_size = 100L) + ) |> + collect(), + "inferred as null" + ) +})