diff --git a/R/table_columns.R b/R/table_columns.R index 80cadef..0c853b5 100644 --- a/R/table_columns.R +++ b/R/table_columns.R @@ -96,14 +96,16 @@ compute_col_widths <- function(resolved_cols, data, content_width_in, tbl, pg_width, pg_height, margins, overflow_action = c("error", "warn"), validate_overflow = TRUE, - floor_overrides = NULL) { + floor_overrides = NULL, + cache = NULL) { overflow_action <- match.arg(overflow_action) strategy <- tbl$col_split_strategy %||% "balanced" # Shared setup: compute natural widths, resolve relative weights, # auto-detect wrap eligibility, and measure col_cont_label_half_w. setup <- .resolve_natural_widths( - resolved_cols, data, content_width_in, tbl, pg_width, pg_height, margins + resolved_cols, data, content_width_in, tbl, pg_width, pg_height, margins, + cache = cache ) # Dispatch. Each strategy returns list(resolved_cols, col_groups). @@ -162,7 +164,8 @@ compute_col_widths <- function(resolved_cols, data, content_width_in, # The scratch device used for text measurement is opened, used, and # closed inside this function so neither strategy has to manage it. .resolve_natural_widths <- function(resolved_cols, data, content_width_in, - tbl, pg_width, pg_height, margins) { + tbl, pg_width, pg_height, margins, + cache = NULL) { n_cols <- length(resolved_cols) n_grp <- length(tbl$group_vars) min_in <- .width_in(tbl$min_col_width) @@ -171,6 +174,7 @@ compute_col_widths <- function(resolved_cols, data, content_width_in, .width_in(cell_pad[["left"]]) na_str <- tbl$na_string max_rows <- tbl$max_measure_rows + hdr_gp_key <- paste0("header_row_lh", tbl$line_height) scratch_file <- tempfile(fileext = ".pdf") grDevices::pdf(scratch_file, width = pg_width, height = pg_height) @@ -198,8 +202,12 @@ compute_col_widths <- function(resolved_cols, data, content_width_in, .resolve_table_gp(tbl$gp, "header_row"), tbl$line_height ) parts <- .split_col_strings(data[[cs$col]], cs$label, na_str, max_rows) - w_data <- .measure_max_string_width(parts$data, cell_gp) - w_hdr <- .measure_max_string_width(parts$header, hdr_gp) + cell_key <- paste0(if (cs$is_group_col) "group_col" else "data_row", + "_lh", tbl$line_height) + w_data <- .measure_max_string_width(parts$data, cell_gp, + gp_key = cell_key, cache = cache) + w_hdr <- .measure_max_string_width(parts$header, hdr_gp, + gp_key = hdr_gp_key, cache = cache) max(min_in, max(w_data, w_hdr) + h_pad_in) } }, numeric(1L)) diff --git a/R/table_draw.R b/R/table_draw.R index 197c78a..21936d5 100644 --- a/R/table_draw.R +++ b/R/table_draw.R @@ -66,7 +66,8 @@ build_table_grob <- function(row_page, col_group_idx, n_group_cols, cell_heights_in_mat = NULL, cont_row_h_in = NULL, is_first_col_page = TRUE, - is_last_col_page = TRUE) { + is_last_col_page = TRUE, + clip_width_caches = NULL) { # Subset to display columns for this page page_cols <- resolved_cols[col_group_idx] @@ -81,6 +82,11 @@ build_table_grob <- function(row_page, col_group_idx, n_group_cols, cont_row_h_in = cont_row_h_in, # cached from paginate phase is_first_col_page = is_first_col_page, # FALSE when prior col pages exist is_last_col_page = is_last_col_page, # FALSE when more col pages follow + clip_width_caches = clip_width_caches, # shared across all pages of + # this tfl_table; NULL means + # drawDetails will create + # per-page envs (e.g. grobs + # assembled outside pipeline) cl = "tfl_table_grob" ) } @@ -300,12 +306,22 @@ drawDetails.tfl_table_grob <- function(x, recording) { }) # Per-column clip-width memo: many cells in a column share identical text - # (numeric formats like "5.1", category labels), and the clip-width - # computation otherwise re-measures each one. Cache scoped to this - # drawDetails call and to one column, so a single (text -> width) key is - # enough. - clip_width_cache_by_col <- lapply(page_cols, function(cs) { - new.env(hash = TRUE, parent = emptyenv()) + # (numeric formats like "5.1", category labels). When the parent + # tfl_table_to_pagelist() built a shared cache list, reuse those envs -- + # they persist across every row-page and col-group of this table, so the + # cache hits accumulate over all pages. Otherwise fall back to per-page + # envs (e.g. for grobs built outside the normal pipeline). + clip_width_cache_by_col <- if (!is.null(x$clip_width_caches)) { + lapply(x$col_group_idx, function(k) x$clip_width_caches[[k]]) + } else { + lapply(page_cols, function(cs) new.env(hash = TRUE, parent = emptyenv())) + } + + # Pre-extract and pre-format each column's cell strings for the rows on + # this page. Replaces a per-cell `.fmt_cell(data[[cs$col]][i], na_str)` + # with a single vectorised `.fmt_cell_vec()` per column. + cell_strs_by_col <- lapply(page_cols, function(cs) { + .fmt_cell_vec(data[[cs$col]][rows], na_str) }) # Hoist row/group-rule gpars too -- they don't change between rows. Only @@ -358,9 +374,8 @@ drawDetails.tfl_table_grob <- function(x, recording) { # Draw data row for (j in seq_len(n_disp_cols)) { - cs <- page_cols[[j]] - raw_val <- data[[cs$col]][i] - cell_str <- .fmt_cell(raw_val, na_str) + cs <- page_cols[[j]] + cell_str <- cell_strs_by_col[[j]][[ri]] # Group repeat suppression and span detection clip_h <- row_h @@ -590,10 +605,38 @@ drawDetails.tfl_table_grob <- function(x, recording) { # call; the optional per-column cache lets the caller deduplicate. text_w <- .measure_text_width_in(text, gp, width_cache) needed <- text_w + h_lft_in + h_rgt_in - bleed_tol_in <- 0.05 - clip_w <- min(col_width_in + bleed_tol_in, max(col_width_in, needed)) - # Clip to column width by using a clipping viewport + # X position in parent-viewport inches. + x_in <- if (identical(align, "left")) { + x_left + h_lft_in + } else if (identical(align, "right")) { + x_right - h_rgt_in + } else { + (x_left + x_right) / 2 + } + y_in <- y_top_in + v_top_in + + if (needed <= col_width_in) { + # Fast path: the text fits inside its column, so the column width + # already clips by construction. Draw directly into the parent + # viewport and skip the per-cell clip viewport push/pop entirely -- + # for tables of all-fits cells (numeric columns, short categoricals) + # this saves a viewport per cell. + grid::grid.text( + label = text, + x = grid::unit(x_in / vp_w, "npc"), + y = grid::unit(1 - y_in / vp_h, "npc"), + just = just, + gp = gp + ) + return(invisible(NULL)) + } + + # Slow path: text exceeds the column. Push a clipping viewport so the + # overflow is clipped at the column edge plus a small tolerance, + # instead of bleeding into the neighbour. + bleed_tol_in <- 0.05 + clip_w <- min(col_width_in + bleed_tol_in, needed) vp_clip <- grid::viewport( x = grid::unit(x_left / vp_w, "npc"), y = grid::unit(1 - (y_top_in + row_h) / vp_h, "npc"), @@ -604,18 +647,17 @@ drawDetails.tfl_table_grob <- function(x, recording) { ) grid::pushViewport(vp_clip) - # Re-express x, y relative to clip viewport - vp_w2 <- .width_in(grid::unit(1, "npc")) - vp_h2 <- .height_in(grid::unit(1, "npc")) - + # vp_clip was just constructed with width = clip_w inches and + # height = row_h inches, so npc=1 inside it is exactly that many inches -- + # no need to convertUnit() to recover those numbers. x_local_in <- if (identical(align, "left")) { h_lft_in } else if (identical(align, "right")) { - vp_w2 - h_rgt_in + clip_w - h_rgt_in } else { - vp_w2 / 2 + clip_w / 2 } - y_local_in <- vp_h2 - v_top_in + y_local_in <- row_h - v_top_in grid::grid.text( label = text, diff --git a/R/table_pagelist.R b/R/table_pagelist.R index bb2583f..2a40fc4 100644 --- a/R/table_pagelist.R +++ b/R/table_pagelist.R @@ -150,6 +150,15 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots, wrap_extra_pad_in <- if (!is.null(tbl$wrap_extra_padding)) { .height_in(tbl$wrap_extra_padding) } else 0 + + # Per-pagination text-dimension cache. Spans the natural-width pass, + # the row-height pass, and the header-height pass. Each (gp_key, string) + # is constructed via grid::textGrob exactly once; both dimensions are read + # from that grob and cached together. Different passes typically need + # only one dimension, so the cache amortises the other for free. All + # those passes open PDF scratch devices with the same pg_width/pg_height + # so font metrics are stable across the cache's lifetime. + text_dim_cache <- new.env(hash = TRUE, parent = emptyenv()) strategy <- tbl$col_split_strategy %||% "balanced" max_retries <- as.integer(tbl$row_overflow_max_retries %||% 5L) use_retry_loop <- identical(strategy, "balanced") && max_retries > 0L @@ -178,14 +187,16 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots, header_row_h <- if (tbl$show_col_names) { .measure_header_row_height(resolved_cols, tbl$gp, tbl$cell_padding, tbl$line_height, breaks = breaks, - wrap_extra_pad_in = wrap_extra_pad_in) + wrap_extra_pad_in = wrap_extra_pad_in, + cache = text_dim_cache) } else 0 cell_h_mat <- measure_row_heights_tbl( tbl$data, resolved_cols, tbl$gp, tbl$cell_padding, tbl$na_string, tbl$line_height, tbl$max_measure_rows, breaks = breaks, - wrap_extra_pad_in = wrap_extra_pad_in + wrap_extra_pad_in = wrap_extra_pad_in, + cache = text_dim_cache ) cont_row_h <- max( @@ -217,7 +228,8 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots, col_result <- compute_col_widths( resolved_cols_0, tbl$data, cw, tbl, pg_width, pg_height, margins, overflow_action = overflow_action, - floor_overrides = floor_overrides + floor_overrides = floor_overrides, + cache = text_dim_cache ) resolved_cols <- col_result$resolved_cols col_groups <- col_result$col_groups @@ -239,7 +251,8 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots, resolved_cols_0, tbl$data, cw_adj, tbl, pg_width, pg_height, margins, overflow_action = overflow_action, validate_overflow = FALSE, - floor_overrides = floor_overrides + floor_overrides = floor_overrides, + cache = text_dim_cache ) resolved_cols <- col_result$resolved_cols col_groups <- col_result$col_groups @@ -287,6 +300,15 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots, pages <- vector("list", n_rp * n_cg) idx <- 1L + # Shared per-resolved_cols clip-width cache. Every page-grob built below + # holds a reference to the SAME list of envs, so .draw_cell_text() can + # reuse measurements across pages of the same tfl_table (one entry per + # unique cell text per column). Envs are reference-typed, so memory is + # not duplicated per grob. + clip_width_caches <- lapply(seq_along(resolved_cols), function(k) { + new.env(hash = TRUE, parent = emptyenv()) + }) + for (rp in seq_len(n_rp)) { for (cg in seq_len(n_cg)) { grob <- build_table_grob( @@ -298,7 +320,8 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots, cell_heights_in_mat = cell_h_mat, cont_row_h_in = cont_row_h, is_first_col_page = (cg == 1L), - is_last_col_page = (cg == n_cg) + is_last_col_page = (cg == n_cg), + clip_width_caches = clip_width_caches ) page_spec <- list(content = grob) pages[[idx]] <- page_spec diff --git a/R/table_rows.R b/R/table_rows.R index 6af93e1..7c71d23 100644 --- a/R/table_rows.R +++ b/R/table_rows.R @@ -26,7 +26,8 @@ #' @keywords internal measure_row_heights_tbl <- function(data, resolved_cols, gp_tbl, cell_padding, na_string, line_height, max_measure_rows, - breaks = NULL, wrap_extra_pad_in = 0) { + breaks = NULL, wrap_extra_pad_in = 0, + cache = NULL) { n_rows <- nrow(data) n_cols <- length(resolved_cols) v_pad_in <- .height_in(cell_padding[["top"]]) + @@ -34,17 +35,12 @@ measure_row_heights_tbl <- function(data, resolved_cols, gp_tbl, cell_padding, h_lft_in <- .width_in(cell_padding[["left"]]) h_rgt_in <- .width_in(cell_padding[["right"]]) - # Memoised per-cell-text-height function: (string, gp_key) -> height_in - memo <- new.env(hash = TRUE, parent = emptyenv()) - .memo_str_height <- function(s, gp_key, gp) { - key <- paste0(gp_key, "\x01", s) - if (!exists(key, envir = memo, inherits = FALSE)) { - grob <- grid::textGrob(s, gp = gp) - h <- .height_in(grid::grobHeight(grob)) - assign(key, h, envir = memo) - } - get(key, envir = memo, inherits = FALSE) - } + # When the caller supplied a shared cache, reuse it across columns so that + # any (string, gp) measured during the natural-width pass earlier in + # pagination is served from the cache here. Otherwise create a local + # height-only memo (preserves the pre-cache behaviour for the few callers + # that exercise this function directly). + if (is.null(cache)) cache <- new.env(hash = TRUE, parent = emptyenv()) # Limit rows sampled for height estimation sample_rows <- if (is.finite(max_measure_rows) && n_rows > max_measure_rows) { @@ -83,7 +79,7 @@ measure_row_heights_tbl <- function(data, resolved_cols, gp_tbl, cell_padding, cell_str } nlines <- max(1L, length(strsplit(display_str, "\n", fixed = TRUE)[[1L]])) - h_grob <- .memo_str_height(display_str, gp_key, cell_gp) + h_grob <- .measure_text_dims_in(display_str, cell_gp, gp_key, cache)$h h_line <- nlines * .height_in(grid::stringHeight("M")) extra <- if (nlines > 1L) wrap_extra_pad_in else 0 cell_h_mat[i, j] <- max(h_grob, h_line) + v_pad_in + extra diff --git a/R/table_utils.R b/R/table_utils.R index b81b17c..379f478 100644 --- a/R/table_utils.R +++ b/R/table_utils.R @@ -52,13 +52,15 @@ # between the header row and the first data row is more obvious. .measure_header_row_height <- function(resolved_cols, gp_tbl, cell_padding, line_height, breaks = NULL, - wrap_extra_pad_in = 0) { + wrap_extra_pad_in = 0, + cache = NULL) { v_pad_in <- .height_in(cell_padding[["top"]]) + .height_in(cell_padding[["bottom"]]) h_lft_in <- .width_in(cell_padding[["left"]]) h_rgt_in <- .width_in(cell_padding[["right"]]) hdr_gp <- .gp_with_lineheight(.resolve_table_gp(gp_tbl, "header_row"), line_height) + gp_key <- paste0("header_row_lh", line_height) max(vapply(resolved_cols, function(cs) { label <- cs$label @@ -67,8 +69,7 @@ h_lft_in + h_rgt_in, hdr_gp, breaks) } nlines <- max(1L, length(strsplit(label, "\n", fixed = TRUE)[[1L]])) - grob <- grid::textGrob(label, gp = hdr_gp) - h_grob <- .height_in(grid::grobHeight(grob)) + h_grob <- .measure_text_dims_in(label, hdr_gp, gp_key, cache)$h h_line <- nlines * .height_in(grid::stringHeight("M")) extra <- if (nlines > 1L) wrap_extra_pad_in else 0 max(h_grob, h_line) + extra @@ -255,15 +256,53 @@ # Text measurement helpers # --------------------------------------------------------------------------- +# Look up or measure (width, height) for `s` under `gp`, caching both when +# an environment is supplied. `gp_key` is a stable structural key for `gp` +# (e.g. paste0("data_row_lh", line_height)) so callers using the same gp +# share entries without hashing the gpar field-by-field per lookup. +# +# When the cache misses we build the textGrob once and read BOTH dimensions +# from it -- consolidating what would otherwise be two separate textGrob +# constructions if a later caller needs the other dimension of the same +# (gp, string). Each construction re-runs grid's gpar validation, which +# is the dominant cost; avoiding the duplicate is the point. +.measure_text_dims_in <- function(s, gp, gp_key, cache = NULL) { + if (!nzchar(s)) return(list(w = 0, h = 0)) + if (!is.null(cache)) { + key <- paste0(gp_key, "\x01", s) + if (exists(key, envir = cache, inherits = FALSE)) { + return(get(key, envir = cache, inherits = FALSE)) + } + } + g <- grid::textGrob(s, gp = gp) + out <- list(w = .width_in(grid::grobWidth(g)), + h = .height_in(grid::grobHeight(g))) + if (!is.null(cache)) assign(key, out, envir = cache) + out +} + # Measure the maximum rendered text width (in inches) for a vector of strings. # Uses textGrob rather than stringWidth() because stringWidth() does not # accept a gp argument in all grid versions. -.measure_max_string_width <- function(strings, gp) { +# +# When a `cache` env and `gp_key` are supplied, lookups go through the +# consolidated (string -> (w, h)) cache from `.measure_text_dims_in()` so a +# later height query for the same (string, gp) reuses the textGrob. +.measure_max_string_width <- function(strings, gp, gp_key = NULL, + cache = NULL) { if (length(strings) == 0L) return(0) # Dedupe up front: real-world callers pass cell-string vectors where the # same value typically appears in many rows (e.g. category labels, NA # strings), so this saves grid round-trips with no behaviour change. uniq <- unique(strings) + if (!is.null(cache) && !is.null(gp_key)) { + return(max(vapply(uniq, function(s) { + lines <- strsplit(s, "\n", fixed = TRUE)[[1L]] + max(vapply(lines, + function(ln) .measure_text_dims_in(ln, gp, gp_key, cache)$w, + numeric(1L))) + }, numeric(1L)))) + } max(vapply(uniq, function(s) { lines <- strsplit(s, "\n", fixed = TRUE)[[1L]] max(vapply(lines, function(ln) { diff --git a/R/wrap.R b/R/wrap.R index 7fd9acf..289ebf0 100644 --- a/R/wrap.R +++ b/R/wrap.R @@ -112,21 +112,25 @@ wrap_breaks_default <- function() { n <- length(chars) if (n == 0L) return(list()) - tokens <- vector("list", n) # over-allocate; trim at end - k <- 0L - cur_buf <- character(n) - cur_n <- 0L - pending <- "" # lead for the next emitted token + # Track each token by its [start, end] position in `s` rather than + # accumulating characters and pasting on flush. `substr(s, st, ed)` is + # one C call per token; the previous `paste(cur_buf[seq_len(cur_n)], + # collapse = "")` allocated and joined cur_n elements per token. + tokens <- vector("list", n) # over-allocate; trim at end + k <- 0L + cur_start <- 0L # 0 means "no token in progress" + cur_end <- 0L + pending <- "" flush <- function() { - if (cur_n > 0L) { + if (cur_start > 0L) { k <<- k + 1L tokens[[k]] <<- list( - text = paste(cur_buf[seq_len(cur_n)], collapse = ""), + text = substr(s, cur_start, cur_end), lead = pending ) - cur_n <<- 0L - pending <<- "" + cur_start <<- 0L + pending <<- "" } } @@ -136,13 +140,13 @@ wrap_breaks_default <- function() { flush() pending <- ch } else if (length(keep_chars) > 0L && ch %in% keep_chars) { - cur_n <- cur_n + 1L - cur_buf[cur_n] <- ch + if (cur_start == 0L) cur_start <- i + cur_end <- i flush() pending <- "" } else { - cur_n <- cur_n + 1L - cur_buf[cur_n] <- ch + if (cur_start == 0L) cur_start <- i + cur_end <- i } } flush() diff --git a/design/DECISIONS.md b/design/DECISIONS.md index f9260e4..b6c6f8a 100644 --- a/design/DECISIONS.md +++ b/design/DECISIONS.md @@ -1483,3 +1483,272 @@ never touches `drawDetails.tfl_table_grob`. **Verification:** - Full `devtools::test()` green before and after. - `examples/bench_compare.R` reproduces the table above. + +--- + +## D-45: Fast-path cell drawing, position-based tokenizer, and per-page formatting + +**Decision:** Three independent, profile-driven changes layered on top of +D-43/D-44: + +1. **Fast path in `.draw_cell_text()`** — when the measured text width + plus horizontal padding is no greater than the column width, draw + directly in the parent viewport instead of pushing a per-cell + clipping viewport. The clip viewport (which created+pushed+popped + per cell) is still used in the slow path, where text might bleed. +2. **Drop redundant `convertUnit` calls in `.draw_cell_text()`'s clip + path** — the clip viewport is constructed with explicit + `unit(clip_w, "inches")` / `unit(row_h, "inches")` dimensions, so + the post-push `vp_w2 <- .width_in(unit(1, "npc"))` / + `vp_h2 <- .height_in(unit(1, "npc"))` calls were re-measuring known + values. Replaced with the literals. +3. **Position-based tokenizer in `.tokenize_for_wrap()`** — replaced the + per-token `paste(cur_buf[seq_len(cur_n)], collapse = "")` with + `substr(s, cur_start, cur_end)`. One C call per token instead of + `n` element accumulations + `paste`. +4. **Per-page formatting hoist in `drawDetails`** — replaced per-cell + `.fmt_cell(data[[cs$col]][i], na_str)` with a single + `.fmt_cell_vec(data[[cs$col]][rows], na_str)` per column built before + the row loop. + +**Context (profiling, round 3):** Post-D-44, the `core_paginate` +Rprof showed `table_draw.R:403` (the `.draw_cell_text()` call site) +accumulating **73% of total time**. Drilling into `.draw_cell_text()`: + +| Line | Self self.pct | Total total.pct | What | +|------|--------------|-----------------|------| +| 597 (measure) | 0.73% | 13.76% | text-width measurement (cached, but still per cell) | +| 603 (vp_clip) | 1.17% | 13.03% | viewport creation (4 `grid::unit()` + `viewport()`) | +| 611 (pushViewport) | — | — | pushing the clip vp | +| 625 (grid.text) | 0.29% | 29.14% | the actual text draw | +| 633 (popViewport) | 0.15% | 6.44% | popping | + +The viewport push/pop pair (~20% of total) is only meaningful when text +overflows the column. For all-numeric tables (iris) and post-wrap +cells, the text always fits and the clip is redundant. + +**Measured improvement** (medians, 15 iterations per core scenario, 3 +for `wrap_demos`, 5 independent runs averaged; baseline = main + D-43 + +D-44 at `7c9484c`): + +| Scenario | Before | After | Δ | +|-----------------|-----------|-----------|-------------------------| +| `core_small` | 146 ms | 111 ms | **~24% ↓** | +| `core_wrap` | 206 ms | 209 ms | within run-to-run noise | +| `core_paginate` | 397 ms | 270 ms | **~32% ↓** | +| `figure_multi` | 318 ms | 330 ms | within noise | +| `wrap_demos` | 2.97 s | 2.80 s | **~6% ↓** | + +`core_paginate` (the iris-heavy draw scenario) and `core_small` +(short table going through the same draw loop) get the biggest gains. +`wrap_demos` had high run-to-run variance with only n=3 iterations; +averaged across 5 independent runs it shows a steady ~6% reduction. +`core_wrap` is unchanged because its bottleneck is height-balance +measurement, not cell drawing. `figure_multi` doesn't touch +`drawDetails.tfl_table_grob`. + +**Alternatives considered and rejected:** +- *Regex-based vectorized tokenizer* — `regmatches`/`strsplit`-driven + reimplementation would shave more time but the algorithm has subtle + drop-vs-keep_before semantics (multiple consecutive drops collapse to + one lead, keep_before chars stay with the preceding token). The + position-based change keeps the existing algorithm verbatim and + preserves the comment-level explanation. Faster vectorization was + not worth the risk-of-regression. +- *Pre-computed parent-viewport coordinates passed into + `.draw_cell_text()`* — would push more work into the drawDetails + body. The current shape (compute once inside `.draw_cell_text()`) is + shorter and the per-call overhead is now tiny. +- *Skipping the text-width measurement entirely when `nchar(text)` is + small enough* — would require a font-aware upper bound on per-char + width; fragile across font sizes and families. The cached + measurement is already cheap on cache hit. + +**Files touched:** +- `R/table_draw.R`: + - `drawDetails.tfl_table_grob`: hoisted `cell_strs_by_col` (vectorised + formatting), and the existing hoists from D-44 are unchanged. + - `.draw_cell_text`: added fast/slow branch on `needed <= col_width_in`, + removed the two post-push `convertUnit` calls in the slow path. +- `R/wrap.R`: + - `.tokenize_for_wrap`: track `cur_start`/`cur_end` positions and + emit text via `substr()` instead of accumulating + pasting. + +**Verification:** +- Full `devtools::test()` green before and after. +- `examples/bench_compare.R` reproduces the table above (averaging over + multiple runs is needed for `wrap_demos`'s lower-iteration sample). + +--- + +## D-46: Cross-page clip-width cache shared across all pages of one tfl_table + +**Decision:** Construct one list of clip-width cache envs (one per column +in `resolved_cols`) at the top of `tfl_table_to_pagelist()` and pass it +into every `build_table_grob()` call. Every page-grob built for the +same table holds a reference to the same env list, so +`drawDetails.tfl_table_grob()` can reuse cached measurements across all +row-page x col-group combinations. + +**Context:** D-44 introduced a per-page clip-width cache. Profiling and +focused benchmarking showed that for tables spanning many pages, the +same cell text typically appears on every page (numeric formats, +visit labels, category codes), so each page was re-measuring the same +strings. A single env shared across pages eliminates that +inter-page duplication while preserving the intra-page hits D-44 +already provided. + +**Change:** +- `R/table_pagelist.R` -- inside `tfl_table_to_pagelist()` (and the + table1/flextable equivalents that call `build_table_grob` similarly), + `clip_width_caches <- lapply(seq_along(resolved_cols), function(k) + new.env(...))` is built once, then threaded to every + `build_table_grob()` invocation. +- `R/table_draw.R` -- `build_table_grob()` gains a `clip_width_caches` + argument stored as a grob slot. `drawDetails.tfl_table_grob()` now + prefers `x$clip_width_caches[x$col_group_idx]` over creating fresh + per-page envs, falling back to the old behaviour when the slot is + absent (e.g. for grobs built outside the normal pipeline). + +The cache is keyed by column index into `resolved_cols` (stable across +col-group splits), not by the local `j` index in `page_cols` (which +re-numbers per col-group). + +**Measured improvement** (medians, 30 iterations; baseline = round-3 +HEAD at `7fcf208` = D-45 shipped): + +| Scenario | Before | After | Δ | +|-----------------|-----------|-----------|-------------| +| `iris5p` (150 rows / 5 pages) | 264 ms | 258 ms | ~2% (modest) | +| `big_df` (500 rows / ~17 pages, 4 cols incl. repeating categoricals) | 1560 ms | 1330 ms | **~15% ↓** | + +The gain scales with page count and amount of inter-page duplication. +Short single-page tables see negligible benefit (the per-page cache +already caught everything). Realistic multi-page clinical listings +hit the design sweet spot. + +**Alternatives considered and rejected:** +- *Cache spanning pagination (scratch device) + drawing (render device)* + -- the documented re-measurement in `.draw_cell_text()` exists + because font metrics can legitimately differ between the scratch + PDF and the render device (e.g. knitr PNG vs PDF for preview mode). + A cross-device cache would risk inaccurate placement, which the + project owner explicitly excluded. +- *Consolidated (width, height) cache in pagination* -- would save + ~half the gpar-validation overhead in `.measure_max_string_width()` + + `.memo_str_height()` by sharing one textGrob's measurements + between Pass 1 (widths) and Pass 2 (heights). Independent of this + change; worth pursuing only if benchmarks justify the threading + cost. Deferred. +- *Package-level env keyed by tbl-object identity* -- zero plumbing, + but a global mutable env is harder to reason about (parallelism, + leaked entries). The explicit list-threaded approach is clearer. + +**Files touched:** +- `R/table_pagelist.R` -- one new `clip_width_caches` construction + block in `.tfl_table_to_pagelist_default()` and the corresponding + pass-through in the `build_table_grob()` call. +- `R/table_draw.R` -- `build_table_grob()` accepts and stores the + cache; `drawDetails.tfl_table_grob()` reads it via + `x$clip_width_caches`. + +**Verification:** +- Full `devtools::test()` green. +- `examples/bench_focused.R` (n=30) reproduces the `iris5p` / + `big_df` table above. + +--- + +## D-47: Consolidated width+height text-dimension cache during pagination + +**Decision:** Replace the two separate per-call height memos with a single +`(gp_key, string) -> list(w, h)` cache that lives for one +`.tfl_table_to_pagelist_default()` call. Every textGrob built for +measurement during pagination populates both dimensions; subsequent +lookups for either dimension reuse the existing entry. + +**Context:** Profiling after D-46 showed that pagination still constructed +two textGrobs for every unique cell string -- one in +`.measure_max_string_width()` (Pass 1, widths) and one in +`measure_row_heights_tbl()` (Pass 2, heights). Each construction re-runs +grid's `validGP` / `set.gpar` chain, the dominant per-call cost. Pass 1 +and Pass 2 use the same gpar for matching column categories +(`data_row`, `group_col`, `header_row`), so the same (gp, string) shows +up twice in pagination work. + +**Change:** +- `R/table_utils.R` -- new `.measure_text_dims_in(s, gp, gp_key, cache)` + helper that builds the textGrob once, reads both dimensions, and + caches the pair. Existing `.measure_max_string_width()` and + `.measure_header_row_height()` now accept an optional cache and + delegate per-string measurement to the helper when provided. +- `R/table_rows.R` -- `measure_row_heights_tbl()` accepts a cache and + replaces its inline `.memo_str_height()` closure with the same helper. +- `R/table_columns.R` -- `compute_col_widths()` and `.resolve_natural_widths()` + thread a `cache` argument; the natural-width pass builds the + appropriate structural `gp_key` for cell-vs-header gpars and passes + it down. +- `R/table_pagelist.R` -- `.tfl_table_to_pagelist_default()` creates one + `text_dim_cache <- new.env(hash = TRUE, parent = emptyenv())` at the + top and passes it to `compute_col_widths()`, + `.measure_header_row_height()`, and `measure_row_heights_tbl()`. + +The `gp_key` namespace matches what `measure_row_heights_tbl()` already +used internally (e.g. `"data_row_lh1.2"`, `"group_col_lh1.2"`, +`"header_row_lh1.2"`). All callers that share a category resolve to the +same key, so width-then-height (Pass 1 -> Pass 2) on the same `(category, +string)` is a cache hit. + +**Why crossing scratch-device boundaries is safe here:** the multiple +PDF scratch devices opened during pagination +(`.resolve_natural_widths`, `.run_pagination_iter`) all use identical +`(pg_width, pg_height)` settings, identical fonts, and identical R +session state. PDF device font metrics are deterministic given those +inputs, so values measured on one scratch device are equal to what +the next scratch device would produce. The cache does NOT cross into +the render-device drawing phase -- that boundary still goes through +`.draw_cell_text()`'s separate re-measurement (D-44/D-46 territory). + +**Measured improvement** (n=30 iterations, baseline = round-3 HEAD at +`61bd26a` = D-46 shipped; min-of-mins across 3 independent runs to +suppress system-load noise): + +| Scenario | Before (min) | After (min) | Δ | +|----------|--------------|-------------|------------| +| `iris5p` (150 rows / 5 pages) | 329 ms | 255 ms | **~22% ↓** | +| `big_df` (500 rows / ~17 pages, 4 cols) | 1.45 s | 1.36 s | ~6% ↓ | + +`iris5p` is the targeted scenario for pagination-side optimisation: +relatively small data, many measurement-heavy passes. `big_df` is +dominated by drawing rather than measurement (D-44/D-46 territory), so +the pagination cache helps less in relative terms. + +**Alternatives considered and rejected:** +- *Caching width and height under separate keys* -- requires the cache + consumer to ask for the right dimension and stores two entries per + unique string. Consolidated `(w, h)` tuple is one entry, one + textGrob construction, and any pass that has either dim gets the + other for free. +- *Hashing the full gpar object as the cache key* -- gpars carry many + fields; per-lookup hashing costs more than the structural-key + approach the codebase already uses internally. Callers pre-compute + `gp_key` once per gpar. +- *Threading the cache further down into `.compute_col_min_widths()` + and `.height_balance_widths_impl()`* -- those have their own + per-call caches keyed differently (per-token, per-(j, width)). + Bringing them into the unified cache would require harmonising key + schemes and is left for a follow-up if profile data justifies it. + +**Files touched:** +- `R/table_utils.R` -- new `.measure_text_dims_in()` helper; cache-aware + `.measure_max_string_width()` and `.measure_header_row_height()`. +- `R/table_rows.R` -- cache-aware `measure_row_heights_tbl()`. +- `R/table_columns.R` -- cache-aware `compute_col_widths()` and + `.resolve_natural_widths()`. +- `R/table_pagelist.R` -- cache construction in + `.tfl_table_to_pagelist_default()` and threading into the three + pagination measurement entry points. + +**Verification:** +- Full `devtools::test()` green. +- `examples/bench_focused.R` (n=30) reproduces the table above. diff --git a/examples/bench_focused.R b/examples/bench_focused.R new file mode 100644 index 0000000..1ab6602 --- /dev/null +++ b/examples/bench_focused.R @@ -0,0 +1,41 @@ +# examples/bench_focused.R +# +# Focused benchmark on the scenarios most likely to show cross-page cache +# benefit: tfl_table(iris) (5 pages) and a 500-row synthetic table. + +suppressPackageStartupMessages({ + devtools::load_all(quiet = TRUE) +}) + +big_df <- data.frame( + subject = sprintf("Subject %03d", 1:500), + visit = rep(c("Baseline", "Week 1", "Week 4", "Week 12"), length.out = 500), + result = round(rnorm(500, 100, 15), 1), + flag = sample(c("Y", "N", "?"), 500, replace = TRUE) +) + +scenarios <- list( + iris5p = function() { + out <- tempfile(fileext = ".pdf") + export_tfl(tfl_table(iris), file = out) + unlink(out) + }, + big_df = function() { + out <- tempfile(fileext = ".pdf") + export_tfl(tfl_table(big_df), file = out) + unlink(out) + } +) + +for (name in names(scenarios)) { + fn <- scenarios[[name]] + invisible(fn()) # warmup + bm <- bench::mark(fn(), iterations = 30L, check = FALSE, + filter_gc = FALSE, memory = FALSE) + cat(sprintf("%-10s min=%-9s median=%-9s iqr=%s n=%d\n", + name, + format(bm$min), + format(bm$median), + format(bm$median - bm$min), + bm$n_itr)) +}